applied-cli 0.6.8__tar.gz → 0.6.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. {applied_cli-0.6.8 → applied_cli-0.6.10}/PKG-INFO +11 -1
  2. {applied_cli-0.6.8 → applied_cli-0.6.10}/README.md +10 -0
  3. {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/cli.py +40 -0
  4. {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/tools.py +91 -0
  5. {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/v2/domains.py +1 -0
  6. {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/v2/scenarios.py +63 -0
  7. {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli.egg-info/PKG-INFO +11 -1
  8. {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli.egg-info/SOURCES.txt +1 -0
  9. {applied_cli-0.6.8 → applied_cli-0.6.10}/pyproject.toml +1 -1
  10. applied_cli-0.6.10/tests/test_scenario_create_bulk.py +112 -0
  11. {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/__init__.py +0 -0
  12. {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/agent_scoped_flows.py +0 -0
  13. {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/auth.py +0 -0
  14. {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/client.py +0 -0
  15. {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/conversation_lookup.py +0 -0
  16. {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/conversations.py +0 -0
  17. {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/credentials.py +0 -0
  18. {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/flow_helpers.py +0 -0
  19. {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/formatters.py +0 -0
  20. {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/mcp.py +0 -0
  21. {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/recovery.py +0 -0
  22. {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/toolkit.py +0 -0
  23. {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/v2/__init__.py +0 -0
  24. {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/v2/agents.py +0 -0
  25. {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/v2/articles.py +0 -0
  26. {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/v2/catalog.py +0 -0
  27. {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/v2/connectors.py +0 -0
  28. {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/v2/content.py +0 -0
  29. {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/v2/conversations.py +0 -0
  30. {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/v2/flows.py +0 -0
  31. {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/v2/knowledge.py +0 -0
  32. {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/v2/manifest.py +0 -0
  33. {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/v2/products.py +0 -0
  34. {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/v2/taxonomy.py +0 -0
  35. {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/v2/tickets.py +0 -0
  36. {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli.egg-info/dependency_links.txt +0 -0
  37. {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli.egg-info/entry_points.txt +0 -0
  38. {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli.egg-info/requires.txt +0 -0
  39. {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli.egg-info/top_level.txt +0 -0
  40. {applied_cli-0.6.8 → applied_cli-0.6.10}/setup.cfg +0 -0
  41. {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_agent_scoped_flows.py +0 -0
  42. {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_audit_tools.py +0 -0
  43. {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_auth_context.py +0 -0
  44. {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_benchmark_clone.py +0 -0
  45. {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_benchmark_delete_guardrail.py +0 -0
  46. {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_benchmark_list_with_results.py +0 -0
  47. {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_benchmark_results.py +0 -0
  48. {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_benchmark_scenario_tools.py +0 -0
  49. {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_cli.py +0 -0
  50. {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_cli_v2.py +0 -0
  51. {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_client.py +0 -0
  52. {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_client_v2.py +0 -0
  53. {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_conversation_tools.py +0 -0
  54. {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_flow_tools.py +0 -0
  55. {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_knowledge_content_tools.py +0 -0
  56. {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_recovery.py +0 -0
  57. {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_scenario_bulk_cancel.py +0 -0
  58. {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_scenario_bulk_run_contact.py +0 -0
  59. {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_scenario_bulk_run_wait.py +0 -0
  60. {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_toolkit_contract.py +0 -0
  61. {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_v2_agents.py +0 -0
  62. {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_v2_articles.py +0 -0
  63. {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_v2_catalog_and_mcp.py +0 -0
  64. {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_v2_connectors.py +0 -0
  65. {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_v2_content.py +0 -0
  66. {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_v2_conversations.py +0 -0
  67. {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_v2_flows.py +0 -0
  68. {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_v2_knowledge.py +0 -0
  69. {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_v2_products.py +0 -0
  70. {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_v2_scenarios.py +0 -0
  71. {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_v2_taxonomy.py +0 -0
  72. {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_v2_tickets.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: applied-cli
3
- Version: 0.6.8
3
+ Version: 0.6.10
4
4
  Summary: CLI and shared client library for Applied Labs AI support agents
5
5
  Author: Applied Labs
6
6
  License-Expression: MIT
@@ -97,6 +97,11 @@ applied benchmark-create --agent-id <agent_id> --name "Cancel Regression"
97
97
  applied scenario-create --input-conversation-id <conversation_id> --name "<name>" \
98
98
  --benchmark-id <benchmark_id>
99
99
 
100
+ # Build a suite fast from several real conversations at once. Each scenario is
101
+ # named from its conversation's title (or "<prefix> N" with --name-prefix).
102
+ applied scenario-create-bulk --conversation-ids <id1>,<id2>,<id3> \
103
+ --benchmark-id <benchmark_id>
104
+
100
105
  # Port a suite to another agent (e.g. email -> chat). Cross-agent recreates the
101
106
  # scenarios under the destination agent; same-agent just tags them in.
102
107
  # Dry-run by default; add --apply to write.
@@ -116,6 +121,9 @@ applied scenario-bulk-cancel <job_id> --apply
116
121
  # Review pass/fail health (pass_status reflects the latest run per scenario)
117
122
  applied benchmark-results <benchmark_id> --format json
118
123
 
124
+ # Portfolio go/no-go: pass rates across all of an agent's benchmarks at a glance
125
+ applied benchmarks --agent-id <agent_id> --with-results --format json
126
+
119
127
  # Rate scenarios as you evaluate
120
128
  applied scenario-update <scenario_id> --pass-status pass --feedback "<note>"
121
129
 
@@ -167,6 +175,8 @@ conversations = await tools.conversation_query(
167
175
  | `benchmark_clone` | Copy all scenarios from one benchmark into another |
168
176
  | `benchmark_delete` | Delete a benchmark (guards against wiping scenarios) |
169
177
  | `benchmark_results` | Pass/fail/unrated tally and pass rate for a benchmark |
178
+ | `benchmark_list` | List benchmarks (with per-benchmark pass rates via `with_results`) |
179
+ | `scenario_create_bulk` | Build scenarios from several conversations at once |
170
180
  | `scenario_bulk_run` | Run scenarios (contact override + wait-to-completion) |
171
181
  | `scenario_bulk_cancel` | Cancel a stuck bulk run's queued/running scenario runs |
172
182
 
@@ -71,6 +71,11 @@ applied benchmark-create --agent-id <agent_id> --name "Cancel Regression"
71
71
  applied scenario-create --input-conversation-id <conversation_id> --name "<name>" \
72
72
  --benchmark-id <benchmark_id>
73
73
 
74
+ # Build a suite fast from several real conversations at once. Each scenario is
75
+ # named from its conversation's title (or "<prefix> N" with --name-prefix).
76
+ applied scenario-create-bulk --conversation-ids <id1>,<id2>,<id3> \
77
+ --benchmark-id <benchmark_id>
78
+
74
79
  # Port a suite to another agent (e.g. email -> chat). Cross-agent recreates the
75
80
  # scenarios under the destination agent; same-agent just tags them in.
76
81
  # Dry-run by default; add --apply to write.
@@ -90,6 +95,9 @@ applied scenario-bulk-cancel <job_id> --apply
90
95
  # Review pass/fail health (pass_status reflects the latest run per scenario)
91
96
  applied benchmark-results <benchmark_id> --format json
92
97
 
98
+ # Portfolio go/no-go: pass rates across all of an agent's benchmarks at a glance
99
+ applied benchmarks --agent-id <agent_id> --with-results --format json
100
+
93
101
  # Rate scenarios as you evaluate
94
102
  applied scenario-update <scenario_id> --pass-status pass --feedback "<note>"
95
103
 
@@ -141,6 +149,8 @@ conversations = await tools.conversation_query(
141
149
  | `benchmark_clone` | Copy all scenarios from one benchmark into another |
142
150
  | `benchmark_delete` | Delete a benchmark (guards against wiping scenarios) |
143
151
  | `benchmark_results` | Pass/fail/unrated tally and pass rate for a benchmark |
152
+ | `benchmark_list` | List benchmarks (with per-benchmark pass rates via `with_results`) |
153
+ | `scenario_create_bulk` | Build scenarios from several conversations at once |
144
154
  | `scenario_bulk_run` | Run scenarios (contact override + wait-to-completion) |
145
155
  | `scenario_bulk_cancel` | Cancel a stuck bulk run's queued/running scenario runs |
146
156
 
@@ -1785,6 +1785,46 @@ def scenario_create(
1785
1785
  typer.echo(result)
1786
1786
 
1787
1787
 
1788
+ @app.command("scenario-create-bulk")
1789
+ def scenario_create_bulk(
1790
+ conversation_ids: str = typer.Option(
1791
+ ..., "--conversation-ids", help="Comma-separated source conversation IDs"
1792
+ ),
1793
+ benchmark_id: str = typer.Option(
1794
+ None, "--benchmark-id", help="Attach the scenarios to an existing benchmark"
1795
+ ),
1796
+ benchmark_name: str = typer.Option(
1797
+ None, "--benchmark-name", help="Create or reuse a benchmark by name"
1798
+ ),
1799
+ agent_id: str = typer.Option(
1800
+ None, "--agent-id", help="Required when --benchmark-name is used"
1801
+ ),
1802
+ name_prefix: str = typer.Option(
1803
+ None,
1804
+ "--name-prefix",
1805
+ help="Name scenarios '<prefix> N' instead of deriving from conversation titles",
1806
+ ),
1807
+ shop_id: str = typer.Option(None, "--shop-id", help="Override shop ID"),
1808
+ format: str = typer.Option(
1809
+ "text", "--format", "-f", help="Output format: text or json"
1810
+ ),
1811
+ ) -> None:
1812
+ """Create scenarios from several conversations at once (build a suite fast)."""
1813
+ client = get_client(shop_id=shop_id)
1814
+ result = asyncio.run(
1815
+ tools.scenario_create_bulk(
1816
+ client,
1817
+ conversation_ids=_parse_csv_option(conversation_ids),
1818
+ benchmark_id=benchmark_id,
1819
+ benchmark_name=benchmark_name,
1820
+ agent_id=agent_id,
1821
+ name_prefix=name_prefix,
1822
+ output_format=format,
1823
+ )
1824
+ )
1825
+ typer.echo(result)
1826
+
1827
+
1788
1828
  @app.command("scenario-update")
1789
1829
  def scenario_update_cmd(
1790
1830
  id: str = typer.Argument(..., help="Scenario ID"),
@@ -5984,6 +5984,97 @@ async def scenario_create(
5984
5984
  return result
5985
5985
 
5986
5986
 
5987
+ async def scenario_create_bulk(
5988
+ client: AppliedClient,
5989
+ conversation_ids: list[str],
5990
+ *,
5991
+ benchmark_id: str | None = None,
5992
+ benchmark_name: str | None = None,
5993
+ agent_id: str | None = None,
5994
+ name_prefix: str | None = None,
5995
+ output_format: str = "text",
5996
+ ) -> str:
5997
+ """
5998
+ Create scenarios from several conversations at once and attach them to a
5999
+ benchmark — the fast way to build a regression suite from real conversations.
6000
+
6001
+ Each scenario's name is derived from its source conversation's title; pass
6002
+ name_prefix to instead name them "<prefix> 1", "<prefix> 2", … (skips the
6003
+ per-conversation title lookup). Names are de-duplicated server-side.
6004
+
6005
+ Args:
6006
+ client: Authenticated AppliedClient
6007
+ conversation_ids: Source conversation UUIDs (one scenario each)
6008
+ benchmark_id: Attach the scenarios to this existing benchmark
6009
+ benchmark_name: Create or reuse a benchmark by name (requires agent_id)
6010
+ agent_id: Required when benchmark_name is used
6011
+ name_prefix: Name scenarios "<prefix> N" instead of using conversation titles
6012
+ output_format: 'text' (default) or 'json'
6013
+
6014
+ Returns:
6015
+ Summary of created scenarios and any per-conversation errors.
6016
+ """
6017
+ if not conversation_ids:
6018
+ return _format_argument_error("Pass at least one conversation id.")
6019
+
6020
+ created: list[dict[str, Any]] = []
6021
+ errors: list[dict[str, Any]] = []
6022
+ for index, conversation_id in enumerate(conversation_ids):
6023
+ if name_prefix:
6024
+ name = f"{name_prefix} {index + 1}"
6025
+ else:
6026
+ try:
6027
+ conversation = await client.get_conversation(conversation_id)
6028
+ name = conversation.get("title") or f"Scenario {str(conversation_id)[:8]}"
6029
+ except AppliedAPIError:
6030
+ name = f"Scenario {str(conversation_id)[:8]}"
6031
+
6032
+ try:
6033
+ scenario = await client.create_scenario(
6034
+ input_conversation_id=conversation_id,
6035
+ name=name,
6036
+ benchmark_id=benchmark_id,
6037
+ benchmark_name=benchmark_name,
6038
+ agent_id=agent_id,
6039
+ )
6040
+ created.append(
6041
+ {
6042
+ "id": scenario.get("id"),
6043
+ "name": scenario.get("name"),
6044
+ "conversation_id": conversation_id,
6045
+ }
6046
+ )
6047
+ except AppliedAPIError as e:
6048
+ errors.append({"conversation_id": conversation_id, "error": str(e)})
6049
+
6050
+ summary = {
6051
+ "requested": len(conversation_ids),
6052
+ "created": len(created),
6053
+ "failed": len(errors),
6054
+ "scenarios": created,
6055
+ "errors": errors,
6056
+ }
6057
+
6058
+ if output_format == "json":
6059
+ return to_json(summary)
6060
+
6061
+ lines = [
6062
+ "# Bulk Scenario Create",
6063
+ f"requested: {summary['requested']}",
6064
+ f"created: {summary['created']}",
6065
+ f"failed: {summary['failed']}",
6066
+ ]
6067
+ if created:
6068
+ lines.append("\n# Created")
6069
+ lines.extend(f" - {s['name']} ({s['id']})" for s in created[:50])
6070
+ if len(created) > 50:
6071
+ lines.append(f" ... and {len(created) - 50} more")
6072
+ if errors:
6073
+ lines.append(f"\n# Errors ({len(errors)})")
6074
+ lines.extend(f" - {e['conversation_id']}: {e['error']}" for e in errors[:20])
6075
+ return "\n".join(lines)
6076
+
6077
+
5987
6078
  async def scenario_update(
5988
6079
  client: AppliedClient,
5989
6080
  scenario_id: str,
@@ -112,6 +112,7 @@ DOMAIN_TOOL_RENAMES: dict[str, dict[str, str]] = {
112
112
  "scenario_list": "scenarios_list",
113
113
  "scenario_get": "scenarios_get",
114
114
  "scenario_create": "scenarios_create",
115
+ "scenario_create_bulk": "scenarios_create_bulk",
115
116
  "scenario_update": "scenarios_update",
116
117
  "scenario_delete": "scenarios_delete",
117
118
  "scenario_run_list": "scenarios_runs_list",
@@ -98,6 +98,14 @@ class ScenariosCreateInput(StrictInput):
98
98
  agent_id: str | None = None
99
99
 
100
100
 
101
+ class ScenariosCreateBulkInput(StrictInput):
102
+ conversation_ids: list[str]
103
+ benchmark_id: str | None = None
104
+ benchmark_name: str | None = None
105
+ agent_id: str | None = None
106
+ name_prefix: str | None = None
107
+
108
+
101
109
  class ScenariosUpdateInput(StrictInput):
102
110
  scenario_id: str
103
111
  name: str | None = None
@@ -651,6 +659,46 @@ async def scenarios_create_handler(
651
659
  )
652
660
 
653
661
 
662
+ async def scenarios_create_bulk_handler(
663
+ client: AppliedClient,
664
+ params: ScenariosCreateBulkInput,
665
+ ) -> ToolResult[Any]:
666
+ from applied_cli import tools as legacy_tools
667
+
668
+ raw = await legacy_tools.scenario_create_bulk(
669
+ client,
670
+ conversation_ids=params.conversation_ids,
671
+ benchmark_id=params.benchmark_id,
672
+ benchmark_name=params.benchmark_name,
673
+ agent_id=params.agent_id,
674
+ name_prefix=params.name_prefix,
675
+ output_format="json",
676
+ )
677
+ try:
678
+ data = json.loads(raw)
679
+ except (json.JSONDecodeError, TypeError):
680
+ return ToolResult(data={"message": raw}, summary=str(raw))
681
+
682
+ next_actions = []
683
+ if data.get("created"):
684
+ next_actions.append(
685
+ "Use scenarios_bulk_run with the benchmark_id to run the new scenarios."
686
+ )
687
+ return ToolResult(
688
+ data=data,
689
+ summary=(
690
+ f"Created {data.get('created', 0)}/{data.get('requested', 0)} "
691
+ f"scenarios ({data.get('failed', 0)} failed)."
692
+ ),
693
+ warnings=(
694
+ [f"{data['failed']} conversation(s) failed to convert."]
695
+ if data.get("failed")
696
+ else []
697
+ ),
698
+ next_actions=next_actions,
699
+ )
700
+
701
+
654
702
  async def scenarios_get_handler(
655
703
  client: AppliedClient,
656
704
  params: ScenariosGetInput,
@@ -1116,6 +1164,21 @@ def scenario_specs() -> list[ToolSpec]:
1116
1164
  read_write_mode="write",
1117
1165
  tags=["scenario_create", "native"],
1118
1166
  ),
1167
+ ToolSpec(
1168
+ name="scenarios_create_bulk",
1169
+ namespace="scenarios",
1170
+ description=(
1171
+ "Create scenarios from several conversations at once and attach "
1172
+ "them to a benchmark — build a regression suite from real "
1173
+ "conversations. Names derive from each conversation's title "
1174
+ "unless name_prefix is given."
1175
+ ),
1176
+ input_model=ScenariosCreateBulkInput,
1177
+ output_model=None,
1178
+ handler=scenarios_create_bulk_handler,
1179
+ read_write_mode="write",
1180
+ tags=["scenario_create_bulk", "native"],
1181
+ ),
1119
1182
  ToolSpec(
1120
1183
  name="scenarios_update",
1121
1184
  namespace="scenarios",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: applied-cli
3
- Version: 0.6.8
3
+ Version: 0.6.10
4
4
  Summary: CLI and shared client library for Applied Labs AI support agents
5
5
  Author: Applied Labs
6
6
  License-Expression: MIT
@@ -97,6 +97,11 @@ applied benchmark-create --agent-id <agent_id> --name "Cancel Regression"
97
97
  applied scenario-create --input-conversation-id <conversation_id> --name "<name>" \
98
98
  --benchmark-id <benchmark_id>
99
99
 
100
+ # Build a suite fast from several real conversations at once. Each scenario is
101
+ # named from its conversation's title (or "<prefix> N" with --name-prefix).
102
+ applied scenario-create-bulk --conversation-ids <id1>,<id2>,<id3> \
103
+ --benchmark-id <benchmark_id>
104
+
100
105
  # Port a suite to another agent (e.g. email -> chat). Cross-agent recreates the
101
106
  # scenarios under the destination agent; same-agent just tags them in.
102
107
  # Dry-run by default; add --apply to write.
@@ -116,6 +121,9 @@ applied scenario-bulk-cancel <job_id> --apply
116
121
  # Review pass/fail health (pass_status reflects the latest run per scenario)
117
122
  applied benchmark-results <benchmark_id> --format json
118
123
 
124
+ # Portfolio go/no-go: pass rates across all of an agent's benchmarks at a glance
125
+ applied benchmarks --agent-id <agent_id> --with-results --format json
126
+
119
127
  # Rate scenarios as you evaluate
120
128
  applied scenario-update <scenario_id> --pass-status pass --feedback "<note>"
121
129
 
@@ -167,6 +175,8 @@ conversations = await tools.conversation_query(
167
175
  | `benchmark_clone` | Copy all scenarios from one benchmark into another |
168
176
  | `benchmark_delete` | Delete a benchmark (guards against wiping scenarios) |
169
177
  | `benchmark_results` | Pass/fail/unrated tally and pass rate for a benchmark |
178
+ | `benchmark_list` | List benchmarks (with per-benchmark pass rates via `with_results`) |
179
+ | `scenario_create_bulk` | Build scenarios from several conversations at once |
170
180
  | `scenario_bulk_run` | Run scenarios (contact override + wait-to-completion) |
171
181
  | `scenario_bulk_cancel` | Cancel a stuck bulk run's queued/running scenario runs |
172
182
 
@@ -54,6 +54,7 @@ tests/test_recovery.py
54
54
  tests/test_scenario_bulk_cancel.py
55
55
  tests/test_scenario_bulk_run_contact.py
56
56
  tests/test_scenario_bulk_run_wait.py
57
+ tests/test_scenario_create_bulk.py
57
58
  tests/test_toolkit_contract.py
58
59
  tests/test_v2_agents.py
59
60
  tests/test_v2_articles.py
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "applied-cli"
3
- version = "0.6.8"
3
+ version = "0.6.10"
4
4
  description = "CLI and shared client library for Applied Labs AI support agents"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11"
@@ -0,0 +1,112 @@
1
+ import json
2
+
3
+ import pytest
4
+
5
+ from applied_cli import tools
6
+ from applied_cli.client import AppliedAPIError
7
+
8
+
9
+ class FakeBulkCreateClient:
10
+ def __init__(self, titles=None, fail_ids=None):
11
+ self._titles = titles or {}
12
+ self._fail_ids = set(fail_ids or [])
13
+ self.get_conversation_calls = []
14
+ self.created = []
15
+
16
+ async def get_conversation(self, conversation_id, *, shop_id=None):
17
+ self.get_conversation_calls.append(conversation_id)
18
+ return {"id": conversation_id, "title": self._titles.get(conversation_id)}
19
+
20
+ async def create_scenario(
21
+ self,
22
+ input_conversation_id,
23
+ name,
24
+ benchmark_id=None,
25
+ benchmark_name=None,
26
+ agent_id=None,
27
+ ):
28
+ if input_conversation_id in self._fail_ids:
29
+ raise AppliedAPIError("boom", status_code=400)
30
+ rec = {
31
+ "id": f"scn-{len(self.created) + 1}",
32
+ "name": name,
33
+ "input_conversation_id": input_conversation_id,
34
+ "benchmark_id": benchmark_id,
35
+ }
36
+ self.created.append(rec)
37
+ return rec
38
+
39
+
40
+ @pytest.mark.asyncio
41
+ async def test_bulk_create_names_from_conversation_titles():
42
+ client = FakeBulkCreateClient(
43
+ titles={"c1": "Cancel order BP123", "c2": "Refund request"}
44
+ )
45
+ out = await tools.scenario_create_bulk(
46
+ client, ["c1", "c2"], benchmark_id="b1", output_format="json"
47
+ )
48
+ data = json.loads(out)
49
+ assert data["created"] == 2
50
+ assert [s["name"] for s in client.created] == ["Cancel order BP123", "Refund request"]
51
+ assert all(s["benchmark_id"] == "b1" for s in client.created)
52
+ assert client.get_conversation_calls == ["c1", "c2"]
53
+
54
+
55
+ @pytest.mark.asyncio
56
+ async def test_name_prefix_skips_title_lookup():
57
+ client = FakeBulkCreateClient()
58
+ await tools.scenario_create_bulk(
59
+ client, ["c1", "c2", "c3"], name_prefix="DG Cancel", output_format="json"
60
+ )
61
+ assert [s["name"] for s in client.created] == [
62
+ "DG Cancel 1",
63
+ "DG Cancel 2",
64
+ "DG Cancel 3",
65
+ ]
66
+ # No per-conversation fetches when a prefix is supplied.
67
+ assert client.get_conversation_calls == []
68
+
69
+
70
+ @pytest.mark.asyncio
71
+ async def test_missing_title_falls_back_to_short_id():
72
+ client = FakeBulkCreateClient(titles={"abcdef12-0000": None})
73
+ await tools.scenario_create_bulk(
74
+ client, ["abcdef12-0000"], output_format="json"
75
+ )
76
+ assert client.created[0]["name"] == "Scenario abcdef12"
77
+
78
+
79
+ @pytest.mark.asyncio
80
+ async def test_partial_failures_are_reported_not_fatal():
81
+ client = FakeBulkCreateClient(
82
+ titles={"c1": "A", "c2": "B", "c3": "C"}, fail_ids=["c2"]
83
+ )
84
+ out = await tools.scenario_create_bulk(
85
+ client, ["c1", "c2", "c3"], output_format="json"
86
+ )
87
+ data = json.loads(out)
88
+ assert data["created"] == 2
89
+ assert data["failed"] == 1
90
+ assert data["errors"][0]["conversation_id"] == "c2"
91
+
92
+
93
+ @pytest.mark.asyncio
94
+ async def test_empty_list_is_an_argument_error():
95
+ client = FakeBulkCreateClient()
96
+ out = await tools.scenario_create_bulk(client, [], output_format="text")
97
+ assert "at least one conversation id" in out.lower()
98
+
99
+
100
+ @pytest.mark.asyncio
101
+ async def test_v2_scenarios_create_bulk_handler():
102
+ from applied_cli.v2.scenarios import (
103
+ ScenariosCreateBulkInput,
104
+ scenarios_create_bulk_handler,
105
+ )
106
+
107
+ client = FakeBulkCreateClient(titles={"c1": "A"})
108
+ result = await scenarios_create_bulk_handler(
109
+ client, ScenariosCreateBulkInput(conversation_ids=["c1"], benchmark_id="b1")
110
+ )
111
+ assert result.data["created"] == 1
112
+ assert "scenarios" in " ".join(result.next_actions).lower()
File without changes