applied-cli 0.6.8__tar.gz → 0.6.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {applied_cli-0.6.8 → applied_cli-0.6.10}/PKG-INFO +11 -1
- {applied_cli-0.6.8 → applied_cli-0.6.10}/README.md +10 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/cli.py +40 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/tools.py +91 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/v2/domains.py +1 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/v2/scenarios.py +63 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli.egg-info/PKG-INFO +11 -1
- {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli.egg-info/SOURCES.txt +1 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/pyproject.toml +1 -1
- applied_cli-0.6.10/tests/test_scenario_create_bulk.py +112 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/__init__.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/agent_scoped_flows.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/auth.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/client.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/conversation_lookup.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/conversations.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/credentials.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/flow_helpers.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/formatters.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/mcp.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/recovery.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/toolkit.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/v2/__init__.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/v2/agents.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/v2/articles.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/v2/catalog.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/v2/connectors.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/v2/content.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/v2/conversations.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/v2/flows.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/v2/knowledge.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/v2/manifest.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/v2/products.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/v2/taxonomy.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli/v2/tickets.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli.egg-info/dependency_links.txt +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli.egg-info/entry_points.txt +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli.egg-info/requires.txt +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/applied_cli.egg-info/top_level.txt +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/setup.cfg +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_agent_scoped_flows.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_audit_tools.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_auth_context.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_benchmark_clone.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_benchmark_delete_guardrail.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_benchmark_list_with_results.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_benchmark_results.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_benchmark_scenario_tools.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_cli.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_cli_v2.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_client.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_client_v2.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_conversation_tools.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_flow_tools.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_knowledge_content_tools.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_recovery.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_scenario_bulk_cancel.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_scenario_bulk_run_contact.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_scenario_bulk_run_wait.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_toolkit_contract.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_v2_agents.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_v2_articles.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_v2_catalog_and_mcp.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_v2_connectors.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_v2_content.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_v2_conversations.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_v2_flows.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_v2_knowledge.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_v2_products.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_v2_scenarios.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_v2_taxonomy.py +0 -0
- {applied_cli-0.6.8 → applied_cli-0.6.10}/tests/test_v2_tickets.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: applied-cli
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.10
|
|
4
4
|
Summary: CLI and shared client library for Applied Labs AI support agents
|
|
5
5
|
Author: Applied Labs
|
|
6
6
|
License-Expression: MIT
|
|
@@ -97,6 +97,11 @@ applied benchmark-create --agent-id <agent_id> --name "Cancel Regression"
|
|
|
97
97
|
applied scenario-create --input-conversation-id <conversation_id> --name "<name>" \
|
|
98
98
|
--benchmark-id <benchmark_id>
|
|
99
99
|
|
|
100
|
+
# Build a suite fast from several real conversations at once. Each scenario is
|
|
101
|
+
# named from its conversation's title (or "<prefix> N" with --name-prefix).
|
|
102
|
+
applied scenario-create-bulk --conversation-ids <id1>,<id2>,<id3> \
|
|
103
|
+
--benchmark-id <benchmark_id>
|
|
104
|
+
|
|
100
105
|
# Port a suite to another agent (e.g. email -> chat). Cross-agent recreates the
|
|
101
106
|
# scenarios under the destination agent; same-agent just tags them in.
|
|
102
107
|
# Dry-run by default; add --apply to write.
|
|
@@ -116,6 +121,9 @@ applied scenario-bulk-cancel <job_id> --apply
|
|
|
116
121
|
# Review pass/fail health (pass_status reflects the latest run per scenario)
|
|
117
122
|
applied benchmark-results <benchmark_id> --format json
|
|
118
123
|
|
|
124
|
+
# Portfolio go/no-go: pass rates across all of an agent's benchmarks at a glance
|
|
125
|
+
applied benchmarks --agent-id <agent_id> --with-results --format json
|
|
126
|
+
|
|
119
127
|
# Rate scenarios as you evaluate
|
|
120
128
|
applied scenario-update <scenario_id> --pass-status pass --feedback "<note>"
|
|
121
129
|
|
|
@@ -167,6 +175,8 @@ conversations = await tools.conversation_query(
|
|
|
167
175
|
| `benchmark_clone` | Copy all scenarios from one benchmark into another |
|
|
168
176
|
| `benchmark_delete` | Delete a benchmark (guards against wiping scenarios) |
|
|
169
177
|
| `benchmark_results` | Pass/fail/unrated tally and pass rate for a benchmark |
|
|
178
|
+
| `benchmark_list` | List benchmarks (with per-benchmark pass rates via `with_results`) |
|
|
179
|
+
| `scenario_create_bulk` | Build scenarios from several conversations at once |
|
|
170
180
|
| `scenario_bulk_run` | Run scenarios (contact override + wait-to-completion) |
|
|
171
181
|
| `scenario_bulk_cancel` | Cancel a stuck bulk run's queued/running scenario runs |
|
|
172
182
|
|
|
@@ -71,6 +71,11 @@ applied benchmark-create --agent-id <agent_id> --name "Cancel Regression"
|
|
|
71
71
|
applied scenario-create --input-conversation-id <conversation_id> --name "<name>" \
|
|
72
72
|
--benchmark-id <benchmark_id>
|
|
73
73
|
|
|
74
|
+
# Build a suite fast from several real conversations at once. Each scenario is
|
|
75
|
+
# named from its conversation's title (or "<prefix> N" with --name-prefix).
|
|
76
|
+
applied scenario-create-bulk --conversation-ids <id1>,<id2>,<id3> \
|
|
77
|
+
--benchmark-id <benchmark_id>
|
|
78
|
+
|
|
74
79
|
# Port a suite to another agent (e.g. email -> chat). Cross-agent recreates the
|
|
75
80
|
# scenarios under the destination agent; same-agent just tags them in.
|
|
76
81
|
# Dry-run by default; add --apply to write.
|
|
@@ -90,6 +95,9 @@ applied scenario-bulk-cancel <job_id> --apply
|
|
|
90
95
|
# Review pass/fail health (pass_status reflects the latest run per scenario)
|
|
91
96
|
applied benchmark-results <benchmark_id> --format json
|
|
92
97
|
|
|
98
|
+
# Portfolio go/no-go: pass rates across all of an agent's benchmarks at a glance
|
|
99
|
+
applied benchmarks --agent-id <agent_id> --with-results --format json
|
|
100
|
+
|
|
93
101
|
# Rate scenarios as you evaluate
|
|
94
102
|
applied scenario-update <scenario_id> --pass-status pass --feedback "<note>"
|
|
95
103
|
|
|
@@ -141,6 +149,8 @@ conversations = await tools.conversation_query(
|
|
|
141
149
|
| `benchmark_clone` | Copy all scenarios from one benchmark into another |
|
|
142
150
|
| `benchmark_delete` | Delete a benchmark (guards against wiping scenarios) |
|
|
143
151
|
| `benchmark_results` | Pass/fail/unrated tally and pass rate for a benchmark |
|
|
152
|
+
| `benchmark_list` | List benchmarks (with per-benchmark pass rates via `with_results`) |
|
|
153
|
+
| `scenario_create_bulk` | Build scenarios from several conversations at once |
|
|
144
154
|
| `scenario_bulk_run` | Run scenarios (contact override + wait-to-completion) |
|
|
145
155
|
| `scenario_bulk_cancel` | Cancel a stuck bulk run's queued/running scenario runs |
|
|
146
156
|
|
|
@@ -1785,6 +1785,46 @@ def scenario_create(
|
|
|
1785
1785
|
typer.echo(result)
|
|
1786
1786
|
|
|
1787
1787
|
|
|
1788
|
+
@app.command("scenario-create-bulk")
|
|
1789
|
+
def scenario_create_bulk(
|
|
1790
|
+
conversation_ids: str = typer.Option(
|
|
1791
|
+
..., "--conversation-ids", help="Comma-separated source conversation IDs"
|
|
1792
|
+
),
|
|
1793
|
+
benchmark_id: str = typer.Option(
|
|
1794
|
+
None, "--benchmark-id", help="Attach the scenarios to an existing benchmark"
|
|
1795
|
+
),
|
|
1796
|
+
benchmark_name: str = typer.Option(
|
|
1797
|
+
None, "--benchmark-name", help="Create or reuse a benchmark by name"
|
|
1798
|
+
),
|
|
1799
|
+
agent_id: str = typer.Option(
|
|
1800
|
+
None, "--agent-id", help="Required when --benchmark-name is used"
|
|
1801
|
+
),
|
|
1802
|
+
name_prefix: str = typer.Option(
|
|
1803
|
+
None,
|
|
1804
|
+
"--name-prefix",
|
|
1805
|
+
help="Name scenarios '<prefix> N' instead of deriving from conversation titles",
|
|
1806
|
+
),
|
|
1807
|
+
shop_id: str = typer.Option(None, "--shop-id", help="Override shop ID"),
|
|
1808
|
+
format: str = typer.Option(
|
|
1809
|
+
"text", "--format", "-f", help="Output format: text or json"
|
|
1810
|
+
),
|
|
1811
|
+
) -> None:
|
|
1812
|
+
"""Create scenarios from several conversations at once (build a suite fast)."""
|
|
1813
|
+
client = get_client(shop_id=shop_id)
|
|
1814
|
+
result = asyncio.run(
|
|
1815
|
+
tools.scenario_create_bulk(
|
|
1816
|
+
client,
|
|
1817
|
+
conversation_ids=_parse_csv_option(conversation_ids),
|
|
1818
|
+
benchmark_id=benchmark_id,
|
|
1819
|
+
benchmark_name=benchmark_name,
|
|
1820
|
+
agent_id=agent_id,
|
|
1821
|
+
name_prefix=name_prefix,
|
|
1822
|
+
output_format=format,
|
|
1823
|
+
)
|
|
1824
|
+
)
|
|
1825
|
+
typer.echo(result)
|
|
1826
|
+
|
|
1827
|
+
|
|
1788
1828
|
@app.command("scenario-update")
|
|
1789
1829
|
def scenario_update_cmd(
|
|
1790
1830
|
id: str = typer.Argument(..., help="Scenario ID"),
|
|
@@ -5984,6 +5984,97 @@ async def scenario_create(
|
|
|
5984
5984
|
return result
|
|
5985
5985
|
|
|
5986
5986
|
|
|
5987
|
+
async def scenario_create_bulk(
|
|
5988
|
+
client: AppliedClient,
|
|
5989
|
+
conversation_ids: list[str],
|
|
5990
|
+
*,
|
|
5991
|
+
benchmark_id: str | None = None,
|
|
5992
|
+
benchmark_name: str | None = None,
|
|
5993
|
+
agent_id: str | None = None,
|
|
5994
|
+
name_prefix: str | None = None,
|
|
5995
|
+
output_format: str = "text",
|
|
5996
|
+
) -> str:
|
|
5997
|
+
"""
|
|
5998
|
+
Create scenarios from several conversations at once and attach them to a
|
|
5999
|
+
benchmark — the fast way to build a regression suite from real conversations.
|
|
6000
|
+
|
|
6001
|
+
Each scenario's name is derived from its source conversation's title; pass
|
|
6002
|
+
name_prefix to instead name them "<prefix> 1", "<prefix> 2", … (skips the
|
|
6003
|
+
per-conversation title lookup). Names are de-duplicated server-side.
|
|
6004
|
+
|
|
6005
|
+
Args:
|
|
6006
|
+
client: Authenticated AppliedClient
|
|
6007
|
+
conversation_ids: Source conversation UUIDs (one scenario each)
|
|
6008
|
+
benchmark_id: Attach the scenarios to this existing benchmark
|
|
6009
|
+
benchmark_name: Create or reuse a benchmark by name (requires agent_id)
|
|
6010
|
+
agent_id: Required when benchmark_name is used
|
|
6011
|
+
name_prefix: Name scenarios "<prefix> N" instead of using conversation titles
|
|
6012
|
+
output_format: 'text' (default) or 'json'
|
|
6013
|
+
|
|
6014
|
+
Returns:
|
|
6015
|
+
Summary of created scenarios and any per-conversation errors.
|
|
6016
|
+
"""
|
|
6017
|
+
if not conversation_ids:
|
|
6018
|
+
return _format_argument_error("Pass at least one conversation id.")
|
|
6019
|
+
|
|
6020
|
+
created: list[dict[str, Any]] = []
|
|
6021
|
+
errors: list[dict[str, Any]] = []
|
|
6022
|
+
for index, conversation_id in enumerate(conversation_ids):
|
|
6023
|
+
if name_prefix:
|
|
6024
|
+
name = f"{name_prefix} {index + 1}"
|
|
6025
|
+
else:
|
|
6026
|
+
try:
|
|
6027
|
+
conversation = await client.get_conversation(conversation_id)
|
|
6028
|
+
name = conversation.get("title") or f"Scenario {str(conversation_id)[:8]}"
|
|
6029
|
+
except AppliedAPIError:
|
|
6030
|
+
name = f"Scenario {str(conversation_id)[:8]}"
|
|
6031
|
+
|
|
6032
|
+
try:
|
|
6033
|
+
scenario = await client.create_scenario(
|
|
6034
|
+
input_conversation_id=conversation_id,
|
|
6035
|
+
name=name,
|
|
6036
|
+
benchmark_id=benchmark_id,
|
|
6037
|
+
benchmark_name=benchmark_name,
|
|
6038
|
+
agent_id=agent_id,
|
|
6039
|
+
)
|
|
6040
|
+
created.append(
|
|
6041
|
+
{
|
|
6042
|
+
"id": scenario.get("id"),
|
|
6043
|
+
"name": scenario.get("name"),
|
|
6044
|
+
"conversation_id": conversation_id,
|
|
6045
|
+
}
|
|
6046
|
+
)
|
|
6047
|
+
except AppliedAPIError as e:
|
|
6048
|
+
errors.append({"conversation_id": conversation_id, "error": str(e)})
|
|
6049
|
+
|
|
6050
|
+
summary = {
|
|
6051
|
+
"requested": len(conversation_ids),
|
|
6052
|
+
"created": len(created),
|
|
6053
|
+
"failed": len(errors),
|
|
6054
|
+
"scenarios": created,
|
|
6055
|
+
"errors": errors,
|
|
6056
|
+
}
|
|
6057
|
+
|
|
6058
|
+
if output_format == "json":
|
|
6059
|
+
return to_json(summary)
|
|
6060
|
+
|
|
6061
|
+
lines = [
|
|
6062
|
+
"# Bulk Scenario Create",
|
|
6063
|
+
f"requested: {summary['requested']}",
|
|
6064
|
+
f"created: {summary['created']}",
|
|
6065
|
+
f"failed: {summary['failed']}",
|
|
6066
|
+
]
|
|
6067
|
+
if created:
|
|
6068
|
+
lines.append("\n# Created")
|
|
6069
|
+
lines.extend(f" - {s['name']} ({s['id']})" for s in created[:50])
|
|
6070
|
+
if len(created) > 50:
|
|
6071
|
+
lines.append(f" ... and {len(created) - 50} more")
|
|
6072
|
+
if errors:
|
|
6073
|
+
lines.append(f"\n# Errors ({len(errors)})")
|
|
6074
|
+
lines.extend(f" - {e['conversation_id']}: {e['error']}" for e in errors[:20])
|
|
6075
|
+
return "\n".join(lines)
|
|
6076
|
+
|
|
6077
|
+
|
|
5987
6078
|
async def scenario_update(
|
|
5988
6079
|
client: AppliedClient,
|
|
5989
6080
|
scenario_id: str,
|
|
@@ -112,6 +112,7 @@ DOMAIN_TOOL_RENAMES: dict[str, dict[str, str]] = {
|
|
|
112
112
|
"scenario_list": "scenarios_list",
|
|
113
113
|
"scenario_get": "scenarios_get",
|
|
114
114
|
"scenario_create": "scenarios_create",
|
|
115
|
+
"scenario_create_bulk": "scenarios_create_bulk",
|
|
115
116
|
"scenario_update": "scenarios_update",
|
|
116
117
|
"scenario_delete": "scenarios_delete",
|
|
117
118
|
"scenario_run_list": "scenarios_runs_list",
|
|
@@ -98,6 +98,14 @@ class ScenariosCreateInput(StrictInput):
|
|
|
98
98
|
agent_id: str | None = None
|
|
99
99
|
|
|
100
100
|
|
|
101
|
+
class ScenariosCreateBulkInput(StrictInput):
|
|
102
|
+
conversation_ids: list[str]
|
|
103
|
+
benchmark_id: str | None = None
|
|
104
|
+
benchmark_name: str | None = None
|
|
105
|
+
agent_id: str | None = None
|
|
106
|
+
name_prefix: str | None = None
|
|
107
|
+
|
|
108
|
+
|
|
101
109
|
class ScenariosUpdateInput(StrictInput):
|
|
102
110
|
scenario_id: str
|
|
103
111
|
name: str | None = None
|
|
@@ -651,6 +659,46 @@ async def scenarios_create_handler(
|
|
|
651
659
|
)
|
|
652
660
|
|
|
653
661
|
|
|
662
|
+
async def scenarios_create_bulk_handler(
|
|
663
|
+
client: AppliedClient,
|
|
664
|
+
params: ScenariosCreateBulkInput,
|
|
665
|
+
) -> ToolResult[Any]:
|
|
666
|
+
from applied_cli import tools as legacy_tools
|
|
667
|
+
|
|
668
|
+
raw = await legacy_tools.scenario_create_bulk(
|
|
669
|
+
client,
|
|
670
|
+
conversation_ids=params.conversation_ids,
|
|
671
|
+
benchmark_id=params.benchmark_id,
|
|
672
|
+
benchmark_name=params.benchmark_name,
|
|
673
|
+
agent_id=params.agent_id,
|
|
674
|
+
name_prefix=params.name_prefix,
|
|
675
|
+
output_format="json",
|
|
676
|
+
)
|
|
677
|
+
try:
|
|
678
|
+
data = json.loads(raw)
|
|
679
|
+
except (json.JSONDecodeError, TypeError):
|
|
680
|
+
return ToolResult(data={"message": raw}, summary=str(raw))
|
|
681
|
+
|
|
682
|
+
next_actions = []
|
|
683
|
+
if data.get("created"):
|
|
684
|
+
next_actions.append(
|
|
685
|
+
"Use scenarios_bulk_run with the benchmark_id to run the new scenarios."
|
|
686
|
+
)
|
|
687
|
+
return ToolResult(
|
|
688
|
+
data=data,
|
|
689
|
+
summary=(
|
|
690
|
+
f"Created {data.get('created', 0)}/{data.get('requested', 0)} "
|
|
691
|
+
f"scenarios ({data.get('failed', 0)} failed)."
|
|
692
|
+
),
|
|
693
|
+
warnings=(
|
|
694
|
+
[f"{data['failed']} conversation(s) failed to convert."]
|
|
695
|
+
if data.get("failed")
|
|
696
|
+
else []
|
|
697
|
+
),
|
|
698
|
+
next_actions=next_actions,
|
|
699
|
+
)
|
|
700
|
+
|
|
701
|
+
|
|
654
702
|
async def scenarios_get_handler(
|
|
655
703
|
client: AppliedClient,
|
|
656
704
|
params: ScenariosGetInput,
|
|
@@ -1116,6 +1164,21 @@ def scenario_specs() -> list[ToolSpec]:
|
|
|
1116
1164
|
read_write_mode="write",
|
|
1117
1165
|
tags=["scenario_create", "native"],
|
|
1118
1166
|
),
|
|
1167
|
+
ToolSpec(
|
|
1168
|
+
name="scenarios_create_bulk",
|
|
1169
|
+
namespace="scenarios",
|
|
1170
|
+
description=(
|
|
1171
|
+
"Create scenarios from several conversations at once and attach "
|
|
1172
|
+
"them to a benchmark — build a regression suite from real "
|
|
1173
|
+
"conversations. Names derive from each conversation's title "
|
|
1174
|
+
"unless name_prefix is given."
|
|
1175
|
+
),
|
|
1176
|
+
input_model=ScenariosCreateBulkInput,
|
|
1177
|
+
output_model=None,
|
|
1178
|
+
handler=scenarios_create_bulk_handler,
|
|
1179
|
+
read_write_mode="write",
|
|
1180
|
+
tags=["scenario_create_bulk", "native"],
|
|
1181
|
+
),
|
|
1119
1182
|
ToolSpec(
|
|
1120
1183
|
name="scenarios_update",
|
|
1121
1184
|
namespace="scenarios",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: applied-cli
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.10
|
|
4
4
|
Summary: CLI and shared client library for Applied Labs AI support agents
|
|
5
5
|
Author: Applied Labs
|
|
6
6
|
License-Expression: MIT
|
|
@@ -97,6 +97,11 @@ applied benchmark-create --agent-id <agent_id> --name "Cancel Regression"
|
|
|
97
97
|
applied scenario-create --input-conversation-id <conversation_id> --name "<name>" \
|
|
98
98
|
--benchmark-id <benchmark_id>
|
|
99
99
|
|
|
100
|
+
# Build a suite fast from several real conversations at once. Each scenario is
|
|
101
|
+
# named from its conversation's title (or "<prefix> N" with --name-prefix).
|
|
102
|
+
applied scenario-create-bulk --conversation-ids <id1>,<id2>,<id3> \
|
|
103
|
+
--benchmark-id <benchmark_id>
|
|
104
|
+
|
|
100
105
|
# Port a suite to another agent (e.g. email -> chat). Cross-agent recreates the
|
|
101
106
|
# scenarios under the destination agent; same-agent just tags them in.
|
|
102
107
|
# Dry-run by default; add --apply to write.
|
|
@@ -116,6 +121,9 @@ applied scenario-bulk-cancel <job_id> --apply
|
|
|
116
121
|
# Review pass/fail health (pass_status reflects the latest run per scenario)
|
|
117
122
|
applied benchmark-results <benchmark_id> --format json
|
|
118
123
|
|
|
124
|
+
# Portfolio go/no-go: pass rates across all of an agent's benchmarks at a glance
|
|
125
|
+
applied benchmarks --agent-id <agent_id> --with-results --format json
|
|
126
|
+
|
|
119
127
|
# Rate scenarios as you evaluate
|
|
120
128
|
applied scenario-update <scenario_id> --pass-status pass --feedback "<note>"
|
|
121
129
|
|
|
@@ -167,6 +175,8 @@ conversations = await tools.conversation_query(
|
|
|
167
175
|
| `benchmark_clone` | Copy all scenarios from one benchmark into another |
|
|
168
176
|
| `benchmark_delete` | Delete a benchmark (guards against wiping scenarios) |
|
|
169
177
|
| `benchmark_results` | Pass/fail/unrated tally and pass rate for a benchmark |
|
|
178
|
+
| `benchmark_list` | List benchmarks (with per-benchmark pass rates via `with_results`) |
|
|
179
|
+
| `scenario_create_bulk` | Build scenarios from several conversations at once |
|
|
170
180
|
| `scenario_bulk_run` | Run scenarios (contact override + wait-to-completion) |
|
|
171
181
|
| `scenario_bulk_cancel` | Cancel a stuck bulk run's queued/running scenario runs |
|
|
172
182
|
|
|
@@ -54,6 +54,7 @@ tests/test_recovery.py
|
|
|
54
54
|
tests/test_scenario_bulk_cancel.py
|
|
55
55
|
tests/test_scenario_bulk_run_contact.py
|
|
56
56
|
tests/test_scenario_bulk_run_wait.py
|
|
57
|
+
tests/test_scenario_create_bulk.py
|
|
57
58
|
tests/test_toolkit_contract.py
|
|
58
59
|
tests/test_v2_agents.py
|
|
59
60
|
tests/test_v2_articles.py
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from applied_cli import tools
|
|
6
|
+
from applied_cli.client import AppliedAPIError
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class FakeBulkCreateClient:
|
|
10
|
+
def __init__(self, titles=None, fail_ids=None):
|
|
11
|
+
self._titles = titles or {}
|
|
12
|
+
self._fail_ids = set(fail_ids or [])
|
|
13
|
+
self.get_conversation_calls = []
|
|
14
|
+
self.created = []
|
|
15
|
+
|
|
16
|
+
async def get_conversation(self, conversation_id, *, shop_id=None):
|
|
17
|
+
self.get_conversation_calls.append(conversation_id)
|
|
18
|
+
return {"id": conversation_id, "title": self._titles.get(conversation_id)}
|
|
19
|
+
|
|
20
|
+
async def create_scenario(
|
|
21
|
+
self,
|
|
22
|
+
input_conversation_id,
|
|
23
|
+
name,
|
|
24
|
+
benchmark_id=None,
|
|
25
|
+
benchmark_name=None,
|
|
26
|
+
agent_id=None,
|
|
27
|
+
):
|
|
28
|
+
if input_conversation_id in self._fail_ids:
|
|
29
|
+
raise AppliedAPIError("boom", status_code=400)
|
|
30
|
+
rec = {
|
|
31
|
+
"id": f"scn-{len(self.created) + 1}",
|
|
32
|
+
"name": name,
|
|
33
|
+
"input_conversation_id": input_conversation_id,
|
|
34
|
+
"benchmark_id": benchmark_id,
|
|
35
|
+
}
|
|
36
|
+
self.created.append(rec)
|
|
37
|
+
return rec
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@pytest.mark.asyncio
|
|
41
|
+
async def test_bulk_create_names_from_conversation_titles():
|
|
42
|
+
client = FakeBulkCreateClient(
|
|
43
|
+
titles={"c1": "Cancel order BP123", "c2": "Refund request"}
|
|
44
|
+
)
|
|
45
|
+
out = await tools.scenario_create_bulk(
|
|
46
|
+
client, ["c1", "c2"], benchmark_id="b1", output_format="json"
|
|
47
|
+
)
|
|
48
|
+
data = json.loads(out)
|
|
49
|
+
assert data["created"] == 2
|
|
50
|
+
assert [s["name"] for s in client.created] == ["Cancel order BP123", "Refund request"]
|
|
51
|
+
assert all(s["benchmark_id"] == "b1" for s in client.created)
|
|
52
|
+
assert client.get_conversation_calls == ["c1", "c2"]
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@pytest.mark.asyncio
|
|
56
|
+
async def test_name_prefix_skips_title_lookup():
|
|
57
|
+
client = FakeBulkCreateClient()
|
|
58
|
+
await tools.scenario_create_bulk(
|
|
59
|
+
client, ["c1", "c2", "c3"], name_prefix="DG Cancel", output_format="json"
|
|
60
|
+
)
|
|
61
|
+
assert [s["name"] for s in client.created] == [
|
|
62
|
+
"DG Cancel 1",
|
|
63
|
+
"DG Cancel 2",
|
|
64
|
+
"DG Cancel 3",
|
|
65
|
+
]
|
|
66
|
+
# No per-conversation fetches when a prefix is supplied.
|
|
67
|
+
assert client.get_conversation_calls == []
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@pytest.mark.asyncio
|
|
71
|
+
async def test_missing_title_falls_back_to_short_id():
|
|
72
|
+
client = FakeBulkCreateClient(titles={"abcdef12-0000": None})
|
|
73
|
+
await tools.scenario_create_bulk(
|
|
74
|
+
client, ["abcdef12-0000"], output_format="json"
|
|
75
|
+
)
|
|
76
|
+
assert client.created[0]["name"] == "Scenario abcdef12"
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@pytest.mark.asyncio
|
|
80
|
+
async def test_partial_failures_are_reported_not_fatal():
|
|
81
|
+
client = FakeBulkCreateClient(
|
|
82
|
+
titles={"c1": "A", "c2": "B", "c3": "C"}, fail_ids=["c2"]
|
|
83
|
+
)
|
|
84
|
+
out = await tools.scenario_create_bulk(
|
|
85
|
+
client, ["c1", "c2", "c3"], output_format="json"
|
|
86
|
+
)
|
|
87
|
+
data = json.loads(out)
|
|
88
|
+
assert data["created"] == 2
|
|
89
|
+
assert data["failed"] == 1
|
|
90
|
+
assert data["errors"][0]["conversation_id"] == "c2"
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
@pytest.mark.asyncio
|
|
94
|
+
async def test_empty_list_is_an_argument_error():
|
|
95
|
+
client = FakeBulkCreateClient()
|
|
96
|
+
out = await tools.scenario_create_bulk(client, [], output_format="text")
|
|
97
|
+
assert "at least one conversation id" in out.lower()
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
@pytest.mark.asyncio
|
|
101
|
+
async def test_v2_scenarios_create_bulk_handler():
|
|
102
|
+
from applied_cli.v2.scenarios import (
|
|
103
|
+
ScenariosCreateBulkInput,
|
|
104
|
+
scenarios_create_bulk_handler,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
client = FakeBulkCreateClient(titles={"c1": "A"})
|
|
108
|
+
result = await scenarios_create_bulk_handler(
|
|
109
|
+
client, ScenariosCreateBulkInput(conversation_ids=["c1"], benchmark_id="b1")
|
|
110
|
+
)
|
|
111
|
+
assert result.data["created"] == 1
|
|
112
|
+
assert "scenarios" in " ".join(result.next_actions).lower()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|