applied-cli 0.6.3__tar.gz → 0.6.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {applied_cli-0.6.3 → applied_cli-0.6.5}/PKG-INFO +1 -1
- {applied_cli-0.6.3 → applied_cli-0.6.5}/applied_cli/cli.py +27 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/applied_cli/tools.py +143 -8
- {applied_cli-0.6.3 → applied_cli-0.6.5}/applied_cli/v2/scenarios.py +62 -2
- {applied_cli-0.6.3 → applied_cli-0.6.5}/applied_cli.egg-info/PKG-INFO +1 -1
- {applied_cli-0.6.3 → applied_cli-0.6.5}/applied_cli.egg-info/SOURCES.txt +2 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/pyproject.toml +1 -1
- applied_cli-0.6.5/tests/test_scenario_bulk_run_contact.py +116 -0
- applied_cli-0.6.5/tests/test_scenario_bulk_run_wait.py +107 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/README.md +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/applied_cli/__init__.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/applied_cli/agent_scoped_flows.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/applied_cli/auth.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/applied_cli/client.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/applied_cli/conversation_lookup.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/applied_cli/conversations.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/applied_cli/credentials.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/applied_cli/flow_helpers.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/applied_cli/formatters.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/applied_cli/mcp.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/applied_cli/recovery.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/applied_cli/toolkit.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/applied_cli/v2/__init__.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/applied_cli/v2/agents.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/applied_cli/v2/articles.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/applied_cli/v2/catalog.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/applied_cli/v2/connectors.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/applied_cli/v2/content.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/applied_cli/v2/conversations.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/applied_cli/v2/domains.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/applied_cli/v2/flows.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/applied_cli/v2/knowledge.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/applied_cli/v2/manifest.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/applied_cli/v2/products.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/applied_cli/v2/taxonomy.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/applied_cli/v2/tickets.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/applied_cli.egg-info/dependency_links.txt +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/applied_cli.egg-info/entry_points.txt +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/applied_cli.egg-info/requires.txt +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/applied_cli.egg-info/top_level.txt +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/setup.cfg +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/tests/test_agent_scoped_flows.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/tests/test_audit_tools.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/tests/test_auth_context.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/tests/test_benchmark_clone.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/tests/test_benchmark_delete_guardrail.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/tests/test_benchmark_scenario_tools.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/tests/test_cli.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/tests/test_cli_v2.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/tests/test_client.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/tests/test_client_v2.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/tests/test_conversation_tools.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/tests/test_flow_tools.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/tests/test_knowledge_content_tools.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/tests/test_recovery.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/tests/test_scenario_bulk_cancel.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/tests/test_toolkit_contract.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/tests/test_v2_agents.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/tests/test_v2_articles.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/tests/test_v2_catalog_and_mcp.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/tests/test_v2_connectors.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/tests/test_v2_content.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/tests/test_v2_conversations.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/tests/test_v2_flows.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/tests/test_v2_knowledge.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/tests/test_v2_products.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/tests/test_v2_scenarios.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/tests/test_v2_taxonomy.py +0 -0
- {applied_cli-0.6.3 → applied_cli-0.6.5}/tests/test_v2_tickets.py +0 -0
|
@@ -1916,6 +1916,27 @@ def scenario_bulk_run(
|
|
|
1916
1916
|
target_agent_id: str = typer.Option(
|
|
1917
1917
|
None, "--target-agent-id", help="Optional target agent for reruns"
|
|
1918
1918
|
),
|
|
1919
|
+
contact_email: str = typer.Option(
|
|
1920
|
+
None,
|
|
1921
|
+
"--contact-email",
|
|
1922
|
+
help="Run as a contact with this email (resolves/creates it) so test "
|
|
1923
|
+
"conversations carry an email — fixes 'Email is not present' failures",
|
|
1924
|
+
),
|
|
1925
|
+
contact_id: str = typer.Option(
|
|
1926
|
+
None, "--contact-id", help="Run scenarios as this existing contact UUID"
|
|
1927
|
+
),
|
|
1928
|
+
anonymous: bool = typer.Option(
|
|
1929
|
+
False, "--anonymous", help="Run with an anonymous contact"
|
|
1930
|
+
),
|
|
1931
|
+
wait: bool = typer.Option(
|
|
1932
|
+
False, "--wait", help="Poll until all runs finish, then print final status"
|
|
1933
|
+
),
|
|
1934
|
+
wait_timeout: float = typer.Option(
|
|
1935
|
+
300.0, "--wait-timeout", help="Max seconds to wait with --wait (default 300)"
|
|
1936
|
+
),
|
|
1937
|
+
poll_interval: float = typer.Option(
|
|
1938
|
+
3.0, "--poll-interval", help="Seconds between status polls with --wait"
|
|
1939
|
+
),
|
|
1919
1940
|
shop_id: str = typer.Option(None, "--shop-id", help="Override shop ID"),
|
|
1920
1941
|
format: str = typer.Option(
|
|
1921
1942
|
"text", "--format", "-f", help="Output format: text or json"
|
|
@@ -1929,6 +1950,12 @@ def scenario_bulk_run(
|
|
|
1929
1950
|
scenario_ids=_parse_csv_option(scenario_ids),
|
|
1930
1951
|
benchmark_id=benchmark_id,
|
|
1931
1952
|
target_agent_id=target_agent_id,
|
|
1953
|
+
contact_email=contact_email,
|
|
1954
|
+
contact_id=contact_id,
|
|
1955
|
+
anonymous=anonymous,
|
|
1956
|
+
wait=wait,
|
|
1957
|
+
wait_timeout=wait_timeout,
|
|
1958
|
+
poll_interval=poll_interval,
|
|
1932
1959
|
output_format=format,
|
|
1933
1960
|
)
|
|
1934
1961
|
)
|
|
@@ -8,6 +8,7 @@ import asyncio
|
|
|
8
8
|
import difflib
|
|
9
9
|
import json
|
|
10
10
|
import re
|
|
11
|
+
import time
|
|
11
12
|
from contextlib import suppress
|
|
12
13
|
from html.parser import HTMLParser
|
|
13
14
|
from typing import Any
|
|
@@ -6082,26 +6083,116 @@ async def scenario_run_delete(
|
|
|
6082
6083
|
return f"Scenario run {run_id} deleted successfully."
|
|
6083
6084
|
|
|
6084
6085
|
|
|
6086
|
+
def _bulk_status_counts(counts: dict | None) -> dict[str, int]:
|
|
6087
|
+
"""Normalize bulk-status counts to lowercase keys with int values."""
|
|
6088
|
+
lowered: dict[str, int] = {}
|
|
6089
|
+
for key, value in (counts or {}).items():
|
|
6090
|
+
lowered[str(key).lower()] = int(value or 0)
|
|
6091
|
+
return lowered
|
|
6092
|
+
|
|
6093
|
+
|
|
6094
|
+
def _bulk_pending_count(counts: dict | None) -> int:
|
|
6095
|
+
"""Count runs still queued or running (case-insensitive)."""
|
|
6096
|
+
normalized = _bulk_status_counts(counts)
|
|
6097
|
+
return normalized.get("queued", 0) + normalized.get("running", 0)
|
|
6098
|
+
|
|
6099
|
+
|
|
6100
|
+
async def _await_bulk_run(
|
|
6101
|
+
client: AppliedClient,
|
|
6102
|
+
job_id: str,
|
|
6103
|
+
*,
|
|
6104
|
+
timeout: float,
|
|
6105
|
+
poll_interval: float,
|
|
6106
|
+
) -> tuple[dict, bool]:
|
|
6107
|
+
"""Poll a bulk run until no runs are queued/running or the timeout elapses.
|
|
6108
|
+
|
|
6109
|
+
Returns (latest_status_payload, timed_out).
|
|
6110
|
+
"""
|
|
6111
|
+
start = time.monotonic()
|
|
6112
|
+
status = await client.get_scenario_bulk_run_status(job_id)
|
|
6113
|
+
while _bulk_pending_count(status.get("counts")) > 0:
|
|
6114
|
+
if time.monotonic() - start >= timeout:
|
|
6115
|
+
return status, True
|
|
6116
|
+
await asyncio.sleep(poll_interval)
|
|
6117
|
+
status = await client.get_scenario_bulk_run_status(job_id)
|
|
6118
|
+
return status, False
|
|
6119
|
+
|
|
6120
|
+
|
|
6121
|
+
async def _resolve_contact_override(
|
|
6122
|
+
client: AppliedClient,
|
|
6123
|
+
*,
|
|
6124
|
+
contact_override: dict | None,
|
|
6125
|
+
contact_id: str | None,
|
|
6126
|
+
contact_email: str | None,
|
|
6127
|
+
anonymous: bool,
|
|
6128
|
+
) -> dict | None:
|
|
6129
|
+
"""Build the contact_override payload from the convenience arguments.
|
|
6130
|
+
|
|
6131
|
+
An explicit contact_override dict wins. Otherwise anonymous > contact_id >
|
|
6132
|
+
contact_email (which resolves/creates a contact so the test conversation
|
|
6133
|
+
carries a real email).
|
|
6134
|
+
"""
|
|
6135
|
+
if contact_override:
|
|
6136
|
+
return contact_override
|
|
6137
|
+
if anonymous:
|
|
6138
|
+
return {"mode": "anonymous"}
|
|
6139
|
+
if contact_id:
|
|
6140
|
+
return {"mode": "contact", "contact_id": contact_id}
|
|
6141
|
+
if contact_email:
|
|
6142
|
+
contact = await client.get_or_create_contact(email=contact_email)
|
|
6143
|
+
resolved_id = contact.get("id")
|
|
6144
|
+
if not resolved_id:
|
|
6145
|
+
raise AppliedAPIError(
|
|
6146
|
+
f"Could not resolve a contact for email {contact_email}.",
|
|
6147
|
+
status_code=404,
|
|
6148
|
+
)
|
|
6149
|
+
return {"mode": "contact", "contact_id": str(resolved_id)}
|
|
6150
|
+
return None
|
|
6151
|
+
|
|
6152
|
+
|
|
6085
6153
|
async def scenario_bulk_run(
|
|
6086
6154
|
client: AppliedClient,
|
|
6087
6155
|
scenario_ids: list[str] | None = None,
|
|
6088
6156
|
benchmark_id: str | None = None,
|
|
6089
6157
|
target_agent_id: str | None = None,
|
|
6090
6158
|
contact_override: dict | None = None,
|
|
6159
|
+
contact_id: str | None = None,
|
|
6160
|
+
contact_email: str | None = None,
|
|
6161
|
+
anonymous: bool = False,
|
|
6162
|
+
wait: bool = False,
|
|
6163
|
+
wait_timeout: float = 300.0,
|
|
6164
|
+
poll_interval: float = 3.0,
|
|
6091
6165
|
output_format: str = "text",
|
|
6092
6166
|
) -> str:
|
|
6093
6167
|
"""
|
|
6094
6168
|
Run multiple scenarios at once.
|
|
6095
6169
|
|
|
6170
|
+
By default a scenario run reuses the input conversation's contact, which on
|
|
6171
|
+
test/benchmark conversations often has no email — causing agents to respond
|
|
6172
|
+
"Email is not present in the conversation." Pass contact_email or contact_id
|
|
6173
|
+
to run the scenarios as a contact that has an email, so the test conversation
|
|
6174
|
+
carries it.
|
|
6175
|
+
|
|
6176
|
+
With wait=True, this polls until every run finishes (or the timeout elapses)
|
|
6177
|
+
and returns the final status, so you can run a benchmark and read results in
|
|
6178
|
+
one call instead of polling scenario_bulk_status yourself.
|
|
6179
|
+
|
|
6096
6180
|
Args:
|
|
6097
6181
|
client: Authenticated AppliedClient
|
|
6098
6182
|
scenario_ids: List of scenario UUIDs to run
|
|
6099
6183
|
benchmark_id: Run all scenarios in this benchmark
|
|
6100
6184
|
target_agent_id: Optional agent to run against (for A/B testing)
|
|
6101
|
-
contact_override:
|
|
6185
|
+
contact_override: Raw override, e.g. {"mode": "contact", "contact_id": "<uuid>"}
|
|
6186
|
+
(takes precedence over the convenience args below)
|
|
6187
|
+
contact_id: Run scenarios as this existing contact (gives test convos its email)
|
|
6188
|
+
contact_email: Resolve/create a contact with this email and run as them
|
|
6189
|
+
anonymous: Run with an anonymous contact (mode='anonymous')
|
|
6190
|
+
wait: Poll until all runs finish (or wait_timeout elapses)
|
|
6191
|
+
wait_timeout: Max seconds to wait when wait=True (default 300)
|
|
6192
|
+
poll_interval: Seconds between status polls when wait=True (default 3)
|
|
6102
6193
|
|
|
6103
6194
|
Returns:
|
|
6104
|
-
Summary of runs created
|
|
6195
|
+
Summary of runs created (plus the final status when wait=True)
|
|
6105
6196
|
"""
|
|
6106
6197
|
resolved_scenario_ids = list(scenario_ids or [])
|
|
6107
6198
|
if not resolved_scenario_ids:
|
|
@@ -6123,10 +6214,17 @@ async def scenario_bulk_run(
|
|
|
6123
6214
|
)
|
|
6124
6215
|
|
|
6125
6216
|
try:
|
|
6217
|
+
effective_override = await _resolve_contact_override(
|
|
6218
|
+
client,
|
|
6219
|
+
contact_override=contact_override,
|
|
6220
|
+
contact_id=contact_id,
|
|
6221
|
+
contact_email=contact_email,
|
|
6222
|
+
anonymous=anonymous,
|
|
6223
|
+
)
|
|
6126
6224
|
result = await client.bulk_run_scenarios(
|
|
6127
6225
|
scenario_ids=resolved_scenario_ids,
|
|
6128
6226
|
target_agent_id=target_agent_id,
|
|
6129
|
-
contact_override=
|
|
6227
|
+
contact_override=effective_override,
|
|
6130
6228
|
)
|
|
6131
6229
|
except AppliedAPIError as e:
|
|
6132
6230
|
return _format_error(e)
|
|
@@ -6144,6 +6242,26 @@ async def scenario_bulk_run(
|
|
|
6144
6242
|
"contact_override": result.get("contact_override"),
|
|
6145
6243
|
}
|
|
6146
6244
|
|
|
6245
|
+
job_id = payload.get("job_id")
|
|
6246
|
+
final_status: dict | None = None
|
|
6247
|
+
timed_out = False
|
|
6248
|
+
if wait and job_id:
|
|
6249
|
+
try:
|
|
6250
|
+
final_status, timed_out = await _await_bulk_run(
|
|
6251
|
+
client,
|
|
6252
|
+
str(job_id),
|
|
6253
|
+
timeout=wait_timeout,
|
|
6254
|
+
poll_interval=poll_interval,
|
|
6255
|
+
)
|
|
6256
|
+
except AppliedAPIError as e:
|
|
6257
|
+
return _format_error(e)
|
|
6258
|
+
counts = _bulk_status_counts(final_status.get("counts"))
|
|
6259
|
+
payload["final_counts"] = counts
|
|
6260
|
+
payload["timed_out"] = timed_out
|
|
6261
|
+
payload["duration_seconds"] = final_status.get("duration_seconds")
|
|
6262
|
+
payload["completed_at"] = final_status.get("completed_at")
|
|
6263
|
+
payload["failed"] = final_status.get("failed") or []
|
|
6264
|
+
|
|
6147
6265
|
if output_format == "json":
|
|
6148
6266
|
return to_json(payload)
|
|
6149
6267
|
|
|
@@ -6160,6 +6278,23 @@ async def scenario_bulk_run(
|
|
|
6160
6278
|
output += f"scenario_run_ids: {preview_ids}\n"
|
|
6161
6279
|
if len(run_ids) > 10:
|
|
6162
6280
|
output += f"more_runs: {len(run_ids) - 10}\n"
|
|
6281
|
+
|
|
6282
|
+
if final_status is not None:
|
|
6283
|
+
counts = payload["final_counts"]
|
|
6284
|
+
output += "\n# Final Status\n"
|
|
6285
|
+
output += "timed_out: " + ("true (still pending)" if timed_out else "false") + "\n"
|
|
6286
|
+
output += f"completed: {counts.get('completed', 0)}\n"
|
|
6287
|
+
output += f"failed: {counts.get('failed', 0)}\n"
|
|
6288
|
+
pending = counts.get("queued", 0) + counts.get("running", 0)
|
|
6289
|
+
output += f"still_pending: {pending}\n"
|
|
6290
|
+
if payload.get("duration_seconds") is not None:
|
|
6291
|
+
output += f"duration_seconds: {payload['duration_seconds']}\n"
|
|
6292
|
+
failed_runs = payload.get("failed") or []
|
|
6293
|
+
if failed_runs:
|
|
6294
|
+
output += f"\n# Failed Runs ({len(failed_runs)})\n"
|
|
6295
|
+
output += to_json(failed_runs)
|
|
6296
|
+
return output
|
|
6297
|
+
|
|
6163
6298
|
output += "\nTip: use scenario_bulk_status(job_id, include_runs=True) or scenario_run_list(bulk_job_id=job_id) to get per-run details with scenario mappings."
|
|
6164
6299
|
return output
|
|
6165
6300
|
|
|
@@ -6193,14 +6328,14 @@ async def scenario_bulk_status(
|
|
|
6193
6328
|
payload.pop("runs", None)
|
|
6194
6329
|
return to_json(payload)
|
|
6195
6330
|
|
|
6196
|
-
counts = result.get("counts")
|
|
6331
|
+
counts = _bulk_status_counts(result.get("counts"))
|
|
6197
6332
|
output = "# Bulk Run Status\n"
|
|
6198
6333
|
output += f"job_id: {result.get('job_id')}\n"
|
|
6199
6334
|
output += f"total: {result.get('total')}\n"
|
|
6200
|
-
output += f"queued: {counts.get('
|
|
6201
|
-
output += f"running: {counts.get('
|
|
6202
|
-
output += f"completed: {counts.get('
|
|
6203
|
-
output += f"failed: {counts.get('
|
|
6335
|
+
output += f"queued: {counts.get('queued', 0)}\n"
|
|
6336
|
+
output += f"running: {counts.get('running', 0)}\n"
|
|
6337
|
+
output += f"completed: {counts.get('completed', 0)}\n"
|
|
6338
|
+
output += f"failed: {counts.get('failed', 0)}\n"
|
|
6204
6339
|
output += f"created_at: {result.get('created_at')}\n"
|
|
6205
6340
|
output += f"updated_at: {result.get('updated_at')}\n"
|
|
6206
6341
|
if result.get("completed_at"):
|
|
@@ -27,6 +27,12 @@ class ScenariosBulkRunInput(StrictInput):
|
|
|
27
27
|
benchmark_id: str | None = None
|
|
28
28
|
target_agent_id: str | None = None
|
|
29
29
|
contact_override: dict[str, Any] | None = None
|
|
30
|
+
contact_id: str | None = None
|
|
31
|
+
contact_email: str | None = None
|
|
32
|
+
anonymous: bool = False
|
|
33
|
+
wait: bool = False
|
|
34
|
+
wait_timeout: float = 300.0
|
|
35
|
+
poll_interval: float = 3.0
|
|
30
36
|
|
|
31
37
|
|
|
32
38
|
class ScenariosBulkCancelInput(StrictInput):
|
|
@@ -796,10 +802,19 @@ async def scenarios_bulk_run_handler(
|
|
|
796
802
|
)
|
|
797
803
|
|
|
798
804
|
try:
|
|
805
|
+
from applied_cli.tools import _resolve_contact_override
|
|
806
|
+
|
|
807
|
+
effective_override = await _resolve_contact_override(
|
|
808
|
+
client,
|
|
809
|
+
contact_override=params.contact_override,
|
|
810
|
+
contact_id=params.contact_id,
|
|
811
|
+
contact_email=params.contact_email,
|
|
812
|
+
anonymous=params.anonymous,
|
|
813
|
+
)
|
|
799
814
|
result = await client.bulk_run_scenarios(
|
|
800
815
|
scenario_ids=resolved_scenario_ids,
|
|
801
816
|
target_agent_id=params.target_agent_id,
|
|
802
|
-
contact_override=
|
|
817
|
+
contact_override=effective_override,
|
|
803
818
|
)
|
|
804
819
|
except AppliedAPIError as exc:
|
|
805
820
|
return _api_error_result(exc)
|
|
@@ -816,6 +831,48 @@ async def scenarios_bulk_run_handler(
|
|
|
816
831
|
"duplicated_scenarios": result.get("duplicated_scenarios"),
|
|
817
832
|
"contact_override": result.get("contact_override"),
|
|
818
833
|
}
|
|
834
|
+
|
|
835
|
+
job_id = payload.get("job_id")
|
|
836
|
+
if params.wait and job_id:
|
|
837
|
+
from applied_cli.tools import _await_bulk_run, _bulk_status_counts
|
|
838
|
+
|
|
839
|
+
try:
|
|
840
|
+
final_status, timed_out = await _await_bulk_run(
|
|
841
|
+
client,
|
|
842
|
+
str(job_id),
|
|
843
|
+
timeout=params.wait_timeout,
|
|
844
|
+
poll_interval=params.poll_interval,
|
|
845
|
+
)
|
|
846
|
+
except AppliedAPIError as exc:
|
|
847
|
+
return _api_error_result(exc)
|
|
848
|
+
counts = _bulk_status_counts(final_status.get("counts"))
|
|
849
|
+
payload["final_counts"] = counts
|
|
850
|
+
payload["timed_out"] = timed_out
|
|
851
|
+
payload["duration_seconds"] = final_status.get("duration_seconds")
|
|
852
|
+
payload["failed"] = final_status.get("failed") or []
|
|
853
|
+
pending = counts.get("queued", 0) + counts.get("running", 0)
|
|
854
|
+
summary = (
|
|
855
|
+
f"Bulk job {job_id} "
|
|
856
|
+
+ ("timed out with " if timed_out else "finished: ")
|
|
857
|
+
+ f"{counts.get('completed', 0)} completed, "
|
|
858
|
+
+ f"{counts.get('failed', 0)} failed"
|
|
859
|
+
+ (f", {pending} still pending" if pending else "")
|
|
860
|
+
+ "."
|
|
861
|
+
)
|
|
862
|
+
warnings = []
|
|
863
|
+
if counts.get("failed"):
|
|
864
|
+
warnings.append(f"{counts['failed']} run(s) failed.")
|
|
865
|
+
if timed_out:
|
|
866
|
+
warnings.append("Timed out before all runs finished.")
|
|
867
|
+
return ToolResult(
|
|
868
|
+
data=payload,
|
|
869
|
+
summary=summary,
|
|
870
|
+
warnings=warnings,
|
|
871
|
+
next_actions=[
|
|
872
|
+
"Use scenarios_bulk_status with include_runs=true to inspect runs.",
|
|
873
|
+
],
|
|
874
|
+
)
|
|
875
|
+
|
|
819
876
|
queued = payload.get("queued") or 0
|
|
820
877
|
return ToolResult(
|
|
821
878
|
data=payload,
|
|
@@ -1050,7 +1107,10 @@ def scenario_specs() -> list[ToolSpec]:
|
|
|
1050
1107
|
namespace="scenarios",
|
|
1051
1108
|
description=(
|
|
1052
1109
|
"Run selected scenarios or every scenario in a benchmark and "
|
|
1053
|
-
"return the queued job metadata."
|
|
1110
|
+
"return the queued job metadata. Pass contact_email or contact_id "
|
|
1111
|
+
"to run as a contact with an email (fixes 'Email is not present' "
|
|
1112
|
+
"failures on test conversations). Pass wait=true to block until "
|
|
1113
|
+
"all runs finish and return the final status in one call."
|
|
1054
1114
|
),
|
|
1055
1115
|
input_model=ScenariosBulkRunInput,
|
|
1056
1116
|
output_model=None,
|
|
@@ -50,6 +50,8 @@ tests/test_flow_tools.py
|
|
|
50
50
|
tests/test_knowledge_content_tools.py
|
|
51
51
|
tests/test_recovery.py
|
|
52
52
|
tests/test_scenario_bulk_cancel.py
|
|
53
|
+
tests/test_scenario_bulk_run_contact.py
|
|
54
|
+
tests/test_scenario_bulk_run_wait.py
|
|
53
55
|
tests/test_toolkit_contract.py
|
|
54
56
|
tests/test_v2_agents.py
|
|
55
57
|
tests/test_v2_articles.py
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from applied_cli import tools
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class FakeRunClient:
|
|
9
|
+
def __init__(self):
|
|
10
|
+
self.bulk_kwargs = None
|
|
11
|
+
self.get_or_create_calls = []
|
|
12
|
+
|
|
13
|
+
async def list_scenarios(self, benchmark_id=None, limit=500, **kwargs):
|
|
14
|
+
return [{"id": "s1"}, {"id": "s2"}]
|
|
15
|
+
|
|
16
|
+
async def get_or_create_contact(self, email=None, name=None, phone=None):
|
|
17
|
+
self.get_or_create_calls.append(email)
|
|
18
|
+
return {"id": "contact-123", "email": email}
|
|
19
|
+
|
|
20
|
+
async def bulk_run_scenarios(
|
|
21
|
+
self, scenario_ids=None, target_agent_id=None, contact_override=None
|
|
22
|
+
):
|
|
23
|
+
self.bulk_kwargs = {
|
|
24
|
+
"scenario_ids": scenario_ids,
|
|
25
|
+
"target_agent_id": target_agent_id,
|
|
26
|
+
"contact_override": contact_override,
|
|
27
|
+
}
|
|
28
|
+
return {
|
|
29
|
+
"job_id": "job-1",
|
|
30
|
+
"total": len(scenario_ids or []),
|
|
31
|
+
"queued": len(scenario_ids or []),
|
|
32
|
+
"scenario_run_ids": ["r1", "r2"],
|
|
33
|
+
"contact_override": contact_override,
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@pytest.mark.asyncio
|
|
38
|
+
async def test_no_contact_args_sends_no_override():
|
|
39
|
+
client = FakeRunClient()
|
|
40
|
+
await tools.scenario_bulk_run(client, benchmark_id="bench-1", output_format="json")
|
|
41
|
+
assert client.bulk_kwargs["contact_override"] is None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@pytest.mark.asyncio
|
|
45
|
+
async def test_contact_id_builds_override():
|
|
46
|
+
client = FakeRunClient()
|
|
47
|
+
await tools.scenario_bulk_run(
|
|
48
|
+
client, benchmark_id="bench-1", contact_id="c-9", output_format="json"
|
|
49
|
+
)
|
|
50
|
+
assert client.bulk_kwargs["contact_override"] == {
|
|
51
|
+
"mode": "contact",
|
|
52
|
+
"contact_id": "c-9",
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@pytest.mark.asyncio
|
|
57
|
+
async def test_contact_email_resolves_then_overrides():
|
|
58
|
+
client = FakeRunClient()
|
|
59
|
+
result = await tools.scenario_bulk_run(
|
|
60
|
+
client,
|
|
61
|
+
benchmark_id="bench-1",
|
|
62
|
+
contact_email="casey@example.com",
|
|
63
|
+
output_format="json",
|
|
64
|
+
)
|
|
65
|
+
assert client.get_or_create_calls == ["casey@example.com"]
|
|
66
|
+
assert client.bulk_kwargs["contact_override"] == {
|
|
67
|
+
"mode": "contact",
|
|
68
|
+
"contact_id": "contact-123",
|
|
69
|
+
}
|
|
70
|
+
assert json.loads(result)["job_id"] == "job-1"
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@pytest.mark.asyncio
|
|
74
|
+
async def test_anonymous_mode():
|
|
75
|
+
client = FakeRunClient()
|
|
76
|
+
await tools.scenario_bulk_run(
|
|
77
|
+
client, benchmark_id="bench-1", anonymous=True, output_format="json"
|
|
78
|
+
)
|
|
79
|
+
assert client.bulk_kwargs["contact_override"] == {"mode": "anonymous"}
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@pytest.mark.asyncio
|
|
83
|
+
async def test_explicit_override_wins_over_convenience_args():
|
|
84
|
+
client = FakeRunClient()
|
|
85
|
+
await tools.scenario_bulk_run(
|
|
86
|
+
client,
|
|
87
|
+
benchmark_id="bench-1",
|
|
88
|
+
contact_override={"mode": "contact", "contact_id": "raw"},
|
|
89
|
+
contact_email="ignored@example.com",
|
|
90
|
+
output_format="json",
|
|
91
|
+
)
|
|
92
|
+
# The raw override is used; email resolution is skipped.
|
|
93
|
+
assert client.get_or_create_calls == []
|
|
94
|
+
assert client.bulk_kwargs["contact_override"] == {
|
|
95
|
+
"mode": "contact",
|
|
96
|
+
"contact_id": "raw",
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
@pytest.mark.asyncio
|
|
101
|
+
async def test_v2_handler_threads_contact_email():
|
|
102
|
+
from applied_cli.v2.scenarios import (
|
|
103
|
+
ScenariosBulkRunInput,
|
|
104
|
+
scenarios_bulk_run_handler,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
client = FakeRunClient()
|
|
108
|
+
result = await scenarios_bulk_run_handler(
|
|
109
|
+
client,
|
|
110
|
+
ScenariosBulkRunInput(benchmark_id="bench-1", contact_email="x@example.com"),
|
|
111
|
+
)
|
|
112
|
+
assert client.bulk_kwargs["contact_override"] == {
|
|
113
|
+
"mode": "contact",
|
|
114
|
+
"contact_id": "contact-123",
|
|
115
|
+
}
|
|
116
|
+
assert result.data["job_id"] == "job-1"
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from applied_cli import tools
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class FakeWaitClient:
|
|
9
|
+
"""Bulk client whose status transitions to done after N polls."""
|
|
10
|
+
|
|
11
|
+
def __init__(self, status_sequence):
|
|
12
|
+
self._status_sequence = list(status_sequence)
|
|
13
|
+
self._poll = 0
|
|
14
|
+
self.status_calls = 0
|
|
15
|
+
|
|
16
|
+
async def list_scenarios(self, benchmark_id=None, limit=500, **kwargs):
|
|
17
|
+
return [{"id": "s1"}, {"id": "s2"}]
|
|
18
|
+
|
|
19
|
+
async def bulk_run_scenarios(
|
|
20
|
+
self, scenario_ids=None, target_agent_id=None, contact_override=None
|
|
21
|
+
):
|
|
22
|
+
return {
|
|
23
|
+
"job_id": "job-1",
|
|
24
|
+
"total": len(scenario_ids or []),
|
|
25
|
+
"queued": len(scenario_ids or []),
|
|
26
|
+
"scenario_run_ids": ["r1", "r2"],
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
async def get_scenario_bulk_run_status(self, job_id):
|
|
30
|
+
self.status_calls += 1
|
|
31
|
+
idx = min(self._poll, len(self._status_sequence) - 1)
|
|
32
|
+
self._poll += 1
|
|
33
|
+
return self._status_sequence[idx]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@pytest.mark.asyncio
|
|
37
|
+
async def test_wait_polls_until_no_pending(monkeypatch):
|
|
38
|
+
# Avoid real sleeping between polls.
|
|
39
|
+
async def _no_sleep(_seconds):
|
|
40
|
+
return None
|
|
41
|
+
|
|
42
|
+
monkeypatch.setattr(tools.asyncio, "sleep", _no_sleep)
|
|
43
|
+
|
|
44
|
+
client = FakeWaitClient(
|
|
45
|
+
status_sequence=[
|
|
46
|
+
{"counts": {"queued": 2, "running": 0, "completed": 0, "failed": 0}},
|
|
47
|
+
{"counts": {"queued": 0, "running": 1, "completed": 1, "failed": 0}},
|
|
48
|
+
{
|
|
49
|
+
"counts": {"queued": 0, "running": 0, "completed": 2, "failed": 0},
|
|
50
|
+
"duration_seconds": 12.5,
|
|
51
|
+
"completed_at": "2026-06-05T10:00:00Z",
|
|
52
|
+
"failed": [],
|
|
53
|
+
},
|
|
54
|
+
]
|
|
55
|
+
)
|
|
56
|
+
result = await tools.scenario_bulk_run(
|
|
57
|
+
client, benchmark_id="bench-1", wait=True, output_format="json"
|
|
58
|
+
)
|
|
59
|
+
data = json.loads(result)
|
|
60
|
+
assert data["timed_out"] is False
|
|
61
|
+
assert data["final_counts"]["completed"] == 2
|
|
62
|
+
assert data["duration_seconds"] == 12.5
|
|
63
|
+
assert client.status_calls == 3
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@pytest.mark.asyncio
|
|
67
|
+
async def test_wait_times_out_when_runs_stay_pending(monkeypatch):
|
|
68
|
+
async def _no_sleep(_seconds):
|
|
69
|
+
return None
|
|
70
|
+
|
|
71
|
+
monkeypatch.setattr(tools.asyncio, "sleep", _no_sleep)
|
|
72
|
+
|
|
73
|
+
# Always pending → must hit the timeout path.
|
|
74
|
+
client = FakeWaitClient(
|
|
75
|
+
status_sequence=[
|
|
76
|
+
{"counts": {"queued": 2, "running": 0, "completed": 0, "failed": 0}}
|
|
77
|
+
]
|
|
78
|
+
)
|
|
79
|
+
result = await tools.scenario_bulk_run(
|
|
80
|
+
client,
|
|
81
|
+
benchmark_id="bench-1",
|
|
82
|
+
wait=True,
|
|
83
|
+
wait_timeout=0.0, # immediate timeout after first poll
|
|
84
|
+
output_format="json",
|
|
85
|
+
)
|
|
86
|
+
data = json.loads(result)
|
|
87
|
+
assert data["timed_out"] is True
|
|
88
|
+
assert data["final_counts"]["queued"] == 2
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
@pytest.mark.asyncio
|
|
92
|
+
async def test_no_wait_returns_started_summary():
|
|
93
|
+
client = FakeWaitClient(status_sequence=[{"counts": {}}])
|
|
94
|
+
result = await tools.scenario_bulk_run(
|
|
95
|
+
client, benchmark_id="bench-1", output_format="json"
|
|
96
|
+
)
|
|
97
|
+
data = json.loads(result)
|
|
98
|
+
assert "final_counts" not in data
|
|
99
|
+
assert client.status_calls == 0 # no polling when wait is False
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def test_bulk_status_counts_normalizes_case_and_types():
|
|
103
|
+
assert tools._bulk_status_counts({"QUEUED": 2, "Running": "1"}) == {
|
|
104
|
+
"queued": 2,
|
|
105
|
+
"running": 1,
|
|
106
|
+
}
|
|
107
|
+
assert tools._bulk_pending_count({"QUEUED": 3, "RUNNING": 4, "COMPLETED": 9}) == 7
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|