applied-cli 0.6.1__tar.gz → 0.6.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {applied_cli-0.6.1 → applied_cli-0.6.3}/PKG-INFO +1 -1
- {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/cli.py +53 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/tools.py +177 -4
- {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/v2/domains.py +1 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/v2/scenarios.py +82 -6
- {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli.egg-info/PKG-INFO +1 -1
- {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli.egg-info/SOURCES.txt +2 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/pyproject.toml +1 -1
- applied_cli-0.6.3/tests/test_benchmark_delete_guardrail.py +83 -0
- applied_cli-0.6.3/tests/test_scenario_bulk_cancel.py +87 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_v2_scenarios.py +34 -12
- {applied_cli-0.6.1 → applied_cli-0.6.3}/README.md +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/__init__.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/agent_scoped_flows.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/auth.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/client.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/conversation_lookup.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/conversations.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/credentials.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/flow_helpers.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/formatters.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/mcp.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/recovery.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/toolkit.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/v2/__init__.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/v2/agents.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/v2/articles.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/v2/catalog.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/v2/connectors.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/v2/content.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/v2/conversations.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/v2/flows.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/v2/knowledge.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/v2/manifest.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/v2/products.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/v2/taxonomy.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/v2/tickets.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli.egg-info/dependency_links.txt +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli.egg-info/entry_points.txt +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli.egg-info/requires.txt +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli.egg-info/top_level.txt +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/setup.cfg +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_agent_scoped_flows.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_audit_tools.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_auth_context.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_benchmark_clone.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_benchmark_scenario_tools.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_cli.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_cli_v2.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_client.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_client_v2.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_conversation_tools.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_flow_tools.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_knowledge_content_tools.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_recovery.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_toolkit_contract.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_v2_agents.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_v2_articles.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_v2_catalog_and_mcp.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_v2_connectors.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_v2_content.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_v2_conversations.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_v2_flows.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_v2_knowledge.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_v2_products.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_v2_taxonomy.py +0 -0
- {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_v2_tickets.py +0 -0
|
@@ -1640,6 +1640,38 @@ def benchmark_clone(
|
|
|
1640
1640
|
typer.echo(result)
|
|
1641
1641
|
|
|
1642
1642
|
|
|
1643
|
+
@app.command("benchmark-delete")
|
|
1644
|
+
def benchmark_delete(
|
|
1645
|
+
id: str = typer.Argument(..., help="Benchmark ID"),
|
|
1646
|
+
force: bool = typer.Option(
|
|
1647
|
+
False,
|
|
1648
|
+
"--force",
|
|
1649
|
+
help="Acknowledge and proceed with the cascade delete of scenarios/runs",
|
|
1650
|
+
),
|
|
1651
|
+
detach_scenarios: bool = typer.Option(
|
|
1652
|
+
False,
|
|
1653
|
+
"--detach-scenarios",
|
|
1654
|
+
help="Preserve scenarios by unlinking them before deleting the benchmark",
|
|
1655
|
+
),
|
|
1656
|
+
shop_id: str = typer.Option(None, "--shop-id", help="Override shop ID"),
|
|
1657
|
+
format: str = typer.Option(
|
|
1658
|
+
"text", "--format", "-f", help="Output format: text or json"
|
|
1659
|
+
),
|
|
1660
|
+
) -> None:
|
|
1661
|
+
"""Delete a benchmark; refuses to silently wipe scenarios (see --force / --detach-scenarios)."""
|
|
1662
|
+
client = get_client(shop_id=shop_id)
|
|
1663
|
+
result = asyncio.run(
|
|
1664
|
+
tools.benchmark_delete(
|
|
1665
|
+
client,
|
|
1666
|
+
benchmark_id=id,
|
|
1667
|
+
force=force,
|
|
1668
|
+
detach_scenarios=detach_scenarios,
|
|
1669
|
+
output_format=format,
|
|
1670
|
+
)
|
|
1671
|
+
)
|
|
1672
|
+
typer.echo(result)
|
|
1673
|
+
|
|
1674
|
+
|
|
1643
1675
|
@app.command()
|
|
1644
1676
|
def scenarios(
|
|
1645
1677
|
benchmark_id: str = typer.Option(
|
|
@@ -1924,6 +1956,27 @@ def scenario_bulk_status(
|
|
|
1924
1956
|
typer.echo(result)
|
|
1925
1957
|
|
|
1926
1958
|
|
|
1959
|
+
@app.command("scenario-bulk-cancel")
|
|
1960
|
+
def scenario_bulk_cancel(
|
|
1961
|
+
job_id: str = typer.Argument(..., help="Bulk run job ID"),
|
|
1962
|
+
apply: bool = typer.Option(
|
|
1963
|
+
False, "--apply", help="Cancel the pending runs (default is a dry-run plan)"
|
|
1964
|
+
),
|
|
1965
|
+
shop_id: str = typer.Option(None, "--shop-id", help="Override shop ID"),
|
|
1966
|
+
format: str = typer.Option(
|
|
1967
|
+
"text", "--format", "-f", help="Output format: text or json"
|
|
1968
|
+
),
|
|
1969
|
+
) -> None:
|
|
1970
|
+
"""Cancel a stuck bulk run by deleting its queued/running scenario runs."""
|
|
1971
|
+
client = get_client(shop_id=shop_id)
|
|
1972
|
+
result = asyncio.run(
|
|
1973
|
+
tools.scenario_bulk_cancel(
|
|
1974
|
+
client, job_id=job_id, apply=apply, output_format=format
|
|
1975
|
+
)
|
|
1976
|
+
)
|
|
1977
|
+
typer.echo(result)
|
|
1978
|
+
|
|
1979
|
+
|
|
1927
1980
|
@app.command("audit-metric-fields")
|
|
1928
1981
|
def audit_metric_fields(
|
|
1929
1982
|
is_active: bool = typer.Option(
|
|
@@ -5409,23 +5409,107 @@ async def benchmark_create(
|
|
|
5409
5409
|
async def benchmark_delete(
|
|
5410
5410
|
client: AppliedClient,
|
|
5411
5411
|
benchmark_id: str,
|
|
5412
|
+
*,
|
|
5413
|
+
force: bool = False,
|
|
5414
|
+
detach_scenarios: bool = False,
|
|
5415
|
+
output_format: str = "text",
|
|
5412
5416
|
) -> str:
|
|
5413
5417
|
"""
|
|
5414
|
-
Delete a benchmark.
|
|
5418
|
+
Delete a benchmark, with a guardrail against silently wiping scenarios.
|
|
5419
|
+
|
|
5420
|
+
On the platform, deleting a benchmark cascades and permanently deletes every
|
|
5421
|
+
scenario and scenario run beneath it. To prevent accidental data loss this
|
|
5422
|
+
refuses to delete a benchmark that still has scenarios unless you either:
|
|
5423
|
+
|
|
5424
|
+
- pass detach_scenarios=True to first unlink the scenarios (they survive
|
|
5425
|
+
under their agent, untagged) and then delete the now-empty benchmark, or
|
|
5426
|
+
- pass force=True to acknowledge and proceed with the cascade delete.
|
|
5427
|
+
|
|
5428
|
+
An empty benchmark is deleted directly.
|
|
5415
5429
|
|
|
5416
5430
|
Args:
|
|
5417
5431
|
client: Authenticated AppliedClient
|
|
5418
5432
|
benchmark_id: The benchmark UUID
|
|
5433
|
+
force: Acknowledge and proceed with the cascade delete of scenarios/runs
|
|
5434
|
+
detach_scenarios: Preserve scenarios by unlinking them before deleting
|
|
5435
|
+
output_format: 'text' (default) or 'json'
|
|
5419
5436
|
|
|
5420
5437
|
Returns:
|
|
5421
|
-
|
|
5438
|
+
Result message (or refusal with the impact that would occur).
|
|
5422
5439
|
"""
|
|
5423
5440
|
try:
|
|
5424
|
-
await client.
|
|
5441
|
+
scenarios = await client.list_scenarios(
|
|
5442
|
+
benchmark_id=benchmark_id, fetch_all=True
|
|
5443
|
+
)
|
|
5425
5444
|
except AppliedAPIError as e:
|
|
5426
5445
|
return _format_error(e)
|
|
5427
5446
|
|
|
5428
|
-
|
|
5447
|
+
scenario_count = len(scenarios)
|
|
5448
|
+
run_count = sum(int(s.get("run_count") or 0) for s in scenarios)
|
|
5449
|
+
result: dict[str, Any] = {
|
|
5450
|
+
"benchmark_id": benchmark_id,
|
|
5451
|
+
"scenario_count": scenario_count,
|
|
5452
|
+
"run_count": run_count,
|
|
5453
|
+
"deleted": False,
|
|
5454
|
+
"refused": False,
|
|
5455
|
+
"detached_scenarios": 0,
|
|
5456
|
+
}
|
|
5457
|
+
|
|
5458
|
+
# Preserve scenarios: unlink each from this benchmark, then delete it empty.
|
|
5459
|
+
if scenario_count and detach_scenarios:
|
|
5460
|
+
detached = 0
|
|
5461
|
+
try:
|
|
5462
|
+
for scenario in scenarios:
|
|
5463
|
+
detail = await client.get_scenario(str(scenario["id"]))
|
|
5464
|
+
remaining = [
|
|
5465
|
+
str(b["id"])
|
|
5466
|
+
for b in (detail.get("benchmarks") or [])
|
|
5467
|
+
if b.get("id") and str(b["id"]) != benchmark_id
|
|
5468
|
+
]
|
|
5469
|
+
await client.update_scenario(
|
|
5470
|
+
str(scenario["id"]), benchmark_ids=remaining
|
|
5471
|
+
)
|
|
5472
|
+
detached += 1
|
|
5473
|
+
await client.delete_benchmark(benchmark_id)
|
|
5474
|
+
except AppliedAPIError as e:
|
|
5475
|
+
result["detached_scenarios"] = detached
|
|
5476
|
+
result["message"] = _format_error(e)
|
|
5477
|
+
return to_json(result) if output_format == "json" else result["message"]
|
|
5478
|
+
result["deleted"] = True
|
|
5479
|
+
result["detached_scenarios"] = detached
|
|
5480
|
+
result["message"] = (
|
|
5481
|
+
f"Detached {detached} scenario(s) and deleted empty benchmark "
|
|
5482
|
+
f"{benchmark_id}. Scenarios preserved under their agent."
|
|
5483
|
+
)
|
|
5484
|
+
return to_json(result) if output_format == "json" else result["message"]
|
|
5485
|
+
|
|
5486
|
+
# Refuse to cascade-delete a non-empty benchmark unless explicitly forced.
|
|
5487
|
+
if scenario_count and not force:
|
|
5488
|
+
result["refused"] = True
|
|
5489
|
+
result["message"] = (
|
|
5490
|
+
f"Refusing to delete benchmark {benchmark_id}: it has "
|
|
5491
|
+
f"{scenario_count} scenario(s) and {run_count} run(s) that would be "
|
|
5492
|
+
f"PERMANENTLY deleted by the cascade. Re-run with detach_scenarios=True "
|
|
5493
|
+
f"to preserve the scenarios, or force=True to delete them anyway."
|
|
5494
|
+
)
|
|
5495
|
+
return to_json(result) if output_format == "json" else result["message"]
|
|
5496
|
+
|
|
5497
|
+
try:
|
|
5498
|
+
await client.delete_benchmark(benchmark_id)
|
|
5499
|
+
except AppliedAPIError as e:
|
|
5500
|
+
result["message"] = _format_error(e)
|
|
5501
|
+
return to_json(result) if output_format == "json" else result["message"]
|
|
5502
|
+
|
|
5503
|
+
result["deleted"] = True
|
|
5504
|
+
result["message"] = (
|
|
5505
|
+
f"Benchmark {benchmark_id} deleted successfully"
|
|
5506
|
+
+ (
|
|
5507
|
+
f" (cascaded {scenario_count} scenario(s), {run_count} run(s))."
|
|
5508
|
+
if scenario_count
|
|
5509
|
+
else "."
|
|
5510
|
+
)
|
|
5511
|
+
)
|
|
5512
|
+
return to_json(result) if output_format == "json" else result["message"]
|
|
5429
5513
|
|
|
5430
5514
|
|
|
5431
5515
|
def _scenario_input_conversation_id(scenario: dict) -> str | None:
|
|
@@ -6146,6 +6230,95 @@ async def scenario_bulk_status(
|
|
|
6146
6230
|
return output
|
|
6147
6231
|
|
|
6148
6232
|
|
|
6233
|
+
_PENDING_RUN_STATUSES = ("queued", "running")
|
|
6234
|
+
|
|
6235
|
+
|
|
6236
|
+
async def scenario_bulk_cancel(
|
|
6237
|
+
client: AppliedClient,
|
|
6238
|
+
job_id: str,
|
|
6239
|
+
*,
|
|
6240
|
+
apply: bool = False,
|
|
6241
|
+
output_format: str = "text",
|
|
6242
|
+
) -> str:
|
|
6243
|
+
"""
|
|
6244
|
+
Cancel a stuck bulk scenario run job.
|
|
6245
|
+
|
|
6246
|
+
A bulk run can get stuck with runs left in 'queued' or 'running' forever, so
|
|
6247
|
+
the job never reports complete. This cancels the job by deleting those pending
|
|
6248
|
+
runs; completed and failed runs (and their result conversations) are preserved.
|
|
6249
|
+
Once no pending runs remain, the job reports as finished.
|
|
6250
|
+
|
|
6251
|
+
This replaces the manual workaround of deleting the 'agent-test-bulk-job:'
|
|
6252
|
+
key from browser localStorage.
|
|
6253
|
+
|
|
6254
|
+
Args:
|
|
6255
|
+
client: Authenticated AppliedClient
|
|
6256
|
+
job_id: The bulk run job UUID
|
|
6257
|
+
apply: If True, delete the pending runs; otherwise just report the plan
|
|
6258
|
+
output_format: 'text' (default) or 'json'
|
|
6259
|
+
|
|
6260
|
+
Returns:
|
|
6261
|
+
Summary of pending runs cancelled (or that would be cancelled).
|
|
6262
|
+
"""
|
|
6263
|
+
try:
|
|
6264
|
+
runs = await client.list_scenario_runs(
|
|
6265
|
+
bulk_job_id=job_id, latest=False, limit=1000
|
|
6266
|
+
)
|
|
6267
|
+
except AppliedAPIError as e:
|
|
6268
|
+
return _format_error(e)
|
|
6269
|
+
|
|
6270
|
+
pending = [r for r in runs if str(r.get("status") or "") in _PENDING_RUN_STATUSES]
|
|
6271
|
+
result: dict[str, Any] = {
|
|
6272
|
+
"job_id": job_id,
|
|
6273
|
+
"apply": apply,
|
|
6274
|
+
"total_runs": len(runs),
|
|
6275
|
+
"pending_runs": len(pending),
|
|
6276
|
+
"terminal_runs": len(runs) - len(pending),
|
|
6277
|
+
"cancelled": 0,
|
|
6278
|
+
"errors": [],
|
|
6279
|
+
}
|
|
6280
|
+
|
|
6281
|
+
if not runs:
|
|
6282
|
+
result["message"] = (
|
|
6283
|
+
f"No runs found for job {job_id} — it may already be cleared or the "
|
|
6284
|
+
f"job id is invalid. Nothing to cancel."
|
|
6285
|
+
)
|
|
6286
|
+
return to_json(result) if output_format == "json" else result["message"]
|
|
6287
|
+
|
|
6288
|
+
if not pending:
|
|
6289
|
+
result["message"] = (
|
|
6290
|
+
f"Job {job_id} has no pending (queued/running) runs — nothing to "
|
|
6291
|
+
f"cancel. {result['terminal_runs']} run(s) already finished."
|
|
6292
|
+
)
|
|
6293
|
+
return to_json(result) if output_format == "json" else result["message"]
|
|
6294
|
+
|
|
6295
|
+
if not apply:
|
|
6296
|
+
result["message"] = (
|
|
6297
|
+
f"Would cancel {len(pending)} pending run(s) (queued/running) for job "
|
|
6298
|
+
f"{job_id}; {result['terminal_runs']} finished run(s) preserved. "
|
|
6299
|
+
f"Re-run with --apply to cancel."
|
|
6300
|
+
)
|
|
6301
|
+
return to_json(result) if output_format == "json" else result["message"]
|
|
6302
|
+
|
|
6303
|
+
cancelled = 0
|
|
6304
|
+
for run in pending:
|
|
6305
|
+
run_id = run.get("id")
|
|
6306
|
+
if not run_id:
|
|
6307
|
+
continue
|
|
6308
|
+
try:
|
|
6309
|
+
await client.delete_scenario_run(str(run_id))
|
|
6310
|
+
cancelled += 1
|
|
6311
|
+
except AppliedAPIError as e:
|
|
6312
|
+
result["errors"].append({"run_id": str(run_id), "error": str(e)})
|
|
6313
|
+
result["cancelled"] = cancelled
|
|
6314
|
+
result["message"] = (
|
|
6315
|
+
f"Cancelled {cancelled} pending run(s) for job {job_id}; "
|
|
6316
|
+
f"{result['terminal_runs']} finished run(s) preserved."
|
|
6317
|
+
+ (f" {len(result['errors'])} error(s)." if result["errors"] else "")
|
|
6318
|
+
)
|
|
6319
|
+
return to_json(result) if output_format == "json" else result["message"]
|
|
6320
|
+
|
|
6321
|
+
|
|
6149
6322
|
async def _load_audit_target_summaries(
|
|
6150
6323
|
client: AppliedClient,
|
|
6151
6324
|
ratings: list[dict[str, Any]],
|
|
@@ -119,6 +119,7 @@ DOMAIN_TOOL_RENAMES: dict[str, dict[str, str]] = {
|
|
|
119
119
|
"scenario_run_delete": "scenarios_runs_delete",
|
|
120
120
|
"scenario_bulk_run": "scenarios_bulk_run",
|
|
121
121
|
"scenario_bulk_status": "scenarios_bulk_status",
|
|
122
|
+
"scenario_bulk_cancel": "scenarios_bulk_cancel",
|
|
122
123
|
},
|
|
123
124
|
"taxonomy": {
|
|
124
125
|
"taxonomy_list": "taxonomy_items_list",
|
|
@@ -29,6 +29,11 @@ class ScenariosBulkRunInput(StrictInput):
|
|
|
29
29
|
contact_override: dict[str, Any] | None = None
|
|
30
30
|
|
|
31
31
|
|
|
32
|
+
class ScenariosBulkCancelInput(StrictInput):
|
|
33
|
+
job_id: str
|
|
34
|
+
apply: bool = False
|
|
35
|
+
|
|
36
|
+
|
|
32
37
|
class BenchmarksListInput(StrictInput):
|
|
33
38
|
agent_id: str | None = None
|
|
34
39
|
limit: int = 50
|
|
@@ -48,6 +53,8 @@ class BenchmarksCreateInput(StrictInput):
|
|
|
48
53
|
|
|
49
54
|
class BenchmarksDeleteInput(StrictInput):
|
|
50
55
|
benchmark_id: str
|
|
56
|
+
force: bool = False
|
|
57
|
+
detach_scenarios: bool = False
|
|
51
58
|
|
|
52
59
|
|
|
53
60
|
class BenchmarksCloneInput(StrictInput):
|
|
@@ -494,14 +501,33 @@ async def benchmarks_delete_handler(
|
|
|
494
501
|
client: AppliedClient,
|
|
495
502
|
params: BenchmarksDeleteInput,
|
|
496
503
|
) -> ToolResult[Any]:
|
|
504
|
+
from applied_cli import tools as legacy_tools
|
|
505
|
+
|
|
506
|
+
raw = await legacy_tools.benchmark_delete(
|
|
507
|
+
client,
|
|
508
|
+
benchmark_id=params.benchmark_id,
|
|
509
|
+
force=params.force,
|
|
510
|
+
detach_scenarios=params.detach_scenarios,
|
|
511
|
+
output_format="json",
|
|
512
|
+
)
|
|
497
513
|
try:
|
|
498
|
-
|
|
499
|
-
except
|
|
500
|
-
return
|
|
514
|
+
data = json.loads(raw)
|
|
515
|
+
except (json.JSONDecodeError, TypeError):
|
|
516
|
+
return ToolResult(data={"message": raw}, summary=str(raw))
|
|
517
|
+
|
|
518
|
+
if data.get("refused"):
|
|
519
|
+
return ToolResult(
|
|
520
|
+
data=data,
|
|
521
|
+
summary=data.get("message", "Refused to delete a non-empty benchmark."),
|
|
522
|
+
next_actions=[
|
|
523
|
+
"Re-run with detach_scenarios=true to preserve the scenarios.",
|
|
524
|
+
"Or re-run with force=true to delete the scenarios and runs too.",
|
|
525
|
+
],
|
|
526
|
+
)
|
|
501
527
|
|
|
502
528
|
return ToolResult(
|
|
503
|
-
data=
|
|
504
|
-
summary=f"Deleted benchmark {params.benchmark_id}.",
|
|
529
|
+
data=data,
|
|
530
|
+
summary=data.get("message", f"Deleted benchmark {params.benchmark_id}."),
|
|
505
531
|
next_actions=[
|
|
506
532
|
"Use benchmarks_list to confirm the benchmark was removed.",
|
|
507
533
|
],
|
|
@@ -829,6 +855,38 @@ async def scenarios_bulk_status_handler(
|
|
|
829
855
|
)
|
|
830
856
|
|
|
831
857
|
|
|
858
|
+
async def scenarios_bulk_cancel_handler(
|
|
859
|
+
client: AppliedClient,
|
|
860
|
+
params: ScenariosBulkCancelInput,
|
|
861
|
+
) -> ToolResult[Any]:
|
|
862
|
+
from applied_cli import tools as legacy_tools
|
|
863
|
+
|
|
864
|
+
raw = await legacy_tools.scenario_bulk_cancel(
|
|
865
|
+
client,
|
|
866
|
+
job_id=params.job_id,
|
|
867
|
+
apply=params.apply,
|
|
868
|
+
output_format="json",
|
|
869
|
+
)
|
|
870
|
+
try:
|
|
871
|
+
data = json.loads(raw)
|
|
872
|
+
except (json.JSONDecodeError, TypeError):
|
|
873
|
+
return ToolResult(data={"message": raw}, summary=str(raw))
|
|
874
|
+
|
|
875
|
+
next_actions = []
|
|
876
|
+
if not params.apply and data.get("pending_runs"):
|
|
877
|
+
next_actions.append("Re-run with apply=true to cancel the pending runs.")
|
|
878
|
+
elif data.get("cancelled"):
|
|
879
|
+
next_actions.append(
|
|
880
|
+
f"Poll scenarios_bulk_status with job_id='{params.job_id}' to confirm "
|
|
881
|
+
f"the job now reports finished."
|
|
882
|
+
)
|
|
883
|
+
return ToolResult(
|
|
884
|
+
data=data,
|
|
885
|
+
summary=data.get("message", "Bulk cancel processed."),
|
|
886
|
+
next_actions=next_actions,
|
|
887
|
+
)
|
|
888
|
+
|
|
889
|
+
|
|
832
890
|
def scenario_specs() -> list[ToolSpec]:
|
|
833
891
|
return [
|
|
834
892
|
ToolSpec(
|
|
@@ -866,7 +924,11 @@ def scenario_specs() -> list[ToolSpec]:
|
|
|
866
924
|
ToolSpec(
|
|
867
925
|
name="benchmarks_delete",
|
|
868
926
|
namespace="benchmarks",
|
|
869
|
-
description=
|
|
927
|
+
description=(
|
|
928
|
+
"Delete a benchmark. Refuses to delete a benchmark that still "
|
|
929
|
+
"has scenarios (the platform cascade would permanently delete "
|
|
930
|
+
"them) unless detach_scenarios=true (preserve them) or force=true."
|
|
931
|
+
),
|
|
870
932
|
input_model=BenchmarksDeleteInput,
|
|
871
933
|
output_model=None,
|
|
872
934
|
handler=benchmarks_delete_handler,
|
|
@@ -1009,4 +1071,18 @@ def scenario_specs() -> list[ToolSpec]:
|
|
|
1009
1071
|
read_write_mode="read",
|
|
1010
1072
|
tags=["scenario_bulk_status", "native"],
|
|
1011
1073
|
),
|
|
1074
|
+
ToolSpec(
|
|
1075
|
+
name="scenarios_bulk_cancel",
|
|
1076
|
+
namespace="scenarios",
|
|
1077
|
+
description=(
|
|
1078
|
+
"Cancel a stuck bulk scenario run job by deleting its queued/"
|
|
1079
|
+
"running runs (completed/failed runs are preserved). Dry-run by "
|
|
1080
|
+
"default; set apply=true to cancel."
|
|
1081
|
+
),
|
|
1082
|
+
input_model=ScenariosBulkCancelInput,
|
|
1083
|
+
output_model=None,
|
|
1084
|
+
handler=scenarios_bulk_cancel_handler,
|
|
1085
|
+
read_write_mode="write",
|
|
1086
|
+
tags=["scenario_bulk_cancel", "native"],
|
|
1087
|
+
),
|
|
1012
1088
|
]
|
|
@@ -39,6 +39,7 @@ tests/test_agent_scoped_flows.py
|
|
|
39
39
|
tests/test_audit_tools.py
|
|
40
40
|
tests/test_auth_context.py
|
|
41
41
|
tests/test_benchmark_clone.py
|
|
42
|
+
tests/test_benchmark_delete_guardrail.py
|
|
42
43
|
tests/test_benchmark_scenario_tools.py
|
|
43
44
|
tests/test_cli.py
|
|
44
45
|
tests/test_cli_v2.py
|
|
@@ -48,6 +49,7 @@ tests/test_conversation_tools.py
|
|
|
48
49
|
tests/test_flow_tools.py
|
|
49
50
|
tests/test_knowledge_content_tools.py
|
|
50
51
|
tests/test_recovery.py
|
|
52
|
+
tests/test_scenario_bulk_cancel.py
|
|
51
53
|
tests/test_toolkit_contract.py
|
|
52
54
|
tests/test_v2_agents.py
|
|
53
55
|
tests/test_v2_articles.py
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from applied_cli import tools
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class FakeDeleteClient:
|
|
9
|
+
def __init__(self, scenarios):
|
|
10
|
+
self._scenarios = scenarios
|
|
11
|
+
self.deleted = None
|
|
12
|
+
self.updates = []
|
|
13
|
+
|
|
14
|
+
async def list_scenarios(self, benchmark_id=None, fetch_all=True, **kwargs):
|
|
15
|
+
return list(self._scenarios)
|
|
16
|
+
|
|
17
|
+
async def get_scenario(self, scenario_id):
|
|
18
|
+
for s in self._scenarios:
|
|
19
|
+
if s["id"] == scenario_id:
|
|
20
|
+
return s
|
|
21
|
+
raise KeyError(scenario_id)
|
|
22
|
+
|
|
23
|
+
async def update_scenario(self, scenario_id, **updates):
|
|
24
|
+
self.updates.append({"id": scenario_id, **updates})
|
|
25
|
+
return {"id": scenario_id, **updates}
|
|
26
|
+
|
|
27
|
+
async def delete_benchmark(self, benchmark_id):
|
|
28
|
+
self.deleted = benchmark_id
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
SCENARIOS = [
|
|
32
|
+
{"id": "s1", "run_count": 3, "benchmarks": [{"id": "bench-1"}]},
|
|
33
|
+
{"id": "s2", "run_count": 1, "benchmarks": [{"id": "bench-1"}, {"id": "bench-2"}]},
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@pytest.mark.asyncio
|
|
38
|
+
async def test_empty_benchmark_deletes_directly():
|
|
39
|
+
client = FakeDeleteClient(scenarios=[])
|
|
40
|
+
result = await tools.benchmark_delete(client, "bench-1", output_format="json")
|
|
41
|
+
data = json.loads(result)
|
|
42
|
+
assert data["deleted"] is True
|
|
43
|
+
assert data["refused"] is False
|
|
44
|
+
assert client.deleted == "bench-1"
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@pytest.mark.asyncio
|
|
48
|
+
async def test_nonempty_benchmark_refuses_and_discloses_impact():
|
|
49
|
+
client = FakeDeleteClient(scenarios=SCENARIOS)
|
|
50
|
+
result = await tools.benchmark_delete(client, "bench-1") # text, no force
|
|
51
|
+
assert "Refusing to delete" in result
|
|
52
|
+
assert "2 scenario(s)" in result
|
|
53
|
+
assert "4 run(s)" in result # 3 + 1
|
|
54
|
+
assert client.deleted is None # nothing wiped
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@pytest.mark.asyncio
|
|
58
|
+
async def test_force_cascades_and_reports_counts():
|
|
59
|
+
client = FakeDeleteClient(scenarios=SCENARIOS)
|
|
60
|
+
result = await tools.benchmark_delete(
|
|
61
|
+
client, "bench-1", force=True, output_format="json"
|
|
62
|
+
)
|
|
63
|
+
data = json.loads(result)
|
|
64
|
+
assert data["deleted"] is True
|
|
65
|
+
assert data["scenario_count"] == 2
|
|
66
|
+
assert data["run_count"] == 4
|
|
67
|
+
assert client.deleted == "bench-1"
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@pytest.mark.asyncio
|
|
71
|
+
async def test_detach_unlinks_then_deletes_empty_benchmark():
|
|
72
|
+
client = FakeDeleteClient(scenarios=SCENARIOS)
|
|
73
|
+
result = await tools.benchmark_delete(
|
|
74
|
+
client, "bench-1", detach_scenarios=True, output_format="json"
|
|
75
|
+
)
|
|
76
|
+
data = json.loads(result)
|
|
77
|
+
assert data["deleted"] is True
|
|
78
|
+
assert data["detached_scenarios"] == 2
|
|
79
|
+
# bench-1 removed from each scenario's links; bench-2 kept on s2.
|
|
80
|
+
by_id = {u["id"]: u["benchmark_ids"] for u in client.updates}
|
|
81
|
+
assert by_id["s1"] == []
|
|
82
|
+
assert by_id["s2"] == ["bench-2"]
|
|
83
|
+
assert client.deleted == "bench-1"
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from applied_cli import tools
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class FakeBulkClient:
|
|
9
|
+
def __init__(self, runs):
|
|
10
|
+
self._runs = runs
|
|
11
|
+
self.deleted = []
|
|
12
|
+
|
|
13
|
+
async def list_scenario_runs(
|
|
14
|
+
self, scenario_id=None, benchmark_id=None, bulk_job_id=None, latest=False,
|
|
15
|
+
limit=50,
|
|
16
|
+
):
|
|
17
|
+
return list(self._runs)
|
|
18
|
+
|
|
19
|
+
async def delete_scenario_run(self, run_id):
|
|
20
|
+
self.deleted.append(run_id)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
STUCK_RUNS = [
|
|
24
|
+
{"id": "r1", "status": "completed"},
|
|
25
|
+
{"id": "r2", "status": "queued"},
|
|
26
|
+
{"id": "r3", "status": "running"},
|
|
27
|
+
{"id": "r4", "status": "failed"},
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@pytest.mark.asyncio
|
|
32
|
+
async def test_dry_run_reports_pending_without_deleting():
|
|
33
|
+
client = FakeBulkClient(STUCK_RUNS)
|
|
34
|
+
result = await tools.scenario_bulk_cancel(client, "job-1", output_format="json")
|
|
35
|
+
data = json.loads(result)
|
|
36
|
+
assert data["pending_runs"] == 2
|
|
37
|
+
assert data["terminal_runs"] == 2
|
|
38
|
+
assert data["cancelled"] == 0
|
|
39
|
+
assert client.deleted == [] # dry run deletes nothing
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@pytest.mark.asyncio
|
|
43
|
+
async def test_apply_deletes_only_pending_runs():
|
|
44
|
+
client = FakeBulkClient(STUCK_RUNS)
|
|
45
|
+
result = await tools.scenario_bulk_cancel(
|
|
46
|
+
client, "job-1", apply=True, output_format="json"
|
|
47
|
+
)
|
|
48
|
+
data = json.loads(result)
|
|
49
|
+
assert data["cancelled"] == 2
|
|
50
|
+
# Only the queued/running runs are deleted; completed/failed preserved.
|
|
51
|
+
assert set(client.deleted) == {"r2", "r3"}
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@pytest.mark.asyncio
|
|
55
|
+
async def test_no_pending_runs_is_a_noop():
|
|
56
|
+
client = FakeBulkClient(
|
|
57
|
+
[{"id": "r1", "status": "completed"}, {"id": "r2", "status": "failed"}]
|
|
58
|
+
)
|
|
59
|
+
result = await tools.scenario_bulk_cancel(
|
|
60
|
+
client, "job-1", apply=True, output_format="text"
|
|
61
|
+
)
|
|
62
|
+
assert "nothing to" in result.lower()
|
|
63
|
+
assert client.deleted == []
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@pytest.mark.asyncio
|
|
67
|
+
async def test_unknown_job_reports_no_runs():
|
|
68
|
+
client = FakeBulkClient([])
|
|
69
|
+
result = await tools.scenario_bulk_cancel(client, "missing", output_format="text")
|
|
70
|
+
assert "No runs found" in result
|
|
71
|
+
assert client.deleted == []
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@pytest.mark.asyncio
|
|
75
|
+
async def test_v2_scenarios_bulk_cancel_handler_structured():
|
|
76
|
+
from applied_cli.v2.scenarios import (
|
|
77
|
+
ScenariosBulkCancelInput,
|
|
78
|
+
scenarios_bulk_cancel_handler,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
client = FakeBulkClient(STUCK_RUNS)
|
|
82
|
+
result = await scenarios_bulk_cancel_handler(
|
|
83
|
+
client, ScenariosBulkCancelInput(job_id="job-1", apply=False)
|
|
84
|
+
)
|
|
85
|
+
assert result.data["pending_runs"] == 2
|
|
86
|
+
assert "apply=true" in " ".join(result.next_actions)
|
|
87
|
+
assert client.deleted == []
|
|
@@ -429,24 +429,46 @@ async def test_benchmarks_get_includes_structured_scenarios_without_legacy_wrapp
|
|
|
429
429
|
|
|
430
430
|
|
|
431
431
|
@pytest.mark.asyncio
|
|
432
|
-
async def
|
|
433
|
-
monkeypatch,
|
|
434
|
-
):
|
|
435
|
-
async def fail_legacy_wrapper(*args, **kwargs):
|
|
436
|
-
raise AssertionError("legacy benchmark_delete wrapper should not run")
|
|
437
|
-
|
|
438
|
-
monkeypatch.setattr("applied_cli.tools.benchmark_delete", fail_legacy_wrapper)
|
|
432
|
+
async def test_benchmarks_delete_refuses_to_wipe_scenarios_without_force():
|
|
439
433
|
client = FakeScenarioClient()
|
|
440
434
|
spec = get_tool_catalog()["benchmarks_delete"]
|
|
441
435
|
|
|
442
436
|
result = await spec.run(client, {"benchmark_id": "bench-1"})
|
|
443
437
|
|
|
444
438
|
assert result.ok is True
|
|
445
|
-
assert result.
|
|
446
|
-
assert result.data
|
|
447
|
-
assert result.
|
|
448
|
-
|
|
449
|
-
|
|
439
|
+
assert result.data["refused"] is True
|
|
440
|
+
assert result.data["deleted"] is False
|
|
441
|
+
assert result.data["scenario_count"] >= 1
|
|
442
|
+
assert "Refusing to delete" in result.summary
|
|
443
|
+
# Guardrail must not have deleted the benchmark.
|
|
444
|
+
assert client.delete_benchmark_id is None
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
@pytest.mark.asyncio
|
|
448
|
+
async def test_benchmarks_delete_force_cascades():
|
|
449
|
+
client = FakeScenarioClient()
|
|
450
|
+
spec = get_tool_catalog()["benchmarks_delete"]
|
|
451
|
+
|
|
452
|
+
result = await spec.run(client, {"benchmark_id": "bench-1", "force": True})
|
|
453
|
+
|
|
454
|
+
assert result.data["deleted"] is True
|
|
455
|
+
assert client.delete_benchmark_id == "bench-1"
|
|
456
|
+
|
|
457
|
+
|
|
458
|
+
@pytest.mark.asyncio
|
|
459
|
+
async def test_benchmarks_delete_detach_preserves_scenarios():
|
|
460
|
+
client = FakeScenarioClient()
|
|
461
|
+
spec = get_tool_catalog()["benchmarks_delete"]
|
|
462
|
+
|
|
463
|
+
result = await spec.run(
|
|
464
|
+
client, {"benchmark_id": "bench-1", "detach_scenarios": True}
|
|
465
|
+
)
|
|
466
|
+
|
|
467
|
+
assert result.data["deleted"] is True
|
|
468
|
+
assert result.data["detached_scenarios"] >= 1
|
|
469
|
+
# Scenarios were unlinked (bench-1 removed from their benchmark list).
|
|
470
|
+
assert client.update_scenario_kwargs is not None
|
|
471
|
+
assert "bench-1" not in client.update_scenario_kwargs["updates"]["benchmark_ids"]
|
|
450
472
|
assert client.delete_benchmark_id == "bench-1"
|
|
451
473
|
|
|
452
474
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|