applied-cli 0.6.1__tar.gz → 0.6.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. {applied_cli-0.6.1 → applied_cli-0.6.3}/PKG-INFO +1 -1
  2. {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/cli.py +53 -0
  3. {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/tools.py +177 -4
  4. {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/v2/domains.py +1 -0
  5. {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/v2/scenarios.py +82 -6
  6. {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli.egg-info/PKG-INFO +1 -1
  7. {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli.egg-info/SOURCES.txt +2 -0
  8. {applied_cli-0.6.1 → applied_cli-0.6.3}/pyproject.toml +1 -1
  9. applied_cli-0.6.3/tests/test_benchmark_delete_guardrail.py +83 -0
  10. applied_cli-0.6.3/tests/test_scenario_bulk_cancel.py +87 -0
  11. {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_v2_scenarios.py +34 -12
  12. {applied_cli-0.6.1 → applied_cli-0.6.3}/README.md +0 -0
  13. {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/__init__.py +0 -0
  14. {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/agent_scoped_flows.py +0 -0
  15. {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/auth.py +0 -0
  16. {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/client.py +0 -0
  17. {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/conversation_lookup.py +0 -0
  18. {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/conversations.py +0 -0
  19. {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/credentials.py +0 -0
  20. {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/flow_helpers.py +0 -0
  21. {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/formatters.py +0 -0
  22. {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/mcp.py +0 -0
  23. {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/recovery.py +0 -0
  24. {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/toolkit.py +0 -0
  25. {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/v2/__init__.py +0 -0
  26. {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/v2/agents.py +0 -0
  27. {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/v2/articles.py +0 -0
  28. {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/v2/catalog.py +0 -0
  29. {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/v2/connectors.py +0 -0
  30. {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/v2/content.py +0 -0
  31. {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/v2/conversations.py +0 -0
  32. {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/v2/flows.py +0 -0
  33. {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/v2/knowledge.py +0 -0
  34. {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/v2/manifest.py +0 -0
  35. {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/v2/products.py +0 -0
  36. {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/v2/taxonomy.py +0 -0
  37. {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli/v2/tickets.py +0 -0
  38. {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli.egg-info/dependency_links.txt +0 -0
  39. {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli.egg-info/entry_points.txt +0 -0
  40. {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli.egg-info/requires.txt +0 -0
  41. {applied_cli-0.6.1 → applied_cli-0.6.3}/applied_cli.egg-info/top_level.txt +0 -0
  42. {applied_cli-0.6.1 → applied_cli-0.6.3}/setup.cfg +0 -0
  43. {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_agent_scoped_flows.py +0 -0
  44. {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_audit_tools.py +0 -0
  45. {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_auth_context.py +0 -0
  46. {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_benchmark_clone.py +0 -0
  47. {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_benchmark_scenario_tools.py +0 -0
  48. {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_cli.py +0 -0
  49. {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_cli_v2.py +0 -0
  50. {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_client.py +0 -0
  51. {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_client_v2.py +0 -0
  52. {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_conversation_tools.py +0 -0
  53. {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_flow_tools.py +0 -0
  54. {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_knowledge_content_tools.py +0 -0
  55. {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_recovery.py +0 -0
  56. {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_toolkit_contract.py +0 -0
  57. {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_v2_agents.py +0 -0
  58. {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_v2_articles.py +0 -0
  59. {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_v2_catalog_and_mcp.py +0 -0
  60. {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_v2_connectors.py +0 -0
  61. {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_v2_content.py +0 -0
  62. {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_v2_conversations.py +0 -0
  63. {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_v2_flows.py +0 -0
  64. {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_v2_knowledge.py +0 -0
  65. {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_v2_products.py +0 -0
  66. {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_v2_taxonomy.py +0 -0
  67. {applied_cli-0.6.1 → applied_cli-0.6.3}/tests/test_v2_tickets.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: applied-cli
3
- Version: 0.6.1
3
+ Version: 0.6.3
4
4
  Summary: CLI and shared client library for Applied Labs AI support agents
5
5
  Author: Applied Labs
6
6
  License-Expression: MIT
@@ -1640,6 +1640,38 @@ def benchmark_clone(
1640
1640
  typer.echo(result)
1641
1641
 
1642
1642
 
1643
+ @app.command("benchmark-delete")
1644
+ def benchmark_delete(
1645
+ id: str = typer.Argument(..., help="Benchmark ID"),
1646
+ force: bool = typer.Option(
1647
+ False,
1648
+ "--force",
1649
+ help="Acknowledge and proceed with the cascade delete of scenarios/runs",
1650
+ ),
1651
+ detach_scenarios: bool = typer.Option(
1652
+ False,
1653
+ "--detach-scenarios",
1654
+ help="Preserve scenarios by unlinking them before deleting the benchmark",
1655
+ ),
1656
+ shop_id: str = typer.Option(None, "--shop-id", help="Override shop ID"),
1657
+ format: str = typer.Option(
1658
+ "text", "--format", "-f", help="Output format: text or json"
1659
+ ),
1660
+ ) -> None:
1661
+ """Delete a benchmark; refuses to silently wipe scenarios (see --force / --detach-scenarios)."""
1662
+ client = get_client(shop_id=shop_id)
1663
+ result = asyncio.run(
1664
+ tools.benchmark_delete(
1665
+ client,
1666
+ benchmark_id=id,
1667
+ force=force,
1668
+ detach_scenarios=detach_scenarios,
1669
+ output_format=format,
1670
+ )
1671
+ )
1672
+ typer.echo(result)
1673
+
1674
+
1643
1675
  @app.command()
1644
1676
  def scenarios(
1645
1677
  benchmark_id: str = typer.Option(
@@ -1924,6 +1956,27 @@ def scenario_bulk_status(
1924
1956
  typer.echo(result)
1925
1957
 
1926
1958
 
1959
+ @app.command("scenario-bulk-cancel")
1960
+ def scenario_bulk_cancel(
1961
+ job_id: str = typer.Argument(..., help="Bulk run job ID"),
1962
+ apply: bool = typer.Option(
1963
+ False, "--apply", help="Cancel the pending runs (default is a dry-run plan)"
1964
+ ),
1965
+ shop_id: str = typer.Option(None, "--shop-id", help="Override shop ID"),
1966
+ format: str = typer.Option(
1967
+ "text", "--format", "-f", help="Output format: text or json"
1968
+ ),
1969
+ ) -> None:
1970
+ """Cancel a stuck bulk run by deleting its queued/running scenario runs."""
1971
+ client = get_client(shop_id=shop_id)
1972
+ result = asyncio.run(
1973
+ tools.scenario_bulk_cancel(
1974
+ client, job_id=job_id, apply=apply, output_format=format
1975
+ )
1976
+ )
1977
+ typer.echo(result)
1978
+
1979
+
1927
1980
  @app.command("audit-metric-fields")
1928
1981
  def audit_metric_fields(
1929
1982
  is_active: bool = typer.Option(
@@ -5409,23 +5409,107 @@ async def benchmark_create(
5409
5409
  async def benchmark_delete(
5410
5410
  client: AppliedClient,
5411
5411
  benchmark_id: str,
5412
+ *,
5413
+ force: bool = False,
5414
+ detach_scenarios: bool = False,
5415
+ output_format: str = "text",
5412
5416
  ) -> str:
5413
5417
  """
5414
- Delete a benchmark.
5418
+ Delete a benchmark, with a guardrail against silently wiping scenarios.
5419
+
5420
+ On the platform, deleting a benchmark cascades and permanently deletes every
5421
+ scenario and scenario run beneath it. To prevent accidental data loss this
5422
+ refuses to delete a benchmark that still has scenarios unless you either:
5423
+
5424
+ - pass detach_scenarios=True to first unlink the scenarios (they survive
5425
+ under their agent, untagged) and then delete the now-empty benchmark, or
5426
+ - pass force=True to acknowledge and proceed with the cascade delete.
5427
+
5428
+ An empty benchmark is deleted directly.
5415
5429
 
5416
5430
  Args:
5417
5431
  client: Authenticated AppliedClient
5418
5432
  benchmark_id: The benchmark UUID
5433
+ force: Acknowledge and proceed with the cascade delete of scenarios/runs
5434
+ detach_scenarios: Preserve scenarios by unlinking them before deleting
5435
+ output_format: 'text' (default) or 'json'
5419
5436
 
5420
5437
  Returns:
5421
- Success message
5438
+ Result message (or refusal with the impact that would occur).
5422
5439
  """
5423
5440
  try:
5424
- await client.delete_benchmark(benchmark_id)
5441
+ scenarios = await client.list_scenarios(
5442
+ benchmark_id=benchmark_id, fetch_all=True
5443
+ )
5425
5444
  except AppliedAPIError as e:
5426
5445
  return _format_error(e)
5427
5446
 
5428
- return f"Benchmark {benchmark_id} deleted successfully."
5447
+ scenario_count = len(scenarios)
5448
+ run_count = sum(int(s.get("run_count") or 0) for s in scenarios)
5449
+ result: dict[str, Any] = {
5450
+ "benchmark_id": benchmark_id,
5451
+ "scenario_count": scenario_count,
5452
+ "run_count": run_count,
5453
+ "deleted": False,
5454
+ "refused": False,
5455
+ "detached_scenarios": 0,
5456
+ }
5457
+
5458
+ # Preserve scenarios: unlink each from this benchmark, then delete it empty.
5459
+ if scenario_count and detach_scenarios:
5460
+ detached = 0
5461
+ try:
5462
+ for scenario in scenarios:
5463
+ detail = await client.get_scenario(str(scenario["id"]))
5464
+ remaining = [
5465
+ str(b["id"])
5466
+ for b in (detail.get("benchmarks") or [])
5467
+ if b.get("id") and str(b["id"]) != benchmark_id
5468
+ ]
5469
+ await client.update_scenario(
5470
+ str(scenario["id"]), benchmark_ids=remaining
5471
+ )
5472
+ detached += 1
5473
+ await client.delete_benchmark(benchmark_id)
5474
+ except AppliedAPIError as e:
5475
+ result["detached_scenarios"] = detached
5476
+ result["message"] = _format_error(e)
5477
+ return to_json(result) if output_format == "json" else result["message"]
5478
+ result["deleted"] = True
5479
+ result["detached_scenarios"] = detached
5480
+ result["message"] = (
5481
+ f"Detached {detached} scenario(s) and deleted empty benchmark "
5482
+ f"{benchmark_id}. Scenarios preserved under their agent."
5483
+ )
5484
+ return to_json(result) if output_format == "json" else result["message"]
5485
+
5486
+ # Refuse to cascade-delete a non-empty benchmark unless explicitly forced.
5487
+ if scenario_count and not force:
5488
+ result["refused"] = True
5489
+ result["message"] = (
5490
+ f"Refusing to delete benchmark {benchmark_id}: it has "
5491
+ f"{scenario_count} scenario(s) and {run_count} run(s) that would be "
5492
+ f"PERMANENTLY deleted by the cascade. Re-run with detach_scenarios=True "
5493
+ f"to preserve the scenarios, or force=True to delete them anyway."
5494
+ )
5495
+ return to_json(result) if output_format == "json" else result["message"]
5496
+
5497
+ try:
5498
+ await client.delete_benchmark(benchmark_id)
5499
+ except AppliedAPIError as e:
5500
+ result["message"] = _format_error(e)
5501
+ return to_json(result) if output_format == "json" else result["message"]
5502
+
5503
+ result["deleted"] = True
5504
+ result["message"] = (
5505
+ f"Benchmark {benchmark_id} deleted successfully"
5506
+ + (
5507
+ f" (cascaded {scenario_count} scenario(s), {run_count} run(s))."
5508
+ if scenario_count
5509
+ else "."
5510
+ )
5511
+ )
5512
+ return to_json(result) if output_format == "json" else result["message"]
5429
5513
 
5430
5514
 
5431
5515
  def _scenario_input_conversation_id(scenario: dict) -> str | None:
@@ -6146,6 +6230,95 @@ async def scenario_bulk_status(
6146
6230
  return output
6147
6231
 
6148
6232
 
6233
+ _PENDING_RUN_STATUSES = ("queued", "running")
6234
+
6235
+
6236
+ async def scenario_bulk_cancel(
6237
+ client: AppliedClient,
6238
+ job_id: str,
6239
+ *,
6240
+ apply: bool = False,
6241
+ output_format: str = "text",
6242
+ ) -> str:
6243
+ """
6244
+ Cancel a stuck bulk scenario run job.
6245
+
6246
+ A bulk run can get stuck with runs left in 'queued' or 'running' forever, so
6247
+ the job never reports complete. This cancels the job by deleting those pending
6248
+ runs; completed and failed runs (and their result conversations) are preserved.
6249
+ Once no pending runs remain, the job reports as finished.
6250
+
6251
+ This replaces the manual workaround of deleting the 'agent-test-bulk-job:'
6252
+ key from browser localStorage.
6253
+
6254
+ Args:
6255
+ client: Authenticated AppliedClient
6256
+ job_id: The bulk run job UUID
6257
+ apply: If True, delete the pending runs; otherwise just report the plan
6258
+ output_format: 'text' (default) or 'json'
6259
+
6260
+ Returns:
6261
+ Summary of pending runs cancelled (or that would be cancelled).
6262
+ """
6263
+ try:
6264
+ runs = await client.list_scenario_runs(
6265
+ bulk_job_id=job_id, latest=False, limit=1000
6266
+ )
6267
+ except AppliedAPIError as e:
6268
+ return _format_error(e)
6269
+
6270
+ pending = [r for r in runs if str(r.get("status") or "") in _PENDING_RUN_STATUSES]
6271
+ result: dict[str, Any] = {
6272
+ "job_id": job_id,
6273
+ "apply": apply,
6274
+ "total_runs": len(runs),
6275
+ "pending_runs": len(pending),
6276
+ "terminal_runs": len(runs) - len(pending),
6277
+ "cancelled": 0,
6278
+ "errors": [],
6279
+ }
6280
+
6281
+ if not runs:
6282
+ result["message"] = (
6283
+ f"No runs found for job {job_id} — it may already be cleared or the "
6284
+ f"job id is invalid. Nothing to cancel."
6285
+ )
6286
+ return to_json(result) if output_format == "json" else result["message"]
6287
+
6288
+ if not pending:
6289
+ result["message"] = (
6290
+ f"Job {job_id} has no pending (queued/running) runs — nothing to "
6291
+ f"cancel. {result['terminal_runs']} run(s) already finished."
6292
+ )
6293
+ return to_json(result) if output_format == "json" else result["message"]
6294
+
6295
+ if not apply:
6296
+ result["message"] = (
6297
+ f"Would cancel {len(pending)} pending run(s) (queued/running) for job "
6298
+ f"{job_id}; {result['terminal_runs']} finished run(s) preserved. "
6299
+ f"Re-run with --apply to cancel."
6300
+ )
6301
+ return to_json(result) if output_format == "json" else result["message"]
6302
+
6303
+ cancelled = 0
6304
+ for run in pending:
6305
+ run_id = run.get("id")
6306
+ if not run_id:
6307
+ continue
6308
+ try:
6309
+ await client.delete_scenario_run(str(run_id))
6310
+ cancelled += 1
6311
+ except AppliedAPIError as e:
6312
+ result["errors"].append({"run_id": str(run_id), "error": str(e)})
6313
+ result["cancelled"] = cancelled
6314
+ result["message"] = (
6315
+ f"Cancelled {cancelled} pending run(s) for job {job_id}; "
6316
+ f"{result['terminal_runs']} finished run(s) preserved."
6317
+ + (f" {len(result['errors'])} error(s)." if result["errors"] else "")
6318
+ )
6319
+ return to_json(result) if output_format == "json" else result["message"]
6320
+
6321
+
6149
6322
  async def _load_audit_target_summaries(
6150
6323
  client: AppliedClient,
6151
6324
  ratings: list[dict[str, Any]],
@@ -119,6 +119,7 @@ DOMAIN_TOOL_RENAMES: dict[str, dict[str, str]] = {
119
119
  "scenario_run_delete": "scenarios_runs_delete",
120
120
  "scenario_bulk_run": "scenarios_bulk_run",
121
121
  "scenario_bulk_status": "scenarios_bulk_status",
122
+ "scenario_bulk_cancel": "scenarios_bulk_cancel",
122
123
  },
123
124
  "taxonomy": {
124
125
  "taxonomy_list": "taxonomy_items_list",
@@ -29,6 +29,11 @@ class ScenariosBulkRunInput(StrictInput):
29
29
  contact_override: dict[str, Any] | None = None
30
30
 
31
31
 
32
+ class ScenariosBulkCancelInput(StrictInput):
33
+ job_id: str
34
+ apply: bool = False
35
+
36
+
32
37
  class BenchmarksListInput(StrictInput):
33
38
  agent_id: str | None = None
34
39
  limit: int = 50
@@ -48,6 +53,8 @@ class BenchmarksCreateInput(StrictInput):
48
53
 
49
54
  class BenchmarksDeleteInput(StrictInput):
50
55
  benchmark_id: str
56
+ force: bool = False
57
+ detach_scenarios: bool = False
51
58
 
52
59
 
53
60
  class BenchmarksCloneInput(StrictInput):
@@ -494,14 +501,33 @@ async def benchmarks_delete_handler(
494
501
  client: AppliedClient,
495
502
  params: BenchmarksDeleteInput,
496
503
  ) -> ToolResult[Any]:
504
+ from applied_cli import tools as legacy_tools
505
+
506
+ raw = await legacy_tools.benchmark_delete(
507
+ client,
508
+ benchmark_id=params.benchmark_id,
509
+ force=params.force,
510
+ detach_scenarios=params.detach_scenarios,
511
+ output_format="json",
512
+ )
497
513
  try:
498
- await client.delete_benchmark(params.benchmark_id)
499
- except AppliedAPIError as exc:
500
- return _api_error_result(exc)
514
+ data = json.loads(raw)
515
+ except (json.JSONDecodeError, TypeError):
516
+ return ToolResult(data={"message": raw}, summary=str(raw))
517
+
518
+ if data.get("refused"):
519
+ return ToolResult(
520
+ data=data,
521
+ summary=data.get("message", "Refused to delete a non-empty benchmark."),
522
+ next_actions=[
523
+ "Re-run with detach_scenarios=true to preserve the scenarios.",
524
+ "Or re-run with force=true to delete the scenarios and runs too.",
525
+ ],
526
+ )
501
527
 
502
528
  return ToolResult(
503
- data={"benchmark_id": params.benchmark_id, "deleted": True},
504
- summary=f"Deleted benchmark {params.benchmark_id}.",
529
+ data=data,
530
+ summary=data.get("message", f"Deleted benchmark {params.benchmark_id}."),
505
531
  next_actions=[
506
532
  "Use benchmarks_list to confirm the benchmark was removed.",
507
533
  ],
@@ -829,6 +855,38 @@ async def scenarios_bulk_status_handler(
829
855
  )
830
856
 
831
857
 
858
+ async def scenarios_bulk_cancel_handler(
859
+ client: AppliedClient,
860
+ params: ScenariosBulkCancelInput,
861
+ ) -> ToolResult[Any]:
862
+ from applied_cli import tools as legacy_tools
863
+
864
+ raw = await legacy_tools.scenario_bulk_cancel(
865
+ client,
866
+ job_id=params.job_id,
867
+ apply=params.apply,
868
+ output_format="json",
869
+ )
870
+ try:
871
+ data = json.loads(raw)
872
+ except (json.JSONDecodeError, TypeError):
873
+ return ToolResult(data={"message": raw}, summary=str(raw))
874
+
875
+ next_actions = []
876
+ if not params.apply and data.get("pending_runs"):
877
+ next_actions.append("Re-run with apply=true to cancel the pending runs.")
878
+ elif data.get("cancelled"):
879
+ next_actions.append(
880
+ f"Poll scenarios_bulk_status with job_id='{params.job_id}' to confirm "
881
+ f"the job now reports finished."
882
+ )
883
+ return ToolResult(
884
+ data=data,
885
+ summary=data.get("message", "Bulk cancel processed."),
886
+ next_actions=next_actions,
887
+ )
888
+
889
+
832
890
  def scenario_specs() -> list[ToolSpec]:
833
891
  return [
834
892
  ToolSpec(
@@ -866,7 +924,11 @@ def scenario_specs() -> list[ToolSpec]:
866
924
  ToolSpec(
867
925
  name="benchmarks_delete",
868
926
  namespace="benchmarks",
869
- description="Delete a benchmark and return a structured acknowledgement.",
927
+ description=(
928
+ "Delete a benchmark. Refuses to delete a benchmark that still "
929
+ "has scenarios (the platform cascade would permanently delete "
930
+ "them) unless detach_scenarios=true (preserve them) or force=true."
931
+ ),
870
932
  input_model=BenchmarksDeleteInput,
871
933
  output_model=None,
872
934
  handler=benchmarks_delete_handler,
@@ -1009,4 +1071,18 @@ def scenario_specs() -> list[ToolSpec]:
1009
1071
  read_write_mode="read",
1010
1072
  tags=["scenario_bulk_status", "native"],
1011
1073
  ),
1074
+ ToolSpec(
1075
+ name="scenarios_bulk_cancel",
1076
+ namespace="scenarios",
1077
+ description=(
1078
+ "Cancel a stuck bulk scenario run job by deleting its queued/"
1079
+ "running runs (completed/failed runs are preserved). Dry-run by "
1080
+ "default; set apply=true to cancel."
1081
+ ),
1082
+ input_model=ScenariosBulkCancelInput,
1083
+ output_model=None,
1084
+ handler=scenarios_bulk_cancel_handler,
1085
+ read_write_mode="write",
1086
+ tags=["scenario_bulk_cancel", "native"],
1087
+ ),
1012
1088
  ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: applied-cli
3
- Version: 0.6.1
3
+ Version: 0.6.3
4
4
  Summary: CLI and shared client library for Applied Labs AI support agents
5
5
  Author: Applied Labs
6
6
  License-Expression: MIT
@@ -39,6 +39,7 @@ tests/test_agent_scoped_flows.py
39
39
  tests/test_audit_tools.py
40
40
  tests/test_auth_context.py
41
41
  tests/test_benchmark_clone.py
42
+ tests/test_benchmark_delete_guardrail.py
42
43
  tests/test_benchmark_scenario_tools.py
43
44
  tests/test_cli.py
44
45
  tests/test_cli_v2.py
@@ -48,6 +49,7 @@ tests/test_conversation_tools.py
48
49
  tests/test_flow_tools.py
49
50
  tests/test_knowledge_content_tools.py
50
51
  tests/test_recovery.py
52
+ tests/test_scenario_bulk_cancel.py
51
53
  tests/test_toolkit_contract.py
52
54
  tests/test_v2_agents.py
53
55
  tests/test_v2_articles.py
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "applied-cli"
3
- version = "0.6.1"
3
+ version = "0.6.3"
4
4
  description = "CLI and shared client library for Applied Labs AI support agents"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11"
@@ -0,0 +1,83 @@
1
+ import json
2
+
3
+ import pytest
4
+
5
+ from applied_cli import tools
6
+
7
+
8
+ class FakeDeleteClient:
9
+ def __init__(self, scenarios):
10
+ self._scenarios = scenarios
11
+ self.deleted = None
12
+ self.updates = []
13
+
14
+ async def list_scenarios(self, benchmark_id=None, fetch_all=True, **kwargs):
15
+ return list(self._scenarios)
16
+
17
+ async def get_scenario(self, scenario_id):
18
+ for s in self._scenarios:
19
+ if s["id"] == scenario_id:
20
+ return s
21
+ raise KeyError(scenario_id)
22
+
23
+ async def update_scenario(self, scenario_id, **updates):
24
+ self.updates.append({"id": scenario_id, **updates})
25
+ return {"id": scenario_id, **updates}
26
+
27
+ async def delete_benchmark(self, benchmark_id):
28
+ self.deleted = benchmark_id
29
+
30
+
31
+ SCENARIOS = [
32
+ {"id": "s1", "run_count": 3, "benchmarks": [{"id": "bench-1"}]},
33
+ {"id": "s2", "run_count": 1, "benchmarks": [{"id": "bench-1"}, {"id": "bench-2"}]},
34
+ ]
35
+
36
+
37
+ @pytest.mark.asyncio
38
+ async def test_empty_benchmark_deletes_directly():
39
+ client = FakeDeleteClient(scenarios=[])
40
+ result = await tools.benchmark_delete(client, "bench-1", output_format="json")
41
+ data = json.loads(result)
42
+ assert data["deleted"] is True
43
+ assert data["refused"] is False
44
+ assert client.deleted == "bench-1"
45
+
46
+
47
+ @pytest.mark.asyncio
48
+ async def test_nonempty_benchmark_refuses_and_discloses_impact():
49
+ client = FakeDeleteClient(scenarios=SCENARIOS)
50
+ result = await tools.benchmark_delete(client, "bench-1") # text, no force
51
+ assert "Refusing to delete" in result
52
+ assert "2 scenario(s)" in result
53
+ assert "4 run(s)" in result # 3 + 1
54
+ assert client.deleted is None # nothing wiped
55
+
56
+
57
+ @pytest.mark.asyncio
58
+ async def test_force_cascades_and_reports_counts():
59
+ client = FakeDeleteClient(scenarios=SCENARIOS)
60
+ result = await tools.benchmark_delete(
61
+ client, "bench-1", force=True, output_format="json"
62
+ )
63
+ data = json.loads(result)
64
+ assert data["deleted"] is True
65
+ assert data["scenario_count"] == 2
66
+ assert data["run_count"] == 4
67
+ assert client.deleted == "bench-1"
68
+
69
+
70
+ @pytest.mark.asyncio
71
+ async def test_detach_unlinks_then_deletes_empty_benchmark():
72
+ client = FakeDeleteClient(scenarios=SCENARIOS)
73
+ result = await tools.benchmark_delete(
74
+ client, "bench-1", detach_scenarios=True, output_format="json"
75
+ )
76
+ data = json.loads(result)
77
+ assert data["deleted"] is True
78
+ assert data["detached_scenarios"] == 2
79
+ # bench-1 removed from each scenario's links; bench-2 kept on s2.
80
+ by_id = {u["id"]: u["benchmark_ids"] for u in client.updates}
81
+ assert by_id["s1"] == []
82
+ assert by_id["s2"] == ["bench-2"]
83
+ assert client.deleted == "bench-1"
@@ -0,0 +1,87 @@
1
+ import json
2
+
3
+ import pytest
4
+
5
+ from applied_cli import tools
6
+
7
+
8
+ class FakeBulkClient:
9
+ def __init__(self, runs):
10
+ self._runs = runs
11
+ self.deleted = []
12
+
13
+ async def list_scenario_runs(
14
+ self, scenario_id=None, benchmark_id=None, bulk_job_id=None, latest=False,
15
+ limit=50,
16
+ ):
17
+ return list(self._runs)
18
+
19
+ async def delete_scenario_run(self, run_id):
20
+ self.deleted.append(run_id)
21
+
22
+
23
+ STUCK_RUNS = [
24
+ {"id": "r1", "status": "completed"},
25
+ {"id": "r2", "status": "queued"},
26
+ {"id": "r3", "status": "running"},
27
+ {"id": "r4", "status": "failed"},
28
+ ]
29
+
30
+
31
+ @pytest.mark.asyncio
32
+ async def test_dry_run_reports_pending_without_deleting():
33
+ client = FakeBulkClient(STUCK_RUNS)
34
+ result = await tools.scenario_bulk_cancel(client, "job-1", output_format="json")
35
+ data = json.loads(result)
36
+ assert data["pending_runs"] == 2
37
+ assert data["terminal_runs"] == 2
38
+ assert data["cancelled"] == 0
39
+ assert client.deleted == [] # dry run deletes nothing
40
+
41
+
42
+ @pytest.mark.asyncio
43
+ async def test_apply_deletes_only_pending_runs():
44
+ client = FakeBulkClient(STUCK_RUNS)
45
+ result = await tools.scenario_bulk_cancel(
46
+ client, "job-1", apply=True, output_format="json"
47
+ )
48
+ data = json.loads(result)
49
+ assert data["cancelled"] == 2
50
+ # Only the queued/running runs are deleted; completed/failed preserved.
51
+ assert set(client.deleted) == {"r2", "r3"}
52
+
53
+
54
+ @pytest.mark.asyncio
55
+ async def test_no_pending_runs_is_a_noop():
56
+ client = FakeBulkClient(
57
+ [{"id": "r1", "status": "completed"}, {"id": "r2", "status": "failed"}]
58
+ )
59
+ result = await tools.scenario_bulk_cancel(
60
+ client, "job-1", apply=True, output_format="text"
61
+ )
62
+ assert "nothing to" in result.lower()
63
+ assert client.deleted == []
64
+
65
+
66
+ @pytest.mark.asyncio
67
+ async def test_unknown_job_reports_no_runs():
68
+ client = FakeBulkClient([])
69
+ result = await tools.scenario_bulk_cancel(client, "missing", output_format="text")
70
+ assert "No runs found" in result
71
+ assert client.deleted == []
72
+
73
+
74
+ @pytest.mark.asyncio
75
+ async def test_v2_scenarios_bulk_cancel_handler_structured():
76
+ from applied_cli.v2.scenarios import (
77
+ ScenariosBulkCancelInput,
78
+ scenarios_bulk_cancel_handler,
79
+ )
80
+
81
+ client = FakeBulkClient(STUCK_RUNS)
82
+ result = await scenarios_bulk_cancel_handler(
83
+ client, ScenariosBulkCancelInput(job_id="job-1", apply=False)
84
+ )
85
+ assert result.data["pending_runs"] == 2
86
+ assert "apply=true" in " ".join(result.next_actions)
87
+ assert client.deleted == []
@@ -429,24 +429,46 @@ async def test_benchmarks_get_includes_structured_scenarios_without_legacy_wrapp
429
429
 
430
430
 
431
431
  @pytest.mark.asyncio
432
- async def test_benchmarks_delete_returns_structured_ack_without_legacy_wrapper(
433
- monkeypatch,
434
- ):
435
- async def fail_legacy_wrapper(*args, **kwargs):
436
- raise AssertionError("legacy benchmark_delete wrapper should not run")
437
-
438
- monkeypatch.setattr("applied_cli.tools.benchmark_delete", fail_legacy_wrapper)
432
+ async def test_benchmarks_delete_refuses_to_wipe_scenarios_without_force():
439
433
  client = FakeScenarioClient()
440
434
  spec = get_tool_catalog()["benchmarks_delete"]
441
435
 
442
436
  result = await spec.run(client, {"benchmark_id": "bench-1"})
443
437
 
444
438
  assert result.ok is True
445
- assert result.summary == "Deleted benchmark bench-1."
446
- assert result.data == {"benchmark_id": "bench-1", "deleted": True}
447
- assert result.next_actions == [
448
- "Use benchmarks_list to confirm the benchmark was removed.",
449
- ]
439
+ assert result.data["refused"] is True
440
+ assert result.data["deleted"] is False
441
+ assert result.data["scenario_count"] >= 1
442
+ assert "Refusing to delete" in result.summary
443
+ # Guardrail must not have deleted the benchmark.
444
+ assert client.delete_benchmark_id is None
445
+
446
+
447
+ @pytest.mark.asyncio
448
+ async def test_benchmarks_delete_force_cascades():
449
+ client = FakeScenarioClient()
450
+ spec = get_tool_catalog()["benchmarks_delete"]
451
+
452
+ result = await spec.run(client, {"benchmark_id": "bench-1", "force": True})
453
+
454
+ assert result.data["deleted"] is True
455
+ assert client.delete_benchmark_id == "bench-1"
456
+
457
+
458
+ @pytest.mark.asyncio
459
+ async def test_benchmarks_delete_detach_preserves_scenarios():
460
+ client = FakeScenarioClient()
461
+ spec = get_tool_catalog()["benchmarks_delete"]
462
+
463
+ result = await spec.run(
464
+ client, {"benchmark_id": "bench-1", "detach_scenarios": True}
465
+ )
466
+
467
+ assert result.data["deleted"] is True
468
+ assert result.data["detached_scenarios"] >= 1
469
+ # Scenarios were unlinked (bench-1 removed from their benchmark list).
470
+ assert client.update_scenario_kwargs is not None
471
+ assert "bench-1" not in client.update_scenario_kwargs["updates"]["benchmark_ids"]
450
472
  assert client.delete_benchmark_id == "bench-1"
451
473
 
452
474
 
File without changes
File without changes