PyPI - applied-cli - Versions diffs - 0.6.4__tar.gz → 0.6.6__tar.gz - Mend

applied-cli 0.6.4tar.gz → 0.6.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

{applied_cli-0.6.4 → applied_cli-0.6.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: applied-cli
-Version: 0.6.4
+Version: 0.6.6
 Summary: CLI and shared client library for Applied Labs AI support agents
 Author: Applied Labs
 License-Expression: MIT

{applied_cli-0.6.4 → applied_cli-0.6.6}/applied_cli/cli.py RENAMED Viewed

@@ -1672,6 +1672,22 @@ def benchmark_delete(
     typer.echo(result)
+@app.command("benchmark-results")
+def benchmark_results(
+    id: str = typer.Argument(..., help="Benchmark ID"),
+    shop_id: str = typer.Option(None, "--shop-id", help="Override shop ID"),
+    format: str = typer.Option(
+        "text", "--format", "-f", help="Output format: text or json"
+    ),
+) -> None:
+    """Summarize a benchmark's pass/fail/unrated health and pass rate."""
+    client = get_client(shop_id=shop_id)
+    result = asyncio.run(
+        tools.benchmark_results(client, benchmark_id=id, output_format=format)
+    )
+    typer.echo(result)
 @app.command()
 def scenarios(
     benchmark_id: str = typer.Option(
@@ -1928,6 +1944,15 @@ def scenario_bulk_run(
     anonymous: bool = typer.Option(
         False, "--anonymous", help="Run with an anonymous contact"
     ),
+    wait: bool = typer.Option(
+        False, "--wait", help="Poll until all runs finish, then print final status"
+    ),
+    wait_timeout: float = typer.Option(
+        300.0, "--wait-timeout", help="Max seconds to wait with --wait (default 300)"
+    ),
+    poll_interval: float = typer.Option(
+        3.0, "--poll-interval", help="Seconds between status polls with --wait"
+    ),
     shop_id: str = typer.Option(None, "--shop-id", help="Override shop ID"),
     format: str = typer.Option(
         "text", "--format", "-f", help="Output format: text or json"
@@ -1944,6 +1969,9 @@ def scenario_bulk_run(
             contact_email=contact_email,
             contact_id=contact_id,
             anonymous=anonymous,
+            wait=wait,
+            wait_timeout=wait_timeout,
+            poll_interval=poll_interval,
             output_format=format,
         )
     )

{applied_cli-0.6.4 → applied_cli-0.6.6}/applied_cli/tools.py RENAMED Viewed

@@ -8,6 +8,7 @@ import asyncio
 import difflib
 import json
 import re
+import time
 from contextlib import suppress
 from html.parser import HTMLParser
 from typing import Any
@@ -5709,6 +5710,93 @@ async def benchmark_clone(
     return "\n".join(lines)
+async def benchmark_results(
+    client: AppliedClient,
+    benchmark_id: str,
+    *,
+    output_format: str = "text",
+) -> str:
+    """
+    Summarize a benchmark's pass/fail health.
+    Tallies the pass_status across the benchmark's scenarios (pass / fail /
+    unrated), computes the pass rate among rated scenarios, and lists the failing
+    and still-unrated scenarios so you know what to fix or evaluate next.
+    Args:
+        client: Authenticated AppliedClient
+        benchmark_id: The benchmark UUID
+        output_format: 'text' (default) or 'json'
+    Returns:
+        Pass-rate summary with failing and unrated scenario lists.
+    """
+    try:
+        benchmark = await client.get_benchmark(benchmark_id)
+        scenarios = await client.list_scenarios(
+            benchmark_id=benchmark_id, fetch_all=True
+        )
+    except AppliedAPIError as e:
+        return _format_error(e)
+    tally = {"pass": 0, "fail": 0, "unrated": 0}
+    failing: list[dict[str, Any]] = []
+    unrated: list[dict[str, Any]] = []
+    for scenario in scenarios:
+        status = str(scenario.get("pass_status") or "unrated").lower()
+        if status not in tally:
+            status = "unrated"
+        tally[status] += 1
+        entry = {"id": scenario.get("id"), "name": scenario.get("name")}
+        if status == "fail":
+            failing.append(entry)
+        elif status == "unrated":
+            unrated.append(entry)
+    rated = tally["pass"] + tally["fail"]
+    pass_rate = round(tally["pass"] / rated, 4) if rated else None
+    summary = {
+        "benchmark_id": benchmark_id,
+        "benchmark_name": benchmark.get("name"),
+        "total_scenarios": len(scenarios),
+        "passed": tally["pass"],
+        "failed": tally["fail"],
+        "unrated": tally["unrated"],
+        "rated": rated,
+        "pass_rate": pass_rate,
+        "failing_scenarios": failing,
+        "unrated_scenarios": unrated,
+    }
+    if output_format == "json":
+        return to_json(summary)
+    pass_rate_str = (
+        f"{pass_rate * 100:.1f}% ({tally['pass']}/{rated} rated)"
+        if pass_rate is not None
+        else "n/a (no rated scenarios yet)"
+    )
+    lines = [
+        f"# Benchmark Results: {benchmark.get('name')} ({benchmark_id})",
+        f"total_scenarios: {summary['total_scenarios']}",
+        f"passed: {tally['pass']}",
+        f"failed: {tally['fail']}",
+        f"unrated: {tally['unrated']}",
+        f"pass_rate: {pass_rate_str}",
+    ]
+    if failing:
+        lines.append(f"\n# Failing ({len(failing)})")
+        lines.extend(f"  - {s['name']} ({s['id']})" for s in failing[:50])
+        if len(failing) > 50:
+            lines.append(f"  ... and {len(failing) - 50} more")
+    if unrated:
+        lines.append(f"\n# Unrated ({len(unrated)}) — evaluate these next")
+        lines.extend(f"  - {s['name']} ({s['id']})" for s in unrated[:50])
+        if len(unrated) > 50:
+            lines.append(f"  ... and {len(unrated) - 50} more")
+    return "\n".join(lines)
 # -----------------------------------------------------------------------------
 # Scenarios
 # -----------------------------------------------------------------------------
@@ -6082,6 +6170,41 @@ async def scenario_run_delete(
     return f"Scenario run {run_id} deleted successfully."
+def _bulk_status_counts(counts: dict | None) -> dict[str, int]:
+    """Normalize bulk-status counts to lowercase keys with int values."""
+    lowered: dict[str, int] = {}
+    for key, value in (counts or {}).items():
+        lowered[str(key).lower()] = int(value or 0)
+    return lowered
+def _bulk_pending_count(counts: dict | None) -> int:
+    """Count runs still queued or running (case-insensitive)."""
+    normalized = _bulk_status_counts(counts)
+    return normalized.get("queued", 0) + normalized.get("running", 0)
+async def _await_bulk_run(
+    client: AppliedClient,
+    job_id: str,
+    *,
+    timeout: float,
+    poll_interval: float,
+) -> tuple[dict, bool]:
+    """Poll a bulk run until no runs are queued/running or the timeout elapses.
+    Returns (latest_status_payload, timed_out).
+    """
+    start = time.monotonic()
+    status = await client.get_scenario_bulk_run_status(job_id)
+    while _bulk_pending_count(status.get("counts")) > 0:
+        if time.monotonic() - start >= timeout:
+            return status, True
+        await asyncio.sleep(poll_interval)
+        status = await client.get_scenario_bulk_run_status(job_id)
+    return status, False
 async def _resolve_contact_override(
     client: AppliedClient,
     *,
@@ -6123,6 +6246,9 @@ async def scenario_bulk_run(
     contact_id: str | None = None,
     contact_email: str | None = None,
     anonymous: bool = False,
+    wait: bool = False,
+    wait_timeout: float = 300.0,
+    poll_interval: float = 3.0,
     output_format: str = "text",
 ) -> str:
     """
@@ -6134,6 +6260,10 @@ async def scenario_bulk_run(
     to run the scenarios as a contact that has an email, so the test conversation
     carries it.
+    With wait=True, this polls until every run finishes (or the timeout elapses)
+    and returns the final status, so you can run a benchmark and read results in
+    one call instead of polling scenario_bulk_status yourself.
     Args:
         client: Authenticated AppliedClient
         scenario_ids: List of scenario UUIDs to run
@@ -6144,9 +6274,12 @@ async def scenario_bulk_run(
         contact_id: Run scenarios as this existing contact (gives test convos its email)
         contact_email: Resolve/create a contact with this email and run as them
         anonymous: Run with an anonymous contact (mode='anonymous')
+        wait: Poll until all runs finish (or wait_timeout elapses)
+        wait_timeout: Max seconds to wait when wait=True (default 300)
+        poll_interval: Seconds between status polls when wait=True (default 3)
     Returns:
-        Summary of runs created
+        Summary of runs created (plus the final status when wait=True)
     """
     resolved_scenario_ids = list(scenario_ids or [])
     if not resolved_scenario_ids:
@@ -6196,6 +6329,26 @@ async def scenario_bulk_run(
         "contact_override": result.get("contact_override"),
     }
+    job_id = payload.get("job_id")
+    final_status: dict | None = None
+    timed_out = False
+    if wait and job_id:
+        try:
+            final_status, timed_out = await _await_bulk_run(
+                client,
+                str(job_id),
+                timeout=wait_timeout,
+                poll_interval=poll_interval,
+            )
+        except AppliedAPIError as e:
+            return _format_error(e)
+        counts = _bulk_status_counts(final_status.get("counts"))
+        payload["final_counts"] = counts
+        payload["timed_out"] = timed_out
+        payload["duration_seconds"] = final_status.get("duration_seconds")
+        payload["completed_at"] = final_status.get("completed_at")
+        payload["failed"] = final_status.get("failed") or []
     if output_format == "json":
         return to_json(payload)
@@ -6212,6 +6365,23 @@ async def scenario_bulk_run(
         output += f"scenario_run_ids: {preview_ids}\n"
         if len(run_ids) > 10:
             output += f"more_runs: {len(run_ids) - 10}\n"
+    if final_status is not None:
+        counts = payload["final_counts"]
+        output += "\n# Final Status\n"
+        output += "timed_out: " + ("true (still pending)" if timed_out else "false") + "\n"
+        output += f"completed: {counts.get('completed', 0)}\n"
+        output += f"failed: {counts.get('failed', 0)}\n"
+        pending = counts.get("queued", 0) + counts.get("running", 0)
+        output += f"still_pending: {pending}\n"
+        if payload.get("duration_seconds") is not None:
+            output += f"duration_seconds: {payload['duration_seconds']}\n"
+        failed_runs = payload.get("failed") or []
+        if failed_runs:
+            output += f"\n# Failed Runs ({len(failed_runs)})\n"
+            output += to_json(failed_runs)
+        return output
     output += "\nTip: use scenario_bulk_status(job_id, include_runs=True) or scenario_run_list(bulk_job_id=job_id) to get per-run details with scenario mappings."
     return output
@@ -6245,14 +6415,14 @@ async def scenario_bulk_status(
             payload.pop("runs", None)
         return to_json(payload)
-    counts = result.get("counts") or {}
+    counts = _bulk_status_counts(result.get("counts"))
     output = "# Bulk Run Status\n"
     output += f"job_id: {result.get('job_id')}\n"
     output += f"total: {result.get('total')}\n"
-    output += f"queued: {counts.get('QUEUED', 0)}\n"
-    output += f"running: {counts.get('RUNNING', 0)}\n"
-    output += f"completed: {counts.get('COMPLETED', 0)}\n"
-    output += f"failed: {counts.get('FAILED', 0)}\n"
+    output += f"queued: {counts.get('queued', 0)}\n"
+    output += f"running: {counts.get('running', 0)}\n"
+    output += f"completed: {counts.get('completed', 0)}\n"
+    output += f"failed: {counts.get('failed', 0)}\n"
     output += f"created_at: {result.get('created_at')}\n"
     output += f"updated_at: {result.get('updated_at')}\n"
     if result.get("completed_at"):

{applied_cli-0.6.4 → applied_cli-0.6.6}/applied_cli/v2/domains.py RENAMED Viewed

@@ -43,6 +43,7 @@ DOMAIN_TOOL_RENAMES: dict[str, dict[str, str]] = {
         "benchmark_create": "benchmarks_create",
         "benchmark_delete": "benchmarks_delete",
         "benchmark_clone": "benchmarks_clone",
+        "benchmark_results": "benchmarks_results",
     },
     "connectors": {
         "connector_types": "connectors_types_list",

{applied_cli-0.6.4 → applied_cli-0.6.6}/applied_cli/v2/scenarios.py RENAMED Viewed

@@ -30,6 +30,9 @@ class ScenariosBulkRunInput(StrictInput):
     contact_id: str | None = None
     contact_email: str | None = None
     anonymous: bool = False
+    wait: bool = False
+    wait_timeout: float = 300.0
+    poll_interval: float = 3.0
 class ScenariosBulkCancelInput(StrictInput):
@@ -68,6 +71,10 @@ class BenchmarksCloneInput(StrictInput):
     apply: bool = False
+class BenchmarksResultsInput(StrictInput):
+    benchmark_id: str
 class ScenariosListInput(StrictInput):
     benchmark_id: str | None = None
     agent_id: str | None = None
@@ -500,6 +507,44 @@ async def benchmarks_clone_handler(
     )
+async def benchmarks_results_handler(
+    client: AppliedClient,
+    params: BenchmarksResultsInput,
+) -> ToolResult[Any]:
+    from applied_cli import tools as legacy_tools
+    raw = await legacy_tools.benchmark_results(
+        client, benchmark_id=params.benchmark_id, output_format="json"
+    )
+    try:
+        data = json.loads(raw)
+    except (json.JSONDecodeError, TypeError):
+        return ToolResult(data={"message": raw}, summary=str(raw))
+    pass_rate = data.get("pass_rate")
+    rate_str = (
+        f"{pass_rate * 100:.1f}%" if pass_rate is not None else "n/a (no rated yet)"
+    )
+    next_actions = []
+    if data.get("unrated"):
+        next_actions.append(
+            "Rate the unrated scenarios with scenarios_update (pass_status)."
+        )
+    if data.get("failed"):
+        next_actions.append(
+            "Inspect failing scenarios with scenarios_get / conversations_debug_bundle."
+        )
+    return ToolResult(
+        data=data,
+        summary=(
+            f"{data.get('benchmark_name') or params.benchmark_id}: pass rate "
+            f"{rate_str} — {data.get('passed', 0)} passed, "
+            f"{data.get('failed', 0)} failed, {data.get('unrated', 0)} unrated."
+        ),
+        next_actions=next_actions,
+    )
 async def benchmarks_delete_handler(
     client: AppliedClient,
     params: BenchmarksDeleteInput,
@@ -828,6 +873,48 @@ async def scenarios_bulk_run_handler(
         "duplicated_scenarios": result.get("duplicated_scenarios"),
         "contact_override": result.get("contact_override"),
     }
+    job_id = payload.get("job_id")
+    if params.wait and job_id:
+        from applied_cli.tools import _await_bulk_run, _bulk_status_counts
+        try:
+            final_status, timed_out = await _await_bulk_run(
+                client,
+                str(job_id),
+                timeout=params.wait_timeout,
+                poll_interval=params.poll_interval,
+            )
+        except AppliedAPIError as exc:
+            return _api_error_result(exc)
+        counts = _bulk_status_counts(final_status.get("counts"))
+        payload["final_counts"] = counts
+        payload["timed_out"] = timed_out
+        payload["duration_seconds"] = final_status.get("duration_seconds")
+        payload["failed"] = final_status.get("failed") or []
+        pending = counts.get("queued", 0) + counts.get("running", 0)
+        summary = (
+            f"Bulk job {job_id} "
+            + ("timed out with " if timed_out else "finished: ")
+            + f"{counts.get('completed', 0)} completed, "
+            + f"{counts.get('failed', 0)} failed"
+            + (f", {pending} still pending" if pending else "")
+            + "."
+        )
+        warnings = []
+        if counts.get("failed"):
+            warnings.append(f"{counts['failed']} run(s) failed.")
+        if timed_out:
+            warnings.append("Timed out before all runs finished.")
+        return ToolResult(
+            data=payload,
+            summary=summary,
+            warnings=warnings,
+            next_actions=[
+                "Use scenarios_bulk_status with include_runs=true to inspect runs.",
+            ],
+        )
     queued = payload.get("queued") or 0
     return ToolResult(
         data=payload,
@@ -961,6 +1048,19 @@ def scenario_specs() -> list[ToolSpec]:
             read_write_mode="write",
             tags=["benchmark_clone", "native"],
         ),
+        ToolSpec(
+            name="benchmarks_results",
+            namespace="benchmarks",
+            description=(
+                "Summarize a benchmark's pass/fail/unrated health and pass rate, "
+                "with the failing and unrated scenario lists."
+            ),
+            input_model=BenchmarksResultsInput,
+            output_model=None,
+            handler=benchmarks_results_handler,
+            read_write_mode="read",
+            tags=["benchmark_results", "native"],
+        ),
         ToolSpec(
             name="scenarios_list",
             namespace="scenarios",
@@ -1064,7 +1164,8 @@ def scenario_specs() -> list[ToolSpec]:
                 "Run selected scenarios or every scenario in a benchmark and "
                 "return the queued job metadata. Pass contact_email or contact_id "
                 "to run as a contact with an email (fixes 'Email is not present' "
-                "failures on test conversations)."
+                "failures on test conversations). Pass wait=true to block until "
+                "all runs finish and return the final status in one call."
             ),
             input_model=ScenariosBulkRunInput,
             output_model=None,

{applied_cli-0.6.4 → applied_cli-0.6.6}/applied_cli.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: applied-cli
-Version: 0.6.4
+Version: 0.6.6
 Summary: CLI and shared client library for Applied Labs AI support agents
 Author: Applied Labs
 License-Expression: MIT

{applied_cli-0.6.4 → applied_cli-0.6.6}/applied_cli.egg-info/SOURCES.txt RENAMED Viewed

@@ -40,6 +40,7 @@ tests/test_audit_tools.py
 tests/test_auth_context.py
 tests/test_benchmark_clone.py
 tests/test_benchmark_delete_guardrail.py
+tests/test_benchmark_results.py
 tests/test_benchmark_scenario_tools.py
 tests/test_cli.py
 tests/test_cli_v2.py
@@ -51,6 +52,7 @@ tests/test_knowledge_content_tools.py
 tests/test_recovery.py
 tests/test_scenario_bulk_cancel.py
 tests/test_scenario_bulk_run_contact.py
+tests/test_scenario_bulk_run_wait.py
 tests/test_toolkit_contract.py
 tests/test_v2_agents.py
 tests/test_v2_articles.py

{applied_cli-0.6.4 → applied_cli-0.6.6}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "applied-cli"
-version = "0.6.4"
+version = "0.6.6"
 description = "CLI and shared client library for Applied Labs AI support agents"
 readme = "README.md"
 requires-python = ">=3.11"

applied_cli-0.6.6/tests/test_benchmark_results.py ADDED Viewed

@@ -0,0 +1,78 @@
+import json
+import pytest
+from applied_cli import tools
+BENCHMARK = {"id": "bench-1", "name": "Cancel Regression"}
+SCENARIOS = [
+    {"id": "s1", "name": "Cancel order", "pass_status": "pass"},
+    {"id": "s2", "name": "Refund flow", "pass_status": "pass"},
+    {"id": "s3", "name": "Pause subscription", "pass_status": "fail"},
+    {"id": "s4", "name": "Address change", "pass_status": "unrated"},
+    {"id": "s5", "name": "No status field"},  # missing -> unrated
+]
+class FakeResultsClient:
+    def __init__(self, scenarios=SCENARIOS):
+        self._scenarios = scenarios
+    async def get_benchmark(self, benchmark_id):
+        return BENCHMARK
+    async def list_scenarios(self, benchmark_id=None, fetch_all=True, **kwargs):
+        return list(self._scenarios)
+@pytest.mark.asyncio
+async def test_results_tally_and_pass_rate():
+    client = FakeResultsClient()
+    data = json.loads(
+        await tools.benchmark_results(client, "bench-1", output_format="json")
+    )
+    assert data["total_scenarios"] == 5
+    assert data["passed"] == 2
+    assert data["failed"] == 1
+    assert data["unrated"] == 2
+    assert data["rated"] == 3
+    # 2 passed / 3 rated
+    assert data["pass_rate"] == round(2 / 3, 4)
+    assert [s["id"] for s in data["failing_scenarios"]] == ["s3"]
+    assert {s["id"] for s in data["unrated_scenarios"]} == {"s4", "s5"}
+@pytest.mark.asyncio
+async def test_results_no_rated_scenarios_pass_rate_none():
+    client = FakeResultsClient(
+        scenarios=[{"id": "s1", "name": "A", "pass_status": "unrated"}]
+    )
+    text = await tools.benchmark_results(client, "bench-1", output_format="text")
+    assert "n/a (no rated scenarios yet)" in text
+@pytest.mark.asyncio
+async def test_results_text_lists_failing_and_unrated():
+    client = FakeResultsClient()
+    text = await tools.benchmark_results(client, "bench-1")
+    assert "# Failing (1)" in text
+    assert "Pause subscription" in text
+    assert "# Unrated (2)" in text
+@pytest.mark.asyncio
+async def test_v2_benchmarks_results_handler_summary():
+    from applied_cli.v2.scenarios import (
+        BenchmarksResultsInput,
+        benchmarks_results_handler,
+    )
+    client = FakeResultsClient()
+    result = await benchmarks_results_handler(
+        client, BenchmarksResultsInput(benchmark_id="bench-1")
+    )
+    assert result.data["passed"] == 2
+    assert "pass rate" in result.summary
+    # Has unrated + failing → both follow-up actions surfaced.
+    assert len(result.next_actions) == 2

applied_cli-0.6.6/tests/test_scenario_bulk_run_wait.py ADDED Viewed

@@ -0,0 +1,107 @@
+import json
+import pytest
+from applied_cli import tools
+class FakeWaitClient:
+    """Bulk client whose status transitions to done after N polls."""
+    def __init__(self, status_sequence):
+        self._status_sequence = list(status_sequence)
+        self._poll = 0
+        self.status_calls = 0
+    async def list_scenarios(self, benchmark_id=None, limit=500, **kwargs):
+        return [{"id": "s1"}, {"id": "s2"}]
+    async def bulk_run_scenarios(
+        self, scenario_ids=None, target_agent_id=None, contact_override=None
+    ):
+        return {
+            "job_id": "job-1",
+            "total": len(scenario_ids or []),
+            "queued": len(scenario_ids or []),
+            "scenario_run_ids": ["r1", "r2"],
+        }
+    async def get_scenario_bulk_run_status(self, job_id):
+        self.status_calls += 1
+        idx = min(self._poll, len(self._status_sequence) - 1)
+        self._poll += 1
+        return self._status_sequence[idx]
+@pytest.mark.asyncio
+async def test_wait_polls_until_no_pending(monkeypatch):
+    # Avoid real sleeping between polls.
+    async def _no_sleep(_seconds):
+        return None
+    monkeypatch.setattr(tools.asyncio, "sleep", _no_sleep)
+    client = FakeWaitClient(
+        status_sequence=[
+            {"counts": {"queued": 2, "running": 0, "completed": 0, "failed": 0}},
+            {"counts": {"queued": 0, "running": 1, "completed": 1, "failed": 0}},
+            {
+                "counts": {"queued": 0, "running": 0, "completed": 2, "failed": 0},
+                "duration_seconds": 12.5,
+                "completed_at": "2026-06-05T10:00:00Z",
+                "failed": [],
+            },
+        ]
+    )
+    result = await tools.scenario_bulk_run(
+        client, benchmark_id="bench-1", wait=True, output_format="json"
+    )
+    data = json.loads(result)
+    assert data["timed_out"] is False
+    assert data["final_counts"]["completed"] == 2
+    assert data["duration_seconds"] == 12.5
+    assert client.status_calls == 3
+@pytest.mark.asyncio
+async def test_wait_times_out_when_runs_stay_pending(monkeypatch):
+    async def _no_sleep(_seconds):
+        return None
+    monkeypatch.setattr(tools.asyncio, "sleep", _no_sleep)
+    # Always pending → must hit the timeout path.
+    client = FakeWaitClient(
+        status_sequence=[
+            {"counts": {"queued": 2, "running": 0, "completed": 0, "failed": 0}}
+        ]
+    )
+    result = await tools.scenario_bulk_run(
+        client,
+        benchmark_id="bench-1",
+        wait=True,
+        wait_timeout=0.0,  # immediate timeout after first poll
+        output_format="json",
+    )
+    data = json.loads(result)
+    assert data["timed_out"] is True
+    assert data["final_counts"]["queued"] == 2
+@pytest.mark.asyncio
+async def test_no_wait_returns_started_summary():
+    client = FakeWaitClient(status_sequence=[{"counts": {}}])
+    result = await tools.scenario_bulk_run(
+        client, benchmark_id="bench-1", output_format="json"
+    )
+    data = json.loads(result)
+    assert "final_counts" not in data
+    assert client.status_calls == 0  # no polling when wait is False
+def test_bulk_status_counts_normalizes_case_and_types():
+    assert tools._bulk_status_counts({"QUEUED": 2, "Running": "1"}) == {
+        "queued": 2,
+        "running": 1,
+    }
+    assert tools._bulk_pending_count({"QUEUED": 3, "RUNNING": 4, "COMPLETED": 9}) == 7

{applied_cli-0.6.4 → applied_cli-0.6.6}/README.md RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/applied_cli/__init__.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/applied_cli/agent_scoped_flows.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/applied_cli/auth.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/applied_cli/client.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/applied_cli/conversation_lookup.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/applied_cli/conversations.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/applied_cli/credentials.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/applied_cli/flow_helpers.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/applied_cli/formatters.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/applied_cli/mcp.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/applied_cli/recovery.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/applied_cli/toolkit.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/applied_cli/v2/__init__.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/applied_cli/v2/agents.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/applied_cli/v2/articles.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/applied_cli/v2/catalog.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/applied_cli/v2/connectors.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/applied_cli/v2/content.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/applied_cli/v2/conversations.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/applied_cli/v2/flows.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/applied_cli/v2/knowledge.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/applied_cli/v2/manifest.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/applied_cli/v2/products.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/applied_cli/v2/taxonomy.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/applied_cli/v2/tickets.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/applied_cli.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/applied_cli.egg-info/entry_points.txt RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/applied_cli.egg-info/requires.txt RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/applied_cli.egg-info/top_level.txt RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/setup.cfg RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/tests/test_agent_scoped_flows.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/tests/test_audit_tools.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/tests/test_auth_context.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/tests/test_benchmark_clone.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/tests/test_benchmark_delete_guardrail.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/tests/test_benchmark_scenario_tools.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/tests/test_cli.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/tests/test_cli_v2.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/tests/test_client.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/tests/test_client_v2.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/tests/test_conversation_tools.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/tests/test_flow_tools.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/tests/test_knowledge_content_tools.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/tests/test_recovery.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/tests/test_scenario_bulk_cancel.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/tests/test_scenario_bulk_run_contact.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/tests/test_toolkit_contract.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/tests/test_v2_agents.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/tests/test_v2_articles.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/tests/test_v2_catalog_and_mcp.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/tests/test_v2_connectors.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/tests/test_v2_content.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/tests/test_v2_conversations.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/tests/test_v2_flows.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/tests/test_v2_knowledge.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/tests/test_v2_products.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/tests/test_v2_scenarios.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/tests/test_v2_taxonomy.py RENAMED Viewed

File without changes

{applied_cli-0.6.4 → applied_cli-0.6.6}/tests/test_v2_tickets.py RENAMED Viewed

File without changes

applied-cli 0.6.4__tar.gz → 0.6.6__tar.gz

applied-cli 0.6.4tar.gz → 0.6.6tar.gz