applied-cli 0.6.7__tar.gz → 0.6.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {applied_cli-0.6.7 → applied_cli-0.6.8}/PKG-INFO +1 -1
- {applied_cli-0.6.7 → applied_cli-0.6.8}/applied_cli/cli.py +13 -2
- {applied_cli-0.6.7 → applied_cli-0.6.8}/applied_cli/tools.py +75 -33
- {applied_cli-0.6.7 → applied_cli-0.6.8}/applied_cli/v2/scenarios.py +23 -2
- {applied_cli-0.6.7 → applied_cli-0.6.8}/applied_cli.egg-info/PKG-INFO +1 -1
- {applied_cli-0.6.7 → applied_cli-0.6.8}/applied_cli.egg-info/SOURCES.txt +1 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/pyproject.toml +1 -1
- applied_cli-0.6.8/tests/test_benchmark_list_with_results.py +104 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/README.md +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/applied_cli/__init__.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/applied_cli/agent_scoped_flows.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/applied_cli/auth.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/applied_cli/client.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/applied_cli/conversation_lookup.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/applied_cli/conversations.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/applied_cli/credentials.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/applied_cli/flow_helpers.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/applied_cli/formatters.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/applied_cli/mcp.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/applied_cli/recovery.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/applied_cli/toolkit.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/applied_cli/v2/__init__.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/applied_cli/v2/agents.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/applied_cli/v2/articles.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/applied_cli/v2/catalog.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/applied_cli/v2/connectors.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/applied_cli/v2/content.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/applied_cli/v2/conversations.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/applied_cli/v2/domains.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/applied_cli/v2/flows.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/applied_cli/v2/knowledge.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/applied_cli/v2/manifest.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/applied_cli/v2/products.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/applied_cli/v2/taxonomy.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/applied_cli/v2/tickets.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/applied_cli.egg-info/dependency_links.txt +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/applied_cli.egg-info/entry_points.txt +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/applied_cli.egg-info/requires.txt +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/applied_cli.egg-info/top_level.txt +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/setup.cfg +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/tests/test_agent_scoped_flows.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/tests/test_audit_tools.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/tests/test_auth_context.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/tests/test_benchmark_clone.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/tests/test_benchmark_delete_guardrail.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/tests/test_benchmark_results.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/tests/test_benchmark_scenario_tools.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/tests/test_cli.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/tests/test_cli_v2.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/tests/test_client.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/tests/test_client_v2.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/tests/test_conversation_tools.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/tests/test_flow_tools.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/tests/test_knowledge_content_tools.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/tests/test_recovery.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/tests/test_scenario_bulk_cancel.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/tests/test_scenario_bulk_run_contact.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/tests/test_scenario_bulk_run_wait.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/tests/test_toolkit_contract.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/tests/test_v2_agents.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/tests/test_v2_articles.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/tests/test_v2_catalog_and_mcp.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/tests/test_v2_connectors.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/tests/test_v2_content.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/tests/test_v2_conversations.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/tests/test_v2_flows.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/tests/test_v2_knowledge.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/tests/test_v2_products.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/tests/test_v2_scenarios.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/tests/test_v2_taxonomy.py +0 -0
- {applied_cli-0.6.7 → applied_cli-0.6.8}/tests/test_v2_tickets.py +0 -0
|
@@ -1540,15 +1540,26 @@ def send_message_cmd(
|
|
|
1540
1540
|
@app.command()
|
|
1541
1541
|
def benchmarks(
|
|
1542
1542
|
agent_id: str = typer.Option(None, "--agent-id", help="Filter by agent ID"),
|
|
1543
|
+
with_results: bool = typer.Option(
|
|
1544
|
+
False,
|
|
1545
|
+
"--with-results",
|
|
1546
|
+
help="Include each benchmark's pass/fail/unrated tally and pass rate "
|
|
1547
|
+
"(one scenario fetch per benchmark) — a go/no-go portfolio view",
|
|
1548
|
+
),
|
|
1543
1549
|
shop_id: str = typer.Option(None, "--shop-id", help="Override shop ID"),
|
|
1544
1550
|
format: str = typer.Option(
|
|
1545
1551
|
"csv", "--format", "-f", help="Output format: csv or json"
|
|
1546
1552
|
),
|
|
1547
1553
|
) -> None:
|
|
1548
|
-
"""List benchmarks."""
|
|
1554
|
+
"""List benchmarks (optionally with per-benchmark pass rates via --with-results)."""
|
|
1549
1555
|
client = get_client(shop_id=shop_id)
|
|
1550
1556
|
result = asyncio.run(
|
|
1551
|
-
tools.benchmark_list(
|
|
1557
|
+
tools.benchmark_list(
|
|
1558
|
+
client,
|
|
1559
|
+
agent_id=agent_id,
|
|
1560
|
+
output_format=format,
|
|
1561
|
+
with_results=with_results,
|
|
1562
|
+
)
|
|
1552
1563
|
)
|
|
1553
1564
|
typer.echo(result)
|
|
1554
1565
|
|
|
@@ -5306,6 +5306,7 @@ async def benchmark_list(
|
|
|
5306
5306
|
client: AppliedClient,
|
|
5307
5307
|
agent_id: str | None = None,
|
|
5308
5308
|
output_format: str = "csv",
|
|
5309
|
+
with_results: bool = False,
|
|
5309
5310
|
) -> str:
|
|
5310
5311
|
"""
|
|
5311
5312
|
List conversation benchmarks.
|
|
@@ -5314,26 +5315,45 @@ async def benchmark_list(
|
|
|
5314
5315
|
client: Authenticated AppliedClient
|
|
5315
5316
|
agent_id: Optional - filter by agent UUID
|
|
5316
5317
|
output_format: 'csv' or 'json'
|
|
5318
|
+
with_results: Also compute each benchmark's pass/fail/unrated tally and
|
|
5319
|
+
pass rate (one extra scenario fetch per benchmark) — a go/no-go
|
|
5320
|
+
portfolio view across all benchmarks
|
|
5317
5321
|
|
|
5318
5322
|
Returns:
|
|
5319
|
-
List of benchmarks with id, name, agent, scenario count
|
|
5323
|
+
List of benchmarks with id, name, agent, scenario count (and pass-rate
|
|
5324
|
+
columns when with_results is set)
|
|
5320
5325
|
"""
|
|
5321
5326
|
benchmarks = await client.list_benchmarks(agent_id=agent_id)
|
|
5322
|
-
mapped = [
|
|
5323
|
-
|
|
5327
|
+
mapped = []
|
|
5328
|
+
for b in benchmarks:
|
|
5329
|
+
row = {
|
|
5324
5330
|
"id": b.get("id"),
|
|
5325
5331
|
"name": b.get("name"),
|
|
5326
5332
|
"agent_name": b.get("agent", {}).get("name", ""),
|
|
5327
5333
|
"scenario_count": b.get("scenario_count", 0),
|
|
5328
5334
|
"description": str(b.get("description", ""))[:80],
|
|
5329
5335
|
}
|
|
5330
|
-
|
|
5331
|
-
|
|
5336
|
+
if with_results:
|
|
5337
|
+
scenarios = await client.list_scenarios(
|
|
5338
|
+
benchmark_id=b.get("id"), fetch_all=True
|
|
5339
|
+
)
|
|
5340
|
+
tally = _pass_status_tally(scenarios)
|
|
5341
|
+
pass_rate = tally["pass_rate"]
|
|
5342
|
+
row["passed"] = tally["passed"]
|
|
5343
|
+
row["failed"] = tally["failed"]
|
|
5344
|
+
row["unrated"] = tally["unrated"]
|
|
5345
|
+
row["pass_rate"] = (
|
|
5346
|
+
f"{pass_rate * 100:.1f}%" if pass_rate is not None else "n/a"
|
|
5347
|
+
)
|
|
5348
|
+
mapped.append(row)
|
|
5349
|
+
|
|
5350
|
+
columns = ["id", "name", "agent_name", "scenario_count"]
|
|
5351
|
+
if with_results:
|
|
5352
|
+
columns += ["passed", "failed", "unrated", "pass_rate"]
|
|
5353
|
+
columns.append("description")
|
|
5332
5354
|
|
|
5333
5355
|
if output_format == "csv":
|
|
5334
|
-
return to_csv(
|
|
5335
|
-
mapped, ["id", "name", "agent_name", "scenario_count", "description"]
|
|
5336
|
-
)
|
|
5356
|
+
return to_csv(mapped, columns)
|
|
5337
5357
|
return to_json(mapped)
|
|
5338
5358
|
|
|
5339
5359
|
|
|
@@ -5710,6 +5730,40 @@ async def benchmark_clone(
|
|
|
5710
5730
|
return "\n".join(lines)
|
|
5711
5731
|
|
|
5712
5732
|
|
|
5733
|
+
def _pass_status_tally(scenarios: list[dict]) -> dict[str, Any]:
|
|
5734
|
+
"""Tally scenarios by pass_status and compute the pass rate among rated.
|
|
5735
|
+
|
|
5736
|
+
Scenario pass_status from the API is the *effective* value (the latest run's
|
|
5737
|
+
pass_status when present, else the scenario's own), so this reflects the most
|
|
5738
|
+
recent run per scenario.
|
|
5739
|
+
"""
|
|
5740
|
+
tally = {"pass": 0, "fail": 0, "unrated": 0}
|
|
5741
|
+
failing: list[dict[str, Any]] = []
|
|
5742
|
+
unrated: list[dict[str, Any]] = []
|
|
5743
|
+
for scenario in scenarios:
|
|
5744
|
+
status = str(scenario.get("pass_status") or "unrated").lower()
|
|
5745
|
+
if status not in tally:
|
|
5746
|
+
status = "unrated"
|
|
5747
|
+
tally[status] += 1
|
|
5748
|
+
entry = {"id": scenario.get("id"), "name": scenario.get("name")}
|
|
5749
|
+
if status == "fail":
|
|
5750
|
+
failing.append(entry)
|
|
5751
|
+
elif status == "unrated":
|
|
5752
|
+
unrated.append(entry)
|
|
5753
|
+
|
|
5754
|
+
rated = tally["pass"] + tally["fail"]
|
|
5755
|
+
return {
|
|
5756
|
+
"total": len(scenarios),
|
|
5757
|
+
"passed": tally["pass"],
|
|
5758
|
+
"failed": tally["fail"],
|
|
5759
|
+
"unrated": tally["unrated"],
|
|
5760
|
+
"rated": rated,
|
|
5761
|
+
"pass_rate": round(tally["pass"] / rated, 4) if rated else None,
|
|
5762
|
+
"failing_scenarios": failing,
|
|
5763
|
+
"unrated_scenarios": unrated,
|
|
5764
|
+
}
|
|
5765
|
+
|
|
5766
|
+
|
|
5713
5767
|
async def benchmark_results(
|
|
5714
5768
|
client: AppliedClient,
|
|
5715
5769
|
benchmark_id: str,
|
|
@@ -5739,30 +5793,18 @@ async def benchmark_results(
|
|
|
5739
5793
|
except AppliedAPIError as e:
|
|
5740
5794
|
return _format_error(e)
|
|
5741
5795
|
|
|
5742
|
-
|
|
5743
|
-
failing
|
|
5744
|
-
unrated
|
|
5745
|
-
|
|
5746
|
-
status = str(scenario.get("pass_status") or "unrated").lower()
|
|
5747
|
-
if status not in tally:
|
|
5748
|
-
status = "unrated"
|
|
5749
|
-
tally[status] += 1
|
|
5750
|
-
entry = {"id": scenario.get("id"), "name": scenario.get("name")}
|
|
5751
|
-
if status == "fail":
|
|
5752
|
-
failing.append(entry)
|
|
5753
|
-
elif status == "unrated":
|
|
5754
|
-
unrated.append(entry)
|
|
5755
|
-
|
|
5756
|
-
rated = tally["pass"] + tally["fail"]
|
|
5757
|
-
pass_rate = round(tally["pass"] / rated, 4) if rated else None
|
|
5796
|
+
t = _pass_status_tally(scenarios)
|
|
5797
|
+
failing = t["failing_scenarios"]
|
|
5798
|
+
unrated = t["unrated_scenarios"]
|
|
5799
|
+
pass_rate = t["pass_rate"]
|
|
5758
5800
|
summary = {
|
|
5759
5801
|
"benchmark_id": benchmark_id,
|
|
5760
5802
|
"benchmark_name": benchmark.get("name"),
|
|
5761
|
-
"total_scenarios":
|
|
5762
|
-
"passed":
|
|
5763
|
-
"failed":
|
|
5764
|
-
"unrated":
|
|
5765
|
-
"rated": rated,
|
|
5803
|
+
"total_scenarios": t["total"],
|
|
5804
|
+
"passed": t["passed"],
|
|
5805
|
+
"failed": t["failed"],
|
|
5806
|
+
"unrated": t["unrated"],
|
|
5807
|
+
"rated": t["rated"],
|
|
5766
5808
|
"pass_rate": pass_rate,
|
|
5767
5809
|
"failing_scenarios": failing,
|
|
5768
5810
|
"unrated_scenarios": unrated,
|
|
@@ -5772,16 +5814,16 @@ async def benchmark_results(
|
|
|
5772
5814
|
return to_json(summary)
|
|
5773
5815
|
|
|
5774
5816
|
pass_rate_str = (
|
|
5775
|
-
f"{pass_rate * 100:.1f}% ({
|
|
5817
|
+
f"{pass_rate * 100:.1f}% ({t['passed']}/{t['rated']} rated)"
|
|
5776
5818
|
if pass_rate is not None
|
|
5777
5819
|
else "n/a (no rated scenarios yet)"
|
|
5778
5820
|
)
|
|
5779
5821
|
lines = [
|
|
5780
5822
|
f"# Benchmark Results: {benchmark.get('name')} ({benchmark_id})",
|
|
5781
5823
|
f"total_scenarios: {summary['total_scenarios']}",
|
|
5782
|
-
f"passed: {
|
|
5783
|
-
f"failed: {
|
|
5784
|
-
f"unrated: {
|
|
5824
|
+
f"passed: {t['passed']}",
|
|
5825
|
+
f"failed: {t['failed']}",
|
|
5826
|
+
f"unrated: {t['unrated']}",
|
|
5785
5827
|
f"pass_rate: {pass_rate_str}",
|
|
5786
5828
|
]
|
|
5787
5829
|
if failing:
|
|
@@ -43,6 +43,7 @@ class ScenariosBulkCancelInput(StrictInput):
|
|
|
43
43
|
class BenchmarksListInput(StrictInput):
|
|
44
44
|
agent_id: str | None = None
|
|
45
45
|
limit: int = 50
|
|
46
|
+
with_results: bool = False
|
|
46
47
|
|
|
47
48
|
|
|
48
49
|
class BenchmarksGetInput(StrictInput):
|
|
@@ -395,10 +396,26 @@ async def benchmarks_list_handler(
|
|
|
395
396
|
agent_id=params.agent_id,
|
|
396
397
|
limit=params.limit,
|
|
397
398
|
)
|
|
399
|
+
payload = []
|
|
400
|
+
for benchmark in benchmarks:
|
|
401
|
+
row = _project_benchmark_payload(benchmark)
|
|
402
|
+
if params.with_results:
|
|
403
|
+
from applied_cli.tools import _pass_status_tally
|
|
404
|
+
|
|
405
|
+
scenarios = await client.list_scenarios(
|
|
406
|
+
benchmark_id=benchmark.get("id"), fetch_all=True
|
|
407
|
+
)
|
|
408
|
+
tally = _pass_status_tally(scenarios)
|
|
409
|
+
row["results"] = {
|
|
410
|
+
"passed": tally["passed"],
|
|
411
|
+
"failed": tally["failed"],
|
|
412
|
+
"unrated": tally["unrated"],
|
|
413
|
+
"pass_rate": tally["pass_rate"],
|
|
414
|
+
}
|
|
415
|
+
payload.append(row)
|
|
398
416
|
except AppliedAPIError as exc:
|
|
399
417
|
return _api_error_result(exc)
|
|
400
418
|
|
|
401
|
-
payload = [_project_benchmark_payload(benchmark) for benchmark in benchmarks]
|
|
402
419
|
return ToolResult(
|
|
403
420
|
data=payload,
|
|
404
421
|
summary=_count_summary(len(payload), "benchmark"),
|
|
@@ -991,7 +1008,11 @@ def scenario_specs() -> list[ToolSpec]:
|
|
|
991
1008
|
ToolSpec(
|
|
992
1009
|
name="benchmarks_list",
|
|
993
1010
|
namespace="benchmarks",
|
|
994
|
-
description=
|
|
1011
|
+
description=(
|
|
1012
|
+
"List conversation benchmarks as structured rows. Set "
|
|
1013
|
+
"with_results=true for each benchmark's pass/fail/unrated tally "
|
|
1014
|
+
"and pass rate (a go/no-go portfolio view)."
|
|
1015
|
+
),
|
|
995
1016
|
input_model=BenchmarksListInput,
|
|
996
1017
|
output_model=None,
|
|
997
1018
|
handler=benchmarks_list_handler,
|
|
@@ -40,6 +40,7 @@ tests/test_audit_tools.py
|
|
|
40
40
|
tests/test_auth_context.py
|
|
41
41
|
tests/test_benchmark_clone.py
|
|
42
42
|
tests/test_benchmark_delete_guardrail.py
|
|
43
|
+
tests/test_benchmark_list_with_results.py
|
|
43
44
|
tests/test_benchmark_results.py
|
|
44
45
|
tests/test_benchmark_scenario_tools.py
|
|
45
46
|
tests/test_cli.py
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from applied_cli import tools
|
|
6
|
+
|
|
7
|
+
BENCHMARKS = [
|
|
8
|
+
{"id": "b1", "name": "Cancel", "agent": {"name": "August"}, "scenario_count": 3},
|
|
9
|
+
{"id": "b2", "name": "Refund", "agent": {"name": "August"}, "scenario_count": 1},
|
|
10
|
+
]
|
|
11
|
+
|
|
12
|
+
SCENARIOS_BY_BENCHMARK = {
|
|
13
|
+
"b1": [
|
|
14
|
+
{"id": "s1", "name": "a", "pass_status": "pass"},
|
|
15
|
+
{"id": "s2", "name": "b", "pass_status": "fail"},
|
|
16
|
+
{"id": "s3", "name": "c", "pass_status": "unrated"},
|
|
17
|
+
],
|
|
18
|
+
"b2": [{"id": "s4", "name": "d", "pass_status": "pass"}],
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class FakeListClient:
|
|
23
|
+
def __init__(self):
|
|
24
|
+
self.list_scenarios_calls = 0
|
|
25
|
+
|
|
26
|
+
async def list_benchmarks(self, agent_id=None, limit=50):
|
|
27
|
+
return list(BENCHMARKS)
|
|
28
|
+
|
|
29
|
+
async def list_scenarios(self, benchmark_id=None, fetch_all=True, **kwargs):
|
|
30
|
+
self.list_scenarios_calls += 1
|
|
31
|
+
return list(SCENARIOS_BY_BENCHMARK.get(benchmark_id, []))
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@pytest.mark.asyncio
|
|
35
|
+
async def test_list_without_results_does_not_fetch_scenarios():
|
|
36
|
+
client = FakeListClient()
|
|
37
|
+
out = await tools.benchmark_list(client, output_format="json")
|
|
38
|
+
rows = json.loads(out)
|
|
39
|
+
assert client.list_scenarios_calls == 0
|
|
40
|
+
assert "pass_rate" not in rows[0]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@pytest.mark.asyncio
|
|
44
|
+
async def test_list_with_results_adds_pass_rate_per_benchmark():
|
|
45
|
+
client = FakeListClient()
|
|
46
|
+
out = await tools.benchmark_list(
|
|
47
|
+
client, output_format="json", with_results=True
|
|
48
|
+
)
|
|
49
|
+
rows = {r["id"]: r for r in json.loads(out)}
|
|
50
|
+
# One scenario fetch per benchmark.
|
|
51
|
+
assert client.list_scenarios_calls == 2
|
|
52
|
+
# b1: 1 pass / 2 rated = 50%
|
|
53
|
+
assert rows["b1"]["passed"] == 1
|
|
54
|
+
assert rows["b1"]["failed"] == 1
|
|
55
|
+
assert rows["b1"]["unrated"] == 1
|
|
56
|
+
assert rows["b1"]["pass_rate"] == "50.0%"
|
|
57
|
+
# b2: 1 pass / 1 rated = 100%
|
|
58
|
+
assert rows["b2"]["pass_rate"] == "100.0%"
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@pytest.mark.asyncio
|
|
62
|
+
async def test_list_with_results_csv_has_columns():
|
|
63
|
+
client = FakeListClient()
|
|
64
|
+
out = await tools.benchmark_list(client, output_format="csv", with_results=True)
|
|
65
|
+
header = out.splitlines()[0]
|
|
66
|
+
for col in ("passed", "failed", "unrated", "pass_rate"):
|
|
67
|
+
assert col in header
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def test_pass_status_tally_pure():
|
|
71
|
+
tally = tools._pass_status_tally(
|
|
72
|
+
[
|
|
73
|
+
{"id": "1", "pass_status": "pass"},
|
|
74
|
+
{"id": "2", "pass_status": "PASS"}, # case-insensitive
|
|
75
|
+
{"id": "3", "pass_status": "fail"},
|
|
76
|
+
{"id": "4"}, # missing -> unrated
|
|
77
|
+
]
|
|
78
|
+
)
|
|
79
|
+
assert tally["passed"] == 2
|
|
80
|
+
assert tally["failed"] == 1
|
|
81
|
+
assert tally["unrated"] == 1
|
|
82
|
+
assert tally["rated"] == 3
|
|
83
|
+
assert tally["pass_rate"] == round(2 / 3, 4)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def test_pass_status_tally_no_rated():
|
|
87
|
+
tally = tools._pass_status_tally([{"id": "1", "pass_status": "unrated"}])
|
|
88
|
+
assert tally["pass_rate"] is None
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
@pytest.mark.asyncio
|
|
92
|
+
async def test_v2_benchmarks_list_with_results():
|
|
93
|
+
from applied_cli.v2.scenarios import (
|
|
94
|
+
BenchmarksListInput,
|
|
95
|
+
benchmarks_list_handler,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
client = FakeListClient()
|
|
99
|
+
result = await benchmarks_list_handler(
|
|
100
|
+
client, BenchmarksListInput(with_results=True)
|
|
101
|
+
)
|
|
102
|
+
by_id = {r["id"]: r for r in result.data}
|
|
103
|
+
assert by_id["b1"]["results"]["passed"] == 1
|
|
104
|
+
assert by_id["b2"]["results"]["pass_rate"] == 1.0
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|