PyPI - applied-cli - Versions diffs - 0.1.0__py3-none-any.whl - Mend

applied-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

applied_cli/__init__.py +2 -0
applied_cli/auth_store.py +263 -0
applied_cli/commands/__init__.py +2 -0
applied_cli/commands/_hints.py +11 -0
applied_cli/commands/_normalize.py +79 -0
applied_cli/commands/_parsers.py +58 -0
applied_cli/commands/_ui.py +33 -0
applied_cli/commands/agent.py +1231 -0
applied_cli/commands/auth.py +739 -0
applied_cli/commands/chat.py +379 -0
applied_cli/commands/coverage.py +348 -0
applied_cli/commands/discover.py +1006 -0
applied_cli/commands/fix.py +1204 -0
applied_cli/commands/insights.py +614 -0
applied_cli/commands/intents.py +447 -0
applied_cli/commands/rate.py +508 -0
applied_cli/commands/responses.py +604 -0
applied_cli/commands/shop.py +1757 -0
applied_cli/commands/simulate.py +330 -0
applied_cli/commands/spec.py +238 -0
applied_cli/config.py +50 -0
applied_cli/error_reporting.py +38 -0
applied_cli/http.py +1614 -0
applied_cli/main.py +90 -0
applied_cli/mcp_server.py +738 -0
applied_cli/presets/demo.yaml +170 -0
applied_cli/runtime.py +53 -0
applied_cli/shop_spec.py +398 -0
applied_cli/spec_workflow.py +432 -0
applied_cli-0.1.0.dist-info/METADATA +176 -0
applied_cli-0.1.0.dist-info/RECORD +34 -0
applied_cli-0.1.0.dist-info/WHEEL +5 -0
applied_cli-0.1.0.dist-info/entry_points.txt +3 -0
applied_cli-0.1.0.dist-info/top_level.txt +1 -0

applied_cli/commands/rate.py ADDED Viewed

@@ -0,0 +1,508 @@
+import uuid
+from datetime import datetime
+from typing import Any, Optional
+import typer
+from applied_cli.commands._hints import suggest_value
+from applied_cli.commands._ui import confirm_or_exit, emit_success, show_target
+from applied_cli.error_reporting import render_api_error
+from applied_cli.http import (
+    APIError,
+    create_conversation_benchmark,
+    create_conversation_scenario,
+    create_scenario_run,
+    get_conversation,
+    list_conversation_benchmarks,
+    list_conversation_messages,
+    list_conversation_references,
+    list_conversation_scenarios,
+    list_scenario_runs,
+    patch_conversation_scenario,
+    patch_scenario_run,
+)
+from applied_cli.runtime import resolve_runtime
+app = typer.Typer(help="Rate conversations and persist results in Test Coverage.")
+DEFAULT_BENCHMARK_NAME = "CLI Self-Rated Conversations"
+def _parse_dt(value: str) -> datetime:
+    normalized = value.replace("Z", "+00:00")
+    return datetime.fromisoformat(normalized)
+def _safe_float(value: Any) -> Optional[float]:
+    if value is None:
+        return None
+    try:
+        return float(value)
+    except Exception:
+        return None
+def _auto_rate(
+    *,
+    messages: list[dict[str, Any]],
+    references: list[dict[str, Any]],
+) -> dict[str, Any]:
+    user_messages = [m for m in messages if m.get("role") == "user"]
+    assistant_messages = [m for m in messages if m.get("role") == "assistant"]
+    pass_status = "pass"
+    csat_score = 4.0
+    feedback_parts: list[str] = []
+    if not assistant_messages:
+        pass_status = "fail"
+        csat_score = 1.0
+        feedback_parts.append("No assistant message was generated.")
+    else:
+        latest_assistant = assistant_messages[-1]
+        latest_text = str(
+            latest_assistant.get("text") or latest_assistant.get("content") or ""
+        ).strip()
+        if not latest_text:
+            pass_status = "fail"
+            csat_score = min(csat_score, 2.0)
+            feedback_parts.append("Latest assistant response is empty.")
+        if len(assistant_messages) < len(user_messages):
+            pass_status = "fail"
+            csat_score = min(csat_score, 2.0)
+            feedback_parts.append(
+                "Assistant replies are fewer than user turns in transcript."
+            )
+    assistant_message_ids = {str(m.get("id")) for m in assistant_messages}
+    assistant_refs = [
+        ref for ref in references if str(ref.get("message_id")) in assistant_message_ids
+    ]
+    relevance_values = [
+        score
+        for score in (_safe_float(r.get("relevance_score")) for r in assistant_refs)
+        if score is not None
+    ]
+    average_relevance = (
+        sum(relevance_values) / len(relevance_values) if relevance_values else None
+    )
+    if assistant_messages and not assistant_refs:
+        csat_score = min(csat_score, 3.0)
+        feedback_parts.append("No message references were found for assistant outputs.")
+    elif average_relevance is not None and average_relevance < 0.35:
+        pass_status = "fail"
+        csat_score = min(csat_score, 2.0)
+        feedback_parts.append(
+            f"Low reference relevance average ({average_relevance:.2f}) indicates weak grounding."
+        )
+    elif average_relevance is not None:
+        feedback_parts.append(
+            f"References found with average relevance {average_relevance:.2f}."
+        )
+    if not feedback_parts:
+        feedback_parts.append("Response appears grounded and complete.")
+    reasons: list[str] = []
+    for ref in assistant_refs:
+        reason = str(ref.get("reason") or "").strip()
+        if reason:
+            reasons.append(reason)
+    if reasons:
+        preview = "; ".join(reasons[:3])
+        feedback_parts.append(f"Top reference reasons: {preview}")
+    return {
+        "pass_status": pass_status,
+        "csat_score": float(max(1.0, min(5.0, round(csat_score, 1)))),
+        "feedback": " ".join(feedback_parts),
+        "reference_score": round(average_relevance, 3)
+        if average_relevance is not None
+        else None,
+        "reference_notes": f"assistant_refs={len(assistant_refs)} total_refs={len(references)}",
+    }
+def _extract_agent_id(conversation: dict[str, Any]) -> Optional[str]:
+    nested_agent = conversation.get("agent")
+    if isinstance(nested_agent, dict) and nested_agent.get("id"):
+        return str(nested_agent["id"])
+    flat_agent_id = conversation.get("agent_id")
+    if flat_agent_id:
+        return str(flat_agent_id)
+    return None
+def _require_id(value: Any, *, label: str) -> str:
+    if not value:
+        raise APIError(
+            f"Expected {label} in API response.",
+            code="MISSING_RESPONSE_FIELD",
+            hint="Server returned an unexpected payload shape; inspect the raw API response.",
+            retryable=False,
+        )
+    return str(value)
+def _find_or_create_benchmark(
+    *,
+    base_url: str,
+    shop_id: str,
+    api_token: str,
+    agent_id: str,
+    benchmark_name: str,
+) -> Optional[dict[str, Any]]:
+    benchmarks = list_conversation_benchmarks(
+        base_url=base_url,
+        shop_id=shop_id,
+        api_token=api_token,
+        agent_id=agent_id,
+    )
+    for benchmark in benchmarks:
+        if str(benchmark.get("name", "")).strip().lower() == benchmark_name.lower():
+            return benchmark
+    try:
+        return create_conversation_benchmark(
+            base_url=base_url,
+            shop_id=shop_id,
+            api_token=api_token,
+            agent_id=agent_id,
+            name=benchmark_name,
+            description="Scenario collection created by applied-cli rating workflow.",
+        )
+    except APIError:
+        return None
+def _find_or_create_scenario(
+    *,
+    base_url: str,
+    shop_id: str,
+    api_token: str,
+    agent_id: str,
+    benchmark_id: Optional[str],
+    conversation_id: str,
+) -> dict[str, Any]:
+    scenario_name = f"CLI Rated {conversation_id}"
+    scenarios = list_conversation_scenarios(
+        base_url=base_url,
+        shop_id=shop_id,
+        api_token=api_token,
+        agent_id=agent_id,
+        name=scenario_name,
+    )
+    for scenario in scenarios:
+        if str(scenario.get("name", "")).strip() != scenario_name:
+            continue
+        if benchmark_id:
+            existing_benchmarks = scenario.get("benchmarks")
+            benchmark_ids: list[str] = []
+            if isinstance(existing_benchmarks, list):
+                for benchmark in existing_benchmarks:
+                    if isinstance(benchmark, dict) and benchmark.get("id"):
+                        benchmark_ids.append(str(benchmark["id"]))
+            if benchmark_id not in benchmark_ids:
+                benchmark_ids.append(benchmark_id)
+                scenario = patch_conversation_scenario(
+                    base_url=base_url,
+                    shop_id=shop_id,
+                    api_token=api_token,
+                    scenario_id=_require_id(scenario.get("id"), label="scenario id"),
+                    payload={"benchmark_ids": benchmark_ids},
+                )
+            return scenario
+    return create_conversation_scenario(
+        base_url=base_url,
+        shop_id=shop_id,
+        api_token=api_token,
+        agent_id=agent_id,
+        benchmark_id=benchmark_id,
+        name=scenario_name,
+        input_conversation_id=conversation_id,
+    )
+def _find_or_create_run(
+    *,
+    base_url: str,
+    shop_id: str,
+    api_token: str,
+    scenario_id: str,
+    conversation_id: str,
+) -> dict[str, Any]:
+    runs = list_scenario_runs(
+        base_url=base_url,
+        shop_id=shop_id,
+        api_token=api_token,
+        scenario_id=scenario_id,
+        latest_only=True,
+    )
+    if runs:
+        return runs[0]
+    return create_scenario_run(
+        base_url=base_url,
+        shop_id=shop_id,
+        api_token=api_token,
+        scenario_id=scenario_id,
+        output_conversation_id=conversation_id,
+    )
+def _validate_manual_values(
+    *,
+    pass_status: Optional[str],
+    csat_score: Optional[float],
+) -> None:
+    if pass_status is not None and pass_status not in {"pass", "fail"}:
+        suggestion = suggest_value(pass_status, ["pass", "fail"])
+        hint = f" Did you mean '{suggestion}'?" if suggestion else ""
+        raise typer.BadParameter(f"pass-status must be one of: pass, fail.{hint}")
+    if csat_score is not None and (csat_score < 1.0 or csat_score > 5.0):
+        raise typer.BadParameter("csat-score must be between 1 and 5")
+@app.command(
+    "conversation",
+    help=(
+        "Rate a conversation and persist in Test Coverage. Example: applied-cli test scenarios rate "
+        "--conversation-id <uuid> --agent-id <uuid> --auto --yes"
+    ),
+)
+def conversation(
+    conversation_id: str = typer.Option(
+        ..., "--conversation-id", "--conversation", "--id", help="Conversation UUID to rate."
+    ),
+    agent_id: Optional[str] = typer.Option(
+        None,
+        "--agent-id",
+        "--agent",
+        help="Target agent UUID override (defaults to conversation agent).",
+    ),
+    benchmark_name: str = typer.Option(
+        DEFAULT_BENCHMARK_NAME,
+        help="Benchmark collection name used for persisted scenarios.",
+    ),
+    auto: bool = typer.Option(
+        True,
+        "--auto/--manual",
+        help="Auto-compute rating or provide manual score values.",
+    ),
+    include_references: bool = typer.Option(
+        True,
+        "--include-references/--no-include-references",
+        help="Include MessageReference attribution context in rating.",
+    ),
+    pass_status: Optional[str] = typer.Option(
+        None, "--pass-status", help="Manual pass/fail result (required for --manual)."
+    ),
+    csat_score: Optional[float] = typer.Option(
+        None, "--csat-score", help="Manual CSAT score between 1 and 5."
+    ),
+    feedback: Optional[str] = typer.Option(None, "--feedback", help="Manual feedback notes."),
+    reference_score: Optional[float] = typer.Option(
+        None, help="Optional reference quality score."
+    ),
+    reference_notes: Optional[str] = typer.Option(
+        None, help="Optional notes about reference quality."
+    ),
+    base_url: Optional[str] = typer.Option(None, help="Applied base URL."),
+    shop_id: Optional[str] = typer.Option(None, help="Target shop UUID."),
+    api_token: Optional[str] = typer.Option(None, help="Applied API token."),
+    dry_run: bool = typer.Option(False, help="Show rating and payloads without persisting."),
+    yes: bool = typer.Option(
+        False, "--yes", "-y", help="Skip pre-execution confirmation prompt."
+    ),
+) -> None:
+    try:
+        uuid.UUID(conversation_id)
+    except ValueError as exc:
+        raise typer.BadParameter(
+            "conversation-id must be a valid UUID."
+        ) from exc
+    if agent_id:
+        try:
+            uuid.UUID(agent_id)
+        except ValueError as exc:
+            raise typer.BadParameter("agent-id must be a valid UUID.") from exc
+    _validate_manual_values(pass_status=pass_status, csat_score=csat_score)
+    if not auto and yes and not pass_status:
+        raise typer.BadParameter(
+            "manual mode with --yes requires --pass-status to avoid interactive prompts."
+        )
+    try:
+        resolved_base_url, resolved_shop_id, resolved_token = resolve_runtime(
+            base_url=base_url,
+            shop_id=shop_id,
+            api_token=api_token,
+        )
+    except APIError as exc:
+        typer.echo(render_api_error(exc, action="resolve runtime for rating"), err=True)
+        raise typer.Exit(code=1) from exc
+    try:
+        conversation_data = get_conversation(
+            base_url=resolved_base_url,
+            shop_id=resolved_shop_id,
+            api_token=resolved_token,
+            conversation_id=conversation_id,
+        )
+        messages = list_conversation_messages(
+            base_url=resolved_base_url,
+            shop_id=resolved_shop_id,
+            api_token=resolved_token,
+            conversation_id=conversation_id,
+        )
+        references: list[dict[str, Any]] = []
+        if include_references:
+            references = list_conversation_references(
+                base_url=resolved_base_url,
+                shop_id=resolved_shop_id,
+                api_token=resolved_token,
+                conversation_id=conversation_id,
+            )
+    except APIError as exc:
+        typer.echo(render_api_error(exc, action="read conversation for rating"), err=True)
+        raise typer.Exit(code=1) from exc
+    messages.sort(
+        key=lambda item: _parse_dt(str(item.get("created_at") or "1970-01-01T00:00:00"))
+    )
+    resolved_agent_id = agent_id or _extract_agent_id(conversation_data)
+    if not resolved_agent_id:
+        typer.echo(
+            "Could not determine agent_id from conversation. Provide --agent-id.",
+            err=True,
+        )
+        raise typer.Exit(code=1)
+    computed = _auto_rate(messages=messages, references=references)
+    if not auto:
+        if not pass_status:
+            pass_status = typer.prompt("Pass status (pass/fail)").strip().lower()
+        computed["pass_status"] = pass_status
+        if csat_score is not None:
+            computed["csat_score"] = float(csat_score)
+        if feedback is not None:
+            computed["feedback"] = feedback
+        if reference_score is not None:
+            computed["reference_score"] = float(reference_score)
+        if reference_notes is not None:
+            computed["reference_notes"] = reference_notes
+    else:
+        if pass_status is not None:
+            computed["pass_status"] = pass_status
+        if csat_score is not None:
+            computed["csat_score"] = float(csat_score)
+        if feedback is not None:
+            computed["feedback"] = feedback
+        if reference_score is not None:
+            computed["reference_score"] = float(reference_score)
+        if reference_notes is not None:
+            computed["reference_notes"] = reference_notes
+    show_target(
+        {
+            "base_url": resolved_base_url,
+            "shop_id": resolved_shop_id,
+            "conversation_id": conversation_id,
+            "agent_id": resolved_agent_id,
+            "benchmark_name": benchmark_name,
+            "mode": "auto" if auto else "manual",
+            "include_references": include_references,
+            "dry_run": dry_run,
+        }
+    )
+    typer.echo("Computed rating:")
+    typer.echo(f"- pass_status: {computed['pass_status']}")
+    typer.echo(f"- csat_score: {computed['csat_score']}")
+    typer.echo(f"- feedback: {computed['feedback']}")
+    typer.echo(f"- reference_score: {computed.get('reference_score')}")
+    typer.echo(f"- reference_notes: {computed.get('reference_notes')}")
+    confirm_or_exit(yes=yes, prompt="Continue and persist rating to Test Coverage?")
+    if dry_run:
+        typer.echo("Dry run complete. No records were written.")
+        raise typer.Exit(code=0)
+    try:
+        benchmark = _find_or_create_benchmark(
+            base_url=resolved_base_url,
+            shop_id=resolved_shop_id,
+            api_token=resolved_token,
+            agent_id=resolved_agent_id,
+            benchmark_name=benchmark_name,
+        )
+        benchmark_id = None
+        if benchmark is not None:
+            benchmark_id = _require_id(benchmark.get("id"), label="benchmark id")
+        else:
+            typer.echo(
+                "Warning: could not create benchmark with current credentials. "
+                "Proceeding without benchmark linkage."
+            )
+        scenario = _find_or_create_scenario(
+            base_url=resolved_base_url,
+            shop_id=resolved_shop_id,
+            api_token=resolved_token,
+            agent_id=resolved_agent_id,
+            benchmark_id=benchmark_id,
+            conversation_id=conversation_id,
+        )
+        scenario_id = _require_id(scenario.get("id"), label="scenario id")
+        run = _find_or_create_run(
+            base_url=resolved_base_url,
+            shop_id=resolved_shop_id,
+            api_token=resolved_token,
+            scenario_id=scenario_id,
+            conversation_id=conversation_id,
+        )
+        run_id = _require_id(run.get("id"), label="run id")
+        run_payload = {
+            "pass_status": computed["pass_status"],
+            "csat_score": computed["csat_score"],
+            "feedback": computed["feedback"],
+            "reference_score": computed.get("reference_score"),
+            "reference_notes": computed.get("reference_notes"),
+        }
+        scenario_payload = {
+            "pass_status": computed["pass_status"],
+            "csat_score": computed["csat_score"],
+            "feedback": computed["feedback"],
+        }
+        updated_run = patch_scenario_run(
+            base_url=resolved_base_url,
+            shop_id=resolved_shop_id,
+            api_token=resolved_token,
+            run_id=run_id,
+            payload=run_payload,
+        )
+        patch_conversation_scenario(
+            base_url=resolved_base_url,
+            shop_id=resolved_shop_id,
+            api_token=resolved_token,
+            scenario_id=scenario_id,
+            payload=scenario_payload,
+        )
+    except APIError as exc:
+        typer.echo(render_api_error(exc, action="persist rating"), err=True)
+        raise typer.Exit(code=1) from exc
+    emit_success(
+        output_json=False,
+        payload={},
+        fields={
+            "benchmark_id": benchmark_id or "(none)",
+            "scenario_id": scenario_id,
+            "run_id": run_id,
+            "evaluated_at": updated_run.get("evaluated_at"),
+        },
+    )