PyPI - applied-cli - Versions diffs - 0.1.0__py3-none-any.whl - Mend

applied-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

applied_cli/__init__.py +2 -0
applied_cli/auth_store.py +263 -0
applied_cli/commands/__init__.py +2 -0
applied_cli/commands/_hints.py +11 -0
applied_cli/commands/_normalize.py +79 -0
applied_cli/commands/_parsers.py +58 -0
applied_cli/commands/_ui.py +33 -0
applied_cli/commands/agent.py +1231 -0
applied_cli/commands/auth.py +739 -0
applied_cli/commands/chat.py +379 -0
applied_cli/commands/coverage.py +348 -0
applied_cli/commands/discover.py +1006 -0
applied_cli/commands/fix.py +1204 -0
applied_cli/commands/insights.py +614 -0
applied_cli/commands/intents.py +447 -0
applied_cli/commands/rate.py +508 -0
applied_cli/commands/responses.py +604 -0
applied_cli/commands/shop.py +1757 -0
applied_cli/commands/simulate.py +330 -0
applied_cli/commands/spec.py +238 -0
applied_cli/config.py +50 -0
applied_cli/error_reporting.py +38 -0
applied_cli/http.py +1614 -0
applied_cli/main.py +90 -0
applied_cli/mcp_server.py +738 -0
applied_cli/presets/demo.yaml +170 -0
applied_cli/runtime.py +53 -0
applied_cli/shop_spec.py +398 -0
applied_cli/spec_workflow.py +432 -0
applied_cli-0.1.0.dist-info/METADATA +176 -0
applied_cli-0.1.0.dist-info/RECORD +34 -0
applied_cli-0.1.0.dist-info/WHEEL +5 -0
applied_cli-0.1.0.dist-info/entry_points.txt +3 -0
applied_cli-0.1.0.dist-info/top_level.txt +1 -0

applied_cli/commands/shop.py ADDED Viewed

@@ -0,0 +1,1757 @@
+"""
+applied-cli shop — Create and configure shops end-to-end.
+Commands:
+  shop create   — Create a new shop using the current auth credentials.
+  shop setup    — Full shop setup from a spec file (agents, KB, CSV, insights, simulation).
+  shop template — Print the demo spec template to stdout.
+  shop test     — Run smoke tests for configured agents and rate responses.
+"""
+from __future__ import annotations
+import json
+import os
+import re
+import time
+import uuid
+import webbrowser
+from pathlib import Path
+from typing import Any, Optional
+import httpx
+import typer
+from applied_cli.auth_store import save_credentials
+from applied_cli.commands.agent import _upsert_inline_responses as upsert_inline_responses
+from applied_cli.config import Credentials
+from applied_cli.error_reporting import render_api_error
+from applied_cli.http import (
+    APIError,
+    check_superuser,
+    create_agent,
+    create_content_source,
+    create_escalation_flow,
+    create_property_choice,
+    create_conversation_scenario,
+    create_scenario_run,
+    create_shop,
+    import_conversations_bulk,
+    insights_generate,
+    list_agents,
+    list_conversation_messages,
+    list_conversation_references,
+    list_conversation_scenarios,
+    patch_conversation_scenario,
+    patch_scenario_run,
+    poll_cli_device_login,
+    populate_demo_shop,
+    start_cli_device_login,
+    update_agent,
+    validate_api_token,
+)
+from applied_cli.runtime import resolve_runtime
+from applied_cli.shop_spec import load_and_validate_shop_spec
+app = typer.Typer(
+    help=(
+        "Create and configure shops.\n\n"
+        "Typical AI-agent workflow:\n"
+        "  1. Human: applied-cli auth login\n"
+        "  2. Agent: applied-cli shop setup --spec fabfitfun.yaml --json"
+    )
+)
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+def _slugify(name: str) -> str:
+    """Convert a shop name to a safe profile slug (e.g. 'FabFitFun Demo' → 'fabfitfun-demo')."""
+    s = name.lower().strip()
+    s = re.sub(r"[^a-z0-9]+", "-", s)
+    return s.strip("-") or "shop"
+def _emit(step: dict[str, Any], *, output_json: bool) -> None:
+    """Emit one JSONL progress line (json mode) or a human-readable line (text mode)."""
+    if output_json:
+        typer.echo(json.dumps(step))
+    else:
+        step_name = step.get("step", "")
+        skipped = step.get("skipped", False)
+        if skipped:
+            reason = step.get("reason", "")
+            typer.echo(f"  → {step_name}: skipped ({reason})")
+        else:
+            parts = [f"  → {step_name}"]
+            for k, v in step.items():
+                if k in {"step", "skipped", "reason"}:
+                    continue
+                parts.append(f"{k}={v}")
+            typer.echo("  ".join(parts))
+def _device_auth_for_shop(
+    *,
+    base_url: str,
+    output_json: bool,
+    no_browser: bool = False,
+    timeout_seconds: float = 20.0,
+    poll_interval: float = 3.0,
+    expires_in: int = 600,
+) -> tuple[str, str]:
+    """Run the device auth flow and return (api_token, shop_id) for the new shop.
+    Emits a pending_auth JSONL line so an agent can relay the code+URL to the human.
+    """
+    try:
+        device_data = start_cli_device_login(
+            base_url=base_url,
+            timeout_seconds=timeout_seconds,
+        )
+    except APIError as exc:
+        raise
+    token_page: str = device_data.get("verification_uri_complete") or device_data.get(
+        "verification_uri", ""
+    )
+    device_code: str = device_data.get("device_code", "")
+    user_code: str = device_data.get("user_code", "")
+    expires_in = int(device_data.get("expires_in", expires_in))
+    interval = float(device_data.get("interval", poll_interval))
+    if output_json:
+        typer.echo(
+            json.dumps(
+                {
+                    "step": "pending_auth",
+                    "approval_url": token_page,
+                    "user_code": user_code or None,
+                    "expires_in": expires_in,
+                    "message": "Approve in browser and select the new shop",
+                }
+            )
+        )
+    else:
+        typer.echo(f"\nApproval URL:       {token_page}")
+        if user_code:
+            typer.echo(f"Verification code:  {user_code}")
+            typer.echo("Enter this code in the browser when prompted.")
+    if not no_browser:
+        browser_opened = webbrowser.open(token_page)
+        if not output_json:
+            if browser_opened:
+                typer.echo("(Browser opened automatically.)")
+            else:
+                typer.echo("(Could not open browser — open the URL above manually.)")
+    if not output_json:
+        typer.echo("Waiting for browser approval...")
+    # Poll
+    elapsed = 0.0
+    while elapsed < expires_in:
+        time.sleep(interval)
+        elapsed += interval
+        try:
+            poll_result = poll_cli_device_login(
+                base_url=base_url,
+                device_code=device_code,
+                timeout_seconds=timeout_seconds,
+            )
+        except APIError as exc:
+            if exc.status_code == 428:
+                continue  # still pending
+            raise
+        raw_token = poll_result.get("token") or poll_result.get("access_token", "")
+        new_shop_id = poll_result.get("shop_id", "")
+        if raw_token and new_shop_id:
+            return str(raw_token), str(new_shop_id)
+    raise APIError(
+        "Device auth timed out — the approval URL expired.",
+        code="DEVICE_AUTH_TIMEOUT",
+        hint="Run the setup command again to get a fresh URL.",
+        retryable=True,
+    )
+# ---------------------------------------------------------------------------
+# shop create
+# ---------------------------------------------------------------------------
+@app.command(
+    "create",
+    help=(
+        "Create a new shop using current auth credentials (must be an Applied team account). "
+        "The backend auto-mints an API token for the new shop — no second browser login needed."
+    ),
+)
+def create(
+    name: str = typer.Option(..., "--name", help="New shop name."),
+    no_auth: bool = typer.Option(
+        False,
+        "--no-auth",
+        help="Skip saving credentials for the new shop (useful if you will `auth login` separately).",
+    ),
+    base_url: Optional[str] = typer.Option(None, help="Applied base URL."),
+    shop_id: Optional[str] = typer.Option(None, help="Admin shop UUID (auth scope)."),
+    api_token: Optional[str] = typer.Option(None, help="Applied API token."),
+    output_json: bool = typer.Option(False, "--json", help="Emit JSON output."),
+) -> None:
+    try:
+        resolved_base_url, resolved_shop_id, resolved_token = resolve_runtime(
+            base_url=base_url, shop_id=shop_id, api_token=api_token
+        )
+    except APIError as exc:
+        typer.echo(render_api_error(exc, action="resolve runtime for shop create"), err=True)
+        raise typer.Exit(code=1) from exc
+    try:
+        shop_data = create_shop(
+            base_url=resolved_base_url,
+            shop_id=resolved_shop_id,
+            api_token=resolved_token,
+            name=name,
+        )
+    except APIError as exc:
+        typer.echo(render_api_error(exc, action="create shop"), err=True)
+        if exc.status_code in {400, 422}:
+            typer.echo(
+                "Hint: shop creation is restricted to Applied team accounts.\n"
+                "If your current credentials are not for an Applied team shop, run:\n"
+                "  applied-cli auth login  (and select an Applied team shop)\n"
+                "Then retry this command.",
+                err=True,
+            )
+        raise typer.Exit(code=1) from exc
+    new_shop_id = str(shop_data.get("id", ""))
+    setup_token = str(shop_data.get("setup_token", ""))
+    profile = _slugify(name)
+    if output_json:
+        out: dict[str, Any] = {
+            "shop_id": new_shop_id,
+            "name": name,
+            "token_minted": bool(setup_token),
+        }
+        typer.echo(json.dumps(out, indent=2))
+    else:
+        typer.echo(f"Shop created: {name} ({new_shop_id})")
+    if setup_token and not no_auth:
+        save_credentials(
+            Credentials(
+                base_url=resolved_base_url,
+                shop_id=new_shop_id,
+                api_token=setup_token,
+            ),
+            profile=profile,
+            set_active=True,
+        )
+        if output_json:
+            typer.echo(
+                json.dumps({"credentials_saved": True, "profile": profile, "shop_id": new_shop_id})
+            )
+        else:
+            typer.echo(f"Credentials saved as profile '{profile}' and set as active.")
+# ---------------------------------------------------------------------------
+# shop setup
+# ---------------------------------------------------------------------------
+@app.command(
+    "setup",
+    help=(
+        "Full shop setup from a YAML/JSON spec file. "
+        "Creates the shop, configures agents, optionally uploads classified conversations, "
+        "runs insights, simulation, and syncs knowledge base. "
+        "Emits JSONL progress in --json mode so an AI agent can track each step.\n\n"
+        "Example: applied-cli shop setup --spec fabfitfun.yaml --json"
+    ),
+)
+def setup(  # noqa: C901 (acceptable complexity for orchestration command)
+    spec_path: str = typer.Option(..., "--spec", help="Path to YAML or JSON spec file."),
+    shop_id: Optional[str] = typer.Option(
+        None,
+        "--shop-id",
+        help="Existing shop UUID — skip shop creation and configure this shop instead.",
+    ),
+    base_url: Optional[str] = typer.Option(None, help="Applied base URL."),
+    api_token: Optional[str] = typer.Option(None, help="Applied API token."),
+    dry_run: bool = typer.Option(False, "--dry-run", help="Validate spec and show plan, no writes."),
+    yes: bool = typer.Option(False, "--yes", "-y", help="Skip confirmation prompt."),
+    output_json: bool = typer.Option(False, "--json", help="Emit JSONL progress output."),
+) -> None:
+    # --- 1. Load + validate spec ---
+    try:
+        spec = load_and_validate_shop_spec(spec_path)
+    except ValueError as exc:
+        typer.echo(f"Spec error: {exc}", err=True)
+        raise typer.Exit(code=1) from exc
+    shop_name: str = spec["name"]
+    agents_spec: list[dict[str, Any]] = spec["agents"]
+    csv_spec: dict[str, Any] | None = spec["conversations_csv"]
+    taxonomy_spec: dict[str, Any] | None = spec.get("taxonomy")
+    kb_spec: dict[str, Any] | None = spec["knowledge_base"]
+    sim_spec: dict[str, Any] | None = spec["simulation"]
+    has_csv = csv_spec is not None
+    has_taxonomy = taxonomy_spec is not None
+    has_kb = kb_spec is not None
+    has_sim = sim_spec is not None
+    _emit(
+        {
+            "step": "spec_loaded",
+            "name": shop_name,
+            "agent_count": len(agents_spec),
+            "has_csv": has_csv,
+            "has_taxonomy": has_taxonomy,
+            "has_kb": has_kb,
+            "has_simulation": has_sim,
+        },
+        output_json=output_json,
+    )
+    if not output_json:
+        typer.echo(f"\nSpec: {spec_path}")
+        typer.echo(f"  Shop:   {shop_name}")
+        typer.echo(f"  Agents: {', '.join(a['modality'] for a in agents_spec)}")
+        typer.echo(f"  CSV:    {'yes' if has_csv else 'no'}")
+        typer.echo(f"  KB:     {'yes' if has_kb else 'no'}")
+        typer.echo(f"  Sim:    {'yes' if has_sim else 'no'}")
+        if dry_run:
+            typer.echo("\n[dry-run] No API calls will be made.\n")
+        elif not yes:
+            typer.confirm("Proceed?", abort=True)
+    # Resolve admin (current user) credentials for shop creation
+    try:
+        resolved_base_url, admin_shop_id, resolved_admin_token = resolve_runtime(
+            base_url=base_url, shop_id=None, api_token=api_token
+        )
+    except APIError as exc:
+        typer.echo(render_api_error(exc, action="resolve runtime for shop setup"), err=True)
+        raise typer.Exit(code=1) from exc
+    # --- 2. Create shop (or use existing) ---
+    new_shop_id: str
+    new_shop_token: str
+    if shop_id:
+        # Use existing shop
+        new_shop_id = shop_id
+        new_shop_token = resolved_admin_token  # assume current token is for this shop
+        _emit(
+            {"step": "shop_reused", "shop_id": new_shop_id},
+            output_json=output_json,
+        )
+    else:
+        if dry_run:
+            new_shop_id = "(dry-run)"
+            new_shop_token = "(dry-run)"
+            _emit(
+                {"step": "shop_created", "shop_id": new_shop_id, "name": shop_name, "dry_run": True},
+                output_json=output_json,
+            )
+        else:
+            try:
+                shop_data = create_shop(
+                    base_url=resolved_base_url,
+                    shop_id=admin_shop_id,
+                    api_token=resolved_admin_token,
+                    name=shop_name,
+                )
+            except APIError as exc:
+                typer.echo(render_api_error(exc, action="create shop"), err=True)
+                if exc.status_code in {400, 422}:
+                    typer.echo(
+                        "Hint: shop creation is restricted to Applied team accounts.\n"
+                        "Make sure you are logged into an Applied team shop:\n"
+                        "  applied-cli auth login",
+                        err=True,
+                    )
+                raise typer.Exit(code=1) from exc
+            new_shop_id = str(shop_data.get("id", ""))
+            new_shop_token = str(shop_data.get("setup_token", ""))
+            if not new_shop_token:
+                # Fallback: start device auth so user can mint a token for the new shop
+                if not output_json:
+                    typer.echo(
+                        "\nNo auto-minted token returned. Starting device auth for new shop..."
+                    )
+                try:
+                    new_shop_token, confirmed_shop_id = _device_auth_for_shop(
+                        base_url=resolved_base_url,
+                        output_json=output_json,
+                    )
+                    new_shop_id = confirmed_shop_id
+                except APIError as exc:
+                    typer.echo(render_api_error(exc, action="device auth for new shop"), err=True)
+                    raise typer.Exit(code=1) from exc
+            _emit(
+                {
+                    "step": "shop_created",
+                    "shop_id": new_shop_id,
+                    "name": shop_name,
+                    "token_minted": bool(new_shop_token),
+                },
+                output_json=output_json,
+            )
+            # Save credentials for new shop
+            profile = _slugify(shop_name)
+            save_credentials(
+                Credentials(
+                    base_url=resolved_base_url,
+                    shop_id=new_shop_id,
+                    api_token=new_shop_token,
+                ),
+                profile=profile,
+                set_active=True,
+            )
+            _emit(
+                {"step": "credentials_saved", "profile": profile, "shop_id": new_shop_id},
+                output_json=output_json,
+            )
+    # From here on, use the new shop's credentials
+    active_shop_id = new_shop_id
+    active_token = new_shop_token
+    # --- 3. Find + configure agents ---
+    if not dry_run:
+        try:
+            existing_agents = list_agents(
+                base_url=resolved_base_url,
+                shop_id=active_shop_id,
+                api_token=active_token,
+                limit=100,
+            )
+        except APIError as exc:
+            typer.echo(render_api_error(exc, action="list agents for new shop"), err=True)
+            raise typer.Exit(code=1) from exc
+        default_agent_id = str(existing_agents[0].get("id")) if existing_agents else None
+    else:
+        default_agent_id = "(dry-run)"
+    configured_agents: list[dict[str, Any]] = []
+    for idx, agent_spec in enumerate(agents_spec):
+        agent_payload: dict[str, Any] = {
+            "modality": agent_spec["modality"],
+            "name": agent_spec["name"],
+            "type": "Customer Support",
+            "auto_reply": agent_spec.get("auto_reply", True),
+        }
+        if agent_spec.get("description"):
+            agent_payload["description"] = agent_spec["description"]
+        if agent_spec.get("guardrail"):
+            agent_payload["guardrail"] = agent_spec["guardrail"]
+        if agent_spec.get("escalation_mode"):
+            agent_payload["escalation_mode"] = agent_spec["escalation_mode"]
+        if agent_spec.get("response_delay_in_seconds") is not None:
+            agent_payload["response_delay_in_seconds"] = agent_spec["response_delay_in_seconds"]
+        responses_spec: list[dict[str, Any]] = agent_spec.get("responses") or []
+        if dry_run:
+            agent_id = f"(dry-run-agent-{idx})"
+            response_summary = {"created": len(responses_spec), "updated": 0, "unchanged": 0}
+        else:
+            if idx == 0 and default_agent_id:
+                # Update the default agent created with the shop
+                try:
+                    updated = update_agent(
+                        base_url=resolved_base_url,
+                        shop_id=active_shop_id,
+                        api_token=active_token,
+                        agent_id=default_agent_id,
+                        payload=agent_payload,
+                    )
+                    agent_id = str(updated.get("id", default_agent_id))
+                except APIError as exc:
+                    typer.echo(render_api_error(exc, action=f"update agent {idx}"), err=True)
+                    raise typer.Exit(code=1) from exc
+            else:
+                # For additional agents, match by modality before creating a new one
+                spec_modality = agent_spec["modality"].lower()
+                matched_existing = next(
+                    (
+                        ea for ea in existing_agents[1:]
+                        if str(ea.get("modality", "")).lower() == spec_modality
+                    ),
+                    None,
+                )
+                if matched_existing:
+                    try:
+                        updated = update_agent(
+                            base_url=resolved_base_url,
+                            shop_id=active_shop_id,
+                            api_token=active_token,
+                            agent_id=str(matched_existing["id"]),
+                            payload=agent_payload,
+                        )
+                        agent_id = str(updated.get("id", matched_existing["id"]))
+                    except APIError as exc:
+                        typer.echo(render_api_error(exc, action=f"update agent {idx}"), err=True)
+                        raise typer.Exit(code=1) from exc
+                else:
+                    # No existing agent with this modality — create a new one
+                    try:
+                        created_agent = create_agent(
+                            base_url=resolved_base_url,
+                            shop_id=active_shop_id,
+                            api_token=active_token,
+                            payload=agent_payload,
+                        )
+                        agent_id = str(created_agent.get("id", ""))
+                    except APIError as exc:
+                        typer.echo(render_api_error(exc, action=f"create agent {idx}"), err=True)
+                        raise typer.Exit(code=1) from exc
+            # Upsert responses
+            if responses_spec:
+                try:
+                    response_summary = upsert_inline_responses(
+                        base_url=resolved_base_url,
+                        shop_id=active_shop_id,
+                        api_token=active_token,
+                        agent_id=agent_id,
+                        response_rows=responses_spec,
+                        dry_run=False,
+                    )
+                except (APIError, Exception) as exc:
+                    typer.echo(f"Warning: failed to upsert responses for agent {idx}: {exc}", err=True)
+                    response_summary = {"created": 0, "updated": 0, "unchanged": 0}
+            else:
+                response_summary = {"created": 0, "updated": 0, "unchanged": 0}
+        # Create silent escalation flow for email agents
+        escalation_flow_id: str | None = None
+        if agent_spec["modality"].lower() == "email" and not dry_run:
+            try:
+                flow_data = create_escalation_flow(
+                    base_url=resolved_base_url,
+                    shop_id=active_shop_id,
+                    api_token=active_token,
+                    agent_id=agent_id,
+                )
+                escalation_flow_id = str(flow_data.get("id", ""))
+                _emit(
+                    {
+                        "step": "escalation_flow_created",
+                        "agent_id": agent_id,
+                        "flow_id": escalation_flow_id,
+                    },
+                    output_json=output_json,
+                )
+            except APIError as exc:
+                typer.echo(
+                    f"Warning: failed to create escalation flow for email agent: {exc}",
+                    err=True,
+                )
+        configured_agents.append({"id": agent_id, "modality": agent_spec["modality"]})
+        _emit(
+            {
+                "step": "agent_configured",
+                "index": idx,
+                "agent_id": agent_id,
+                "modality": agent_spec["modality"],
+                "name": agent_spec["name"],
+                "responses_created": response_summary["created"],
+                "responses_updated": response_summary["updated"],
+                "dry_run": dry_run,
+            },
+            output_json=output_json,
+        )
+    # Determine the "primary" agent (chat > email > first) for conversation import
+    def _pick_agent_for_csv() -> str:
+        target_modality = (csv_spec or {}).get("agent_modality")
+        if target_modality:
+            for a in configured_agents:
+                if a["modality"].lower() == target_modality.lower():
+                    return a["id"]
+        for a in configured_agents:
+            if a["modality"].lower() == "chat":
+                return a["id"]
+        return configured_agents[0]["id"] if configured_agents else ""
+    # --- 4. Conversations CSV upload ---
+    csv_imported = False  # tracks actual upload success, not just spec presence
+    if not has_csv:
+        _emit(
+            {"step": "conversations_imported", "skipped": True, "reason": "no_csv"},
+            output_json=output_json,
+        )
+    else:
+        assert csv_spec is not None
+        csv_file_path = csv_spec.get("file_path")
+        csv_url = csv_spec.get("url")
+        process_labels = csv_spec.get("process_labels", True)
+        csv_agent_id = _pick_agent_for_csv()
+        # Validate file exists before making any API calls
+        if csv_file_path and not Path(csv_file_path).exists():
+            typer.echo(
+                f"Error: conversations_csv.file_path '{csv_file_path}' not found.", err=True
+            )
+            raise typer.Exit(code=1)
+        # Apply column_map: rename CSV columns to match bulk-upload format
+        column_map = csv_spec.get("column_map") or {}
+        upload_file_path = csv_file_path
+        if csv_file_path and column_map:
+            upload_file_path = _apply_column_map(csv_file_path, column_map)
+        if dry_run:
+            csv_imported = True  # dry-run counts as success for downstream steps
+            _emit(
+                {
+                    "step": "conversations_imported",
+                    "agent_id": csv_agent_id,
+                    "source": csv_file_path or csv_url,
+                    "dry_run": True,
+                },
+                output_json=output_json,
+            )
+        else:
+            try:
+                import_result = import_conversations_bulk(
+                    base_url=resolved_base_url,
+                    shop_id=active_shop_id,
+                    api_token=active_token,
+                    agent_id=csv_agent_id,
+                    file_path=upload_file_path,
+                    url=csv_url,
+                    process_labels=process_labels,
+                )
+                csv_imported = True
+                _emit(
+                    {
+                        "step": "conversations_imported",
+                        "agent_id": csv_agent_id,
+                        "status": import_result.get("status", "processing"),
+                        "queued": import_result.get("queued") or import_result.get("count"),
+                    },
+                    output_json=output_json,
+                )
+            except APIError as exc:
+                typer.echo(render_api_error(exc, action="import conversations CSV"), err=True)
+                typer.echo("Warning: conversation import failed — continuing setup.", err=True)
+                _emit(
+                    {
+                        "step": "conversations_imported",
+                        "skipped": True,
+                        "reason": "import_error",
+                        "error": str(exc),
+                    },
+                    output_json=output_json,
+                )
+    # --- 5. Taxonomy ---
+    if not has_taxonomy:
+        _emit(
+            {"step": "taxonomy_uploaded", "skipped": True, "reason": "not_in_spec"},
+            output_json=output_json,
+        )
+        if not output_json:
+            typer.echo("  ⤼ taxonomy upload skipped (not in spec)")
+    else:
+        assert taxonomy_spec is not None
+        taxonomy_file = taxonomy_spec["file_path"]
+        if dry_run:
+            _emit(
+                {"step": "taxonomy_uploaded", "file": taxonomy_file, "dry_run": True},
+                output_json=output_json,
+            )
+        else:
+            try:
+                topics_list, intents_list = _parse_taxonomy_py(taxonomy_file)
+                if not output_json:
+                    typer.echo(
+                        f"  Uploading taxonomy: {len(topics_list)} topics, "
+                        f"{len(intents_list)} intents..."
+                    )
+                # Create topics first, collect name → id map
+                topic_id_map: dict[str, str] = {}
+                for topic in topics_list:
+                    t_result = create_property_choice(
+                        base_url=resolved_base_url,
+                        shop_id=active_shop_id,
+                        api_token=active_token,
+                        name=topic["name"],
+                        description=topic.get("description", ""),
+                    )
+                    topic_id_map[topic["name"]] = str(t_result["id"])
+                # Create intents under their parent topics
+                intents_created = 0
+                for intent in intents_list:
+                    parent_id = topic_id_map.get(intent.get("topic", ""))
+                    if not parent_id:
+                        continue  # skip orphaned intents
+                    create_property_choice(
+                        base_url=resolved_base_url,
+                        shop_id=active_shop_id,
+                        api_token=active_token,
+                        name=intent["name"],
+                        description=intent.get("description", ""),
+                        parent_choice_id=parent_id,
+                    )
+                    intents_created += 1
+                _emit(
+                    {
+                        "step": "taxonomy_uploaded",
+                        "topics": len(topic_id_map),
+                        "intents": intents_created,
+                    },
+                    output_json=output_json,
+                )
+                if not output_json:
+                    typer.echo(
+                        f"  ✓ taxonomy uploaded ({len(topic_id_map)} topics, "
+                        f"{intents_created} intents)"
+                    )
+            except Exception as exc:
+                typer.echo(f"Warning: taxonomy upload failed — {exc}", err=True)
+                _emit(
+                    {
+                        "step": "taxonomy_uploaded",
+                        "skipped": True,
+                        "reason": "taxonomy_error",
+                        "error": str(exc),
+                    },
+                    output_json=output_json,
+                )
+    # --- 6. Insights (formerly 5) ---
+    if not csv_imported:
+        _emit(
+            {
+                "step": "insights_triggered",
+                "skipped": True,
+                "reason": "no_csv" if not has_csv else "import_failed",
+            },
+            output_json=output_json,
+        )
+    else:
+        if dry_run:
+            _emit(
+                {"step": "insights_triggered", "dry_run": True},
+                output_json=output_json,
+            )
+        else:
+            try:
+                insights_result = insights_generate(
+                    base_url=resolved_base_url,
+                    shop_id=active_shop_id,
+                    api_token=active_token,
+                    instruction="Summarize top topics, intents, and trends from uploaded conversations.",
+                )
+                report_id = (
+                    insights_result.get("reportId")
+                    or insights_result.get("id")
+                    or insights_result.get("report_id")
+                    or insights_result.get("task_id")
+                )
+                _emit(
+                    {"step": "insights_triggered", "report_id": report_id},
+                    output_json=output_json,
+                )
+            except APIError as exc:
+                typer.echo(render_api_error(exc, action="trigger insights"), err=True)
+                _emit(
+                    {
+                        "step": "insights_triggered",
+                        "skipped": True,
+                        "reason": "insights_error",
+                        "error": str(exc),
+                    },
+                    output_json=output_json,
+                )
+    # --- 6. Simulation ---
+    if not has_sim:
+        _emit(
+            {"step": "simulation_started", "skipped": True, "reason": "not_in_spec"},
+            output_json=output_json,
+        )
+    else:
+        # Auto-detect superuser status using the original (admin) credentials.
+        # No separate --admin-token needed — if the logged-in user is a superuser,
+        # their existing API token works for populate_demo_shop.
+        is_superuser = False
+        if not dry_run:
+            try:
+                is_superuser = check_superuser(
+                    base_url=resolved_base_url,
+                    shop_id=admin_shop_id,
+                    api_token=resolved_admin_token,
+                )
+            except APIError:
+                pass  # treat as not superuser if check fails
+        if not dry_run and not is_superuser:
+            _emit(
+                {
+                    "step": "simulation_started",
+                    "skipped": True,
+                    "reason": "not_superuser",
+                    "hint": "Simulation requires a superuser account. Log in with a superuser.",
+                },
+                output_json=output_json,
+            )
+        else:
+            assert sim_spec is not None
+            if dry_run:
+                _emit(
+                    {"step": "simulation_started", "dry_run": True},
+                    output_json=output_json,
+                )
+            else:
+                try:
+                    sim_result = populate_demo_shop(
+                        base_url=resolved_base_url,
+                        shop_id=admin_shop_id,
+                        api_token=resolved_admin_token,
+                        target_shop_id=active_shop_id,
+                        distribution=sim_spec["distribution"],
+                        date_from=sim_spec["date_from"],
+                        date_to=sim_spec["date_to"],
+                        num_conversations=sim_spec["num_conversations"],
+                        delete_previous=sim_spec.get("delete_previous", False),
+                    )
+                    _emit(
+                        {
+                            "step": "simulation_started",
+                            "enqueued": sim_result.get("enqueued"),
+                            "agent_count": sim_result.get("agent_count"),
+                            "deleted": sim_result.get("deleted", 0),
+                        },
+                        output_json=output_json,
+                    )
+                except APIError as exc:
+                    typer.echo(render_api_error(exc, action="populate demo shop"), err=True)
+                    _emit(
+                        {
+                            "step": "simulation_started",
+                            "skipped": True,
+                            "reason": "simulation_error",
+                            "error": str(exc),
+                        },
+                        output_json=output_json,
+                    )
+    # --- 7. Knowledge base ---
+    if not has_kb:
+        _emit(
+            {"step": "knowledge_base_synced", "skipped": True, "reason": "not_in_spec"},
+            output_json=output_json,
+        )
+    else:
+        assert kb_spec is not None
+        kb_url = kb_spec["url"]
+        kb_title = kb_spec.get("title")
+        if dry_run:
+            _emit(
+                {"step": "knowledge_base_synced", "url": kb_url, "dry_run": True},
+                output_json=output_json,
+            )
+        else:
+            try:
+                cs_result = create_content_source(
+                    base_url=resolved_base_url,
+                    shop_id=active_shop_id,
+                    api_token=active_token,
+                    url=kb_url,
+                    title=kb_title,
+                )
+                _emit(
+                    {
+                        "step": "knowledge_base_synced",
+                        "content_source_id": cs_result.get("id"),
+                        "url": kb_url,
+                    },
+                    output_json=output_json,
+                )
+            except APIError as exc:
+                typer.echo(render_api_error(exc, action="create content source"), err=True)
+                _emit(
+                    {
+                        "step": "knowledge_base_synced",
+                        "skipped": True,
+                        "reason": "kb_error",
+                        "error": str(exc),
+                    },
+                    output_json=output_json,
+                )
+    # --- 8. Complete ---
+    complete_payload: dict[str, Any] = {
+        "step": "complete",
+        "shop_id": active_shop_id,
+        "agents": configured_agents,
+        "dry_run": dry_run,
+    }
+    if not shop_id:
+        complete_payload["profile"] = _slugify(shop_name)
+    _emit(complete_payload, output_json=output_json)
+    if not output_json:
+        typer.echo(f"\n✓ Setup complete for shop: {shop_name}")
+        typer.echo(f"  Shop ID: {active_shop_id}")
+        for a in configured_agents:
+            typer.echo(f"  Agent ({a['modality']}): {a['id']}")
+        if not dry_run and not shop_id:
+            typer.echo(
+                f"\nCredentials saved as profile '{_slugify(shop_name)}'.\n"
+                f"Run `applied-cli auth status` to confirm."
+            )
+# ---------------------------------------------------------------------------
+# CSV helpers
+# ---------------------------------------------------------------------------
+def _apply_column_map(file_path: str, column_map: dict[str, str]) -> str:
+    """Rename CSV columns per column_map and return a temp file path.
+    Matching is case-insensitive on the original column name.
+    Returns the original file_path unchanged if column_map is empty or no
+    columns match.
+    """
+    import csv as csv_module
+    import io
+    import tempfile
+    if not column_map:
+        return file_path
+    # Build a case-insensitive lookup: lower(original) → target
+    rename: dict[str, str] = {k.lower(): v for k, v in column_map.items()}
+    try:
+        with open(file_path, newline="", encoding="utf-8") as f:
+            orig_rows = list(csv_module.DictReader(f))
+    except Exception:
+        return file_path  # fallback: leave unchanged
+    if not orig_rows:
+        return file_path
+    original_fields = list(orig_rows[0].keys())
+    new_fields = [rename.get(col.lower(), col) for col in original_fields]
+    if new_fields == original_fields:
+        return file_path  # nothing to rename
+    buf = io.StringIO()
+    writer = csv_module.DictWriter(buf, fieldnames=new_fields)
+    writer.writeheader()
+    for row in orig_rows:
+        new_row = {rename.get(k.lower(), k): v for k, v in row.items()}
+        writer.writerow(new_row)
+    tmp = tempfile.NamedTemporaryFile(
+        mode="w", suffix=".csv", delete=False, encoding="utf-8"
+    )
+    tmp.write(buf.getvalue())
+    tmp.close()
+    return tmp.name
+# ---------------------------------------------------------------------------
+# shop test helpers
+# ---------------------------------------------------------------------------
+# Regex for parsing JSON objects out of the SSE completion stream.
+# Same pattern used by chat.py — matches arbitrarily nested JSON braces.
+_COMPLETION_RE = re.compile(
+    r"(\{(?:(\{(?:(\{(?:(\{(?:(\{(?:(\{(?:(\{(?:(\{(?:(\{[^}{]*\})"
+    r"|[^}{])*\})|[^}{])*\})|[^}{])*\})|[^}{])*\})|[^}{])*\})|[^}{])*\})"
+    r"|[^}{])*\})|[^}{])*\})"
+)
+def _create_test_conversation(
+    client: httpx.Client,
+    *,
+    base_url: str,
+    agent_id: str,
+    channel: str,
+) -> str:
+    """Create a test conversation and return its id."""
+    payload: dict[str, Any] = {
+        "agent_id": agent_id,
+        "is_test": True,
+        "metadata": {"isTest": True, "source": "applied-cli-shop-test"},
+    }
+    if channel == "email":
+        payload["type"] = "email"
+    elif channel == "sms":
+        payload["type"] = "sms"
+    try:
+        response = client.post(
+            f"{base_url}/v1/c/",
+            json=payload,
+            headers={"Content-Type": "application/json"},
+            timeout=15.0,
+        )
+    except httpx.HTTPError as exc:
+        raise APIError(
+            f"Test conversation creation failed: {exc}",
+            code="NETWORK_ERROR",
+            retryable=True,
+        ) from exc
+    if response.status_code >= 400:
+        raise APIError(
+            f"Test conversation creation failed ({response.status_code}).",
+            status_code=response.status_code,
+            code="CONVERSATION_CREATE_FAILED",
+        )
+    conv_id = response.json().get("id")
+    if not conv_id:
+        raise APIError("Test conversation created but no id returned.")
+    return str(conv_id)
+def _stream_completion_silent(
+    client: httpx.Client,
+    *,
+    base_url: str,
+    shop_id: str,
+    api_token: str,
+    agent_id: str,
+    payload: dict[str, Any],
+) -> str:
+    """Stream a completion request silently and return the full generated text."""
+    headers = {
+        "Authorization": f"Bearer {api_token}",
+        "X-Shop-Id": shop_id,
+        "Content-Type": "application/json",
+    }
+    generated_text = ""
+    buffer = ""
+    content_complete_seen = False
+    read_after_complete = False
+    try:
+        with client.stream(
+            "POST",
+            f"{base_url}/v1/agents/{agent_id}/complete/",
+            headers=headers,
+            json=payload,
+            timeout=60.0,
+        ) as response:
+            if response.status_code >= 400:
+                # Return empty string — escalation may produce 200 with no body or a 4xx
+                return ""
+            for chunk in response.iter_text():
+                if not chunk:
+                    continue
+                buffer += chunk
+                last_consumed = 0
+                for match in _COMPLETION_RE.finditer(buffer):
+                    raw = match.group(1)
+                    if not raw:
+                        continue
+                    try:
+                        data = json.loads(raw)
+                    except json.JSONDecodeError:
+                        continue
+                    last_consumed = match.end()
+                    content = data.get("content")
+                    if isinstance(content, str) and content:
+                        generated_text += content
+                    if bool(data.get("content_complete")):
+                        content_complete_seen = True
+                if last_consumed > 0:
+                    buffer = buffer[last_consumed:]
+                if content_complete_seen:
+                    if read_after_complete:
+                        break
+                    read_after_complete = True
+    except Exception:
+        pass  # Network errors return whatever was accumulated
+    return generated_text
+def _run_test_conversation(
+    client: httpx.Client,
+    *,
+    base_url: str,
+    shop_id: str,
+    api_token: str,
+    agent_id: str,
+    channel: str,
+    opening_message: str,
+    max_turns: int = 3,
+) -> str:
+    """Run a multi-turn test conversation. Returns the conversation_id.
+    Sends the opening message, then if the agent replies with a question
+    (ends with '?'), sends a generic follow-up answer and continues until
+    the agent gives a complete answer or max_turns is reached.
+    """
+    conv_id = _create_test_conversation(
+        client,
+        base_url=base_url,
+        agent_id=agent_id,
+        channel=channel,
+    )
+    transcript: list[dict[str, Any]] = [
+        {
+            "id": str(uuid.uuid4()),
+            "role": "user",
+            "content": opening_message,
+            "text": opening_message,
+            "format": "TEXT",
+            "entity": {"type": "user"},
+        }
+    ]
+    for turn in range(max_turns):
+        completion_payload: dict[str, Any] = {
+            "conversation_id": conv_id,
+            "context": "EVALUATE",
+            "transcript": transcript,
+            "metadata": {"source": "applied-cli-shop-test", "isTest": True},
+            "draft": False,
+        }
+        assistant_text = _stream_completion_silent(
+            client,
+            base_url=base_url,
+            shop_id=shop_id,
+            api_token=api_token,
+            agent_id=agent_id,
+            payload=completion_payload,
+        )
+        if not assistant_text.strip():
+            break  # No response — likely escalated or an error
+        # Append assistant turn to transcript
+        transcript.append(
+            {
+                "id": str(uuid.uuid4()),
+                "role": "assistant",
+                "content": assistant_text,
+                "text": assistant_text,
+                "format": "TEXT",
+                "entity": {"type": "agent"},
+            }
+        )
+        # Stop if the agent gave a complete answer (no trailing question)
+        if not assistant_text.strip().endswith("?"):
+            break
+        if turn >= max_turns - 1:
+            break  # Exhausted turns
+        # Agent asked a follow-up — send a generic clarifying reply to push forward
+        follow_up = "I don't have that specific information available. Can you provide a general answer?"
+        transcript.append(
+            {
+                "id": str(uuid.uuid4()),
+                "role": "user",
+                "content": follow_up,
+                "text": follow_up,
+                "format": "TEXT",
+                "entity": {"type": "user"},
+            }
+        )
+    return conv_id
+def _rate_response_test(
+    *,
+    messages: list[dict[str, Any]],
+    references: list[dict[str, Any]],
+    response_type: str,
+    expected_answer: str = "",
+) -> dict[str, Any]:
+    """Rate a test conversation for a specific response type.
+    Builds on the generic _auto_rate from the rate command, then applies
+    type-specific overrides for escalation and Q&A tests.
+    """
+    from applied_cli.commands.rate import _auto_rate
+    rating = _auto_rate(messages=messages, references=references)
+    if response_type == "escalate":
+        # For escalation tests the expected behaviour is that the agent does NOT
+        # send a standard reply — it routes the conversation to a human instead.
+        assistant_messages = [m for m in messages if m.get("role") == "assistant"]
+        if not assistant_messages:
+            # Perfectly silent escalation
+            rating["pass_status"] = "pass"
+            rating["csat_score"] = 5.0
+            rating["feedback"] = (
+                "Escalation trigger processed: no auto-reply generated (expected behaviour)."
+            )
+        else:
+            # Agent replied — check whether the reply acknowledges the escalation
+            latest_text = str(
+                assistant_messages[-1].get("text")
+                or assistant_messages[-1].get("content")
+                or ""
+            ).lower()
+            escalation_keywords = {
+                "human", "agent", "team", "specialist", "representative",
+                "connect", "transfer", "route", "support",
+            }
+            if any(kw in latest_text for kw in escalation_keywords):
+                rating["pass_status"] = "pass"
+                rating["csat_score"] = 4.0
+                rating["feedback"] = (
+                    "Escalation handled: agent acknowledged escalation request."
+                )
+            else:
+                # Override auto_rate — no reply is still acceptable for escalation
+                rating["feedback"] = (
+                    "Escalation trigger sent; agent replied without escalation keywords. "
+                    + rating["feedback"]
+                )
+    elif response_type == "qa" and expected_answer:
+        # Check keyword overlap between the spec's expected answer and the actual reply
+        assistant_messages = [m for m in messages if m.get("role") == "assistant"]
+        if assistant_messages:
+            latest_text = str(
+                assistant_messages[-1].get("text")
+                or assistant_messages[-1].get("content")
+                or ""
+            ).lower()
+            # Collect significant words (>4 chars) from the expected answer
+            keywords = list(
+                dict.fromkeys(
+                    w.lower().strip(".,!?;:\"'()")
+                    for w in expected_answer.split()
+                    if len(w) > 4
+                )
+            )[:15]
+            if keywords:
+                matches = sum(1 for kw in keywords if kw in latest_text)
+                match_pct = matches / len(keywords)
+                if match_pct >= 0.3:
+                    rating["feedback"] = (
+                        f"Response matches ~{match_pct:.0%} of expected answer keywords. "
+                        + rating["feedback"]
+                    )
+                else:
+                    rating["pass_status"] = "fail"
+                    rating["csat_score"] = float(
+                        min(rating.get("csat_score") or 2.0, 2.0)
+                    )
+                    rating["feedback"] = (
+                        f"Low keyword match ({match_pct:.0%}) with expected Q&A answer. "
+                        + rating["feedback"]
+                    )
+    return rating
+def _get_or_create_test_scenario_and_run(
+    *,
+    base_url: str,
+    shop_id: str,
+    api_token: str,
+    agent_id: str,
+    benchmark_id: Optional[str],
+    scenario_name: str,
+    conversation_id: str,
+) -> tuple[str, str]:
+    """Find-or-create a named scenario and always create a fresh run.
+    Scenarios are keyed by name so repeated `shop test` runs accumulate as
+    benchmark runs under the same scenario definition.
+    Returns (scenario_id, run_id).
+    """
+    # Look for an existing scenario with this exact name
+    existing = list_conversation_scenarios(
+        base_url=base_url,
+        shop_id=shop_id,
+        api_token=api_token,
+        agent_id=agent_id,
+        name=scenario_name,
+    )
+    scenario_id: Optional[str] = None
+    for sc in existing:
+        if str(sc.get("name", "")).strip() == scenario_name:
+            scenario_id = str(sc.get("id") or "")
+            break
+    if not scenario_id:
+        scenario = create_conversation_scenario(
+            base_url=base_url,
+            shop_id=shop_id,
+            api_token=api_token,
+            agent_id=agent_id,
+            benchmark_id=benchmark_id,
+            name=scenario_name,
+            input_conversation_id=conversation_id,
+        )
+        scenario_id = str(scenario.get("id") or "")
+    if not scenario_id:
+        raise APIError(
+            "Failed to create or find test scenario.",
+            code="SCENARIO_CREATE_FAILED",
+        )
+    # Always create a new run for this execution
+    run = create_scenario_run(
+        base_url=base_url,
+        shop_id=shop_id,
+        api_token=api_token,
+        scenario_id=scenario_id,
+        output_conversation_id=conversation_id,
+    )
+    run_id = str(run.get("id") or "")
+    return scenario_id, run_id
+# ---------------------------------------------------------------------------
+# taxonomy helpers
+# ---------------------------------------------------------------------------
+def _parse_taxonomy_py(file_path: str) -> tuple[list[dict], list[dict]]:
+    """Execute a generated_taxonomy.py file and extract TOPICS_LIST / INTENTS_LIST.
+    Returns (topics, intents) where each item is a dict with at minimum a
+    ``name`` key and optionally ``description`` / ``topic`` (for intents).
+    """
+    content = Path(file_path).read_text(encoding="utf-8")
+    namespace: dict = {}
+    exec(compile(content, file_path, "exec"), namespace)  # noqa: S102
+    topics: list[dict] = namespace.get("TOPICS_LIST", [])
+    intents: list[dict] = namespace.get("INTENTS_LIST", [])
+    return topics, intents
+# ---------------------------------------------------------------------------
+# template
+# ---------------------------------------------------------------------------
+@app.command(
+    "template",
+    help=(
+        "Print the demo spec template to stdout.\n\n"
+        "Pipe to a file, fill in the placeholders, then run `shop setup --spec <file>`.\n\n"
+        "Example: applied-cli shop template > my_brand.yaml"
+    ),
+)
+def template() -> None:
+    template_path = Path(__file__).parent.parent / "presets" / "demo.yaml"
+    if not template_path.exists():
+        typer.echo("Error: demo template not found in package.", err=True)
+        raise typer.Exit(code=1)
+    typer.echo(template_path.read_text(encoding="utf-8"), nl=False)
+# ---------------------------------------------------------------------------
+# shop test
+# ---------------------------------------------------------------------------
+@app.command(
+    "test",
+    help=(
+        "Run smoke tests for configured agents and persist results as a benchmark.\n\n"
+        "Generates one test conversation per Q&A response and per escalation trigger "
+        "defined in the spec file. Each conversation is rated automatically — checking "
+        "that the agent responds, uses knowledge-base references, and matches the "
+        "expected answer. Results are saved to a named benchmark in Test Coverage.\n\n"
+        "Multi-turn: if the agent asks a follow-up question the test automatically "
+        "sends a clarifying reply and continues, up to --max-turns.\n\n"
+        "Example: applied-cli shop test --spec ridge_demo.yaml --json"
+    ),
+)
+def test(
+    spec_path: str = typer.Option(
+        ..., "--spec", help="Path to shop spec YAML/JSON file (same file used with shop setup)."
+    ),
+    shop_id: Optional[str] = typer.Option(
+        None,
+        "--shop-id",
+        help="Target shop UUID. Defaults to the active profile shop.",
+    ),
+    max_turns: int = typer.Option(
+        3,
+        "--max-turns",
+        help="Maximum conversation turns per test (1–5). Extra turns are used when the agent asks follow-up questions.",
+    ),
+    benchmark_name: str = typer.Option(
+        "Demo Shop Smoke Test",
+        "--benchmark-name",
+        help="Benchmark collection name for persisted scenarios.",
+    ),
+    dry_run: bool = typer.Option(
+        False, "--dry-run", help="Print the test plan without running any conversations."
+    ),
+    output_json: bool = typer.Option(False, "--json", help="Emit JSONL progress output."),
+    base_url: Optional[str] = typer.Option(None, help="Applied base URL."),
+    api_token: Optional[str] = typer.Option(None, help="Applied API token."),
+) -> None:
+    if not (1 <= max_turns <= 5):
+        raise typer.BadParameter("max-turns must be between 1 and 5.")
+    # --- Load spec ---
+    try:
+        spec = load_and_validate_shop_spec(spec_path)
+    except ValueError as exc:
+        typer.echo(f"Spec error: {exc}", err=True)
+        raise typer.Exit(code=1) from exc
+    shop_name = spec["name"]
+    agents_spec = spec["agents"]
+    # --- Resolve runtime ---
+    try:
+        resolved_base_url, resolved_shop_id, resolved_token = resolve_runtime(
+            base_url=base_url,
+            shop_id=shop_id,
+            api_token=api_token,
+        )
+    except APIError as exc:
+        typer.echo(render_api_error(exc, action="resolve runtime for shop test"), err=True)
+        raise typer.Exit(code=1) from exc
+    active_shop_id = shop_id or resolved_shop_id
+    active_token = resolved_token
+    # --- List live agents → build modality → agent map ---
+    try:
+        existing_agents = list_agents(
+            base_url=resolved_base_url,
+            shop_id=active_shop_id,
+            api_token=active_token,
+        )
+    except APIError as exc:
+        typer.echo(render_api_error(exc, action="list agents for shop test"), err=True)
+        raise typer.Exit(code=1) from exc
+    modality_to_agent: dict[str, dict[str, Any]] = {}
+    for ag in existing_agents:
+        mod = str(ag.get("modality") or "").lower()
+        if mod and mod not in modality_to_agent:
+            modality_to_agent[mod] = ag
+    # --- Build test cases from spec responses ---
+    test_cases: list[dict[str, Any]] = []
+    for agent_spec in agents_spec:
+        modality = str(agent_spec.get("modality") or "").lower()
+        channel = modality  # "chat", "email", "sms"
+        live_agent = modality_to_agent.get(modality)
+        if not live_agent:
+            if not output_json:
+                typer.echo(
+                    f"  Warning: no {modality} agent found in shop — skipping.", err=True
+                )
+            continue
+        agent_id = str(live_agent.get("id") or "")
+        for resp in agent_spec.get("responses", []):
+            rtype = resp.get("type", "")
+            question = resp.get("question", "")
+            expected_answer = resp.get("answer", "")
+            if rtype == "greeting":
+                # Test that the agent sends a greeting on first contact
+                test_cases.append(
+                    {
+                        "agent_id": agent_id,
+                        "channel": channel,
+                        "modality": modality,
+                        "type": "greeting",
+                        "opening": "Hello",
+                        "expected_answer": expected_answer,
+                        "name": f"[{modality}] greeting",
+                    }
+                )
+            elif rtype == "qa" and question:
+                test_cases.append(
+                    {
+                        "agent_id": agent_id,
+                        "channel": channel,
+                        "modality": modality,
+                        "type": "qa",
+                        "opening": question,
+                        "expected_answer": expected_answer,
+                        "name": f"[{modality}] qa: {question[:60]}",
+                    }
+                )
+            elif rtype == "escalate" and question:
+                test_cases.append(
+                    {
+                        "agent_id": agent_id,
+                        "channel": channel,
+                        "modality": modality,
+                        "type": "escalate",
+                        "opening": question,
+                        "expected_answer": "",
+                        "name": f"[{modality}] escalation: {question[:60]}",
+                    }
+                )
+            # signature — not a conversational test, skip
+    if not test_cases:
+        _emit(
+            {
+                "step": "test_complete",
+                "total": 0,
+                "pass": 0,
+                "fail": 0,
+                "reason": "no_test_cases",
+            },
+            output_json=output_json,
+        )
+        if not output_json:
+            typer.echo("No testable responses found in spec (qa/escalation/greeting).")
+        return
+    if not output_json:
+        typer.echo(f"\nRunning {len(test_cases)} test(s) for: {shop_name}")
+        typer.echo(f"  Shop ID : {active_shop_id}")
+        typer.echo(f"  Benchmark: {benchmark_name}\n")
+    # --- Dry run: print plan and exit ---
+    if dry_run:
+        for tc in test_cases:
+            _emit(
+                {
+                    "step": "test_case",
+                    "name": tc["name"],
+                    "type": tc["type"],
+                    "opening": tc["opening"][:80],
+                    "agent_id": tc["agent_id"],
+                    "dry_run": True,
+                },
+                output_json=output_json,
+            )
+        _emit(
+            {
+                "step": "test_complete",
+                "total": len(test_cases),
+                "pass": 0,
+                "fail": 0,
+                "dry_run": True,
+            },
+            output_json=output_json,
+        )
+        return
+    # --- Find-or-create one benchmark per agent ---
+    from applied_cli.commands.rate import _find_or_create_benchmark
+    benchmark_ids: dict[str, Optional[str]] = {}
+    for tc in test_cases:
+        aid = tc["agent_id"]
+        if aid not in benchmark_ids:
+            try:
+                bm = _find_or_create_benchmark(
+                    base_url=resolved_base_url,
+                    shop_id=active_shop_id,
+                    api_token=active_token,
+                    agent_id=aid,
+                    benchmark_name=benchmark_name,
+                )
+                benchmark_ids[aid] = str(bm["id"]) if bm and bm.get("id") else None
+            except APIError:
+                benchmark_ids[aid] = None
+    # --- Execute tests ---
+    results: list[dict[str, Any]] = []
+    with httpx.Client() as client:
+        for tc in test_cases:
+            test_name = tc["name"]
+            agent_id = tc["agent_id"]
+            bm_id = benchmark_ids.get(agent_id)
+            if not output_json:
+                typer.echo(f"  Testing: {test_name}")
+            try:
+                # Run multi-turn conversation
+                conv_id = _run_test_conversation(
+                    client,
+                    base_url=resolved_base_url,
+                    shop_id=active_shop_id,
+                    api_token=active_token,
+                    agent_id=agent_id,
+                    channel=tc["channel"],
+                    opening_message=tc["opening"],
+                    max_turns=max_turns,
+                )
+                # Fetch messages + references for rating
+                messages = list_conversation_messages(
+                    base_url=resolved_base_url,
+                    shop_id=active_shop_id,
+                    api_token=active_token,
+                    conversation_id=conv_id,
+                )
+                references = list_conversation_references(
+                    base_url=resolved_base_url,
+                    shop_id=active_shop_id,
+                    api_token=active_token,
+                    conversation_id=conv_id,
+                )
+                # Rate this conversation
+                rating = _rate_response_test(
+                    messages=messages,
+                    references=references,
+                    response_type=tc["type"],
+                    expected_answer=tc.get("expected_answer", ""),
+                )
+                # Persist to benchmark (find-or-create scenario, always new run)
+                scenario_id, run_id = _get_or_create_test_scenario_and_run(
+                    base_url=resolved_base_url,
+                    shop_id=active_shop_id,
+                    api_token=active_token,
+                    agent_id=agent_id,
+                    benchmark_id=bm_id,
+                    scenario_name=test_name,
+                    conversation_id=conv_id,
+                )
+                # Write ratings to run + scenario
+                patch_scenario_run(
+                    base_url=resolved_base_url,
+                    shop_id=active_shop_id,
+                    api_token=active_token,
+                    run_id=run_id,
+                    payload={
+                        "pass_status": rating["pass_status"],
+                        "csat_score": rating["csat_score"],
+                        "feedback": rating["feedback"],
+                        "reference_score": rating.get("reference_score"),
+                        "reference_notes": rating.get("reference_notes"),
+                    },
+                )
+                patch_conversation_scenario(
+                    base_url=resolved_base_url,
+                    shop_id=active_shop_id,
+                    api_token=active_token,
+                    scenario_id=scenario_id,
+                    payload={
+                        "pass_status": rating["pass_status"],
+                        "csat_score": rating["csat_score"],
+                        "feedback": rating["feedback"],
+                    },
+                )
+                result: dict[str, Any] = {
+                    "name": test_name,
+                    "type": tc["type"],
+                    "pass_status": rating["pass_status"],
+                    "csat_score": rating["csat_score"],
+                    "feedback": rating["feedback"],
+                    "reference_score": rating.get("reference_score"),
+                    "conversation_id": conv_id,
+                    "scenario_id": scenario_id,
+                    "run_id": run_id,
+                }
+                results.append(result)
+                if output_json:
+                    _emit(
+                        {
+                            "step": "test_case",
+                            "name": test_name,
+                            "type": tc["type"],
+                            "pass_status": rating["pass_status"],
+                            "csat_score": rating["csat_score"],
+                            "feedback": rating["feedback"],
+                            "conversation_id": conv_id,
+                            "scenario_id": scenario_id,
+                            "run_id": run_id,
+                        },
+                        output_json=True,
+                    )
+                else:
+                    icon = "✓" if rating["pass_status"] == "pass" else "✗"
+                    typer.echo(f"    {icon} {rating['pass_status']} (csat={rating['csat_score']})")
+                    typer.echo(f"      {rating['feedback']}")
+            except APIError as exc:
+                err_msg = render_api_error(exc, action=f"test case '{test_name}'")
+                result = {
+                    "name": test_name,
+                    "type": tc["type"],
+                    "pass_status": "error",
+                    "error": err_msg,
+                }
+                results.append(result)
+                if output_json:
+                    _emit(
+                        {
+                            "step": "test_case",
+                            "name": test_name,
+                            "type": tc["type"],
+                            "pass_status": "error",
+                            "error": err_msg,
+                        },
+                        output_json=True,
+                    )
+                else:
+                    typer.echo(f"    ✗ error: {err_msg}")
+    # --- Summary ---
+    pass_count = sum(1 for r in results if r.get("pass_status") == "pass")
+    fail_count = sum(1 for r in results if r.get("pass_status") == "fail")
+    error_count = sum(1 for r in results if r.get("pass_status") == "error")
+    summary: dict[str, Any] = {
+        "step": "test_complete",
+        "shop_name": shop_name,
+        "total": len(results),
+        "pass": pass_count,
+        "fail": fail_count,
+        "error": error_count,
+        "benchmark_name": benchmark_name,
+    }
+    if output_json:
+        summary["results"] = results
+    _emit(summary, output_json=output_json)
+    if not output_json:
+        status_icon = "✓" if fail_count + error_count == 0 else "✗"
+        typer.echo(
+            f"\n{status_icon} Test complete: {pass_count}/{len(results)} passed"
+            f"  (fail={fail_count}, error={error_count})"
+        )
+        typer.echo(f"  Benchmark: {benchmark_name}")
+        typer.echo("  View results in Applied > Test Coverage > Benchmarks.")