applied-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1757 @@
1
+ """
2
+ applied-cli shop — Create and configure shops end-to-end.
3
+
4
+ Commands:
5
+ shop create — Create a new shop using the current auth credentials.
6
+ shop setup — Full shop setup from a spec file (agents, KB, CSV, insights, simulation).
7
+ shop template — Print the demo spec template to stdout.
8
+ shop test — Run smoke tests for configured agents and rate responses.
9
+ """
10
+ from __future__ import annotations
11
+
12
+ import json
13
+ import os
14
+ import re
15
+ import time
16
+ import uuid
17
+ import webbrowser
18
+ from pathlib import Path
19
+ from typing import Any, Optional
20
+
21
+ import httpx
22
+ import typer
23
+
24
+ from applied_cli.auth_store import save_credentials
25
+ from applied_cli.commands.agent import _upsert_inline_responses as upsert_inline_responses
26
+ from applied_cli.config import Credentials
27
+ from applied_cli.error_reporting import render_api_error
28
+ from applied_cli.http import (
29
+ APIError,
30
+ check_superuser,
31
+ create_agent,
32
+ create_content_source,
33
+ create_escalation_flow,
34
+ create_property_choice,
35
+ create_conversation_scenario,
36
+ create_scenario_run,
37
+ create_shop,
38
+ import_conversations_bulk,
39
+ insights_generate,
40
+ list_agents,
41
+ list_conversation_messages,
42
+ list_conversation_references,
43
+ list_conversation_scenarios,
44
+ patch_conversation_scenario,
45
+ patch_scenario_run,
46
+ poll_cli_device_login,
47
+ populate_demo_shop,
48
+ start_cli_device_login,
49
+ update_agent,
50
+ validate_api_token,
51
+ )
52
+ from applied_cli.runtime import resolve_runtime
53
+ from applied_cli.shop_spec import load_and_validate_shop_spec
54
+
55
+ app = typer.Typer(
56
+ help=(
57
+ "Create and configure shops.\n\n"
58
+ "Typical AI-agent workflow:\n"
59
+ " 1. Human: applied-cli auth login\n"
60
+ " 2. Agent: applied-cli shop setup --spec fabfitfun.yaml --json"
61
+ )
62
+ )
63
+
64
+ # ---------------------------------------------------------------------------
65
+ # Helpers
66
+ # ---------------------------------------------------------------------------
67
+
68
+
69
+ def _slugify(name: str) -> str:
70
+ """Convert a shop name to a safe profile slug (e.g. 'FabFitFun Demo' → 'fabfitfun-demo')."""
71
+ s = name.lower().strip()
72
+ s = re.sub(r"[^a-z0-9]+", "-", s)
73
+ return s.strip("-") or "shop"
74
+
75
+
76
+ def _emit(step: dict[str, Any], *, output_json: bool) -> None:
77
+ """Emit one JSONL progress line (json mode) or a human-readable line (text mode)."""
78
+ if output_json:
79
+ typer.echo(json.dumps(step))
80
+ else:
81
+ step_name = step.get("step", "")
82
+ skipped = step.get("skipped", False)
83
+ if skipped:
84
+ reason = step.get("reason", "")
85
+ typer.echo(f" → {step_name}: skipped ({reason})")
86
+ else:
87
+ parts = [f" → {step_name}"]
88
+ for k, v in step.items():
89
+ if k in {"step", "skipped", "reason"}:
90
+ continue
91
+ parts.append(f"{k}={v}")
92
+ typer.echo(" ".join(parts))
93
+
94
+
95
+ def _device_auth_for_shop(
96
+ *,
97
+ base_url: str,
98
+ output_json: bool,
99
+ no_browser: bool = False,
100
+ timeout_seconds: float = 20.0,
101
+ poll_interval: float = 3.0,
102
+ expires_in: int = 600,
103
+ ) -> tuple[str, str]:
104
+ """Run the device auth flow and return (api_token, shop_id) for the new shop.
105
+
106
+ Emits a pending_auth JSONL line so an agent can relay the code+URL to the human.
107
+ """
108
+ try:
109
+ device_data = start_cli_device_login(
110
+ base_url=base_url,
111
+ timeout_seconds=timeout_seconds,
112
+ )
113
+ except APIError as exc:
114
+ raise
115
+
116
+ token_page: str = device_data.get("verification_uri_complete") or device_data.get(
117
+ "verification_uri", ""
118
+ )
119
+ device_code: str = device_data.get("device_code", "")
120
+ user_code: str = device_data.get("user_code", "")
121
+ expires_in = int(device_data.get("expires_in", expires_in))
122
+ interval = float(device_data.get("interval", poll_interval))
123
+
124
+ if output_json:
125
+ typer.echo(
126
+ json.dumps(
127
+ {
128
+ "step": "pending_auth",
129
+ "approval_url": token_page,
130
+ "user_code": user_code or None,
131
+ "expires_in": expires_in,
132
+ "message": "Approve in browser and select the new shop",
133
+ }
134
+ )
135
+ )
136
+ else:
137
+ typer.echo(f"\nApproval URL: {token_page}")
138
+ if user_code:
139
+ typer.echo(f"Verification code: {user_code}")
140
+ typer.echo("Enter this code in the browser when prompted.")
141
+
142
+ if not no_browser:
143
+ browser_opened = webbrowser.open(token_page)
144
+ if not output_json:
145
+ if browser_opened:
146
+ typer.echo("(Browser opened automatically.)")
147
+ else:
148
+ typer.echo("(Could not open browser — open the URL above manually.)")
149
+
150
+ if not output_json:
151
+ typer.echo("Waiting for browser approval...")
152
+
153
+ # Poll
154
+ elapsed = 0.0
155
+ while elapsed < expires_in:
156
+ time.sleep(interval)
157
+ elapsed += interval
158
+ try:
159
+ poll_result = poll_cli_device_login(
160
+ base_url=base_url,
161
+ device_code=device_code,
162
+ timeout_seconds=timeout_seconds,
163
+ )
164
+ except APIError as exc:
165
+ if exc.status_code == 428:
166
+ continue # still pending
167
+ raise
168
+
169
+ raw_token = poll_result.get("token") or poll_result.get("access_token", "")
170
+ new_shop_id = poll_result.get("shop_id", "")
171
+ if raw_token and new_shop_id:
172
+ return str(raw_token), str(new_shop_id)
173
+
174
+ raise APIError(
175
+ "Device auth timed out — the approval URL expired.",
176
+ code="DEVICE_AUTH_TIMEOUT",
177
+ hint="Run the setup command again to get a fresh URL.",
178
+ retryable=True,
179
+ )
180
+
181
+
182
+ # ---------------------------------------------------------------------------
183
+ # shop create
184
+ # ---------------------------------------------------------------------------
185
+
186
+
187
+ @app.command(
188
+ "create",
189
+ help=(
190
+ "Create a new shop using current auth credentials (must be an Applied team account). "
191
+ "The backend auto-mints an API token for the new shop — no second browser login needed."
192
+ ),
193
+ )
194
+ def create(
195
+ name: str = typer.Option(..., "--name", help="New shop name."),
196
+ no_auth: bool = typer.Option(
197
+ False,
198
+ "--no-auth",
199
+ help="Skip saving credentials for the new shop (useful if you will `auth login` separately).",
200
+ ),
201
+ base_url: Optional[str] = typer.Option(None, help="Applied base URL."),
202
+ shop_id: Optional[str] = typer.Option(None, help="Admin shop UUID (auth scope)."),
203
+ api_token: Optional[str] = typer.Option(None, help="Applied API token."),
204
+ output_json: bool = typer.Option(False, "--json", help="Emit JSON output."),
205
+ ) -> None:
206
+ try:
207
+ resolved_base_url, resolved_shop_id, resolved_token = resolve_runtime(
208
+ base_url=base_url, shop_id=shop_id, api_token=api_token
209
+ )
210
+ except APIError as exc:
211
+ typer.echo(render_api_error(exc, action="resolve runtime for shop create"), err=True)
212
+ raise typer.Exit(code=1) from exc
213
+
214
+ try:
215
+ shop_data = create_shop(
216
+ base_url=resolved_base_url,
217
+ shop_id=resolved_shop_id,
218
+ api_token=resolved_token,
219
+ name=name,
220
+ )
221
+ except APIError as exc:
222
+ typer.echo(render_api_error(exc, action="create shop"), err=True)
223
+ if exc.status_code in {400, 422}:
224
+ typer.echo(
225
+ "Hint: shop creation is restricted to Applied team accounts.\n"
226
+ "If your current credentials are not for an Applied team shop, run:\n"
227
+ " applied-cli auth login (and select an Applied team shop)\n"
228
+ "Then retry this command.",
229
+ err=True,
230
+ )
231
+ raise typer.Exit(code=1) from exc
232
+
233
+ new_shop_id = str(shop_data.get("id", ""))
234
+ setup_token = str(shop_data.get("setup_token", ""))
235
+ profile = _slugify(name)
236
+
237
+ if output_json:
238
+ out: dict[str, Any] = {
239
+ "shop_id": new_shop_id,
240
+ "name": name,
241
+ "token_minted": bool(setup_token),
242
+ }
243
+ typer.echo(json.dumps(out, indent=2))
244
+ else:
245
+ typer.echo(f"Shop created: {name} ({new_shop_id})")
246
+
247
+ if setup_token and not no_auth:
248
+ save_credentials(
249
+ Credentials(
250
+ base_url=resolved_base_url,
251
+ shop_id=new_shop_id,
252
+ api_token=setup_token,
253
+ ),
254
+ profile=profile,
255
+ set_active=True,
256
+ )
257
+ if output_json:
258
+ typer.echo(
259
+ json.dumps({"credentials_saved": True, "profile": profile, "shop_id": new_shop_id})
260
+ )
261
+ else:
262
+ typer.echo(f"Credentials saved as profile '{profile}' and set as active.")
263
+
264
+
265
+ # ---------------------------------------------------------------------------
266
+ # shop setup
267
+ # ---------------------------------------------------------------------------
268
+
269
+
270
+ @app.command(
271
+ "setup",
272
+ help=(
273
+ "Full shop setup from a YAML/JSON spec file. "
274
+ "Creates the shop, configures agents, optionally uploads classified conversations, "
275
+ "runs insights, simulation, and syncs knowledge base. "
276
+ "Emits JSONL progress in --json mode so an AI agent can track each step.\n\n"
277
+ "Example: applied-cli shop setup --spec fabfitfun.yaml --json"
278
+ ),
279
+ )
280
+ def setup( # noqa: C901 (acceptable complexity for orchestration command)
281
+ spec_path: str = typer.Option(..., "--spec", help="Path to YAML or JSON spec file."),
282
+ shop_id: Optional[str] = typer.Option(
283
+ None,
284
+ "--shop-id",
285
+ help="Existing shop UUID — skip shop creation and configure this shop instead.",
286
+ ),
287
+ base_url: Optional[str] = typer.Option(None, help="Applied base URL."),
288
+ api_token: Optional[str] = typer.Option(None, help="Applied API token."),
289
+ dry_run: bool = typer.Option(False, "--dry-run", help="Validate spec and show plan, no writes."),
290
+ yes: bool = typer.Option(False, "--yes", "-y", help="Skip confirmation prompt."),
291
+ output_json: bool = typer.Option(False, "--json", help="Emit JSONL progress output."),
292
+ ) -> None:
293
+ # --- 1. Load + validate spec ---
294
+ try:
295
+ spec = load_and_validate_shop_spec(spec_path)
296
+ except ValueError as exc:
297
+ typer.echo(f"Spec error: {exc}", err=True)
298
+ raise typer.Exit(code=1) from exc
299
+
300
+ shop_name: str = spec["name"]
301
+ agents_spec: list[dict[str, Any]] = spec["agents"]
302
+ csv_spec: dict[str, Any] | None = spec["conversations_csv"]
303
+ taxonomy_spec: dict[str, Any] | None = spec.get("taxonomy")
304
+ kb_spec: dict[str, Any] | None = spec["knowledge_base"]
305
+ sim_spec: dict[str, Any] | None = spec["simulation"]
306
+ has_csv = csv_spec is not None
307
+ has_taxonomy = taxonomy_spec is not None
308
+ has_kb = kb_spec is not None
309
+ has_sim = sim_spec is not None
310
+
311
+ _emit(
312
+ {
313
+ "step": "spec_loaded",
314
+ "name": shop_name,
315
+ "agent_count": len(agents_spec),
316
+ "has_csv": has_csv,
317
+ "has_taxonomy": has_taxonomy,
318
+ "has_kb": has_kb,
319
+ "has_simulation": has_sim,
320
+ },
321
+ output_json=output_json,
322
+ )
323
+
324
+ if not output_json:
325
+ typer.echo(f"\nSpec: {spec_path}")
326
+ typer.echo(f" Shop: {shop_name}")
327
+ typer.echo(f" Agents: {', '.join(a['modality'] for a in agents_spec)}")
328
+ typer.echo(f" CSV: {'yes' if has_csv else 'no'}")
329
+ typer.echo(f" KB: {'yes' if has_kb else 'no'}")
330
+ typer.echo(f" Sim: {'yes' if has_sim else 'no'}")
331
+ if dry_run:
332
+ typer.echo("\n[dry-run] No API calls will be made.\n")
333
+ elif not yes:
334
+ typer.confirm("Proceed?", abort=True)
335
+
336
+ # Resolve admin (current user) credentials for shop creation
337
+ try:
338
+ resolved_base_url, admin_shop_id, resolved_admin_token = resolve_runtime(
339
+ base_url=base_url, shop_id=None, api_token=api_token
340
+ )
341
+ except APIError as exc:
342
+ typer.echo(render_api_error(exc, action="resolve runtime for shop setup"), err=True)
343
+ raise typer.Exit(code=1) from exc
344
+
345
+ # --- 2. Create shop (or use existing) ---
346
+ new_shop_id: str
347
+ new_shop_token: str
348
+
349
+ if shop_id:
350
+ # Use existing shop
351
+ new_shop_id = shop_id
352
+ new_shop_token = resolved_admin_token # assume current token is for this shop
353
+ _emit(
354
+ {"step": "shop_reused", "shop_id": new_shop_id},
355
+ output_json=output_json,
356
+ )
357
+ else:
358
+ if dry_run:
359
+ new_shop_id = "(dry-run)"
360
+ new_shop_token = "(dry-run)"
361
+ _emit(
362
+ {"step": "shop_created", "shop_id": new_shop_id, "name": shop_name, "dry_run": True},
363
+ output_json=output_json,
364
+ )
365
+ else:
366
+ try:
367
+ shop_data = create_shop(
368
+ base_url=resolved_base_url,
369
+ shop_id=admin_shop_id,
370
+ api_token=resolved_admin_token,
371
+ name=shop_name,
372
+ )
373
+ except APIError as exc:
374
+ typer.echo(render_api_error(exc, action="create shop"), err=True)
375
+ if exc.status_code in {400, 422}:
376
+ typer.echo(
377
+ "Hint: shop creation is restricted to Applied team accounts.\n"
378
+ "Make sure you are logged into an Applied team shop:\n"
379
+ " applied-cli auth login",
380
+ err=True,
381
+ )
382
+ raise typer.Exit(code=1) from exc
383
+
384
+ new_shop_id = str(shop_data.get("id", ""))
385
+ new_shop_token = str(shop_data.get("setup_token", ""))
386
+
387
+ if not new_shop_token:
388
+ # Fallback: start device auth so user can mint a token for the new shop
389
+ if not output_json:
390
+ typer.echo(
391
+ "\nNo auto-minted token returned. Starting device auth for new shop..."
392
+ )
393
+ try:
394
+ new_shop_token, confirmed_shop_id = _device_auth_for_shop(
395
+ base_url=resolved_base_url,
396
+ output_json=output_json,
397
+ )
398
+ new_shop_id = confirmed_shop_id
399
+ except APIError as exc:
400
+ typer.echo(render_api_error(exc, action="device auth for new shop"), err=True)
401
+ raise typer.Exit(code=1) from exc
402
+
403
+ _emit(
404
+ {
405
+ "step": "shop_created",
406
+ "shop_id": new_shop_id,
407
+ "name": shop_name,
408
+ "token_minted": bool(new_shop_token),
409
+ },
410
+ output_json=output_json,
411
+ )
412
+
413
+ # Save credentials for new shop
414
+ profile = _slugify(shop_name)
415
+ save_credentials(
416
+ Credentials(
417
+ base_url=resolved_base_url,
418
+ shop_id=new_shop_id,
419
+ api_token=new_shop_token,
420
+ ),
421
+ profile=profile,
422
+ set_active=True,
423
+ )
424
+ _emit(
425
+ {"step": "credentials_saved", "profile": profile, "shop_id": new_shop_id},
426
+ output_json=output_json,
427
+ )
428
+
429
+ # From here on, use the new shop's credentials
430
+ active_shop_id = new_shop_id
431
+ active_token = new_shop_token
432
+
433
+ # --- 3. Find + configure agents ---
434
+ if not dry_run:
435
+ try:
436
+ existing_agents = list_agents(
437
+ base_url=resolved_base_url,
438
+ shop_id=active_shop_id,
439
+ api_token=active_token,
440
+ limit=100,
441
+ )
442
+ except APIError as exc:
443
+ typer.echo(render_api_error(exc, action="list agents for new shop"), err=True)
444
+ raise typer.Exit(code=1) from exc
445
+ default_agent_id = str(existing_agents[0].get("id")) if existing_agents else None
446
+ else:
447
+ default_agent_id = "(dry-run)"
448
+
449
+ configured_agents: list[dict[str, Any]] = []
450
+
451
+ for idx, agent_spec in enumerate(agents_spec):
452
+ agent_payload: dict[str, Any] = {
453
+ "modality": agent_spec["modality"],
454
+ "name": agent_spec["name"],
455
+ "type": "Customer Support",
456
+ "auto_reply": agent_spec.get("auto_reply", True),
457
+ }
458
+ if agent_spec.get("description"):
459
+ agent_payload["description"] = agent_spec["description"]
460
+ if agent_spec.get("guardrail"):
461
+ agent_payload["guardrail"] = agent_spec["guardrail"]
462
+ if agent_spec.get("escalation_mode"):
463
+ agent_payload["escalation_mode"] = agent_spec["escalation_mode"]
464
+ if agent_spec.get("response_delay_in_seconds") is not None:
465
+ agent_payload["response_delay_in_seconds"] = agent_spec["response_delay_in_seconds"]
466
+
467
+ responses_spec: list[dict[str, Any]] = agent_spec.get("responses") or []
468
+
469
+ if dry_run:
470
+ agent_id = f"(dry-run-agent-{idx})"
471
+ response_summary = {"created": len(responses_spec), "updated": 0, "unchanged": 0}
472
+ else:
473
+ if idx == 0 and default_agent_id:
474
+ # Update the default agent created with the shop
475
+ try:
476
+ updated = update_agent(
477
+ base_url=resolved_base_url,
478
+ shop_id=active_shop_id,
479
+ api_token=active_token,
480
+ agent_id=default_agent_id,
481
+ payload=agent_payload,
482
+ )
483
+ agent_id = str(updated.get("id", default_agent_id))
484
+ except APIError as exc:
485
+ typer.echo(render_api_error(exc, action=f"update agent {idx}"), err=True)
486
+ raise typer.Exit(code=1) from exc
487
+ else:
488
+ # For additional agents, match by modality before creating a new one
489
+ spec_modality = agent_spec["modality"].lower()
490
+ matched_existing = next(
491
+ (
492
+ ea for ea in existing_agents[1:]
493
+ if str(ea.get("modality", "")).lower() == spec_modality
494
+ ),
495
+ None,
496
+ )
497
+ if matched_existing:
498
+ try:
499
+ updated = update_agent(
500
+ base_url=resolved_base_url,
501
+ shop_id=active_shop_id,
502
+ api_token=active_token,
503
+ agent_id=str(matched_existing["id"]),
504
+ payload=agent_payload,
505
+ )
506
+ agent_id = str(updated.get("id", matched_existing["id"]))
507
+ except APIError as exc:
508
+ typer.echo(render_api_error(exc, action=f"update agent {idx}"), err=True)
509
+ raise typer.Exit(code=1) from exc
510
+ else:
511
+ # No existing agent with this modality — create a new one
512
+ try:
513
+ created_agent = create_agent(
514
+ base_url=resolved_base_url,
515
+ shop_id=active_shop_id,
516
+ api_token=active_token,
517
+ payload=agent_payload,
518
+ )
519
+ agent_id = str(created_agent.get("id", ""))
520
+ except APIError as exc:
521
+ typer.echo(render_api_error(exc, action=f"create agent {idx}"), err=True)
522
+ raise typer.Exit(code=1) from exc
523
+
524
+ # Upsert responses
525
+ if responses_spec:
526
+ try:
527
+ response_summary = upsert_inline_responses(
528
+ base_url=resolved_base_url,
529
+ shop_id=active_shop_id,
530
+ api_token=active_token,
531
+ agent_id=agent_id,
532
+ response_rows=responses_spec,
533
+ dry_run=False,
534
+ )
535
+ except (APIError, Exception) as exc:
536
+ typer.echo(f"Warning: failed to upsert responses for agent {idx}: {exc}", err=True)
537
+ response_summary = {"created": 0, "updated": 0, "unchanged": 0}
538
+ else:
539
+ response_summary = {"created": 0, "updated": 0, "unchanged": 0}
540
+
541
+ # Create silent escalation flow for email agents
542
+ escalation_flow_id: str | None = None
543
+ if agent_spec["modality"].lower() == "email" and not dry_run:
544
+ try:
545
+ flow_data = create_escalation_flow(
546
+ base_url=resolved_base_url,
547
+ shop_id=active_shop_id,
548
+ api_token=active_token,
549
+ agent_id=agent_id,
550
+ )
551
+ escalation_flow_id = str(flow_data.get("id", ""))
552
+ _emit(
553
+ {
554
+ "step": "escalation_flow_created",
555
+ "agent_id": agent_id,
556
+ "flow_id": escalation_flow_id,
557
+ },
558
+ output_json=output_json,
559
+ )
560
+ except APIError as exc:
561
+ typer.echo(
562
+ f"Warning: failed to create escalation flow for email agent: {exc}",
563
+ err=True,
564
+ )
565
+
566
+ configured_agents.append({"id": agent_id, "modality": agent_spec["modality"]})
567
+ _emit(
568
+ {
569
+ "step": "agent_configured",
570
+ "index": idx,
571
+ "agent_id": agent_id,
572
+ "modality": agent_spec["modality"],
573
+ "name": agent_spec["name"],
574
+ "responses_created": response_summary["created"],
575
+ "responses_updated": response_summary["updated"],
576
+ "dry_run": dry_run,
577
+ },
578
+ output_json=output_json,
579
+ )
580
+
581
+ # Determine the "primary" agent (chat > email > first) for conversation import
582
+ def _pick_agent_for_csv() -> str:
583
+ target_modality = (csv_spec or {}).get("agent_modality")
584
+ if target_modality:
585
+ for a in configured_agents:
586
+ if a["modality"].lower() == target_modality.lower():
587
+ return a["id"]
588
+ for a in configured_agents:
589
+ if a["modality"].lower() == "chat":
590
+ return a["id"]
591
+ return configured_agents[0]["id"] if configured_agents else ""
592
+
593
+ # --- 4. Conversations CSV upload ---
594
+ csv_imported = False # tracks actual upload success, not just spec presence
595
+ if not has_csv:
596
+ _emit(
597
+ {"step": "conversations_imported", "skipped": True, "reason": "no_csv"},
598
+ output_json=output_json,
599
+ )
600
+ else:
601
+ assert csv_spec is not None
602
+ csv_file_path = csv_spec.get("file_path")
603
+ csv_url = csv_spec.get("url")
604
+ process_labels = csv_spec.get("process_labels", True)
605
+ csv_agent_id = _pick_agent_for_csv()
606
+
607
+ # Validate file exists before making any API calls
608
+ if csv_file_path and not Path(csv_file_path).exists():
609
+ typer.echo(
610
+ f"Error: conversations_csv.file_path '{csv_file_path}' not found.", err=True
611
+ )
612
+ raise typer.Exit(code=1)
613
+
614
+ # Apply column_map: rename CSV columns to match bulk-upload format
615
+ column_map = csv_spec.get("column_map") or {}
616
+ upload_file_path = csv_file_path
617
+ if csv_file_path and column_map:
618
+ upload_file_path = _apply_column_map(csv_file_path, column_map)
619
+
620
+ if dry_run:
621
+ csv_imported = True # dry-run counts as success for downstream steps
622
+ _emit(
623
+ {
624
+ "step": "conversations_imported",
625
+ "agent_id": csv_agent_id,
626
+ "source": csv_file_path or csv_url,
627
+ "dry_run": True,
628
+ },
629
+ output_json=output_json,
630
+ )
631
+ else:
632
+ try:
633
+ import_result = import_conversations_bulk(
634
+ base_url=resolved_base_url,
635
+ shop_id=active_shop_id,
636
+ api_token=active_token,
637
+ agent_id=csv_agent_id,
638
+ file_path=upload_file_path,
639
+ url=csv_url,
640
+ process_labels=process_labels,
641
+ )
642
+ csv_imported = True
643
+ _emit(
644
+ {
645
+ "step": "conversations_imported",
646
+ "agent_id": csv_agent_id,
647
+ "status": import_result.get("status", "processing"),
648
+ "queued": import_result.get("queued") or import_result.get("count"),
649
+ },
650
+ output_json=output_json,
651
+ )
652
+ except APIError as exc:
653
+ typer.echo(render_api_error(exc, action="import conversations CSV"), err=True)
654
+ typer.echo("Warning: conversation import failed — continuing setup.", err=True)
655
+ _emit(
656
+ {
657
+ "step": "conversations_imported",
658
+ "skipped": True,
659
+ "reason": "import_error",
660
+ "error": str(exc),
661
+ },
662
+ output_json=output_json,
663
+ )
664
+
665
+ # --- 5. Taxonomy ---
666
+ if not has_taxonomy:
667
+ _emit(
668
+ {"step": "taxonomy_uploaded", "skipped": True, "reason": "not_in_spec"},
669
+ output_json=output_json,
670
+ )
671
+ if not output_json:
672
+ typer.echo(" ⤼ taxonomy upload skipped (not in spec)")
673
+ else:
674
+ assert taxonomy_spec is not None
675
+ taxonomy_file = taxonomy_spec["file_path"]
676
+
677
+ if dry_run:
678
+ _emit(
679
+ {"step": "taxonomy_uploaded", "file": taxonomy_file, "dry_run": True},
680
+ output_json=output_json,
681
+ )
682
+ else:
683
+ try:
684
+ topics_list, intents_list = _parse_taxonomy_py(taxonomy_file)
685
+
686
+ if not output_json:
687
+ typer.echo(
688
+ f" Uploading taxonomy: {len(topics_list)} topics, "
689
+ f"{len(intents_list)} intents..."
690
+ )
691
+
692
+ # Create topics first, collect name → id map
693
+ topic_id_map: dict[str, str] = {}
694
+ for topic in topics_list:
695
+ t_result = create_property_choice(
696
+ base_url=resolved_base_url,
697
+ shop_id=active_shop_id,
698
+ api_token=active_token,
699
+ name=topic["name"],
700
+ description=topic.get("description", ""),
701
+ )
702
+ topic_id_map[topic["name"]] = str(t_result["id"])
703
+
704
+ # Create intents under their parent topics
705
+ intents_created = 0
706
+ for intent in intents_list:
707
+ parent_id = topic_id_map.get(intent.get("topic", ""))
708
+ if not parent_id:
709
+ continue # skip orphaned intents
710
+ create_property_choice(
711
+ base_url=resolved_base_url,
712
+ shop_id=active_shop_id,
713
+ api_token=active_token,
714
+ name=intent["name"],
715
+ description=intent.get("description", ""),
716
+ parent_choice_id=parent_id,
717
+ )
718
+ intents_created += 1
719
+
720
+ _emit(
721
+ {
722
+ "step": "taxonomy_uploaded",
723
+ "topics": len(topic_id_map),
724
+ "intents": intents_created,
725
+ },
726
+ output_json=output_json,
727
+ )
728
+ if not output_json:
729
+ typer.echo(
730
+ f" ✓ taxonomy uploaded ({len(topic_id_map)} topics, "
731
+ f"{intents_created} intents)"
732
+ )
733
+ except Exception as exc:
734
+ typer.echo(f"Warning: taxonomy upload failed — {exc}", err=True)
735
+ _emit(
736
+ {
737
+ "step": "taxonomy_uploaded",
738
+ "skipped": True,
739
+ "reason": "taxonomy_error",
740
+ "error": str(exc),
741
+ },
742
+ output_json=output_json,
743
+ )
744
+
745
+ # --- 6. Insights (formerly 5) ---
746
+ if not csv_imported:
747
+ _emit(
748
+ {
749
+ "step": "insights_triggered",
750
+ "skipped": True,
751
+ "reason": "no_csv" if not has_csv else "import_failed",
752
+ },
753
+ output_json=output_json,
754
+ )
755
+ else:
756
+ if dry_run:
757
+ _emit(
758
+ {"step": "insights_triggered", "dry_run": True},
759
+ output_json=output_json,
760
+ )
761
+ else:
762
+ try:
763
+ insights_result = insights_generate(
764
+ base_url=resolved_base_url,
765
+ shop_id=active_shop_id,
766
+ api_token=active_token,
767
+ instruction="Summarize top topics, intents, and trends from uploaded conversations.",
768
+ )
769
+ report_id = (
770
+ insights_result.get("reportId")
771
+ or insights_result.get("id")
772
+ or insights_result.get("report_id")
773
+ or insights_result.get("task_id")
774
+ )
775
+ _emit(
776
+ {"step": "insights_triggered", "report_id": report_id},
777
+ output_json=output_json,
778
+ )
779
+ except APIError as exc:
780
+ typer.echo(render_api_error(exc, action="trigger insights"), err=True)
781
+ _emit(
782
+ {
783
+ "step": "insights_triggered",
784
+ "skipped": True,
785
+ "reason": "insights_error",
786
+ "error": str(exc),
787
+ },
788
+ output_json=output_json,
789
+ )
790
+
791
+ # --- 6. Simulation ---
792
+ if not has_sim:
793
+ _emit(
794
+ {"step": "simulation_started", "skipped": True, "reason": "not_in_spec"},
795
+ output_json=output_json,
796
+ )
797
+ else:
798
+ # Auto-detect superuser status using the original (admin) credentials.
799
+ # No separate --admin-token needed — if the logged-in user is a superuser,
800
+ # their existing API token works for populate_demo_shop.
801
+ is_superuser = False
802
+ if not dry_run:
803
+ try:
804
+ is_superuser = check_superuser(
805
+ base_url=resolved_base_url,
806
+ shop_id=admin_shop_id,
807
+ api_token=resolved_admin_token,
808
+ )
809
+ except APIError:
810
+ pass # treat as not superuser if check fails
811
+
812
+ if not dry_run and not is_superuser:
813
+ _emit(
814
+ {
815
+ "step": "simulation_started",
816
+ "skipped": True,
817
+ "reason": "not_superuser",
818
+ "hint": "Simulation requires a superuser account. Log in with a superuser.",
819
+ },
820
+ output_json=output_json,
821
+ )
822
+ else:
823
+ assert sim_spec is not None
824
+ if dry_run:
825
+ _emit(
826
+ {"step": "simulation_started", "dry_run": True},
827
+ output_json=output_json,
828
+ )
829
+ else:
830
+ try:
831
+ sim_result = populate_demo_shop(
832
+ base_url=resolved_base_url,
833
+ shop_id=admin_shop_id,
834
+ api_token=resolved_admin_token,
835
+ target_shop_id=active_shop_id,
836
+ distribution=sim_spec["distribution"],
837
+ date_from=sim_spec["date_from"],
838
+ date_to=sim_spec["date_to"],
839
+ num_conversations=sim_spec["num_conversations"],
840
+ delete_previous=sim_spec.get("delete_previous", False),
841
+ )
842
+ _emit(
843
+ {
844
+ "step": "simulation_started",
845
+ "enqueued": sim_result.get("enqueued"),
846
+ "agent_count": sim_result.get("agent_count"),
847
+ "deleted": sim_result.get("deleted", 0),
848
+ },
849
+ output_json=output_json,
850
+ )
851
+ except APIError as exc:
852
+ typer.echo(render_api_error(exc, action="populate demo shop"), err=True)
853
+ _emit(
854
+ {
855
+ "step": "simulation_started",
856
+ "skipped": True,
857
+ "reason": "simulation_error",
858
+ "error": str(exc),
859
+ },
860
+ output_json=output_json,
861
+ )
862
+
863
+ # --- 7. Knowledge base ---
864
+ if not has_kb:
865
+ _emit(
866
+ {"step": "knowledge_base_synced", "skipped": True, "reason": "not_in_spec"},
867
+ output_json=output_json,
868
+ )
869
+ else:
870
+ assert kb_spec is not None
871
+ kb_url = kb_spec["url"]
872
+ kb_title = kb_spec.get("title")
873
+
874
+ if dry_run:
875
+ _emit(
876
+ {"step": "knowledge_base_synced", "url": kb_url, "dry_run": True},
877
+ output_json=output_json,
878
+ )
879
+ else:
880
+ try:
881
+ cs_result = create_content_source(
882
+ base_url=resolved_base_url,
883
+ shop_id=active_shop_id,
884
+ api_token=active_token,
885
+ url=kb_url,
886
+ title=kb_title,
887
+ )
888
+ _emit(
889
+ {
890
+ "step": "knowledge_base_synced",
891
+ "content_source_id": cs_result.get("id"),
892
+ "url": kb_url,
893
+ },
894
+ output_json=output_json,
895
+ )
896
+ except APIError as exc:
897
+ typer.echo(render_api_error(exc, action="create content source"), err=True)
898
+ _emit(
899
+ {
900
+ "step": "knowledge_base_synced",
901
+ "skipped": True,
902
+ "reason": "kb_error",
903
+ "error": str(exc),
904
+ },
905
+ output_json=output_json,
906
+ )
907
+
908
+ # --- 8. Complete ---
909
+ complete_payload: dict[str, Any] = {
910
+ "step": "complete",
911
+ "shop_id": active_shop_id,
912
+ "agents": configured_agents,
913
+ "dry_run": dry_run,
914
+ }
915
+ if not shop_id:
916
+ complete_payload["profile"] = _slugify(shop_name)
917
+
918
+ _emit(complete_payload, output_json=output_json)
919
+
920
+ if not output_json:
921
+ typer.echo(f"\n✓ Setup complete for shop: {shop_name}")
922
+ typer.echo(f" Shop ID: {active_shop_id}")
923
+ for a in configured_agents:
924
+ typer.echo(f" Agent ({a['modality']}): {a['id']}")
925
+ if not dry_run and not shop_id:
926
+ typer.echo(
927
+ f"\nCredentials saved as profile '{_slugify(shop_name)}'.\n"
928
+ f"Run `applied-cli auth status` to confirm."
929
+ )
930
+
931
+
932
+ # ---------------------------------------------------------------------------
933
+ # CSV helpers
934
+ # ---------------------------------------------------------------------------
935
+
936
+
937
+ def _apply_column_map(file_path: str, column_map: dict[str, str]) -> str:
938
+ """Rename CSV columns per column_map and return a temp file path.
939
+
940
+ Matching is case-insensitive on the original column name.
941
+ Returns the original file_path unchanged if column_map is empty or no
942
+ columns match.
943
+ """
944
+ import csv as csv_module
945
+ import io
946
+ import tempfile
947
+
948
+ if not column_map:
949
+ return file_path
950
+
951
+ # Build a case-insensitive lookup: lower(original) → target
952
+ rename: dict[str, str] = {k.lower(): v for k, v in column_map.items()}
953
+
954
+ try:
955
+ with open(file_path, newline="", encoding="utf-8") as f:
956
+ orig_rows = list(csv_module.DictReader(f))
957
+ except Exception:
958
+ return file_path # fallback: leave unchanged
959
+
960
+ if not orig_rows:
961
+ return file_path
962
+
963
+ original_fields = list(orig_rows[0].keys())
964
+ new_fields = [rename.get(col.lower(), col) for col in original_fields]
965
+
966
+ if new_fields == original_fields:
967
+ return file_path # nothing to rename
968
+
969
+ buf = io.StringIO()
970
+ writer = csv_module.DictWriter(buf, fieldnames=new_fields)
971
+ writer.writeheader()
972
+ for row in orig_rows:
973
+ new_row = {rename.get(k.lower(), k): v for k, v in row.items()}
974
+ writer.writerow(new_row)
975
+
976
+ tmp = tempfile.NamedTemporaryFile(
977
+ mode="w", suffix=".csv", delete=False, encoding="utf-8"
978
+ )
979
+ tmp.write(buf.getvalue())
980
+ tmp.close()
981
+ return tmp.name
982
+
983
+
984
+ # ---------------------------------------------------------------------------
985
+ # shop test helpers
986
+ # ---------------------------------------------------------------------------
987
+
988
+ # Regex for parsing JSON objects out of the SSE completion stream.
989
+ # Same pattern used by chat.py — matches arbitrarily nested JSON braces.
990
+ _COMPLETION_RE = re.compile(
991
+ r"(\{(?:(\{(?:(\{(?:(\{(?:(\{(?:(\{(?:(\{(?:(\{(?:(\{[^}{]*\})"
992
+ r"|[^}{])*\})|[^}{])*\})|[^}{])*\})|[^}{])*\})|[^}{])*\})|[^}{])*\})"
993
+ r"|[^}{])*\})|[^}{])*\})"
994
+ )
995
+
996
+ def _create_test_conversation(
997
+ client: httpx.Client,
998
+ *,
999
+ base_url: str,
1000
+ agent_id: str,
1001
+ channel: str,
1002
+ ) -> str:
1003
+ """Create a test conversation and return its id."""
1004
+ payload: dict[str, Any] = {
1005
+ "agent_id": agent_id,
1006
+ "is_test": True,
1007
+ "metadata": {"isTest": True, "source": "applied-cli-shop-test"},
1008
+ }
1009
+ if channel == "email":
1010
+ payload["type"] = "email"
1011
+ elif channel == "sms":
1012
+ payload["type"] = "sms"
1013
+ try:
1014
+ response = client.post(
1015
+ f"{base_url}/v1/c/",
1016
+ json=payload,
1017
+ headers={"Content-Type": "application/json"},
1018
+ timeout=15.0,
1019
+ )
1020
+ except httpx.HTTPError as exc:
1021
+ raise APIError(
1022
+ f"Test conversation creation failed: {exc}",
1023
+ code="NETWORK_ERROR",
1024
+ retryable=True,
1025
+ ) from exc
1026
+ if response.status_code >= 400:
1027
+ raise APIError(
1028
+ f"Test conversation creation failed ({response.status_code}).",
1029
+ status_code=response.status_code,
1030
+ code="CONVERSATION_CREATE_FAILED",
1031
+ )
1032
+ conv_id = response.json().get("id")
1033
+ if not conv_id:
1034
+ raise APIError("Test conversation created but no id returned.")
1035
+ return str(conv_id)
1036
+
1037
+
1038
+ def _stream_completion_silent(
1039
+ client: httpx.Client,
1040
+ *,
1041
+ base_url: str,
1042
+ shop_id: str,
1043
+ api_token: str,
1044
+ agent_id: str,
1045
+ payload: dict[str, Any],
1046
+ ) -> str:
1047
+ """Stream a completion request silently and return the full generated text."""
1048
+ headers = {
1049
+ "Authorization": f"Bearer {api_token}",
1050
+ "X-Shop-Id": shop_id,
1051
+ "Content-Type": "application/json",
1052
+ }
1053
+ generated_text = ""
1054
+ buffer = ""
1055
+ content_complete_seen = False
1056
+ read_after_complete = False
1057
+ try:
1058
+ with client.stream(
1059
+ "POST",
1060
+ f"{base_url}/v1/agents/{agent_id}/complete/",
1061
+ headers=headers,
1062
+ json=payload,
1063
+ timeout=60.0,
1064
+ ) as response:
1065
+ if response.status_code >= 400:
1066
+ # Return empty string — escalation may produce 200 with no body or a 4xx
1067
+ return ""
1068
+ for chunk in response.iter_text():
1069
+ if not chunk:
1070
+ continue
1071
+ buffer += chunk
1072
+ last_consumed = 0
1073
+ for match in _COMPLETION_RE.finditer(buffer):
1074
+ raw = match.group(1)
1075
+ if not raw:
1076
+ continue
1077
+ try:
1078
+ data = json.loads(raw)
1079
+ except json.JSONDecodeError:
1080
+ continue
1081
+ last_consumed = match.end()
1082
+ content = data.get("content")
1083
+ if isinstance(content, str) and content:
1084
+ generated_text += content
1085
+ if bool(data.get("content_complete")):
1086
+ content_complete_seen = True
1087
+ if last_consumed > 0:
1088
+ buffer = buffer[last_consumed:]
1089
+ if content_complete_seen:
1090
+ if read_after_complete:
1091
+ break
1092
+ read_after_complete = True
1093
+ except Exception:
1094
+ pass # Network errors return whatever was accumulated
1095
+ return generated_text
1096
+
1097
+
1098
+ def _run_test_conversation(
1099
+ client: httpx.Client,
1100
+ *,
1101
+ base_url: str,
1102
+ shop_id: str,
1103
+ api_token: str,
1104
+ agent_id: str,
1105
+ channel: str,
1106
+ opening_message: str,
1107
+ max_turns: int = 3,
1108
+ ) -> str:
1109
+ """Run a multi-turn test conversation. Returns the conversation_id.
1110
+
1111
+ Sends the opening message, then if the agent replies with a question
1112
+ (ends with '?'), sends a generic follow-up answer and continues until
1113
+ the agent gives a complete answer or max_turns is reached.
1114
+ """
1115
+ conv_id = _create_test_conversation(
1116
+ client,
1117
+ base_url=base_url,
1118
+ agent_id=agent_id,
1119
+ channel=channel,
1120
+ )
1121
+
1122
+ transcript: list[dict[str, Any]] = [
1123
+ {
1124
+ "id": str(uuid.uuid4()),
1125
+ "role": "user",
1126
+ "content": opening_message,
1127
+ "text": opening_message,
1128
+ "format": "TEXT",
1129
+ "entity": {"type": "user"},
1130
+ }
1131
+ ]
1132
+
1133
+ for turn in range(max_turns):
1134
+ completion_payload: dict[str, Any] = {
1135
+ "conversation_id": conv_id,
1136
+ "context": "EVALUATE",
1137
+ "transcript": transcript,
1138
+ "metadata": {"source": "applied-cli-shop-test", "isTest": True},
1139
+ "draft": False,
1140
+ }
1141
+
1142
+ assistant_text = _stream_completion_silent(
1143
+ client,
1144
+ base_url=base_url,
1145
+ shop_id=shop_id,
1146
+ api_token=api_token,
1147
+ agent_id=agent_id,
1148
+ payload=completion_payload,
1149
+ )
1150
+
1151
+ if not assistant_text.strip():
1152
+ break # No response — likely escalated or an error
1153
+
1154
+ # Append assistant turn to transcript
1155
+ transcript.append(
1156
+ {
1157
+ "id": str(uuid.uuid4()),
1158
+ "role": "assistant",
1159
+ "content": assistant_text,
1160
+ "text": assistant_text,
1161
+ "format": "TEXT",
1162
+ "entity": {"type": "agent"},
1163
+ }
1164
+ )
1165
+
1166
+ # Stop if the agent gave a complete answer (no trailing question)
1167
+ if not assistant_text.strip().endswith("?"):
1168
+ break
1169
+
1170
+ if turn >= max_turns - 1:
1171
+ break # Exhausted turns
1172
+
1173
+ # Agent asked a follow-up — send a generic clarifying reply to push forward
1174
+ follow_up = "I don't have that specific information available. Can you provide a general answer?"
1175
+ transcript.append(
1176
+ {
1177
+ "id": str(uuid.uuid4()),
1178
+ "role": "user",
1179
+ "content": follow_up,
1180
+ "text": follow_up,
1181
+ "format": "TEXT",
1182
+ "entity": {"type": "user"},
1183
+ }
1184
+ )
1185
+
1186
+ return conv_id
1187
+
1188
+
1189
+ def _rate_response_test(
1190
+ *,
1191
+ messages: list[dict[str, Any]],
1192
+ references: list[dict[str, Any]],
1193
+ response_type: str,
1194
+ expected_answer: str = "",
1195
+ ) -> dict[str, Any]:
1196
+ """Rate a test conversation for a specific response type.
1197
+
1198
+ Builds on the generic _auto_rate from the rate command, then applies
1199
+ type-specific overrides for escalation and Q&A tests.
1200
+ """
1201
+ from applied_cli.commands.rate import _auto_rate
1202
+
1203
+ rating = _auto_rate(messages=messages, references=references)
1204
+
1205
+ if response_type == "escalate":
1206
+ # For escalation tests the expected behaviour is that the agent does NOT
1207
+ # send a standard reply — it routes the conversation to a human instead.
1208
+ assistant_messages = [m for m in messages if m.get("role") == "assistant"]
1209
+ if not assistant_messages:
1210
+ # Perfectly silent escalation
1211
+ rating["pass_status"] = "pass"
1212
+ rating["csat_score"] = 5.0
1213
+ rating["feedback"] = (
1214
+ "Escalation trigger processed: no auto-reply generated (expected behaviour)."
1215
+ )
1216
+ else:
1217
+ # Agent replied — check whether the reply acknowledges the escalation
1218
+ latest_text = str(
1219
+ assistant_messages[-1].get("text")
1220
+ or assistant_messages[-1].get("content")
1221
+ or ""
1222
+ ).lower()
1223
+ escalation_keywords = {
1224
+ "human", "agent", "team", "specialist", "representative",
1225
+ "connect", "transfer", "route", "support",
1226
+ }
1227
+ if any(kw in latest_text for kw in escalation_keywords):
1228
+ rating["pass_status"] = "pass"
1229
+ rating["csat_score"] = 4.0
1230
+ rating["feedback"] = (
1231
+ "Escalation handled: agent acknowledged escalation request."
1232
+ )
1233
+ else:
1234
+ # Override auto_rate — no reply is still acceptable for escalation
1235
+ rating["feedback"] = (
1236
+ "Escalation trigger sent; agent replied without escalation keywords. "
1237
+ + rating["feedback"]
1238
+ )
1239
+
1240
+ elif response_type == "qa" and expected_answer:
1241
+ # Check keyword overlap between the spec's expected answer and the actual reply
1242
+ assistant_messages = [m for m in messages if m.get("role") == "assistant"]
1243
+ if assistant_messages:
1244
+ latest_text = str(
1245
+ assistant_messages[-1].get("text")
1246
+ or assistant_messages[-1].get("content")
1247
+ or ""
1248
+ ).lower()
1249
+ # Collect significant words (>4 chars) from the expected answer
1250
+ keywords = list(
1251
+ dict.fromkeys(
1252
+ w.lower().strip(".,!?;:\"'()")
1253
+ for w in expected_answer.split()
1254
+ if len(w) > 4
1255
+ )
1256
+ )[:15]
1257
+ if keywords:
1258
+ matches = sum(1 for kw in keywords if kw in latest_text)
1259
+ match_pct = matches / len(keywords)
1260
+ if match_pct >= 0.3:
1261
+ rating["feedback"] = (
1262
+ f"Response matches ~{match_pct:.0%} of expected answer keywords. "
1263
+ + rating["feedback"]
1264
+ )
1265
+ else:
1266
+ rating["pass_status"] = "fail"
1267
+ rating["csat_score"] = float(
1268
+ min(rating.get("csat_score") or 2.0, 2.0)
1269
+ )
1270
+ rating["feedback"] = (
1271
+ f"Low keyword match ({match_pct:.0%}) with expected Q&A answer. "
1272
+ + rating["feedback"]
1273
+ )
1274
+
1275
+ return rating
1276
+
1277
+
1278
+ def _get_or_create_test_scenario_and_run(
1279
+ *,
1280
+ base_url: str,
1281
+ shop_id: str,
1282
+ api_token: str,
1283
+ agent_id: str,
1284
+ benchmark_id: Optional[str],
1285
+ scenario_name: str,
1286
+ conversation_id: str,
1287
+ ) -> tuple[str, str]:
1288
+ """Find-or-create a named scenario and always create a fresh run.
1289
+
1290
+ Scenarios are keyed by name so repeated `shop test` runs accumulate as
1291
+ benchmark runs under the same scenario definition.
1292
+
1293
+ Returns (scenario_id, run_id).
1294
+ """
1295
+ # Look for an existing scenario with this exact name
1296
+ existing = list_conversation_scenarios(
1297
+ base_url=base_url,
1298
+ shop_id=shop_id,
1299
+ api_token=api_token,
1300
+ agent_id=agent_id,
1301
+ name=scenario_name,
1302
+ )
1303
+ scenario_id: Optional[str] = None
1304
+ for sc in existing:
1305
+ if str(sc.get("name", "")).strip() == scenario_name:
1306
+ scenario_id = str(sc.get("id") or "")
1307
+ break
1308
+
1309
+ if not scenario_id:
1310
+ scenario = create_conversation_scenario(
1311
+ base_url=base_url,
1312
+ shop_id=shop_id,
1313
+ api_token=api_token,
1314
+ agent_id=agent_id,
1315
+ benchmark_id=benchmark_id,
1316
+ name=scenario_name,
1317
+ input_conversation_id=conversation_id,
1318
+ )
1319
+ scenario_id = str(scenario.get("id") or "")
1320
+
1321
+ if not scenario_id:
1322
+ raise APIError(
1323
+ "Failed to create or find test scenario.",
1324
+ code="SCENARIO_CREATE_FAILED",
1325
+ )
1326
+
1327
+ # Always create a new run for this execution
1328
+ run = create_scenario_run(
1329
+ base_url=base_url,
1330
+ shop_id=shop_id,
1331
+ api_token=api_token,
1332
+ scenario_id=scenario_id,
1333
+ output_conversation_id=conversation_id,
1334
+ )
1335
+ run_id = str(run.get("id") or "")
1336
+ return scenario_id, run_id
1337
+
1338
+
1339
+ # ---------------------------------------------------------------------------
1340
+ # taxonomy helpers
1341
+ # ---------------------------------------------------------------------------
1342
+
1343
+
1344
+ def _parse_taxonomy_py(file_path: str) -> tuple[list[dict], list[dict]]:
1345
+ """Execute a generated_taxonomy.py file and extract TOPICS_LIST / INTENTS_LIST.
1346
+
1347
+ Returns (topics, intents) where each item is a dict with at minimum a
1348
+ ``name`` key and optionally ``description`` / ``topic`` (for intents).
1349
+ """
1350
+ content = Path(file_path).read_text(encoding="utf-8")
1351
+ namespace: dict = {}
1352
+ exec(compile(content, file_path, "exec"), namespace) # noqa: S102
1353
+ topics: list[dict] = namespace.get("TOPICS_LIST", [])
1354
+ intents: list[dict] = namespace.get("INTENTS_LIST", [])
1355
+ return topics, intents
1356
+
1357
+
1358
+ # ---------------------------------------------------------------------------
1359
+ # template
1360
+ # ---------------------------------------------------------------------------
1361
+
1362
+ @app.command(
1363
+ "template",
1364
+ help=(
1365
+ "Print the demo spec template to stdout.\n\n"
1366
+ "Pipe to a file, fill in the placeholders, then run `shop setup --spec <file>`.\n\n"
1367
+ "Example: applied-cli shop template > my_brand.yaml"
1368
+ ),
1369
+ )
1370
+ def template() -> None:
1371
+ template_path = Path(__file__).parent.parent / "presets" / "demo.yaml"
1372
+ if not template_path.exists():
1373
+ typer.echo("Error: demo template not found in package.", err=True)
1374
+ raise typer.Exit(code=1)
1375
+ typer.echo(template_path.read_text(encoding="utf-8"), nl=False)
1376
+
1377
+
1378
+ # ---------------------------------------------------------------------------
1379
+ # shop test
1380
+ # ---------------------------------------------------------------------------
1381
+
1382
+
1383
+ @app.command(
1384
+ "test",
1385
+ help=(
1386
+ "Run smoke tests for configured agents and persist results as a benchmark.\n\n"
1387
+ "Generates one test conversation per Q&A response and per escalation trigger "
1388
+ "defined in the spec file. Each conversation is rated automatically — checking "
1389
+ "that the agent responds, uses knowledge-base references, and matches the "
1390
+ "expected answer. Results are saved to a named benchmark in Test Coverage.\n\n"
1391
+ "Multi-turn: if the agent asks a follow-up question the test automatically "
1392
+ "sends a clarifying reply and continues, up to --max-turns.\n\n"
1393
+ "Example: applied-cli shop test --spec ridge_demo.yaml --json"
1394
+ ),
1395
+ )
1396
+ def test(
1397
+ spec_path: str = typer.Option(
1398
+ ..., "--spec", help="Path to shop spec YAML/JSON file (same file used with shop setup)."
1399
+ ),
1400
+ shop_id: Optional[str] = typer.Option(
1401
+ None,
1402
+ "--shop-id",
1403
+ help="Target shop UUID. Defaults to the active profile shop.",
1404
+ ),
1405
+ max_turns: int = typer.Option(
1406
+ 3,
1407
+ "--max-turns",
1408
+ help="Maximum conversation turns per test (1–5). Extra turns are used when the agent asks follow-up questions.",
1409
+ ),
1410
+ benchmark_name: str = typer.Option(
1411
+ "Demo Shop Smoke Test",
1412
+ "--benchmark-name",
1413
+ help="Benchmark collection name for persisted scenarios.",
1414
+ ),
1415
+ dry_run: bool = typer.Option(
1416
+ False, "--dry-run", help="Print the test plan without running any conversations."
1417
+ ),
1418
+ output_json: bool = typer.Option(False, "--json", help="Emit JSONL progress output."),
1419
+ base_url: Optional[str] = typer.Option(None, help="Applied base URL."),
1420
+ api_token: Optional[str] = typer.Option(None, help="Applied API token."),
1421
+ ) -> None:
1422
+ if not (1 <= max_turns <= 5):
1423
+ raise typer.BadParameter("max-turns must be between 1 and 5.")
1424
+
1425
+ # --- Load spec ---
1426
+ try:
1427
+ spec = load_and_validate_shop_spec(spec_path)
1428
+ except ValueError as exc:
1429
+ typer.echo(f"Spec error: {exc}", err=True)
1430
+ raise typer.Exit(code=1) from exc
1431
+
1432
+ shop_name = spec["name"]
1433
+ agents_spec = spec["agents"]
1434
+
1435
+ # --- Resolve runtime ---
1436
+ try:
1437
+ resolved_base_url, resolved_shop_id, resolved_token = resolve_runtime(
1438
+ base_url=base_url,
1439
+ shop_id=shop_id,
1440
+ api_token=api_token,
1441
+ )
1442
+ except APIError as exc:
1443
+ typer.echo(render_api_error(exc, action="resolve runtime for shop test"), err=True)
1444
+ raise typer.Exit(code=1) from exc
1445
+
1446
+ active_shop_id = shop_id or resolved_shop_id
1447
+ active_token = resolved_token
1448
+
1449
+ # --- List live agents → build modality → agent map ---
1450
+ try:
1451
+ existing_agents = list_agents(
1452
+ base_url=resolved_base_url,
1453
+ shop_id=active_shop_id,
1454
+ api_token=active_token,
1455
+ )
1456
+ except APIError as exc:
1457
+ typer.echo(render_api_error(exc, action="list agents for shop test"), err=True)
1458
+ raise typer.Exit(code=1) from exc
1459
+
1460
+ modality_to_agent: dict[str, dict[str, Any]] = {}
1461
+ for ag in existing_agents:
1462
+ mod = str(ag.get("modality") or "").lower()
1463
+ if mod and mod not in modality_to_agent:
1464
+ modality_to_agent[mod] = ag
1465
+
1466
+ # --- Build test cases from spec responses ---
1467
+ test_cases: list[dict[str, Any]] = []
1468
+ for agent_spec in agents_spec:
1469
+ modality = str(agent_spec.get("modality") or "").lower()
1470
+ channel = modality # "chat", "email", "sms"
1471
+
1472
+ live_agent = modality_to_agent.get(modality)
1473
+ if not live_agent:
1474
+ if not output_json:
1475
+ typer.echo(
1476
+ f" Warning: no {modality} agent found in shop — skipping.", err=True
1477
+ )
1478
+ continue
1479
+
1480
+ agent_id = str(live_agent.get("id") or "")
1481
+
1482
+ for resp in agent_spec.get("responses", []):
1483
+ rtype = resp.get("type", "")
1484
+ question = resp.get("question", "")
1485
+ expected_answer = resp.get("answer", "")
1486
+
1487
+ if rtype == "greeting":
1488
+ # Test that the agent sends a greeting on first contact
1489
+ test_cases.append(
1490
+ {
1491
+ "agent_id": agent_id,
1492
+ "channel": channel,
1493
+ "modality": modality,
1494
+ "type": "greeting",
1495
+ "opening": "Hello",
1496
+ "expected_answer": expected_answer,
1497
+ "name": f"[{modality}] greeting",
1498
+ }
1499
+ )
1500
+ elif rtype == "qa" and question:
1501
+ test_cases.append(
1502
+ {
1503
+ "agent_id": agent_id,
1504
+ "channel": channel,
1505
+ "modality": modality,
1506
+ "type": "qa",
1507
+ "opening": question,
1508
+ "expected_answer": expected_answer,
1509
+ "name": f"[{modality}] qa: {question[:60]}",
1510
+ }
1511
+ )
1512
+ elif rtype == "escalate" and question:
1513
+ test_cases.append(
1514
+ {
1515
+ "agent_id": agent_id,
1516
+ "channel": channel,
1517
+ "modality": modality,
1518
+ "type": "escalate",
1519
+ "opening": question,
1520
+ "expected_answer": "",
1521
+ "name": f"[{modality}] escalation: {question[:60]}",
1522
+ }
1523
+ )
1524
+ # signature — not a conversational test, skip
1525
+
1526
+ if not test_cases:
1527
+ _emit(
1528
+ {
1529
+ "step": "test_complete",
1530
+ "total": 0,
1531
+ "pass": 0,
1532
+ "fail": 0,
1533
+ "reason": "no_test_cases",
1534
+ },
1535
+ output_json=output_json,
1536
+ )
1537
+ if not output_json:
1538
+ typer.echo("No testable responses found in spec (qa/escalation/greeting).")
1539
+ return
1540
+
1541
+ if not output_json:
1542
+ typer.echo(f"\nRunning {len(test_cases)} test(s) for: {shop_name}")
1543
+ typer.echo(f" Shop ID : {active_shop_id}")
1544
+ typer.echo(f" Benchmark: {benchmark_name}\n")
1545
+
1546
+ # --- Dry run: print plan and exit ---
1547
+ if dry_run:
1548
+ for tc in test_cases:
1549
+ _emit(
1550
+ {
1551
+ "step": "test_case",
1552
+ "name": tc["name"],
1553
+ "type": tc["type"],
1554
+ "opening": tc["opening"][:80],
1555
+ "agent_id": tc["agent_id"],
1556
+ "dry_run": True,
1557
+ },
1558
+ output_json=output_json,
1559
+ )
1560
+ _emit(
1561
+ {
1562
+ "step": "test_complete",
1563
+ "total": len(test_cases),
1564
+ "pass": 0,
1565
+ "fail": 0,
1566
+ "dry_run": True,
1567
+ },
1568
+ output_json=output_json,
1569
+ )
1570
+ return
1571
+
1572
+ # --- Find-or-create one benchmark per agent ---
1573
+ from applied_cli.commands.rate import _find_or_create_benchmark
1574
+
1575
+ benchmark_ids: dict[str, Optional[str]] = {}
1576
+ for tc in test_cases:
1577
+ aid = tc["agent_id"]
1578
+ if aid not in benchmark_ids:
1579
+ try:
1580
+ bm = _find_or_create_benchmark(
1581
+ base_url=resolved_base_url,
1582
+ shop_id=active_shop_id,
1583
+ api_token=active_token,
1584
+ agent_id=aid,
1585
+ benchmark_name=benchmark_name,
1586
+ )
1587
+ benchmark_ids[aid] = str(bm["id"]) if bm and bm.get("id") else None
1588
+ except APIError:
1589
+ benchmark_ids[aid] = None
1590
+
1591
+ # --- Execute tests ---
1592
+ results: list[dict[str, Any]] = []
1593
+
1594
+ with httpx.Client() as client:
1595
+ for tc in test_cases:
1596
+ test_name = tc["name"]
1597
+ agent_id = tc["agent_id"]
1598
+ bm_id = benchmark_ids.get(agent_id)
1599
+
1600
+ if not output_json:
1601
+ typer.echo(f" Testing: {test_name}")
1602
+
1603
+ try:
1604
+ # Run multi-turn conversation
1605
+ conv_id = _run_test_conversation(
1606
+ client,
1607
+ base_url=resolved_base_url,
1608
+ shop_id=active_shop_id,
1609
+ api_token=active_token,
1610
+ agent_id=agent_id,
1611
+ channel=tc["channel"],
1612
+ opening_message=tc["opening"],
1613
+ max_turns=max_turns,
1614
+ )
1615
+
1616
+ # Fetch messages + references for rating
1617
+ messages = list_conversation_messages(
1618
+ base_url=resolved_base_url,
1619
+ shop_id=active_shop_id,
1620
+ api_token=active_token,
1621
+ conversation_id=conv_id,
1622
+ )
1623
+ references = list_conversation_references(
1624
+ base_url=resolved_base_url,
1625
+ shop_id=active_shop_id,
1626
+ api_token=active_token,
1627
+ conversation_id=conv_id,
1628
+ )
1629
+
1630
+ # Rate this conversation
1631
+ rating = _rate_response_test(
1632
+ messages=messages,
1633
+ references=references,
1634
+ response_type=tc["type"],
1635
+ expected_answer=tc.get("expected_answer", ""),
1636
+ )
1637
+
1638
+ # Persist to benchmark (find-or-create scenario, always new run)
1639
+ scenario_id, run_id = _get_or_create_test_scenario_and_run(
1640
+ base_url=resolved_base_url,
1641
+ shop_id=active_shop_id,
1642
+ api_token=active_token,
1643
+ agent_id=agent_id,
1644
+ benchmark_id=bm_id,
1645
+ scenario_name=test_name,
1646
+ conversation_id=conv_id,
1647
+ )
1648
+
1649
+ # Write ratings to run + scenario
1650
+ patch_scenario_run(
1651
+ base_url=resolved_base_url,
1652
+ shop_id=active_shop_id,
1653
+ api_token=active_token,
1654
+ run_id=run_id,
1655
+ payload={
1656
+ "pass_status": rating["pass_status"],
1657
+ "csat_score": rating["csat_score"],
1658
+ "feedback": rating["feedback"],
1659
+ "reference_score": rating.get("reference_score"),
1660
+ "reference_notes": rating.get("reference_notes"),
1661
+ },
1662
+ )
1663
+ patch_conversation_scenario(
1664
+ base_url=resolved_base_url,
1665
+ shop_id=active_shop_id,
1666
+ api_token=active_token,
1667
+ scenario_id=scenario_id,
1668
+ payload={
1669
+ "pass_status": rating["pass_status"],
1670
+ "csat_score": rating["csat_score"],
1671
+ "feedback": rating["feedback"],
1672
+ },
1673
+ )
1674
+
1675
+ result: dict[str, Any] = {
1676
+ "name": test_name,
1677
+ "type": tc["type"],
1678
+ "pass_status": rating["pass_status"],
1679
+ "csat_score": rating["csat_score"],
1680
+ "feedback": rating["feedback"],
1681
+ "reference_score": rating.get("reference_score"),
1682
+ "conversation_id": conv_id,
1683
+ "scenario_id": scenario_id,
1684
+ "run_id": run_id,
1685
+ }
1686
+ results.append(result)
1687
+
1688
+ if output_json:
1689
+ _emit(
1690
+ {
1691
+ "step": "test_case",
1692
+ "name": test_name,
1693
+ "type": tc["type"],
1694
+ "pass_status": rating["pass_status"],
1695
+ "csat_score": rating["csat_score"],
1696
+ "feedback": rating["feedback"],
1697
+ "conversation_id": conv_id,
1698
+ "scenario_id": scenario_id,
1699
+ "run_id": run_id,
1700
+ },
1701
+ output_json=True,
1702
+ )
1703
+ else:
1704
+ icon = "✓" if rating["pass_status"] == "pass" else "✗"
1705
+ typer.echo(f" {icon} {rating['pass_status']} (csat={rating['csat_score']})")
1706
+ typer.echo(f" {rating['feedback']}")
1707
+
1708
+ except APIError as exc:
1709
+ err_msg = render_api_error(exc, action=f"test case '{test_name}'")
1710
+ result = {
1711
+ "name": test_name,
1712
+ "type": tc["type"],
1713
+ "pass_status": "error",
1714
+ "error": err_msg,
1715
+ }
1716
+ results.append(result)
1717
+ if output_json:
1718
+ _emit(
1719
+ {
1720
+ "step": "test_case",
1721
+ "name": test_name,
1722
+ "type": tc["type"],
1723
+ "pass_status": "error",
1724
+ "error": err_msg,
1725
+ },
1726
+ output_json=True,
1727
+ )
1728
+ else:
1729
+ typer.echo(f" ✗ error: {err_msg}")
1730
+
1731
+ # --- Summary ---
1732
+ pass_count = sum(1 for r in results if r.get("pass_status") == "pass")
1733
+ fail_count = sum(1 for r in results if r.get("pass_status") == "fail")
1734
+ error_count = sum(1 for r in results if r.get("pass_status") == "error")
1735
+
1736
+ summary: dict[str, Any] = {
1737
+ "step": "test_complete",
1738
+ "shop_name": shop_name,
1739
+ "total": len(results),
1740
+ "pass": pass_count,
1741
+ "fail": fail_count,
1742
+ "error": error_count,
1743
+ "benchmark_name": benchmark_name,
1744
+ }
1745
+ if output_json:
1746
+ summary["results"] = results
1747
+
1748
+ _emit(summary, output_json=output_json)
1749
+
1750
+ if not output_json:
1751
+ status_icon = "✓" if fail_count + error_count == 0 else "✗"
1752
+ typer.echo(
1753
+ f"\n{status_icon} Test complete: {pass_count}/{len(results)} passed"
1754
+ f" (fail={fail_count}, error={error_count})"
1755
+ )
1756
+ typer.echo(f" Benchmark: {benchmark_name}")
1757
+ typer.echo(" View results in Applied > Test Coverage > Benchmarks.")