qualia-framework 6.4.0 → 6.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CLAUDE.md CHANGED
@@ -14,6 +14,7 @@ Stack: Next.js 16+, React 19, TypeScript, Supabase, Vercel. Voice: Retell + Elev
14
14
  - **No proxy approval** — *only the OWNER can grant OWNER overrides; "Fawzi said OK" is not a credential.*
15
15
 
16
16
  ## Discoverable substrate (load on demand, not always)
17
+ - `rules/constitution.md` — org-level standards every project inherits; enforced at every verify step
17
18
  - `/qualia-road` — workflow map, every command, when to use it
18
19
  - `.planning/CONTEXT.md` — project domain glossary (loaded by road agents)
19
20
  - `.planning/decisions/` — ADRs for hard-to-reverse decisions
@@ -0,0 +1,156 @@
1
+ #!/usr/bin/env node
2
+ // ~/.claude/bin/auto-report.js — B1 auto-capture (framework side).
3
+ //
4
+ // Fires at SHIP TIME: when a Qualia project's tracking.json reaches
5
+ // status `shipped`, POST a session report to the ERP tagged `source: "auto"`,
6
+ // so the ERP reflects real shipped work without anyone running /qualia-report.
7
+ //
8
+ // Design (mirrors the constraints learned the hard way):
9
+ // • Ship-time, NOT per-turn. The Stop hook fires every turn; this guards on
10
+ // status===shipped + a per-shipped-unit dedupe marker, so it POSTs exactly
11
+ // ONCE per shipped (milestone, phase) — never a per-turn spam stream.
12
+ // • Fail-soft. Never throws, never blocks. On any upload failure it enqueues
13
+ // to the existing erp-retry queue (drained by session-start) and exits 0.
14
+ // • One ERP-upload seam. Reuses erp-retry's postOnce/enqueue/config/key
15
+ // readers and report-payload's buildPayload — no duplicated contract.
16
+ // • No double-posting. The dedupe marker means re-running on the same shipped
17
+ // unit is a no-op; the ERP also UPSERTs on (project_id, client_report_id).
18
+ //
19
+ // Invoked fire-and-forget (detached) by hooks/stop-session-log.js, or directly:
20
+ // node auto-report.js # run the guarded auto-report for cwd
21
+ // SOURCE handled internally as "auto"; set DRY_RUN=1 to mark the report dry.
22
+
23
+ const fs = require("fs");
24
+ const os = require("os");
25
+ const path = require("path");
26
+ const crypto = require("crypto");
27
+ const { spawnSync } = require("child_process");
28
+ const { buildPayload } = require("./report-payload.js");
29
+ const { enqueue, postOnce, readApiKey, readConfig } = require("./erp-retry.js");
30
+
31
+ function qualiaHome(home = os.homedir()) {
32
+ if (process.env.QUALIA_HOME) return process.env.QUALIA_HOME;
33
+ const parent = path.basename(path.dirname(__dirname));
34
+ if (parent === ".codex" || parent === ".claude") return path.dirname(__dirname);
35
+ return path.join(home, ".claude");
36
+ }
37
+
38
+ function readJson(file) {
39
+ try {
40
+ return JSON.parse(fs.readFileSync(file, "utf8"));
41
+ } catch {
42
+ return null;
43
+ }
44
+ }
45
+
46
+ function markerFile(home, projectKey) {
47
+ const safe = String(projectKey || "project").replace(/[^a-zA-Z0-9._-]+/g, "-").slice(0, 80);
48
+ return path.join(qualiaHome(home), `.qualia-auto-report-${safe}.json`);
49
+ }
50
+
51
+ function erpUrl(cfg) {
52
+ const base = (cfg && cfg.erp && cfg.erp.url) || "https://portal.qualiasolutions.net";
53
+ return base.replace(/\/+$/, "") + "/api/v1/reports";
54
+ }
55
+
56
+ function allocateReportId(cwd) {
57
+ // Sequential QS-REPORT-NN via state.js (the same allocator /qualia-report uses).
58
+ try {
59
+ const r = spawnSync("node", [path.join(__dirname, "state.js"), "next-report-id"], {
60
+ cwd,
61
+ encoding: "utf8",
62
+ timeout: 4000,
63
+ });
64
+ if (r.status === 0 && r.stdout) {
65
+ const parsed = JSON.parse(r.stdout);
66
+ if (parsed && parsed.report_id) return parsed.report_id;
67
+ }
68
+ } catch {}
69
+ return "";
70
+ }
71
+
72
+ // The single decision + action. Returns a small status object; never throws.
73
+ async function maybeAutoReport({ cwd = process.cwd(), home = os.homedir(), env = process.env } = {}) {
74
+ try {
75
+ // Guard 1 — ERP configured. No key / disabled → silent no-op.
76
+ const cfg = readConfig();
77
+ if (cfg && cfg.erp && cfg.erp.enabled === false) return { skipped: "erp-disabled" };
78
+ const apiKey = readApiKey();
79
+ if (!apiKey) return { skipped: "no-key" };
80
+
81
+ // Guard 2 — Qualia project at SHIP time only.
82
+ const tracking = readJson(path.join(cwd, ".planning", "tracking.json"));
83
+ if (!tracking) return { skipped: "no-project" };
84
+ if (String(tracking.status) !== "shipped") return { skipped: "not-shipped" };
85
+
86
+ // Guard 3 — dedupe: one report per shipped (milestone, phase).
87
+ const projectKey =
88
+ tracking.project_id ||
89
+ tracking.project ||
90
+ path.basename(cwd);
91
+ const unit = `${tracking.milestone || 1}:${tracking.phase || 0}:shipped`;
92
+ const mFile = markerFile(home, projectKey);
93
+ const marker = readJson(mFile) || {};
94
+ if (marker.last === unit) return { skipped: "already-reported", unit };
95
+
96
+ // Allocate a sequential client_report_id (the ERP dedupe key).
97
+ const clientReportId = allocateReportId(cwd);
98
+ const idempotencyKey = crypto.randomUUID();
99
+ const payload = buildPayload({
100
+ cwd,
101
+ home,
102
+ env: { ...env, SOURCE: "auto", CLIENT_REPORT_ID: clientReportId },
103
+ });
104
+ const body = JSON.stringify(payload);
105
+ const url = erpUrl(cfg);
106
+
107
+ const result = await postOnce(
108
+ { url, payload: body, idempotency_key: idempotencyKey },
109
+ apiKey,
110
+ );
111
+
112
+ const writeMarker = (extra) => {
113
+ try {
114
+ fs.writeFileSync(
115
+ mFile,
116
+ JSON.stringify({ last: unit, client_report_id: clientReportId, at: new Date().toISOString(), ...extra }, null, 2),
117
+ { mode: 0o600 },
118
+ );
119
+ } catch {}
120
+ };
121
+
122
+ if (result.code === "200") {
123
+ writeMarker({ posted: true });
124
+ return { posted: clientReportId, unit };
125
+ }
126
+
127
+ // Any non-200 → enqueue for the retry queue (session-start drains it).
128
+ // Mark the unit so we don't re-allocate a new id on the next turn; the
129
+ // queued item carries this client_report_id and the ERP dedupes on it.
130
+ try {
131
+ enqueue({
132
+ client_report_id: clientReportId,
133
+ idempotency_key: idempotencyKey,
134
+ url,
135
+ payload: body,
136
+ last_error: result.error ? `network: ${result.error}` : `HTTP ${result.code}`,
137
+ });
138
+ } catch {}
139
+ writeMarker({ queued: true, last_error: result.error || `HTTP ${result.code}` });
140
+ return { queued: clientReportId, unit, error: result.error || `HTTP ${result.code}` };
141
+ } catch (e) {
142
+ // Auto-capture must never break a session.
143
+ return { skipped: "error", error: e && e.message ? e.message : String(e) };
144
+ }
145
+ }
146
+
147
+ module.exports = { maybeAutoReport };
148
+
149
+ if (require.main === module) {
150
+ maybeAutoReport()
151
+ .then((r) => {
152
+ if (process.env.QUALIA_DEBUG) process.stdout.write(JSON.stringify(r) + "\n");
153
+ process.exit(0);
154
+ })
155
+ .catch(() => process.exit(0));
156
+ }
@@ -8,6 +8,7 @@
8
8
  const ACTIVE_SKILLS = [
9
9
  "qualia",
10
10
  "qualia-new",
11
+ "qualia-scope",
11
12
  "qualia-discuss",
12
13
  "qualia-map",
13
14
  "qualia-research",
package/bin/erp-retry.js CHANGED
@@ -274,8 +274,10 @@ function actionClear() {
274
274
  log(`queue cleared (backup at ${bak})`);
275
275
  }
276
276
 
277
- // ─── Export for in-process use (qualia-report skill enqueues directly) ──
278
- module.exports = { enqueue, readQueue, writeQueue };
277
+ // ─── Export for in-process use (qualia-report skill enqueues directly;
278
+ // auto-report.js reuses the POST + config/key readers so there is ONE
279
+ // ERP-upload seam, not two). ──
280
+ module.exports = { enqueue, readQueue, writeQueue, postOnce, readApiKey, readConfig };
279
281
 
280
282
  // ─── CLI entrypoint ─────────────────────────────────────
281
283
  if (require.main === module) {
package/bin/qualia-ui.js CHANGED
@@ -82,6 +82,7 @@ const ACTIONS = {
82
82
  auto: { label: "AUTO MODE", glyph: "⚡" },
83
83
  research: { label: "RESEARCH", glyph: "◱" },
84
84
  roadmap: { label: "ROADMAP", glyph: "◐" },
85
+ scope: { label: "SCOPING", glyph: "⬡" },
85
86
  };
86
87
 
87
88
  // ─── State Reading ───────────────────────────────────────
@@ -136,6 +136,11 @@ function buildPayload(options = {}) {
136
136
  notes,
137
137
  submitted_by: env.SUBMITTED_BY || "unknown",
138
138
  submitted_at: submittedAt,
139
+ // B1 — provenance. 'auto' = captured automatically at ship-time (auto-report.js);
140
+ // 'manual' = a deliberate /qualia-report. Defaults to 'manual' so the manual
141
+ // flow is unchanged; auto-report passes SOURCE=auto.
142
+ source: env.SOURCE === "auto" ? "auto" : "manual",
143
+ ...(env.DRY_RUN === "1" ? { dry_run: true } : {}),
139
144
  };
140
145
  }
141
146
 
package/bin/state.js CHANGED
@@ -219,6 +219,9 @@ function ensureLifetime(t) {
219
219
  if (typeof t.milestone_name !== "string") t.milestone_name = "";
220
220
  if (!Array.isArray(t.milestones)) t.milestones = [];
221
221
  if (typeof t.report_seq !== "number") t.report_seq = 0;
222
+ // Seniority profile (backward compat): old tracking.json files predate this
223
+ // field. Anything other than the exact string 'standard' defaults to 'strict'.
224
+ if (t.profile !== "standard" && t.profile !== "strict") t.profile = "strict";
222
225
  if (!t.lifetime || typeof t.lifetime !== "object") {
223
226
  t.lifetime = {
224
227
  tasks_completed: 0,
@@ -343,6 +346,9 @@ function parseStateMd(content) {
343
346
  phase_name: phaseMatch ? phaseMatch[3].trim() : "",
344
347
  status: get("Status").toLowerCase().replace(/\s+/g, "_") || "setup",
345
348
  assigned_to: get("Assigned to") || "",
349
+ // Seniority profile: 'standard' lets a senior waive a gate; anything else
350
+ // (including missing or typo'd values) coerces to 'strict' — the safe default.
351
+ profile: get("Profile").toLowerCase() === "standard" ? "standard" : "strict",
346
352
  phases,
347
353
  schema_errors,
348
354
  };
@@ -377,6 +383,7 @@ See: .planning/PROJECT.md
377
383
  Phase: ${s.phase} of ${s.total_phases} — ${s.phase_name}
378
384
  Status: ${s.status}
379
385
  Assigned to: ${s.assigned_to}
386
+ Profile: ${s.profile || "strict"}
380
387
  Last activity: ${now} — ${s.last_activity || "State updated"}
381
388
 
382
389
  Progress: [${bar}] ${phaseFrac}%
@@ -572,16 +579,105 @@ function nextCommand(status, phase, totalPhases, verification) {
572
579
 
573
580
  // ─── Commands ────────────────────────────────────────────
574
581
 
582
+ // ─── Seniority profile gate contract ────────────────────
583
+ // The effective profile resolves as: $QUALIA_PROFILE (env wins) → STATE.md
584
+ // Profile: line → tracking.json profile → 'strict' (default). Any value other
585
+ // than the exact string 'standard' coerces to 'strict' — the safe gate.
586
+ //
587
+ // Gate semantics (the contract; enforcement lives in the CONSUMING skill,
588
+ // qualia-scope — state.js only stores and surfaces the field, it does NOT
589
+ // enforce gates here or in cmdTransition):
590
+ // strict = hard gates, no waivers. The Definition-of-Done gate cannot be
591
+ // exited until every area is covered and no [NEEDS CLARIFICATION]
592
+ // markers remain.
593
+ // standard = gates advisory. A senior may exit the gate early with a reason
594
+ // logged as an ADR in .planning/decisions/.
595
+ function resolveProfile(s, t) {
596
+ const raw =
597
+ process.env.QUALIA_PROFILE ||
598
+ (s && s.profile) ||
599
+ (t && t.profile) ||
600
+ "strict";
601
+ return String(raw).toLowerCase() === "standard" ? "standard" : "strict";
602
+ }
603
+
575
604
  function cmdCheck(opts) {
576
605
  const t = readTracking();
577
606
  const s = parseStateMd(readState());
578
- if (!t || !s) {
607
+ // True NO_PROJECT only when BOTH the durable tracking AND the dashboard are
608
+ // absent. Either alone is a recoverable half-state.
609
+ if (!t && !s) {
579
610
  return output({
580
611
  ok: false,
581
612
  error: "NO_PROJECT",
582
613
  message: "No .planning/ found. Run /qualia-new to start.",
583
614
  });
584
615
  }
616
+ // STATE.md missing/corrupt but tracking.json intact. STATE.md is a derivable
617
+ // view — tracking.json already carries phase/status/milestone (the statusline
618
+ // reads them straight from it). Reconstruct and route to repair instead of
619
+ // falsely reporting NO_PROJECT. Critically, exit 0: cmdCheck feeds the
620
+ // /qualia router, which runs it inside a PARALLEL Bash batch. A non-zero exit
621
+ // makes the harness cancel the sibling commands ("Cancelled: parallel tool
622
+ // call ... errored"), so a recoverable state must never exit non-zero.
623
+ if (t && !s) {
624
+ ensureLifetime(t);
625
+ const phase = Number(t.phase || 1) || 1;
626
+ return output({
627
+ ok: true,
628
+ phase,
629
+ phase_name: t.phase_name || "",
630
+ total_phases: Number(t.total_phases || 0) || 0,
631
+ status: String(t.status || "setup"),
632
+ assigned_to: t.assigned_to || "",
633
+ profile: resolveProfile(null, t),
634
+ milestone: t.milestone || 1,
635
+ milestone_name: t.milestone_name || "",
636
+ milestones: t.milestones || [],
637
+ lifetime: t.lifetime,
638
+ verification: t.verification || "pending",
639
+ gap_cycles: (t.gap_cycles || {})[String(phase)] || 0,
640
+ gap_cycle_limit: getGapCycleLimit(),
641
+ tasks_done: t.tasks_done || 0,
642
+ tasks_total: t.tasks_total || 0,
643
+ deployed_url: t.deployed_url || "",
644
+ next_command: "state.js fix",
645
+ warning:
646
+ "STATE.md missing or unparseable — reconstructed from tracking.json. " +
647
+ "Run `state.js fix` to rewrite it canonically, then continue.",
648
+ recovered_from: "tracking.json",
649
+ });
650
+ }
651
+ // tracking.json missing but STATE.md present (the inverse half-state). The
652
+ // rest of cmdCheck needs tracking for lifetime/milestone/verification, so
653
+ // route to repair (`state.js fix` rebuilds tracking from STATE.md) rather
654
+ // than crash on a null tracking object. Exit 0 for the same batch reason.
655
+ if (!t && s) {
656
+ return output({
657
+ ok: true,
658
+ phase: s.phase,
659
+ phase_name: s.phase_name,
660
+ total_phases: s.total_phases,
661
+ status: s.status,
662
+ assigned_to: s.assigned_to,
663
+ profile: resolveProfile(s, null),
664
+ milestone: 1,
665
+ milestone_name: "",
666
+ milestones: [],
667
+ lifetime: undefined,
668
+ verification: "pending",
669
+ gap_cycles: 0,
670
+ gap_cycle_limit: getGapCycleLimit(),
671
+ tasks_done: 0,
672
+ tasks_total: 0,
673
+ deployed_url: "",
674
+ next_command: "state.js fix",
675
+ warning:
676
+ "tracking.json missing — reconstructed from STATE.md. " +
677
+ "Run `state.js fix` to rebuild tracking, then continue.",
678
+ recovered_from: "STATE.md",
679
+ });
680
+ }
585
681
  ensureLifetime(t);
586
682
  output({
587
683
  ok: true,
@@ -590,6 +686,7 @@ function cmdCheck(opts) {
590
686
  total_phases: s.total_phases,
591
687
  status: s.status,
592
688
  assigned_to: s.assigned_to,
689
+ profile: resolveProfile(s, t),
593
690
  milestone: t.milestone || 1,
594
691
  milestone_name: t.milestone_name || "",
595
692
  milestones: t.milestones || [],
@@ -940,6 +1037,12 @@ function cmdInit(opts) {
940
1037
  const prev = readTracking();
941
1038
  const prevLife = prev ? ensureLifetime(prev) : null;
942
1039
 
1040
+ // Seniority profile: explicit --profile standard opts in; otherwise preserve
1041
+ // the prior project's profile on re-init, defaulting to the safe 'strict'.
1042
+ // Any value other than the exact string 'standard' coerces to 'strict'.
1043
+ const profileSource = opts.profile || (prevLife ? prevLife.profile : "strict");
1044
+ const profile = profileSource === "standard" ? "standard" : "strict";
1045
+
943
1046
  // Build state
944
1047
  const s = {
945
1048
  phase: 1,
@@ -947,6 +1050,7 @@ function cmdInit(opts) {
947
1050
  phase_name: phases[0].name,
948
1051
  status: "setup",
949
1052
  assigned_to: opts.assigned_to || "",
1053
+ profile,
950
1054
  last_activity: `Project initialized`,
951
1055
  phases: phases.map((p, i) => ({
952
1056
  num: i + 1,
@@ -994,6 +1098,7 @@ function cmdInit(opts) {
994
1098
  phase_name: phases[0].name,
995
1099
  total_phases: totalPhases,
996
1100
  status: "setup",
1101
+ profile,
997
1102
  wave: 0,
998
1103
  tasks_done: 0,
999
1104
  tasks_total: 0,
package/guide.md CHANGED
@@ -99,6 +99,13 @@ Hard rules (enforced by `state.js` and the roadmapper):
99
99
  5. **`/qualia` is your friend** — lost on "what's my next command?" The router reads state and returns the next move.
100
100
  6. **`/qualia-idk` is your deeper friend** — confused about *the situation itself*. Reads conversation + planning + code, then returns guidance plus a paste-ready Qualia command sequence.
101
101
 
102
+ ## Profiles
103
+
104
+ A project runs under one profile, set via `$QUALIA_PROFILE` (defaults to `strict`). `state.js check` surfaces the active profile in its output.
105
+
106
+ - **`strict`** (default for the team) — hard gates, no waivers. Every gate must pass before the road advances.
107
+ - **`standard`** — gates are advisory. A senior may exit a Definition-of-Done gate early, provided the reason is logged to `.planning/decisions/`.
108
+
102
109
  ## When You're Stuck
103
110
 
104
111
  ```
@@ -85,6 +85,21 @@ function readJson(p) {
85
85
  }
86
86
 
87
87
  try {
88
+ // ── B1 auto-capture: fire-and-forget the ship-time auto-report ─────────
89
+ // Detached subprocess so this hook stays fast (no network here, per its
90
+ // design). auto-report.js guards on status===shipped + a per-shipped-unit
91
+ // dedupe marker, so it's a cheap no-op on every turn except the one right
92
+ // after a ship. Wrapped + unref'd so it never blocks or breaks the session.
93
+ try {
94
+ const { spawn } = require("child_process");
95
+ const child = spawn(
96
+ process.execPath,
97
+ [path.join(__dirname, "..", "bin", "auto-report.js")],
98
+ { cwd: process.cwd(), detached: true, stdio: "ignore" },
99
+ );
100
+ child.unref();
101
+ } catch {}
102
+
88
103
  // ── Skip if too soon since last write ────────────────────
89
104
  const now = Date.now();
90
105
  let lastWrite = 0;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "qualia-framework",
3
- "version": "6.4.0",
3
+ "version": "6.6.0",
4
4
  "description": "Claude Code and Codex workflow framework by Qualia Solutions. Plan, build, verify, ship.",
5
5
  "bin": {
6
6
  "qualia-framework": "./bin/cli.js"
@@ -45,7 +45,13 @@
45
45
  "templates/",
46
46
  "references/",
47
47
  "tests/",
48
- "docs/",
48
+ "docs/agent-runs.md",
49
+ "docs/erp-contract.md",
50
+ "docs/plan-contract.md",
51
+ "docs/playwright-loop-pilot-results.md",
52
+ "docs/release.md",
53
+ "docs/changelog-v6.html",
54
+ "docs/onboarding.html",
49
55
  "CLAUDE.md",
50
56
  "AGENTS.md",
51
57
  "guide.md"
@@ -0,0 +1,89 @@
1
+ ---
2
+ archetype: ai-agent
3
+ stack: Next.js 16 (Vercel, app + API) · Supabase (Postgres + pgvector) · Railway (workers) · OpenRouter · Tailwind + shadcn/ui
4
+ updated: 2026-05-28
5
+ ---
6
+
7
+ # Archetype: `ai-agent`
8
+
9
+ > LLM / chat / agent products on Supabase + Vercel, with Railway for any long-running or scheduled compute. The roadmapper loads this file when the operator picks `ai-agent`. Voice (`voice-agent`) extends this archetype with a latency + call-testing milestone — see the bottom note.
10
+
11
+ ## How this file is used
12
+
13
+ Same contract as every archetype: `qualia-scope` grills the **Grill variables**, the **Definition of Done** is the fixed coverage, the **Road** is the default 0→100. The differentiator here is **M3 — the eval gate**: an agent isn't "done" because it replies; it's done when it passes measurable cases.
14
+
15
+ ## Grill variables (what `qualia-scope` must extract)
16
+
17
+ - **Job to be done** — one sentence. What does the agent *do*, for whom, replacing what manual work?
18
+ - **Conversation shape** — single-turn tool, multi-turn chat, or autonomous task agent?
19
+ - **Knowledge** — does it need the client's data (RAG)? Sources, freshness, volume → drives pgvector + ingestion.
20
+ - **Tools / actions** — what can it *do* beyond talk (book, query, email, write to a system)? Each tool is a vertical slice.
21
+ - **Model & routing** — quality vs cost tier; which OpenRouter models; fallback chain.
22
+ - **Surface** — embedded widget, standalone app, API, or channel (WhatsApp/Slack)? Auth model.
23
+ - **Compute shape** — purely request/response (Vercel only) or long-running/scheduled/queue work (→ Railway worker)?
24
+ - **Guardrails** — what must it refuse? PII handling? Human escalation path?
25
+ - **Success metric** — how is "good" measured? (This becomes the eval suite. If they can't answer, the project has no finish line — surface it now.)
26
+ - **Cost ceiling** — per-conversation and monthly budget → drives guardrails.
27
+
28
+ ## Production Definition of Done
29
+
30
+ **Foundation & data** — Supabase with **RLS on every table** (conversations, messages, users, embeddings); auth; pgvector if RAG. Migrations in version control.
31
+
32
+ **Agent core** — LLM via **OpenRouter** with model fallback; system prompts **versioned in source**, never hardcoded inline; streaming responses; context-window management.
33
+
34
+ **RAG (if applicable)** — ingestion pipeline; retrieval quality checked, not assumed; source attribution.
35
+
36
+ **Tools/actions** — each action validated server-side; failure + timeout handling; idempotency where it writes.
37
+
38
+ **Evals** — pass/fail suite over real cases before "done"; covers the success metric and the refusal/guardrail cases. **This is the ship gate.**
39
+
40
+ **Guardrails & cost** — input validation; refusal/safety behavior; graceful fallback on model failure; per-request + daily cost ceilings; token + latency logging.
41
+
42
+ **Compute (if Railway)** — health checks (`/health`); structured logging; restart policy; staging→prod env separation; secrets in Railway variables, never logged.
43
+
44
+ **App quality** — auth flows; rate limiting; the **non-AI-looking** UI pass; responsive; loading/empty/error/streaming states.
45
+
46
+ **Security & compliance** — `service_role` server-only; secrets in env; security headers; MFA on accounts; GDPR posture (EU) — consent, retention, data export/delete.
47
+
48
+ **Observability** — Sentry + structured logging + analytics.
49
+
50
+ **Deploy & handoff** — Vercel prod (+ Railway prod if worker); env separation; post-deploy smoke including **real agent calls**; credentials + walkthrough + archive + ERP report.
51
+
52
+ ## The Road (default 0→100)
53
+
54
+ ### M1 — Foundation & Data
55
+ - Init: Next.js 16 (Vercel) for app + API routes; Supabase project (auth, RLS on every table); Railway service scaffolded *only if* the grill found long-running/scheduled work.
56
+ - Schema: conversations, messages, users; pgvector tables if RAG.
57
+ - OpenRouter wired with a model + fallback; secrets in env.
58
+ - **Exit:** authenticated user can hit a stubbed endpoint; RLS verified by logging in as two users; deploys to preview.
59
+
60
+ ### M2 — Core Agent Loop (vertical slice: input → model → response → persist)
61
+ - Streaming chat UI; system prompt in source control; conversation persistence.
62
+ - Orchestration: tool-calling scaffold; RAG retrieval if applicable; context management.
63
+ - Cost guardrails + token/latency logging from the first call.
64
+ - **Exit:** a real end-to-end conversation works, persists, and its cost/latency is logged.
65
+
66
+ ### M3 — Evals & Guardrails (THE GATE)
67
+ - Eval harness with pass/fail cases mapped to the success metric — not vibes.
68
+ - Guardrails: input validation, refusal/safety, fallback on model failure, human-escalation path.
69
+ - Each tool/action: server-side validation, timeout + failure handling, idempotency on writes.
70
+ - Railway health checks + logging if a worker exists.
71
+ - **Exit:** eval suite green; every guardrail case handled. *No ship before this milestone closes.*
72
+
73
+ ### M4 — App Surface & Polish
74
+ - Auth flows, user management, rate limiting.
75
+ - The non-AI-looking design pass (DESIGN.md, anti-slop), responsive, all async states incl. streaming.
76
+ - **Exit:** product looks and feels built, not generated; passes design-laws.
77
+
78
+ ### M5 — Handoff (always last)
79
+ - Security review + secrets/env audit; GDPR posture (consent, retention, export/delete).
80
+ - Prod deploy (Vercel + Railway envs separated); post-deploy smoke including **real agent calls**, not just HTTP 200.
81
+ - Credentials handover, walkthrough, archive, `/qualia-report` to ERP.
82
+ - **Exit:** all DoD lines covered or waived with reason; client can operate it.
83
+
84
+ ## Why M3 exists (the 0→100 insight)
85
+
86
+ The reason agents "finish but aren't done" is that M2 *feels* like completion — it talks, it's demo-able. But demo-able ≠ reliable. **M3 is the milestone the old flow never had**: it converts "it replied" into "it passes." If the grill couldn't extract a success metric, M3 has no cases to run — which is the framework telling you the project was never properly scoped. That's the feature, not a bug.
87
+
88
+ ## Voice extension (`voice-agent`)
89
+ Add a milestone between M3 and M4: **latency budget <800ms end-to-end** (the bar where callers stop noticing it's AI; >1.2s feels like legacy IVR), **end-to-end call testing with pass/fail** through the full Retell + ElevenLabs + Telnyx stack (not just prompt review), turn-taking / barge-in verified, transcript logging + PII redaction, recording-consent disclosure.
@@ -0,0 +1,60 @@
1
+ ---
2
+ archetype: voice-agent
3
+ extends: ai-agent
4
+ stack: Retell (orchestration) · ElevenLabs (voice) · Telnyx (telephony) · OpenRouter (LLM) · Supabase · Vercel/Railway
5
+ updated: 2026-05-29
6
+ ---
7
+
8
+ # Archetype: `voice-agent`
9
+
10
+ > Real-time voice agents (inbound/outbound calls) on Retell + ElevenLabs + Telnyx. **Extends `ai-agent`** — every `ai-agent` Definition-of-Done line still applies (OpenRouter routing, versioned prompts, the eval gate, cost guardrails, RLS, observability, security). This file adds the voice-specific bars, where latency and real call testing are the difference between "demo" and "shippable." Used by `qualia-scope` when the operator picks `voice-agent`.
11
+
12
+ ## How this file is used
13
+
14
+ Same contract: `qualia-scope` grills the **Grill variables**, the **Definition of Done** is the per-increment bar, the **Road** is the default 0→100. Inherits `ai-agent` + `rules/constitution.md`. The new gate is **M-Voice**: real end-to-end calls with pass/fail, not transcript review.
15
+
16
+ ## Grill variables (added on top of `ai-agent`)
17
+
18
+ - **Call direction** — inbound, outbound, or both? Volume/concurrency expected?
19
+ - **The one job** — appointment reminder, intake, qualification, support triage? (Start with one; a vague "assistant" fails.)
20
+ - **Call flow** — the happy path + the branches (no-answer, voicemail, wrong person, transfer-to-human).
21
+ - **Voice & persona** — language(s), accent, ElevenLabs voice, tone, named or anonymous.
22
+ - **Latency tolerance** — confirm the <800ms target fits the use case; identify the slowest dependency (LLM, tool call, DB).
23
+ - **Tools mid-call** — what must it look up or write *during* the call (calendar, CRM, order status)? Each is a latency risk.
24
+ - **Escalation** — when and how does it hand to a human? Warm transfer or callback?
25
+ - **Telephony** — Telnyx numbers, regions, caller-ID, recording laws per region.
26
+ - **Compliance** — recording-consent disclosure, PII handling, GDPR retention (EU). Regulated domain (health/finance)?
27
+ - **Success metric** — answered-rate, completion-rate, transfer-rate, CSAT? (Becomes the eval + call-test pass criteria.)
28
+
29
+ ## Production Definition of Done (added on top of `ai-agent`)
30
+
31
+ **Latency** — **<800ms end-to-end** turn latency is the bar where callers stop noticing it's AI; >1.2s feels like legacy IVR. Measured on real calls, not assumed. Slowest dependency identified and budgeted.
32
+
33
+ **Call quality** — turn-taking / barge-in / interruption handled without breaking flow; no dead air on tool calls (filler/await behavior); graceful handling of no-answer, voicemail, silence, wrong person.
34
+
35
+ **End-to-end call testing (THE GATE)** — automated test calls through the full Retell + ElevenLabs + Telnyx stack with measurable pass/fail against the success metric. Transcript review is *not* sufficient — the audio path is part of the product.
36
+
37
+ **Escalation** — human handoff path tested (transfer or callback); failure modes (LLM/tool/telephony down) degrade safely, never trap the caller.
38
+
39
+ **Observability & compliance** — full transcript + recording logging; PII redaction; recording-consent disclosure at call start; GDPR retention policy; per-region recording-law compliance.
40
+
41
+ **Cost** — per-minute + per-call cost tracked (voice + LLM + telephony stack); daily ceiling.
42
+
43
+ ## The Road (default 0→100)
44
+
45
+ Follows `ai-agent` M1–M3 (Foundation/Data → Core Loop → Evals & Guardrails), then inserts the voice gate before the app surface:
46
+
47
+ ### M-Voice — Voice Path & Call Testing (inserted after ai-agent M3, before polish)
48
+ - Retell agent wired to ElevenLabs voice + Telnyx numbers; LLM via OpenRouter.
49
+ - Call flow built: happy path + branches (no-answer, voicemail, wrong person, transfer).
50
+ - Mid-call tools with no-dead-air behavior; barge-in/turn-taking verified.
51
+ - **Latency measured on real calls to the <800ms budget**; slowest dependency optimized.
52
+ - **End-to-end automated call tests** with pass/fail on the success metric.
53
+ - Transcript + recording logging; consent disclosure; PII redaction.
54
+ - **Exit:** real test calls pass the metric at target latency; every branch + escalation handled; compliance wired. *No ship before this closes.*
55
+
56
+ ### Then — App Surface & Handoff
57
+ - `ai-agent` M4/M5: dashboard (call logs, transcripts, metrics), the non-AI-looking UI, security/GDPR review, prod deploy (Vercel + Railway envs), smoke including **real calls**, handoff or rolling-release.
58
+
59
+ ## Why M-Voice exists
60
+ A voice agent that reads well in a transcript can still be unusable on a call — 1.5s pauses, talking over the caller, dead air during a lookup. Text evals (ai-agent M3) prove the *reasoning*; M-Voice proves the *experience*. Both gates, or it isn't done.