qualia-framework 6.3.0 → 6.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/AGENTS.md +8 -8
  2. package/CLAUDE.md +6 -5
  3. package/README.md +17 -39
  4. package/bin/cli.js +64 -16
  5. package/bin/command-surface.js +6 -1
  6. package/bin/install.js +26 -11
  7. package/bin/learning-candidates.js +217 -0
  8. package/bin/prune-deprecated.js +64 -0
  9. package/bin/qualia-ui.js +1 -0
  10. package/bin/runtime-manifest.js +4 -0
  11. package/bin/security-scan.js +409 -0
  12. package/bin/state.js +106 -1
  13. package/bin/status-snapshot.js +363 -0
  14. package/guide.md +18 -33
  15. package/hooks/pre-compact.js +232 -0
  16. package/package.json +8 -2
  17. package/references/archetypes/ai-agent.md +89 -0
  18. package/references/archetypes/voice-agent.md +60 -0
  19. package/references/archetypes/web-app.md +67 -0
  20. package/references/archetypes/website.md +78 -0
  21. package/rules/constitution.md +42 -0
  22. package/skills/qualia/SKILL.md +3 -1
  23. package/skills/qualia-build/SKILL.md +1 -1
  24. package/skills/qualia-discuss/SKILL.md +1 -1
  25. package/skills/qualia-doctor/SKILL.md +1 -1
  26. package/skills/qualia-feature/SKILL.md +1 -1
  27. package/skills/qualia-fix/SKILL.md +1 -1
  28. package/skills/qualia-idk/SKILL.md +245 -0
  29. package/skills/qualia-learn/SKILL.md +1 -1
  30. package/skills/qualia-map/SKILL.md +1 -1
  31. package/skills/qualia-milestone/SKILL.md +1 -1
  32. package/skills/qualia-new/SKILL.md +1 -1
  33. package/skills/qualia-optimize/SKILL.md +1 -1
  34. package/skills/qualia-plan/SKILL.md +1 -1
  35. package/skills/qualia-polish/SKILL.md +1 -1
  36. package/skills/qualia-postmortem/SKILL.md +1 -1
  37. package/skills/qualia-report/SKILL.md +1 -1
  38. package/skills/qualia-research/SKILL.md +1 -1
  39. package/skills/qualia-review/SKILL.md +1 -1
  40. package/skills/qualia-road/SKILL.md +1 -1
  41. package/skills/qualia-scope/SKILL.md +123 -0
  42. package/skills/qualia-secure/SKILL.md +105 -0
  43. package/skills/qualia-test/SKILL.md +1 -1
  44. package/skills/qualia-verify/SKILL.md +1 -1
  45. package/skills/zoho-workflow/SKILL.md +1 -1
  46. package/tests/bin.test.sh +9 -9
  47. package/tests/install-smoke.test.sh +3 -3
  48. package/tests/lib.test.sh +17 -10
  49. package/tests/published-install-smoke.test.sh +3 -3
  50. package/tests/refs.test.sh +29 -22
  51. package/tests/runner.js +3 -3
  52. package/tests/state.test.sh +38 -7
  53. package/docs/archive/CHANGELOG-pre-v4.md +0 -855
  54. package/docs/archive/v4.0.0-review.md +0 -288
  55. package/docs/ecosystem-operating-model.md +0 -121
  56. package/docs/research/2026-04-21-command-quality-deep-research.md +0 -128
  57. package/docs/research/2026-04-21-industry-best-practices.md +0 -255
  58. package/docs/research/2026-05-11-deep-research.md +0 -189
  59. package/docs/reviews/matt-pocock-skills-analysis.md +0 -300
  60. package/docs/reviews/v4.1.0-audit.html +0 -1488
  61. package/docs/reviews/v4.1.0-audit.md +0 -263
  62. package/docs/reviews/v6.2.1-revival-audit.md +0 -53
  63. package/docs/reviews/v6.2.2-memory-erp-audit.md +0 -41
  64. package/docs/reviews/v6.2.3-erp-id-guard.md +0 -15
@@ -0,0 +1,232 @@
1
+ #!/usr/bin/env node
2
+ // ~/.claude/hooks/pre-compact.js — snapshot in-flight session context to
3
+ // .planning/.compaction-snapshot.md so /qualia-resume (or the next session)
4
+ // can surface what was active when compaction wiped the conversation.
5
+ //
6
+ // PreCompact hook (fires before Claude Code compacts the context window).
7
+ //
8
+ // Why this hook returned in v6.3.2:
9
+ // v6.2.0 removed the original pre-compact.js because it created BOT COMMITS
10
+ // to stamp STATE.md / tracking.json. That was the wrong mechanism —
11
+ // state.js's atomic write + journal already gives crash safety. This v2
12
+ // hook does NOT touch git, does NOT touch tracking.json, does NOT write
13
+ // through state.js. It only writes to a sidecar file that survives
14
+ // compaction and tells the next session "here's what was in flight".
15
+ //
16
+ // Design constraints (same shape as stop-session-log.js):
17
+ // • NEVER block — exit 0 always, even on internal failure.
18
+ // • Fast — under 200ms, no network, no LLM call.
19
+ // • No PII / secrets — file paths, commit subjects, phase numbers only.
20
+ // • Idempotent — overwrites the sidecar on every fire; we only care
21
+ // about the LATEST pre-compaction state.
22
+ // • No git commits, no state.js mutations.
23
+ //
24
+ // Sidecar format (.planning/.compaction-snapshot.md):
25
+ // # Pre-compaction snapshot — {ISO timestamp}
26
+ // ## State
27
+ // Phase: 2 of 4 — built
28
+ // ## Work in flight
29
+ // - 3 modified files (uncommitted)
30
+ // - foo.ts, bar.ts, baz.test.ts
31
+ // ## Recent commits (last 5)
32
+ // - abc1234 feat: implement signin
33
+ // - def5678 test: signin happy path
34
+ // ## Active planning artifacts
35
+ // - .planning/phase-2-plan.md (modified 14m ago)
36
+ // - .planning/phase-2-verification.md (modified 3h ago)
37
+
38
+ const fs = require("fs");
39
+ const path = require("path");
40
+ const { spawnSync } = require("child_process");
41
+
42
+ const _traceStart = Date.now();
43
+
44
+ function qualiaHome() {
45
+ if (process.env.QUALIA_HOME) return process.env.QUALIA_HOME;
46
+ const parent = path.basename(path.dirname(__dirname));
47
+ if (parent === ".codex" || parent === ".claude") return path.dirname(__dirname);
48
+ return path.join(require("os").homedir(), ".claude");
49
+ }
50
+
51
+ function _trace(result, extra) {
52
+ try {
53
+ const traceDir = path.join(qualiaHome(), ".qualia-traces");
54
+ if (!fs.existsSync(traceDir)) fs.mkdirSync(traceDir, { recursive: true });
55
+ fs.appendFileSync(
56
+ path.join(traceDir, `${new Date().toISOString().split("T")[0]}.jsonl`),
57
+ JSON.stringify({
58
+ hook: "pre-compact",
59
+ result,
60
+ timestamp: new Date().toISOString(),
61
+ duration_ms: Date.now() - _traceStart,
62
+ ...extra,
63
+ }) + "\n",
64
+ );
65
+ } catch {}
66
+ }
67
+
68
+ function git(args, opts = {}) {
69
+ try {
70
+ const r = spawnSync("git", args, {
71
+ encoding: "utf8",
72
+ timeout: 2000,
73
+ shell: process.platform === "win32",
74
+ ...opts,
75
+ });
76
+ if (r.status !== 0) return "";
77
+ return (r.stdout || "").trim();
78
+ } catch {
79
+ return "";
80
+ }
81
+ }
82
+
83
+ function readJson(p) {
84
+ try {
85
+ return JSON.parse(fs.readFileSync(p, "utf8"));
86
+ } catch {
87
+ return null;
88
+ }
89
+ }
90
+
91
+ function safeStatM(p) {
92
+ try { return fs.statSync(p).mtimeMs; } catch { return 0; }
93
+ }
94
+
95
+ function humanAge(ms) {
96
+ if (ms < 60_000) return `${Math.round(ms / 1000)}s ago`;
97
+ if (ms < 3600_000) return `${Math.round(ms / 60_000)}m ago`;
98
+ if (ms < 86_400_000) return `${Math.round(ms / 3600_000)}h ago`;
99
+ return `${Math.round(ms / 86_400_000)}d ago`;
100
+ }
101
+
102
+ try {
103
+ const cwd = process.cwd();
104
+ const repoRoot = git(["rev-parse", "--show-toplevel"], { cwd }) || cwd;
105
+ const planningDir = path.join(repoRoot, ".planning");
106
+
107
+ // Skip if not a Qualia project (no .planning/ dir) — nothing to snapshot.
108
+ if (!fs.existsSync(planningDir)) {
109
+ _trace("skip", { reason: "no-planning-dir" });
110
+ process.exit(0);
111
+ }
112
+
113
+ const now = Date.now();
114
+ const tracking = readJson(path.join(planningDir, "tracking.json")) || {};
115
+ const stateText = (() => {
116
+ try { return fs.readFileSync(path.join(planningDir, "STATE.md"), "utf8"); }
117
+ catch { return ""; }
118
+ })();
119
+
120
+ // State header line (one line from STATE.md, or fall back to tracking.json).
121
+ const stateLine = (() => {
122
+ const phaseLine = stateText.split("\n").find((l) => /^Phase:/i.test(l));
123
+ const statusLine = stateText.split("\n").find((l) => /^Status:/i.test(l));
124
+ if (phaseLine || statusLine) return [phaseLine, statusLine].filter(Boolean).join(" — ");
125
+ const p = tracking.phase || "?";
126
+ const total = tracking.total_phases || tracking.phase_total || "?";
127
+ const status = tracking.status || "?";
128
+ return `Phase: ${p} of ${total} — ${status}`;
129
+ })();
130
+
131
+ // Work in flight: uncommitted modified files.
132
+ const modified = (() => {
133
+ const out = git(["diff", "--name-only", "HEAD"], { cwd: repoRoot });
134
+ return out ? out.split("\n").filter(Boolean) : [];
135
+ })();
136
+ const staged = (() => {
137
+ const out = git(["diff", "--name-only", "--cached"], { cwd: repoRoot });
138
+ return out ? out.split("\n").filter(Boolean) : [];
139
+ })();
140
+ const inFlight = [...new Set([...modified, ...staged])].slice(0, 10);
141
+
142
+ // Recent commits — last 5, single-line.
143
+ const recentCommits = (() => {
144
+ const out = git(["log", "--pretty=%h %s", "-5"], { cwd: repoRoot });
145
+ return out ? out.split("\n").filter(Boolean) : [];
146
+ })();
147
+
148
+ // Active planning artifacts: phase-*-* files modified in last 24h.
149
+ const activePlanning = (() => {
150
+ try {
151
+ const files = fs.readdirSync(planningDir).filter((f) => /^phase-\d+/.test(f) && f.endsWith(".md"));
152
+ const day = 24 * 3600_000;
153
+ return files
154
+ .map((f) => ({ name: f, age: now - safeStatM(path.join(planningDir, f)) }))
155
+ .filter((x) => x.age < day)
156
+ .sort((a, b) => a.age - b.age)
157
+ .slice(0, 5);
158
+ } catch { return []; }
159
+ })();
160
+
161
+ // Open deviations / verification FAILs in current phase.
162
+ const phaseNum = tracking.phase || 0;
163
+ const verifyFails = (() => {
164
+ if (!phaseNum) return [];
165
+ const vPath = path.join(planningDir, `phase-${phaseNum}-verification.md`);
166
+ if (!fs.existsSync(vPath)) return [];
167
+ try {
168
+ const v = fs.readFileSync(vPath, "utf8");
169
+ const fails = (v.match(/^[^\n]*\bFAIL\b[^\n]*$/gm) || []).slice(0, 5);
170
+ const insufficient = (v.match(/^[^\n]*INSUFFICIENT EVIDENCE[^\n]*$/gm) || []).slice(0, 3);
171
+ return [...fails, ...insufficient];
172
+ } catch { return []; }
173
+ })();
174
+
175
+ // Compose snapshot. Markdown so the next session can render it as-is.
176
+ const ts = new Date().toISOString();
177
+ const lines = [
178
+ `# Pre-compaction snapshot — ${ts}`,
179
+ "",
180
+ "_Written by `hooks/pre-compact.js` immediately before Claude Code compacted the context window. `/qualia-resume` and session-start surface this so the next session sees what was active._",
181
+ "",
182
+ "## State",
183
+ stateLine || "(no state info)",
184
+ "",
185
+ ];
186
+
187
+ lines.push("## Work in flight");
188
+ if (inFlight.length === 0) {
189
+ lines.push("(no uncommitted changes)");
190
+ } else {
191
+ lines.push(`${inFlight.length} file(s):`);
192
+ for (const f of inFlight) lines.push(`- ${f}`);
193
+ }
194
+ lines.push("");
195
+
196
+ lines.push("## Recent commits");
197
+ if (recentCommits.length === 0) {
198
+ lines.push("(no commit history)");
199
+ } else {
200
+ for (const c of recentCommits) lines.push(`- ${c}`);
201
+ }
202
+ lines.push("");
203
+
204
+ lines.push("## Active planning artifacts (modified in last 24h)");
205
+ if (activePlanning.length === 0) {
206
+ lines.push("(none)");
207
+ } else {
208
+ for (const p of activePlanning) lines.push(`- ${p.name} — ${humanAge(p.age)}`);
209
+ }
210
+ lines.push("");
211
+
212
+ if (verifyFails.length > 0) {
213
+ lines.push("## Open verification issues");
214
+ for (const f of verifyFails) lines.push(`- ${f.trim()}`);
215
+ lines.push("");
216
+ }
217
+
218
+ lines.push("---");
219
+ lines.push("_Sidecar file. Not committed to git. Read by `/qualia-resume` and `session-start` to restore in-flight context after compaction._");
220
+
221
+ fs.writeFileSync(path.join(planningDir, ".compaction-snapshot.md"), lines.join("\n") + "\n");
222
+ _trace("snapshot", {
223
+ in_flight: inFlight.length,
224
+ commits: recentCommits.length,
225
+ active_planning: activePlanning.length,
226
+ verify_fails: verifyFails.length,
227
+ });
228
+ process.exit(0);
229
+ } catch (err) {
230
+ _trace("error", { error: err && err.message ? err.message : String(err) });
231
+ process.exit(0);
232
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "qualia-framework",
3
- "version": "6.3.0",
3
+ "version": "6.5.0",
4
4
  "description": "Claude Code and Codex workflow framework by Qualia Solutions. Plan, build, verify, ship.",
5
5
  "bin": {
6
6
  "qualia-framework": "./bin/cli.js"
@@ -45,7 +45,13 @@
45
45
  "templates/",
46
46
  "references/",
47
47
  "tests/",
48
- "docs/",
48
+ "docs/agent-runs.md",
49
+ "docs/erp-contract.md",
50
+ "docs/plan-contract.md",
51
+ "docs/playwright-loop-pilot-results.md",
52
+ "docs/release.md",
53
+ "docs/changelog-v6.html",
54
+ "docs/onboarding.html",
49
55
  "CLAUDE.md",
50
56
  "AGENTS.md",
51
57
  "guide.md"
@@ -0,0 +1,89 @@
1
+ ---
2
+ archetype: ai-agent
3
+ stack: Next.js 16 (Vercel, app + API) · Supabase (Postgres + pgvector) · Railway (workers) · OpenRouter · Tailwind + shadcn/ui
4
+ updated: 2026-05-28
5
+ ---
6
+
7
+ # Archetype: `ai-agent`
8
+
9
+ > LLM / chat / agent products on Supabase + Vercel, with Railway for any long-running or scheduled compute. The roadmapper loads this file when the operator picks `ai-agent`. Voice (`voice-agent`) extends this archetype with a latency + call-testing milestone — see the bottom note.
10
+
11
+ ## How this file is used
12
+
13
+ Same contract as every archetype: `qualia-scope` grills the **Grill variables**, the **Definition of Done** is the fixed coverage, the **Road** is the default 0→100. The differentiator here is **M3 — the eval gate**: an agent isn't "done" because it replies; it's done when it passes measurable cases.
14
+
15
+ ## Grill variables (what `qualia-scope` must extract)
16
+
17
+ - **Job to be done** — one sentence. What does the agent *do*, for whom, replacing what manual work?
18
+ - **Conversation shape** — single-turn tool, multi-turn chat, or autonomous task agent?
19
+ - **Knowledge** — does it need the client's data (RAG)? Sources, freshness, volume → drives pgvector + ingestion.
20
+ - **Tools / actions** — what can it *do* beyond talk (book, query, email, write to a system)? Each tool is a vertical slice.
21
+ - **Model & routing** — quality vs cost tier; which OpenRouter models; fallback chain.
22
+ - **Surface** — embedded widget, standalone app, API, or channel (WhatsApp/Slack)? Auth model.
23
+ - **Compute shape** — purely request/response (Vercel only) or long-running/scheduled/queue work (→ Railway worker)?
24
+ - **Guardrails** — what must it refuse? PII handling? Human escalation path?
25
+ - **Success metric** — how is "good" measured? (This becomes the eval suite. If they can't answer, the project has no finish line — surface it now.)
26
+ - **Cost ceiling** — per-conversation and monthly budget → drives guardrails.
27
+
28
+ ## Production Definition of Done
29
+
30
+ **Foundation & data** — Supabase with **RLS on every table** (conversations, messages, users, embeddings); auth; pgvector if RAG. Migrations in version control.
31
+
32
+ **Agent core** — LLM via **OpenRouter** with model fallback; system prompts **versioned in source**, never hardcoded inline; streaming responses; context-window management.
33
+
34
+ **RAG (if applicable)** — ingestion pipeline; retrieval quality checked, not assumed; source attribution.
35
+
36
+ **Tools/actions** — each action validated server-side; failure + timeout handling; idempotency where it writes.
37
+
38
+ **Evals** — pass/fail suite over real cases before "done"; covers the success metric and the refusal/guardrail cases. **This is the ship gate.**
39
+
40
+ **Guardrails & cost** — input validation; refusal/safety behavior; graceful fallback on model failure; per-request + daily cost ceilings; token + latency logging.
41
+
42
+ **Compute (if Railway)** — health checks (`/health`); structured logging; restart policy; staging→prod env separation; secrets in Railway variables, never logged.
43
+
44
+ **App quality** — auth flows; rate limiting; the **non-AI-looking** UI pass; responsive; loading/empty/error/streaming states.
45
+
46
+ **Security & compliance** — `service_role` server-only; secrets in env; security headers; MFA on accounts; GDPR posture (EU) — consent, retention, data export/delete.
47
+
48
+ **Observability** — Sentry + structured logging + analytics.
49
+
50
+ **Deploy & handoff** — Vercel prod (+ Railway prod if worker); env separation; post-deploy smoke including **real agent calls**; credentials + walkthrough + archive + ERP report.
51
+
52
+ ## The Road (default 0→100)
53
+
54
+ ### M1 — Foundation & Data
55
+ - Init: Next.js 16 (Vercel) for app + API routes; Supabase project (auth, RLS on every table); Railway service scaffolded *only if* the grill found long-running/scheduled work.
56
+ - Schema: conversations, messages, users; pgvector tables if RAG.
57
+ - OpenRouter wired with a model + fallback; secrets in env.
58
+ - **Exit:** authenticated user can hit a stubbed endpoint; RLS verified by logging in as two users; deploys to preview.
59
+
60
+ ### M2 — Core Agent Loop (vertical slice: input → model → response → persist)
61
+ - Streaming chat UI; system prompt in source control; conversation persistence.
62
+ - Orchestration: tool-calling scaffold; RAG retrieval if applicable; context management.
63
+ - Cost guardrails + token/latency logging from the first call.
64
+ - **Exit:** a real end-to-end conversation works, persists, and its cost/latency is logged.
65
+
66
+ ### M3 — Evals & Guardrails (THE GATE)
67
+ - Eval harness with pass/fail cases mapped to the success metric — not vibes.
68
+ - Guardrails: input validation, refusal/safety, fallback on model failure, human-escalation path.
69
+ - Each tool/action: server-side validation, timeout + failure handling, idempotency on writes.
70
+ - Railway health checks + logging if a worker exists.
71
+ - **Exit:** eval suite green; every guardrail case handled. *No ship before this milestone closes.*
72
+
73
+ ### M4 — App Surface & Polish
74
+ - Auth flows, user management, rate limiting.
75
+ - The non-AI-looking design pass (DESIGN.md, anti-slop), responsive, all async states incl. streaming.
76
+ - **Exit:** product looks and feels built, not generated; passes design-laws.
77
+
78
+ ### M5 — Handoff (always last)
79
+ - Security review + secrets/env audit; GDPR posture (consent, retention, export/delete).
80
+ - Prod deploy (Vercel + Railway envs separated); post-deploy smoke including **real agent calls**, not just HTTP 200.
81
+ - Credentials handover, walkthrough, archive, `/qualia-report` to ERP.
82
+ - **Exit:** all DoD lines covered or waived with reason; client can operate it.
83
+
84
+ ## Why M3 exists (the 0→100 insight)
85
+
86
+ The reason agents "finish but aren't done" is that M2 *feels* like completion — it talks, it's demo-able. But demo-able ≠ reliable. **M3 is the milestone the old flow never had**: it converts "it replied" into "it passes." If the grill couldn't extract a success metric, M3 has no cases to run — which is the framework telling you the project was never properly scoped. That's the feature, not a bug.
87
+
88
+ ## Voice extension (`voice-agent`)
89
+ Add a milestone between M3 and M4: **latency budget <800ms end-to-end** (the bar where callers stop noticing it's AI; >1.2s feels like legacy IVR), **end-to-end call testing with pass/fail** through the full Retell + ElevenLabs + Telnyx stack (not just prompt review), turn-taking / barge-in verified, transcript logging + PII redaction, recording-consent disclosure.
@@ -0,0 +1,60 @@
1
+ ---
2
+ archetype: voice-agent
3
+ extends: ai-agent
4
+ stack: Retell (orchestration) · ElevenLabs (voice) · Telnyx (telephony) · OpenRouter (LLM) · Supabase · Vercel/Railway
5
+ updated: 2026-05-29
6
+ ---
7
+
8
+ # Archetype: `voice-agent`
9
+
10
+ > Real-time voice agents (inbound/outbound calls) on Retell + ElevenLabs + Telnyx. **Extends `ai-agent`** — every `ai-agent` Definition-of-Done line still applies (OpenRouter routing, versioned prompts, the eval gate, cost guardrails, RLS, observability, security). This file adds the voice-specific bars, where latency and real call testing are the difference between "demo" and "shippable." Used by `qualia-scope` when the operator picks `voice-agent`.
11
+
12
+ ## How this file is used
13
+
14
+ Same contract: `qualia-scope` grills the **Grill variables**, the **Definition of Done** is the per-increment bar, the **Road** is the default 0→100. Inherits `ai-agent` + `rules/constitution.md`. The new gate is **M-Voice**: real end-to-end calls with pass/fail, not transcript review.
15
+
16
+ ## Grill variables (added on top of `ai-agent`)
17
+
18
+ - **Call direction** — inbound, outbound, or both? Volume/concurrency expected?
19
+ - **The one job** — appointment reminder, intake, qualification, support triage? (Start with one; a vague "assistant" fails.)
20
+ - **Call flow** — the happy path + the branches (no-answer, voicemail, wrong person, transfer-to-human).
21
+ - **Voice & persona** — language(s), accent, ElevenLabs voice, tone, named or anonymous.
22
+ - **Latency tolerance** — confirm the <800ms target fits the use case; identify the slowest dependency (LLM, tool call, DB).
23
+ - **Tools mid-call** — what must it look up or write *during* the call (calendar, CRM, order status)? Each is a latency risk.
24
+ - **Escalation** — when and how does it hand to a human? Warm transfer or callback?
25
+ - **Telephony** — Telnyx numbers, regions, caller-ID, recording laws per region.
26
+ - **Compliance** — recording-consent disclosure, PII handling, GDPR retention (EU). Regulated domain (health/finance)?
27
+ - **Success metric** — answered-rate, completion-rate, transfer-rate, CSAT? (Becomes the eval + call-test pass criteria.)
28
+
29
+ ## Production Definition of Done (added on top of `ai-agent`)
30
+
31
+ **Latency** — **<800ms end-to-end** turn latency is the bar where callers stop noticing it's AI; >1.2s feels like legacy IVR. Measured on real calls, not assumed. Slowest dependency identified and budgeted.
32
+
33
+ **Call quality** — turn-taking / barge-in / interruption handled without breaking flow; no dead air on tool calls (filler/await behavior); graceful handling of no-answer, voicemail, silence, wrong person.
34
+
35
+ **End-to-end call testing (THE GATE)** — automated test calls through the full Retell + ElevenLabs + Telnyx stack with measurable pass/fail against the success metric. Transcript review is *not* sufficient — the audio path is part of the product.
36
+
37
+ **Escalation** — human handoff path tested (transfer or callback); failure modes (LLM/tool/telephony down) degrade safely, never trap the caller.
38
+
39
+ **Observability & compliance** — full transcript + recording logging; PII redaction; recording-consent disclosure at call start; GDPR retention policy; per-region recording-law compliance.
40
+
41
+ **Cost** — per-minute + per-call cost tracked (voice + LLM + telephony stack); daily ceiling.
42
+
43
+ ## The Road (default 0→100)
44
+
45
+ Follows `ai-agent` M1–M3 (Foundation/Data → Core Loop → Evals & Guardrails), then inserts the voice gate before the app surface:
46
+
47
+ ### M-Voice — Voice Path & Call Testing (inserted after ai-agent M3, before polish)
48
+ - Retell agent wired to ElevenLabs voice + Telnyx numbers; LLM via OpenRouter.
49
+ - Call flow built: happy path + branches (no-answer, voicemail, wrong person, transfer).
50
+ - Mid-call tools with no-dead-air behavior; barge-in/turn-taking verified.
51
+ - **Latency measured on real calls to the <800ms budget**; slowest dependency optimized.
52
+ - **End-to-end automated call tests** with pass/fail on the success metric.
53
+ - Transcript + recording logging; consent disclosure; PII redaction.
54
+ - **Exit:** real test calls pass the metric at target latency; every branch + escalation handled; compliance wired. *No ship before this closes.*
55
+
56
+ ### Then — App Surface & Handoff
57
+ - `ai-agent` M4/M5: dashboard (call logs, transcripts, metrics), the non-AI-looking UI, security/GDPR review, prod deploy (Vercel + Railway envs), smoke including **real calls**, handoff or rolling-release.
58
+
59
+ ## Why M-Voice exists
60
+ A voice agent that reads well in a transcript can still be unusable on a call — 1.5s pauses, talking over the caller, dead air during a lookup. Text evals (ai-agent M3) prove the *reasoning*; M-Voice proves the *experience*. Both gates, or it isn't done.
@@ -0,0 +1,67 @@
1
+ ---
2
+ archetype: web-app
3
+ extends: website
4
+ stack: Next.js 16 (App Router) · Tailwind + shadcn/ui · Supabase (auth + Postgres + RLS) · Vercel
5
+ updated: 2026-05-29
6
+ ---
7
+
8
+ # Archetype: `web-app`
9
+
10
+ > Authenticated products with user accounts, roles, and a dashboard on Vercel + Supabase. **Extends `website`** — every `website` Definition-of-Done line still applies (design, performance, SEO where relevant, a11y, observability, deploy, handoff). This file adds the auth, data, and app-quality bars. Used by `qualia-scope` when the operator picks `web-app`, or when a `website` grows gated content / accounts.
11
+
12
+ ## How this file is used
13
+
14
+ Same contract as every archetype: `qualia-scope` grills the **Grill variables**, the **Definition of Done** is the per-increment bar, the **Road** is the default 0→100. Inherits `website` + `rules/constitution.md`; relaxes nothing.
15
+
16
+ ## Grill variables (added on top of `website`)
17
+
18
+ - **Who are the users?** — roles (admin / staff / client / public) and what each can see and do. Drives the RLS model.
19
+ - **Auth model** — email/password, magic link, OAuth providers, SSO? Email verification required? Password reset flow?
20
+ - **Authorization source** — what claims gate access? (Must live in `app_metadata`, never `user_metadata` — constitution.)
21
+ - **Tenancy** — single-tenant, per-user, or multi-tenant/workspace? (Multi-tenant changes every RLS policy — surface it now.)
22
+ - **Core entities & relationships** — the domain model. Each entity → a CONTEXT.md glossary term.
23
+ - **Write surfaces** — what users create/edit/delete; which writes need confirmation, soft-delete, or audit.
24
+ - **Real-time / collaboration** — does anything need live updates (presence, notifications)?
25
+ - **Billing** — free, one-off, subscription? Provider? (If yes, billing is its own increment set.)
26
+ - **Notifications** — email (Resend), in-app, both? Triggered by what events?
27
+
28
+ ## Production Definition of Done (added on top of `website`)
29
+
30
+ **Auth & access** — Supabase auth with the chosen model; email verification + password reset wired; **RLS enabled on every table** with policies derived from `app_metadata` claims; role-based routing enforced in middleware *and* at the data layer (never trust the client). Verified by logging in as each role and confirming isolation.
31
+
32
+ **RLS correctness (constitution)** — every UPDATE policy has a matching SELECT; views use `security_invoker = true`; storage upsert grants INSERT+SELECT+UPDATE; sessions revoked before user delete.
33
+
34
+ **Data** — domain schema in `supabase/migrations/`; FK relationships normalized; soft-delete + audit where the grill flagged it; no N+1 on list views.
35
+
36
+ **App quality** — every async surface has loading / empty / error states; forms validate client *and* server (Zod or equivalent); destructive actions confirm; optimistic UI where latency matters; rate limiting on mutating + public endpoints.
37
+
38
+ **Security** — `service_role` server-only; secrets in env; security headers (HSTS); MFA on Supabase/Vercel accounts; CSRF/permission checks on every mutation.
39
+
40
+ **Billing (if applicable)** — provider integrated; webhooks idempotent; plan/entitlement state authoritative server-side; failed-payment + cancellation flows handled.
41
+
42
+ ## The Road (default 0→100)
43
+
44
+ ### M1 — Foundation, Auth & Data
45
+ - Init stack + Vercel link + CI; Supabase project; **RLS on every table from the first migration** (not retrofitted).
46
+ - Auth model: signup, login, verification, reset; role claims in `app_metadata`; role-based middleware.
47
+ - Domain schema + relationships; seed data.
48
+ - **Exit:** each role logs in and sees only what it should — verified as two+ users; deploys to preview.
49
+
50
+ ### M2 — Core Capabilities (one vertical slice per capability)
51
+ - Each capability cuts through UI + server action + RLS + validation + states + test. Independently shippable.
52
+ - **Exit:** the primary user job works end-to-end with all async states; writes validated both sides.
53
+
54
+ ### M3 — App Hardening
55
+ - Rate limiting, audit/soft-delete, notifications, real-time (if scoped); billing increments (if scoped).
56
+ - Performance pass (list virtualization, query aggregation, no N+1); error/empty states audited.
57
+ - **Exit:** mutation paths secured + rate-limited; perf budget met; billing flows (if any) handle failure.
58
+
59
+ ### M4 — Polish, SEO-where-relevant & Handoff
60
+ - Design pass to `website` anti-slop bar; a11y WCAG 2.2 AA; SEO on public routes only (`noindex` the app).
61
+ - Legal pages; analytics + Sentry; security pass (RLS, headers, env, MFA); custom domain; prod deploy + smoke.
62
+ - Credentials, walkthrough, archive, ERP report (or rolling-release for an internal product).
63
+ - **Exit:** all DoD lines covered or waived with reason.
64
+
65
+ ## Notes
66
+ - Internal/living products (like the ERP) run as **rolling releases** — no terminal Handoff. Each shipped increment still clears this DoD. Handoff applies only to client-delivered web-apps.
67
+ - LLM features → escalate to `ai-agent` (adds OpenRouter routing, evals, cost guardrails on top of this).
@@ -0,0 +1,78 @@
1
+ ---
2
+ archetype: website
3
+ stack: Next.js 16 (App Router) · Tailwind + shadcn/ui · Supabase · Vercel
4
+ updated: 2026-05-28
5
+ ---
6
+
7
+ # Archetype: `website`
8
+
9
+ > Marketing / brochure / content sites on Vercel + Supabase. The roadmapper loads this file when the operator picks `website`. The grill (`qualia-scope`) reads the **Grill variables** below; the **Definition of Done** is the fixed coverage the roadmap must satisfy; the **Road** is the default 0→100 shape.
10
+
11
+ ## How this file is used
12
+
13
+ 1. `qualia-scope` grills the operator through the **Grill variables** — depth on each, recommended-answer-with-why, writing answers to the spec and terms to `.planning/CONTEXT.md`.
14
+ 2. The grill is **DoD-aware**: it raises every Definition-of-Done area even if the operator never mentioned it (auth? legal pages? CMS or static?).
15
+ 3. The roadmapper maps the filled spec onto the **Road**, dropping inapplicable DoD lines *with a logged reason* and expanding applicable ones into vertical slices.
16
+ 4. `qualia-verify` at each milestone close checks DoD coverage, not just per-task acceptance.
17
+
18
+ ## Grill variables (what `qualia-scope` must extract)
19
+
20
+ - **Purpose & primary CTA** — what one action defines success (book, buy, subscribe, contact)?
21
+ - **Page inventory** — exact routes. Static or dynamic?
22
+ - **Content source** — hardcoded, Markdown/MDX, or Supabase-backed CMS? Who edits it after handoff?
23
+ - **Brand direction** — reference sites, typography intent, color, tone. (Drives DESIGN.md — the anti-slop bar.)
24
+ - **Auth?** — most brochure sites: none. If gated content/portal → escalate to `web-app` archetype.
25
+ - **Forms & data capture** — contact, lead, newsletter. Where does the data go? Notifications?
26
+ - **Integrations** — analytics, CRM, payment, booking, email (Resend), maps.
27
+ - **Languages / i18n**, **legal jurisdiction** (drives which legal pages), **domain** status.
28
+
29
+ ## Production Definition of Done
30
+
31
+ **Design (anti-slop)** — chosen typeface with personality (not default Inter); deliberate spacing/radius scale; passes `qualia-design/design-laws.md`; responsive across breakpoints; dark mode if brand calls for it; real content, no lorem.
32
+
33
+ **Performance** — LCP ≤2.5s · INP ≤200ms · CLS ≤0.1 at field-data p75; image optimization (next/image); JS/page-weight budget agreed up front; Lighthouse in CI.
34
+
35
+ **SEO** — Metadata API per route; `metadataBase` in root layout; JSON-LD; sitemap.xml; robots.txt; canonicals; OG images; `X-Robots-Tag: noindex` on preview hosts.
36
+
37
+ **Accessibility** — WCAG 2.2 AA (EU default).
38
+
39
+ **Data (only if forms/CMS)** — Supabase table(s); **RLS enabled** with insert-only public policy on form tables, read policy on published content; rate limit on public POST.
40
+
41
+ **Security** — SSL enforced; secrets in env (`vercel env pull`), never client; `service_role` server-only; security headers (HSTS); MFA on Vercel/Supabase accounts.
42
+
43
+ **Observability** — analytics + Sentry + structured logging from day one.
44
+
45
+ **Content & legal** — real copy; privacy, terms, cookie notice (GDPR).
46
+
47
+ **Deploy & handoff** — Vercel production; custom domain + DNS; post-deploy smoke (HTTP 200, console clean, API <500ms); credentials + walkthrough + archive + ERP report.
48
+
49
+ ## The Road (default 0→100)
50
+
51
+ ### M1 — Foundation & Design System
52
+ Vertical slices establishing the visual language before any page is "real".
53
+ - Init: Next.js 16 App Router + TS + Tailwind + shadcn; repo + CI; Vercel project linked; preview deploys on.
54
+ - DESIGN.md from brand grill: real typography, color scale, spacing/radius, motion rules, explicit anti-slop negative rules.
55
+ - Layout shell: nav, footer, responsive grid, dark mode, base components.
56
+ - **Exit:** design system renders on a preview URL; passes design-laws baseline; tokens documented.
57
+
58
+ ### M2 — Pages & Content (one vertical slice per page-type)
59
+ - Each page-type as a slice: layout + real content + loading/empty/error states.
60
+ - CMS path (if chosen): Supabase schema + RLS read policies + editor wiring.
61
+ - Forms: UI + validation (client + server) → Supabase table (RLS insert + rate limit) → notification (Resend).
62
+ - **Exit:** every route has real content and states; forms persist and notify; no lorem anywhere.
63
+
64
+ ### M3 — Performance, SEO & Accessibility
65
+ - Perf pass to budget (LCP/INP/CLS, image optimization, bundle); Lighthouse in CI.
66
+ - SEO: metadata per route, metadataBase, JSON-LD, sitemap, robots, OG images, canonicals.
67
+ - A11y: WCAG 2.2 AA audit + fixes; responsive QA across breakpoints.
68
+ - **Exit:** budgets met on field-like data; SEO + a11y checklists green.
69
+
70
+ ### M4 — Handoff (always last)
71
+ - Legal pages (privacy/terms/cookie); analytics + Sentry live; security pass (RLS, headers, env, MFA).
72
+ - Custom domain + DNS; production deploy; post-deploy smoke.
73
+ - Credentials handover, client walkthrough, repo archive, `/qualia-report` to ERP.
74
+ - **Exit:** all DoD lines covered or explicitly waived with reason; client can operate it.
75
+
76
+ ## Notes
77
+ - Gated content, user accounts, or a dashboard → this is no longer a `website`. Use `web-app` (adds auth + RLS-everywhere + app-quality DoD).
78
+ - The Road is a default, not a cage. The roadmapper may merge M2/M3 for a 3-page site or split M2 for a 30-page one — but every DoD line still needs an owner.
@@ -0,0 +1,42 @@
1
+ ---
2
+ title: Qualia Constitution
3
+ scope: org-level — inherited by every Qualia project
4
+ updated: 2026-05-29
5
+ ---
6
+
7
+ # Qualia Constitution
8
+
9
+ > The top of the standards hierarchy. **Every Qualia project inherits these standards**, and they are **enforced at every increment's verify step** (`qualia-verify`, milestone close). Archetype Definitions of Done in `references/archetypes/*.md` *extend* this file — they add archetype-specific bars, never relax these. A senior should be able to read this in two minutes.
10
+
11
+ ## Supabase security (non-negotiable)
12
+
13
+ - [ ] **RLS on every table**, with explicit policies. Verify by querying the table as two different users — each must see only their own rows.
14
+ - [ ] **Authorize on `app_metadata`, never `user_metadata`.** `user_metadata` is user-editable and must never gate access.
15
+ - [ ] **`service_role` key is server-only.** Never prefixed `NEXT_PUBLIC_`, never imported into a client component.
16
+ - [ ] **Postgres views set `security_invoker = true`** — otherwise the view runs as its owner and bypasses the caller's RLS.
17
+ - [ ] **Every `UPDATE` policy has a matching `SELECT` policy.** Without it, updates fail silently.
18
+ - [ ] **Storage upsert needs `INSERT` + `SELECT` + `UPDATE`** policies on the bucket.
19
+ - [ ] **Revoke a user's sessions before deleting the user** — deletion alone leaves issued JWTs valid until expiry.
20
+
21
+ ## Schema flow
22
+
23
+ - [ ] **Local container → staging branch → production.** No manual schema edits on remote DBs.
24
+ - [ ] **All schema changes are migrations** in `supabase/migrations/`, applied through CI — never hand-applied to a remote.
25
+
26
+ ## Gates over prompts
27
+
28
+ Dangerous-command and architectural rules are enforced as **deterministic hooks**, not prose the model may forget. The framework already ships:
29
+
30
+ - [ ] **`migration-guard`** — blocks schema edits that bypass `supabase/migrations/`.
31
+ - [ ] **`supabase-destructive-guard`** — blocks destructive operations on remote DBs.
32
+ - [ ] **`branch-guard`** — enforces feature-branch-only; main stays deployable.
33
+
34
+ A rule worth enforcing is worth a hook. Add one rather than relying on instructions alone.
35
+
36
+ ## Context grounding
37
+
38
+ - [ ] **Bundled, version-matched docs are the source of truth for stack APIs** — over model memory. When in doubt about a Supabase / Next.js / vendor API, read the pinned docs, don't recall from weights.
39
+
40
+ ---
41
+
42
+ *This file contains only verified org standards. Archetype DoDs extend it; they do not override it.*
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  name: qualia
3
- description: "Smart router — reads project state (state.js), classifies the situation mechanically, and either returns the exact next command or performs the lightweight diagnosis previously split across helper commands. Use whenever you type /qualia, 'what next', 'next', 'what now', 'what should I do next', 'what command now', 'resume', 'pause', or 'I don't know what is going on'."
3
+ description: "Mechanical state-driven router — reads state.js, returns the exact next command. Cheap and instant. Triggers: '/qualia', 'what next', 'what now'. For deeper situational confusion use /qualia-idk."
4
4
  allowed-tools:
5
5
  - Bash
6
6
  - Read
@@ -20,6 +20,8 @@ Read project state. Classify your situation. Tell you the exact next command.
20
20
  node ${QUALIA_BIN}/state.js check 2>/dev/null
21
21
  ```
22
22
 
23
+ The JSON carries a `profile` field (`strict` or `standard`; env `$QUALIA_PROFILE` wins). `strict` = hard gates, no waivers; `standard` = gates advisory, a senior may waive with a reason logged to `.planning/decisions/`. Surface it when a gate is involved.
24
+
23
25
  Also gather context:
24
26
  ```bash
25
27
  test -f .continue-here.md && echo "HANDOFF_EXISTS" && head -20 .continue-here.md || echo "NO_HANDOFF"
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  name: qualia-build
3
- description: "Executes a planned phase by spawning fresh builder subagents per task with wave-based parallelization. Each task gets isolated context, commits atomically, and runs its own validation. Use when the user says 'build this phase', 'execute the plan', 'start building', 'run the build', 'qualia-build', or after /qualia-plan approves a phase plan."
3
+ description: "Execute a planned phase fresh builder subagents per task, wave-based parallelization, atomic commits, per-task validation. Triggers: 'build this phase', 'execute the plan', 'start building', 'qualia-build'."
4
4
  allowed-tools:
5
5
  - Bash
6
6
  - Read
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  name: qualia-discuss
3
- description: "Alignment interview. Two modes. PROJECT MODE (default, no args) is the non-technical kickoff before /qualia-new — 8 questions for demo projects, 14 for full projects, output is .planning/project-discovery.md. PHASE MODE (with N arg, e.g. /qualia-discuss 2) is the technical aggressive grilling before /qualia-plan N — one question at a time with recommended answer, output is .planning/phase-{N}-context.md. Trigger phrases: 'discuss', 'kickoff interview', 'grill me', 'stress test this plan', 'wait let's think about this one', 'I'm not sure how to approach this'."
3
+ description: "Alignment interview PROJECT MODE before /qualia-new, PHASE MODE before /qualia-plan N. Triggers: 'discuss', 'kickoff interview', 'grill me', 'stress test this plan', 'I'm not sure how to approach this'."
4
4
  allowed-tools:
5
5
  - Bash
6
6
  - Read
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  name: qualia-doctor
3
- description: "Employee-facing framework health check. Wraps `qualia-framework doctor`, checks install targets, project state, contract coverage, planning-folder hygiene, hooks, memory, ERP queue health, and gives safe repair commands. Trigger on 'doctor', 'health check', 'framework broken', 'is Qualia installed correctly', 'Codex not picking up Qualia', 'hooks not running', 'memory broken', 'ERP queue stuck', '.planning messy'."
3
+ description: "Framework health check install, project state, contracts, hooks, memory, ERP queue. Suggests safe repair commands. Triggers: 'doctor', 'health check', 'framework broken', 'hooks not running', 'ERP queue stuck'."
4
4
  allowed-tools:
5
5
  - Bash
6
6
  - Read