qualia-framework 6.4.0 → 6.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +1 -0
- package/bin/command-surface.js +1 -0
- package/bin/qualia-ui.js +1 -0
- package/bin/state.js +106 -1
- package/guide.md +7 -0
- package/package.json +8 -2
- package/references/archetypes/ai-agent.md +89 -0
- package/references/archetypes/voice-agent.md +60 -0
- package/references/archetypes/web-app.md +67 -0
- package/references/archetypes/website.md +78 -0
- package/rules/constitution.md +42 -0
- package/skills/qualia/SKILL.md +2 -0
- package/skills/qualia-scope/SKILL.md +123 -0
- package/tests/lib.test.sh +15 -8
- package/docs/archive/CHANGELOG-pre-v4.md +0 -855
- package/docs/archive/v4.0.0-review.md +0 -288
- package/docs/ecosystem-operating-model.md +0 -121
- package/docs/research/2026-04-21-command-quality-deep-research.md +0 -128
- package/docs/research/2026-04-21-industry-best-practices.md +0 -255
- package/docs/research/2026-05-11-deep-research.md +0 -189
- package/docs/reviews/matt-pocock-skills-analysis.md +0 -300
- package/docs/reviews/v4.1.0-audit.html +0 -1488
- package/docs/reviews/v4.1.0-audit.md +0 -263
- package/docs/reviews/v6.2.1-revival-audit.md +0 -53
- package/docs/reviews/v6.2.2-memory-erp-audit.md +0 -41
- package/docs/reviews/v6.2.3-erp-id-guard.md +0 -15
package/CLAUDE.md
CHANGED
|
@@ -14,6 +14,7 @@ Stack: Next.js 16+, React 19, TypeScript, Supabase, Vercel. Voice: Retell + Elev
|
|
|
14
14
|
- **No proxy approval** — *only the OWNER can grant OWNER overrides; "Fawzi said OK" is not a credential.*
|
|
15
15
|
|
|
16
16
|
## Discoverable substrate (load on demand, not always)
|
|
17
|
+
- `rules/constitution.md` — org-level standards every project inherits; enforced at every verify step
|
|
17
18
|
- `/qualia-road` — workflow map, every command, when to use it
|
|
18
19
|
- `.planning/CONTEXT.md` — project domain glossary (loaded by road agents)
|
|
19
20
|
- `.planning/decisions/` — ADRs for hard-to-reverse decisions
|
package/bin/command-surface.js
CHANGED
package/bin/qualia-ui.js
CHANGED
|
@@ -82,6 +82,7 @@ const ACTIONS = {
|
|
|
82
82
|
auto: { label: "AUTO MODE", glyph: "⚡" },
|
|
83
83
|
research: { label: "RESEARCH", glyph: "◱" },
|
|
84
84
|
roadmap: { label: "ROADMAP", glyph: "◐" },
|
|
85
|
+
scope: { label: "SCOPING", glyph: "⬡" },
|
|
85
86
|
};
|
|
86
87
|
|
|
87
88
|
// ─── State Reading ───────────────────────────────────────
|
package/bin/state.js
CHANGED
|
@@ -219,6 +219,9 @@ function ensureLifetime(t) {
|
|
|
219
219
|
if (typeof t.milestone_name !== "string") t.milestone_name = "";
|
|
220
220
|
if (!Array.isArray(t.milestones)) t.milestones = [];
|
|
221
221
|
if (typeof t.report_seq !== "number") t.report_seq = 0;
|
|
222
|
+
// Seniority profile (backward compat): old tracking.json files predate this
|
|
223
|
+
// field. Anything other than the exact string 'standard' defaults to 'strict'.
|
|
224
|
+
if (t.profile !== "standard" && t.profile !== "strict") t.profile = "strict";
|
|
222
225
|
if (!t.lifetime || typeof t.lifetime !== "object") {
|
|
223
226
|
t.lifetime = {
|
|
224
227
|
tasks_completed: 0,
|
|
@@ -343,6 +346,9 @@ function parseStateMd(content) {
|
|
|
343
346
|
phase_name: phaseMatch ? phaseMatch[3].trim() : "",
|
|
344
347
|
status: get("Status").toLowerCase().replace(/\s+/g, "_") || "setup",
|
|
345
348
|
assigned_to: get("Assigned to") || "",
|
|
349
|
+
// Seniority profile: 'standard' lets a senior waive a gate; anything else
|
|
350
|
+
// (including missing or typo'd values) coerces to 'strict' — the safe default.
|
|
351
|
+
profile: get("Profile").toLowerCase() === "standard" ? "standard" : "strict",
|
|
346
352
|
phases,
|
|
347
353
|
schema_errors,
|
|
348
354
|
};
|
|
@@ -377,6 +383,7 @@ See: .planning/PROJECT.md
|
|
|
377
383
|
Phase: ${s.phase} of ${s.total_phases} — ${s.phase_name}
|
|
378
384
|
Status: ${s.status}
|
|
379
385
|
Assigned to: ${s.assigned_to}
|
|
386
|
+
Profile: ${s.profile || "strict"}
|
|
380
387
|
Last activity: ${now} — ${s.last_activity || "State updated"}
|
|
381
388
|
|
|
382
389
|
Progress: [${bar}] ${phaseFrac}%
|
|
@@ -572,16 +579,105 @@ function nextCommand(status, phase, totalPhases, verification) {
|
|
|
572
579
|
|
|
573
580
|
// ─── Commands ────────────────────────────────────────────
|
|
574
581
|
|
|
582
|
+
// ─── Seniority profile gate contract ────────────────────
|
|
583
|
+
// The effective profile resolves as: $QUALIA_PROFILE (env wins) → STATE.md
|
|
584
|
+
// Profile: line → tracking.json profile → 'strict' (default). Any value other
|
|
585
|
+
// than the exact string 'standard' coerces to 'strict' — the safe gate.
|
|
586
|
+
//
|
|
587
|
+
// Gate semantics (the contract; enforcement lives in the CONSUMING skill,
|
|
588
|
+
// qualia-scope — state.js only stores and surfaces the field, it does NOT
|
|
589
|
+
// enforce gates here or in cmdTransition):
|
|
590
|
+
// strict = hard gates, no waivers. The Definition-of-Done gate cannot be
|
|
591
|
+
// exited until every area is covered and no [NEEDS CLARIFICATION]
|
|
592
|
+
// markers remain.
|
|
593
|
+
// standard = gates advisory. A senior may exit the gate early with a reason
|
|
594
|
+
// logged as an ADR in .planning/decisions/.
|
|
595
|
+
function resolveProfile(s, t) {
|
|
596
|
+
const raw =
|
|
597
|
+
process.env.QUALIA_PROFILE ||
|
|
598
|
+
(s && s.profile) ||
|
|
599
|
+
(t && t.profile) ||
|
|
600
|
+
"strict";
|
|
601
|
+
return String(raw).toLowerCase() === "standard" ? "standard" : "strict";
|
|
602
|
+
}
|
|
603
|
+
|
|
575
604
|
function cmdCheck(opts) {
|
|
576
605
|
const t = readTracking();
|
|
577
606
|
const s = parseStateMd(readState());
|
|
578
|
-
|
|
607
|
+
// True NO_PROJECT only when BOTH the durable tracking AND the dashboard are
|
|
608
|
+
// absent. Either alone is a recoverable half-state.
|
|
609
|
+
if (!t && !s) {
|
|
579
610
|
return output({
|
|
580
611
|
ok: false,
|
|
581
612
|
error: "NO_PROJECT",
|
|
582
613
|
message: "No .planning/ found. Run /qualia-new to start.",
|
|
583
614
|
});
|
|
584
615
|
}
|
|
616
|
+
// STATE.md missing/corrupt but tracking.json intact. STATE.md is a derivable
|
|
617
|
+
// view — tracking.json already carries phase/status/milestone (the statusline
|
|
618
|
+
// reads them straight from it). Reconstruct and route to repair instead of
|
|
619
|
+
// falsely reporting NO_PROJECT. Critically, exit 0: cmdCheck feeds the
|
|
620
|
+
// /qualia router, which runs it inside a PARALLEL Bash batch. A non-zero exit
|
|
621
|
+
// makes the harness cancel the sibling commands ("Cancelled: parallel tool
|
|
622
|
+
// call ... errored"), so a recoverable state must never exit non-zero.
|
|
623
|
+
if (t && !s) {
|
|
624
|
+
ensureLifetime(t);
|
|
625
|
+
const phase = Number(t.phase || 1) || 1;
|
|
626
|
+
return output({
|
|
627
|
+
ok: true,
|
|
628
|
+
phase,
|
|
629
|
+
phase_name: t.phase_name || "",
|
|
630
|
+
total_phases: Number(t.total_phases || 0) || 0,
|
|
631
|
+
status: String(t.status || "setup"),
|
|
632
|
+
assigned_to: t.assigned_to || "",
|
|
633
|
+
profile: resolveProfile(null, t),
|
|
634
|
+
milestone: t.milestone || 1,
|
|
635
|
+
milestone_name: t.milestone_name || "",
|
|
636
|
+
milestones: t.milestones || [],
|
|
637
|
+
lifetime: t.lifetime,
|
|
638
|
+
verification: t.verification || "pending",
|
|
639
|
+
gap_cycles: (t.gap_cycles || {})[String(phase)] || 0,
|
|
640
|
+
gap_cycle_limit: getGapCycleLimit(),
|
|
641
|
+
tasks_done: t.tasks_done || 0,
|
|
642
|
+
tasks_total: t.tasks_total || 0,
|
|
643
|
+
deployed_url: t.deployed_url || "",
|
|
644
|
+
next_command: "state.js fix",
|
|
645
|
+
warning:
|
|
646
|
+
"STATE.md missing or unparseable — reconstructed from tracking.json. " +
|
|
647
|
+
"Run `state.js fix` to rewrite it canonically, then continue.",
|
|
648
|
+
recovered_from: "tracking.json",
|
|
649
|
+
});
|
|
650
|
+
}
|
|
651
|
+
// tracking.json missing but STATE.md present (the inverse half-state). The
|
|
652
|
+
// rest of cmdCheck needs tracking for lifetime/milestone/verification, so
|
|
653
|
+
// route to repair (`state.js fix` rebuilds tracking from STATE.md) rather
|
|
654
|
+
// than crash on a null tracking object. Exit 0 for the same batch reason.
|
|
655
|
+
if (!t && s) {
|
|
656
|
+
return output({
|
|
657
|
+
ok: true,
|
|
658
|
+
phase: s.phase,
|
|
659
|
+
phase_name: s.phase_name,
|
|
660
|
+
total_phases: s.total_phases,
|
|
661
|
+
status: s.status,
|
|
662
|
+
assigned_to: s.assigned_to,
|
|
663
|
+
profile: resolveProfile(s, null),
|
|
664
|
+
milestone: 1,
|
|
665
|
+
milestone_name: "",
|
|
666
|
+
milestones: [],
|
|
667
|
+
lifetime: undefined,
|
|
668
|
+
verification: "pending",
|
|
669
|
+
gap_cycles: 0,
|
|
670
|
+
gap_cycle_limit: getGapCycleLimit(),
|
|
671
|
+
tasks_done: 0,
|
|
672
|
+
tasks_total: 0,
|
|
673
|
+
deployed_url: "",
|
|
674
|
+
next_command: "state.js fix",
|
|
675
|
+
warning:
|
|
676
|
+
"tracking.json missing — reconstructed from STATE.md. " +
|
|
677
|
+
"Run `state.js fix` to rebuild tracking, then continue.",
|
|
678
|
+
recovered_from: "STATE.md",
|
|
679
|
+
});
|
|
680
|
+
}
|
|
585
681
|
ensureLifetime(t);
|
|
586
682
|
output({
|
|
587
683
|
ok: true,
|
|
@@ -590,6 +686,7 @@ function cmdCheck(opts) {
|
|
|
590
686
|
total_phases: s.total_phases,
|
|
591
687
|
status: s.status,
|
|
592
688
|
assigned_to: s.assigned_to,
|
|
689
|
+
profile: resolveProfile(s, t),
|
|
593
690
|
milestone: t.milestone || 1,
|
|
594
691
|
milestone_name: t.milestone_name || "",
|
|
595
692
|
milestones: t.milestones || [],
|
|
@@ -940,6 +1037,12 @@ function cmdInit(opts) {
|
|
|
940
1037
|
const prev = readTracking();
|
|
941
1038
|
const prevLife = prev ? ensureLifetime(prev) : null;
|
|
942
1039
|
|
|
1040
|
+
// Seniority profile: explicit --profile standard opts in; otherwise preserve
|
|
1041
|
+
// the prior project's profile on re-init, defaulting to the safe 'strict'.
|
|
1042
|
+
// Any value other than the exact string 'standard' coerces to 'strict'.
|
|
1043
|
+
const profileSource = opts.profile || (prevLife ? prevLife.profile : "strict");
|
|
1044
|
+
const profile = profileSource === "standard" ? "standard" : "strict";
|
|
1045
|
+
|
|
943
1046
|
// Build state
|
|
944
1047
|
const s = {
|
|
945
1048
|
phase: 1,
|
|
@@ -947,6 +1050,7 @@ function cmdInit(opts) {
|
|
|
947
1050
|
phase_name: phases[0].name,
|
|
948
1051
|
status: "setup",
|
|
949
1052
|
assigned_to: opts.assigned_to || "",
|
|
1053
|
+
profile,
|
|
950
1054
|
last_activity: `Project initialized`,
|
|
951
1055
|
phases: phases.map((p, i) => ({
|
|
952
1056
|
num: i + 1,
|
|
@@ -994,6 +1098,7 @@ function cmdInit(opts) {
|
|
|
994
1098
|
phase_name: phases[0].name,
|
|
995
1099
|
total_phases: totalPhases,
|
|
996
1100
|
status: "setup",
|
|
1101
|
+
profile,
|
|
997
1102
|
wave: 0,
|
|
998
1103
|
tasks_done: 0,
|
|
999
1104
|
tasks_total: 0,
|
package/guide.md
CHANGED
|
@@ -99,6 +99,13 @@ Hard rules (enforced by `state.js` and the roadmapper):
|
|
|
99
99
|
5. **`/qualia` is your friend** — lost on "what's my next command?" The router reads state and returns the next move.
|
|
100
100
|
6. **`/qualia-idk` is your deeper friend** — confused about *the situation itself*. Reads conversation + planning + code, then returns guidance plus a paste-ready Qualia command sequence.
|
|
101
101
|
|
|
102
|
+
## Profiles
|
|
103
|
+
|
|
104
|
+
A project runs under one profile, set via `$QUALIA_PROFILE` (defaults to `strict`). `state.js check` surfaces the active profile in its output.
|
|
105
|
+
|
|
106
|
+
- **`strict`** (default for the team) — hard gates, no waivers. Every gate must pass before the road advances.
|
|
107
|
+
- **`standard`** — gates are advisory. A senior may exit a Definition-of-Done gate early, provided the reason is logged to `.planning/decisions/`.
|
|
108
|
+
|
|
102
109
|
## When You're Stuck
|
|
103
110
|
|
|
104
111
|
```
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "qualia-framework",
|
|
3
|
-
"version": "6.
|
|
3
|
+
"version": "6.5.0",
|
|
4
4
|
"description": "Claude Code and Codex workflow framework by Qualia Solutions. Plan, build, verify, ship.",
|
|
5
5
|
"bin": {
|
|
6
6
|
"qualia-framework": "./bin/cli.js"
|
|
@@ -45,7 +45,13 @@
|
|
|
45
45
|
"templates/",
|
|
46
46
|
"references/",
|
|
47
47
|
"tests/",
|
|
48
|
-
"docs/",
|
|
48
|
+
"docs/agent-runs.md",
|
|
49
|
+
"docs/erp-contract.md",
|
|
50
|
+
"docs/plan-contract.md",
|
|
51
|
+
"docs/playwright-loop-pilot-results.md",
|
|
52
|
+
"docs/release.md",
|
|
53
|
+
"docs/changelog-v6.html",
|
|
54
|
+
"docs/onboarding.html",
|
|
49
55
|
"CLAUDE.md",
|
|
50
56
|
"AGENTS.md",
|
|
51
57
|
"guide.md"
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
---
|
|
2
|
+
archetype: ai-agent
|
|
3
|
+
stack: Next.js 16 (Vercel, app + API) · Supabase (Postgres + pgvector) · Railway (workers) · OpenRouter · Tailwind + shadcn/ui
|
|
4
|
+
updated: 2026-05-28
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Archetype: `ai-agent`
|
|
8
|
+
|
|
9
|
+
> LLM / chat / agent products on Supabase + Vercel, with Railway for any long-running or scheduled compute. The roadmapper loads this file when the operator picks `ai-agent`. Voice (`voice-agent`) extends this archetype with a latency + call-testing milestone — see the bottom note.
|
|
10
|
+
|
|
11
|
+
## How this file is used
|
|
12
|
+
|
|
13
|
+
Same contract as every archetype: `qualia-scope` grills the **Grill variables**, the **Definition of Done** is the fixed coverage, the **Road** is the default 0→100. The differentiator here is **M3 — the eval gate**: an agent isn't "done" because it replies; it's done when it passes measurable cases.
|
|
14
|
+
|
|
15
|
+
## Grill variables (what `qualia-scope` must extract)
|
|
16
|
+
|
|
17
|
+
- **Job to be done** — one sentence. What does the agent *do*, for whom, replacing what manual work?
|
|
18
|
+
- **Conversation shape** — single-turn tool, multi-turn chat, or autonomous task agent?
|
|
19
|
+
- **Knowledge** — does it need the client's data (RAG)? Sources, freshness, volume → drives pgvector + ingestion.
|
|
20
|
+
- **Tools / actions** — what can it *do* beyond talk (book, query, email, write to a system)? Each tool is a vertical slice.
|
|
21
|
+
- **Model & routing** — quality vs cost tier; which OpenRouter models; fallback chain.
|
|
22
|
+
- **Surface** — embedded widget, standalone app, API, or channel (WhatsApp/Slack)? Auth model.
|
|
23
|
+
- **Compute shape** — purely request/response (Vercel only) or long-running/scheduled/queue work (→ Railway worker)?
|
|
24
|
+
- **Guardrails** — what must it refuse? PII handling? Human escalation path?
|
|
25
|
+
- **Success metric** — how is "good" measured? (This becomes the eval suite. If they can't answer, the project has no finish line — surface it now.)
|
|
26
|
+
- **Cost ceiling** — per-conversation and monthly budget → drives guardrails.
|
|
27
|
+
|
|
28
|
+
## Production Definition of Done
|
|
29
|
+
|
|
30
|
+
**Foundation & data** — Supabase with **RLS on every table** (conversations, messages, users, embeddings); auth; pgvector if RAG. Migrations in version control.
|
|
31
|
+
|
|
32
|
+
**Agent core** — LLM via **OpenRouter** with model fallback; system prompts **versioned in source**, never hardcoded inline; streaming responses; context-window management.
|
|
33
|
+
|
|
34
|
+
**RAG (if applicable)** — ingestion pipeline; retrieval quality checked, not assumed; source attribution.
|
|
35
|
+
|
|
36
|
+
**Tools/actions** — each action validated server-side; failure + timeout handling; idempotency where it writes.
|
|
37
|
+
|
|
38
|
+
**Evals** — pass/fail suite over real cases before "done"; covers the success metric and the refusal/guardrail cases. **This is the ship gate.**
|
|
39
|
+
|
|
40
|
+
**Guardrails & cost** — input validation; refusal/safety behavior; graceful fallback on model failure; per-request + daily cost ceilings; token + latency logging.
|
|
41
|
+
|
|
42
|
+
**Compute (if Railway)** — health checks (`/health`); structured logging; restart policy; staging→prod env separation; secrets in Railway variables, never logged.
|
|
43
|
+
|
|
44
|
+
**App quality** — auth flows; rate limiting; the **non-AI-looking** UI pass; responsive; loading/empty/error/streaming states.
|
|
45
|
+
|
|
46
|
+
**Security & compliance** — `service_role` server-only; secrets in env; security headers; MFA on accounts; GDPR posture (EU) — consent, retention, data export/delete.
|
|
47
|
+
|
|
48
|
+
**Observability** — Sentry + structured logging + analytics.
|
|
49
|
+
|
|
50
|
+
**Deploy & handoff** — Vercel prod (+ Railway prod if worker); env separation; post-deploy smoke including **real agent calls**; credentials + walkthrough + archive + ERP report.
|
|
51
|
+
|
|
52
|
+
## The Road (default 0→100)
|
|
53
|
+
|
|
54
|
+
### M1 — Foundation & Data
|
|
55
|
+
- Init: Next.js 16 (Vercel) for app + API routes; Supabase project (auth, RLS on every table); Railway service scaffolded *only if* the grill found long-running/scheduled work.
|
|
56
|
+
- Schema: conversations, messages, users; pgvector tables if RAG.
|
|
57
|
+
- OpenRouter wired with a model + fallback; secrets in env.
|
|
58
|
+
- **Exit:** authenticated user can hit a stubbed endpoint; RLS verified by logging in as two users; deploys to preview.
|
|
59
|
+
|
|
60
|
+
### M2 — Core Agent Loop (vertical slice: input → model → response → persist)
|
|
61
|
+
- Streaming chat UI; system prompt in source control; conversation persistence.
|
|
62
|
+
- Orchestration: tool-calling scaffold; RAG retrieval if applicable; context management.
|
|
63
|
+
- Cost guardrails + token/latency logging from the first call.
|
|
64
|
+
- **Exit:** a real end-to-end conversation works, persists, and its cost/latency is logged.
|
|
65
|
+
|
|
66
|
+
### M3 — Evals & Guardrails (THE GATE)
|
|
67
|
+
- Eval harness with pass/fail cases mapped to the success metric — not vibes.
|
|
68
|
+
- Guardrails: input validation, refusal/safety, fallback on model failure, human-escalation path.
|
|
69
|
+
- Each tool/action: server-side validation, timeout + failure handling, idempotency on writes.
|
|
70
|
+
- Railway health checks + logging if a worker exists.
|
|
71
|
+
- **Exit:** eval suite green; every guardrail case handled. *No ship before this milestone closes.*
|
|
72
|
+
|
|
73
|
+
### M4 — App Surface & Polish
|
|
74
|
+
- Auth flows, user management, rate limiting.
|
|
75
|
+
- The non-AI-looking design pass (DESIGN.md, anti-slop), responsive, all async states incl. streaming.
|
|
76
|
+
- **Exit:** product looks and feels built, not generated; passes design-laws.
|
|
77
|
+
|
|
78
|
+
### M5 — Handoff (always last)
|
|
79
|
+
- Security review + secrets/env audit; GDPR posture (consent, retention, export/delete).
|
|
80
|
+
- Prod deploy (Vercel + Railway envs separated); post-deploy smoke including **real agent calls**, not just HTTP 200.
|
|
81
|
+
- Credentials handover, walkthrough, archive, `/qualia-report` to ERP.
|
|
82
|
+
- **Exit:** all DoD lines covered or waived with reason; client can operate it.
|
|
83
|
+
|
|
84
|
+
## Why M3 exists (the 0→100 insight)
|
|
85
|
+
|
|
86
|
+
The reason agents "finish but aren't done" is that M2 *feels* like completion — it talks, it's demo-able. But demo-able ≠ reliable. **M3 is the milestone the old flow never had**: it converts "it replied" into "it passes." If the grill couldn't extract a success metric, M3 has no cases to run — which is the framework telling you the project was never properly scoped. That's the feature, not a bug.
|
|
87
|
+
|
|
88
|
+
## Voice extension (`voice-agent`)
|
|
89
|
+
Add a milestone between M3 and M4: **latency budget <800ms end-to-end** (the bar where callers stop noticing it's AI; >1.2s feels like legacy IVR), **end-to-end call testing with pass/fail** through the full Retell + ElevenLabs + Telnyx stack (not just prompt review), turn-taking / barge-in verified, transcript logging + PII redaction, recording-consent disclosure.
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
---
|
|
2
|
+
archetype: voice-agent
|
|
3
|
+
extends: ai-agent
|
|
4
|
+
stack: Retell (orchestration) · ElevenLabs (voice) · Telnyx (telephony) · OpenRouter (LLM) · Supabase · Vercel/Railway
|
|
5
|
+
updated: 2026-05-29
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
# Archetype: `voice-agent`
|
|
9
|
+
|
|
10
|
+
> Real-time voice agents (inbound/outbound calls) on Retell + ElevenLabs + Telnyx. **Extends `ai-agent`** — every `ai-agent` Definition-of-Done line still applies (OpenRouter routing, versioned prompts, the eval gate, cost guardrails, RLS, observability, security). This file adds the voice-specific bars, where latency and real call testing are the difference between "demo" and "shippable." Used by `qualia-scope` when the operator picks `voice-agent`.
|
|
11
|
+
|
|
12
|
+
## How this file is used
|
|
13
|
+
|
|
14
|
+
Same contract: `qualia-scope` grills the **Grill variables**, the **Definition of Done** is the per-increment bar, the **Road** is the default 0→100. Inherits `ai-agent` + `rules/constitution.md`. The new gate is **M-Voice**: real end-to-end calls with pass/fail, not transcript review.
|
|
15
|
+
|
|
16
|
+
## Grill variables (added on top of `ai-agent`)
|
|
17
|
+
|
|
18
|
+
- **Call direction** — inbound, outbound, or both? Volume/concurrency expected?
|
|
19
|
+
- **The one job** — appointment reminder, intake, qualification, support triage? (Start with one; a vague "assistant" fails.)
|
|
20
|
+
- **Call flow** — the happy path + the branches (no-answer, voicemail, wrong person, transfer-to-human).
|
|
21
|
+
- **Voice & persona** — language(s), accent, ElevenLabs voice, tone, named or anonymous.
|
|
22
|
+
- **Latency tolerance** — confirm the <800ms target fits the use case; identify the slowest dependency (LLM, tool call, DB).
|
|
23
|
+
- **Tools mid-call** — what must it look up or write *during* the call (calendar, CRM, order status)? Each is a latency risk.
|
|
24
|
+
- **Escalation** — when and how does it hand to a human? Warm transfer or callback?
|
|
25
|
+
- **Telephony** — Telnyx numbers, regions, caller-ID, recording laws per region.
|
|
26
|
+
- **Compliance** — recording-consent disclosure, PII handling, GDPR retention (EU). Regulated domain (health/finance)?
|
|
27
|
+
- **Success metric** — answered-rate, completion-rate, transfer-rate, CSAT? (Becomes the eval + call-test pass criteria.)
|
|
28
|
+
|
|
29
|
+
## Production Definition of Done (added on top of `ai-agent`)
|
|
30
|
+
|
|
31
|
+
**Latency** — **<800ms end-to-end** turn latency is the bar where callers stop noticing it's AI; >1.2s feels like legacy IVR. Measured on real calls, not assumed. Slowest dependency identified and budgeted.
|
|
32
|
+
|
|
33
|
+
**Call quality** — turn-taking / barge-in / interruption handled without breaking flow; no dead air on tool calls (filler/await behavior); graceful handling of no-answer, voicemail, silence, wrong person.
|
|
34
|
+
|
|
35
|
+
**End-to-end call testing (THE GATE)** — automated test calls through the full Retell + ElevenLabs + Telnyx stack with measurable pass/fail against the success metric. Transcript review is *not* sufficient — the audio path is part of the product.
|
|
36
|
+
|
|
37
|
+
**Escalation** — human handoff path tested (transfer or callback); failure modes (LLM/tool/telephony down) degrade safely, never trap the caller.
|
|
38
|
+
|
|
39
|
+
**Observability & compliance** — full transcript + recording logging; PII redaction; recording-consent disclosure at call start; GDPR retention policy; per-region recording-law compliance.
|
|
40
|
+
|
|
41
|
+
**Cost** — per-minute + per-call cost tracked (voice + LLM + telephony stack); daily ceiling.
|
|
42
|
+
|
|
43
|
+
## The Road (default 0→100)
|
|
44
|
+
|
|
45
|
+
Follows `ai-agent` M1–M3 (Foundation/Data → Core Loop → Evals & Guardrails), then inserts the voice gate before the app surface:
|
|
46
|
+
|
|
47
|
+
### M-Voice — Voice Path & Call Testing (inserted after ai-agent M3, before polish)
|
|
48
|
+
- Retell agent wired to ElevenLabs voice + Telnyx numbers; LLM via OpenRouter.
|
|
49
|
+
- Call flow built: happy path + branches (no-answer, voicemail, wrong person, transfer).
|
|
50
|
+
- Mid-call tools with no-dead-air behavior; barge-in/turn-taking verified.
|
|
51
|
+
- **Latency measured on real calls to the <800ms budget**; slowest dependency optimized.
|
|
52
|
+
- **End-to-end automated call tests** with pass/fail on the success metric.
|
|
53
|
+
- Transcript + recording logging; consent disclosure; PII redaction.
|
|
54
|
+
- **Exit:** real test calls pass the metric at target latency; every branch + escalation handled; compliance wired. *No ship before this closes.*
|
|
55
|
+
|
|
56
|
+
### Then — App Surface & Handoff
|
|
57
|
+
- `ai-agent` M4/M5: dashboard (call logs, transcripts, metrics), the non-AI-looking UI, security/GDPR review, prod deploy (Vercel + Railway envs), smoke including **real calls**, handoff or rolling-release.
|
|
58
|
+
|
|
59
|
+
## Why M-Voice exists
|
|
60
|
+
A voice agent that reads well in a transcript can still be unusable on a call — 1.5s pauses, talking over the caller, dead air during a lookup. Text evals (ai-agent M3) prove the *reasoning*; M-Voice proves the *experience*. Both gates, or it isn't done.
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
---
|
|
2
|
+
archetype: web-app
|
|
3
|
+
extends: website
|
|
4
|
+
stack: Next.js 16 (App Router) · Tailwind + shadcn/ui · Supabase (auth + Postgres + RLS) · Vercel
|
|
5
|
+
updated: 2026-05-29
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
# Archetype: `web-app`
|
|
9
|
+
|
|
10
|
+
> Authenticated products with user accounts, roles, and a dashboard on Vercel + Supabase. **Extends `website`** — every `website` Definition-of-Done line still applies (design, performance, SEO where relevant, a11y, observability, deploy, handoff). This file adds the auth, data, and app-quality bars. Used by `qualia-scope` when the operator picks `web-app`, or when a `website` grows gated content / accounts.
|
|
11
|
+
|
|
12
|
+
## How this file is used
|
|
13
|
+
|
|
14
|
+
Same contract as every archetype: `qualia-scope` grills the **Grill variables**, the **Definition of Done** is the per-increment bar, the **Road** is the default 0→100. Inherits `website` + `rules/constitution.md`; relaxes nothing.
|
|
15
|
+
|
|
16
|
+
## Grill variables (added on top of `website`)
|
|
17
|
+
|
|
18
|
+
- **Who are the users?** — roles (admin / staff / client / public) and what each can see and do. Drives the RLS model.
|
|
19
|
+
- **Auth model** — email/password, magic link, OAuth providers, SSO? Email verification required? Password reset flow?
|
|
20
|
+
- **Authorization source** — what claims gate access? (Must live in `app_metadata`, never `user_metadata` — constitution.)
|
|
21
|
+
- **Tenancy** — single-tenant, per-user, or multi-tenant/workspace? (Multi-tenant changes every RLS policy — surface it now.)
|
|
22
|
+
- **Core entities & relationships** — the domain model. Each entity → a CONTEXT.md glossary term.
|
|
23
|
+
- **Write surfaces** — what users create/edit/delete; which writes need confirmation, soft-delete, or audit.
|
|
24
|
+
- **Real-time / collaboration** — does anything need live updates (presence, notifications)?
|
|
25
|
+
- **Billing** — free, one-off, subscription? Provider? (If yes, billing is its own increment set.)
|
|
26
|
+
- **Notifications** — email (Resend), in-app, both? Triggered by what events?
|
|
27
|
+
|
|
28
|
+
## Production Definition of Done (added on top of `website`)
|
|
29
|
+
|
|
30
|
+
**Auth & access** — Supabase auth with the chosen model; email verification + password reset wired; **RLS enabled on every table** with policies derived from `app_metadata` claims; role-based routing enforced in middleware *and* at the data layer (never trust the client). Verified by logging in as each role and confirming isolation.
|
|
31
|
+
|
|
32
|
+
**RLS correctness (constitution)** — every UPDATE policy has a matching SELECT; views use `security_invoker = true`; storage upsert grants INSERT+SELECT+UPDATE; sessions revoked before user delete.
|
|
33
|
+
|
|
34
|
+
**Data** — domain schema in `supabase/migrations/`; FK relationships normalized; soft-delete + audit where the grill flagged it; no N+1 on list views.
|
|
35
|
+
|
|
36
|
+
**App quality** — every async surface has loading / empty / error states; forms validate client *and* server (Zod or equivalent); destructive actions confirm; optimistic UI where latency matters; rate limiting on mutating + public endpoints.
|
|
37
|
+
|
|
38
|
+
**Security** — `service_role` server-only; secrets in env; security headers (HSTS); MFA on Supabase/Vercel accounts; CSRF/permission checks on every mutation.
|
|
39
|
+
|
|
40
|
+
**Billing (if applicable)** — provider integrated; webhooks idempotent; plan/entitlement state authoritative server-side; failed-payment + cancellation flows handled.
|
|
41
|
+
|
|
42
|
+
## The Road (default 0→100)
|
|
43
|
+
|
|
44
|
+
### M1 — Foundation, Auth & Data
|
|
45
|
+
- Init stack + Vercel link + CI; Supabase project; **RLS on every table from the first migration** (not retrofitted).
|
|
46
|
+
- Auth model: signup, login, verification, reset; role claims in `app_metadata`; role-based middleware.
|
|
47
|
+
- Domain schema + relationships; seed data.
|
|
48
|
+
- **Exit:** each role logs in and sees only what it should — verified as two+ users; deploys to preview.
|
|
49
|
+
|
|
50
|
+
### M2 — Core Capabilities (one vertical slice per capability)
|
|
51
|
+
- Each capability cuts through UI + server action + RLS + validation + states + test. Independently shippable.
|
|
52
|
+
- **Exit:** the primary user job works end-to-end with all async states; writes validated both sides.
|
|
53
|
+
|
|
54
|
+
### M3 — App Hardening
|
|
55
|
+
- Rate limiting, audit/soft-delete, notifications, real-time (if scoped); billing increments (if scoped).
|
|
56
|
+
- Performance pass (list virtualization, query aggregation, no N+1); error/empty states audited.
|
|
57
|
+
- **Exit:** mutation paths secured + rate-limited; perf budget met; billing flows (if any) handle failure.
|
|
58
|
+
|
|
59
|
+
### M4 — Polish, SEO-where-relevant & Handoff
|
|
60
|
+
- Design pass to `website` anti-slop bar; a11y WCAG 2.2 AA; SEO on public routes only (`noindex` the app).
|
|
61
|
+
- Legal pages; analytics + Sentry; security pass (RLS, headers, env, MFA); custom domain; prod deploy + smoke.
|
|
62
|
+
- Credentials, walkthrough, archive, ERP report (or rolling-release for an internal product).
|
|
63
|
+
- **Exit:** all DoD lines covered or waived with reason.
|
|
64
|
+
|
|
65
|
+
## Notes
|
|
66
|
+
- Internal/living products (like the ERP) run as **rolling releases** — no terminal Handoff. Each shipped increment still clears this DoD. Handoff applies only to client-delivered web-apps.
|
|
67
|
+
- LLM features → escalate to `ai-agent` (adds OpenRouter routing, evals, cost guardrails on top of this).
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
---
|
|
2
|
+
archetype: website
|
|
3
|
+
stack: Next.js 16 (App Router) · Tailwind + shadcn/ui · Supabase · Vercel
|
|
4
|
+
updated: 2026-05-28
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Archetype: `website`
|
|
8
|
+
|
|
9
|
+
> Marketing / brochure / content sites on Vercel + Supabase. The roadmapper loads this file when the operator picks `website`. The grill (`qualia-scope`) reads the **Grill variables** below; the **Definition of Done** is the fixed coverage the roadmap must satisfy; the **Road** is the default 0→100 shape.
|
|
10
|
+
|
|
11
|
+
## How this file is used
|
|
12
|
+
|
|
13
|
+
1. `qualia-scope` grills the operator through the **Grill variables** — depth on each, recommended-answer-with-why, writing answers to the spec and terms to `.planning/CONTEXT.md`.
|
|
14
|
+
2. The grill is **DoD-aware**: it raises every Definition-of-Done area even if the operator never mentioned it (auth? legal pages? CMS or static?).
|
|
15
|
+
3. The roadmapper maps the filled spec onto the **Road**, dropping inapplicable DoD lines *with a logged reason* and expanding applicable ones into vertical slices.
|
|
16
|
+
4. `qualia-verify` at each milestone close checks DoD coverage, not just per-task acceptance.
|
|
17
|
+
|
|
18
|
+
## Grill variables (what `qualia-scope` must extract)
|
|
19
|
+
|
|
20
|
+
- **Purpose & primary CTA** — what one action defines success (book, buy, subscribe, contact)?
|
|
21
|
+
- **Page inventory** — exact routes. Static or dynamic?
|
|
22
|
+
- **Content source** — hardcoded, Markdown/MDX, or Supabase-backed CMS? Who edits it after handoff?
|
|
23
|
+
- **Brand direction** — reference sites, typography intent, color, tone. (Drives DESIGN.md — the anti-slop bar.)
|
|
24
|
+
- **Auth?** — most brochure sites: none. If gated content/portal → escalate to `web-app` archetype.
|
|
25
|
+
- **Forms & data capture** — contact, lead, newsletter. Where does the data go? Notifications?
|
|
26
|
+
- **Integrations** — analytics, CRM, payment, booking, email (Resend), maps.
|
|
27
|
+
- **Languages / i18n**, **legal jurisdiction** (drives which legal pages), **domain** status.
|
|
28
|
+
|
|
29
|
+
## Production Definition of Done
|
|
30
|
+
|
|
31
|
+
**Design (anti-slop)** — chosen typeface with personality (not default Inter); deliberate spacing/radius scale; passes `qualia-design/design-laws.md`; responsive across breakpoints; dark mode if brand calls for it; real content, no lorem.
|
|
32
|
+
|
|
33
|
+
**Performance** — LCP ≤2.5s · INP ≤200ms · CLS ≤0.1 at field-data p75; image optimization (next/image); JS/page-weight budget agreed up front; Lighthouse in CI.
|
|
34
|
+
|
|
35
|
+
**SEO** — Metadata API per route; `metadataBase` in root layout; JSON-LD; sitemap.xml; robots.txt; canonicals; OG images; `X-Robots-Tag: noindex` on preview hosts.
|
|
36
|
+
|
|
37
|
+
**Accessibility** — WCAG 2.2 AA (EU default).
|
|
38
|
+
|
|
39
|
+
**Data (only if forms/CMS)** — Supabase table(s); **RLS enabled** with insert-only public policy on form tables, read policy on published content; rate limit on public POST.
|
|
40
|
+
|
|
41
|
+
**Security** — SSL enforced; secrets in env (`vercel env pull`), never client; `service_role` server-only; security headers (HSTS); MFA on Vercel/Supabase accounts.
|
|
42
|
+
|
|
43
|
+
**Observability** — analytics + Sentry + structured logging from day one.
|
|
44
|
+
|
|
45
|
+
**Content & legal** — real copy; privacy, terms, cookie notice (GDPR).
|
|
46
|
+
|
|
47
|
+
**Deploy & handoff** — Vercel production; custom domain + DNS; post-deploy smoke (HTTP 200, console clean, API <500ms); credentials + walkthrough + archive + ERP report.
|
|
48
|
+
|
|
49
|
+
## The Road (default 0→100)
|
|
50
|
+
|
|
51
|
+
### M1 — Foundation & Design System
|
|
52
|
+
Vertical slices establishing the visual language before any page is "real".
|
|
53
|
+
- Init: Next.js 16 App Router + TS + Tailwind + shadcn; repo + CI; Vercel project linked; preview deploys on.
|
|
54
|
+
- DESIGN.md from brand grill: real typography, color scale, spacing/radius, motion rules, explicit anti-slop negative rules.
|
|
55
|
+
- Layout shell: nav, footer, responsive grid, dark mode, base components.
|
|
56
|
+
- **Exit:** design system renders on a preview URL; passes design-laws baseline; tokens documented.
|
|
57
|
+
|
|
58
|
+
### M2 — Pages & Content (one vertical slice per page-type)
|
|
59
|
+
- Each page-type as a slice: layout + real content + loading/empty/error states.
|
|
60
|
+
- CMS path (if chosen): Supabase schema + RLS read policies + editor wiring.
|
|
61
|
+
- Forms: UI + validation (client + server) → Supabase table (RLS insert + rate limit) → notification (Resend).
|
|
62
|
+
- **Exit:** every route has real content and states; forms persist and notify; no lorem anywhere.
|
|
63
|
+
|
|
64
|
+
### M3 — Performance, SEO & Accessibility
|
|
65
|
+
- Perf pass to budget (LCP/INP/CLS, image optimization, bundle); Lighthouse in CI.
|
|
66
|
+
- SEO: metadata per route, metadataBase, JSON-LD, sitemap, robots, OG images, canonicals.
|
|
67
|
+
- A11y: WCAG 2.2 AA audit + fixes; responsive QA across breakpoints.
|
|
68
|
+
- **Exit:** budgets met on field-like data; SEO + a11y checklists green.
|
|
69
|
+
|
|
70
|
+
### M4 — Handoff (always last)
|
|
71
|
+
- Legal pages (privacy/terms/cookie); analytics + Sentry live; security pass (RLS, headers, env, MFA).
|
|
72
|
+
- Custom domain + DNS; production deploy; post-deploy smoke.
|
|
73
|
+
- Credentials handover, client walkthrough, repo archive, `/qualia-report` to ERP.
|
|
74
|
+
- **Exit:** all DoD lines covered or explicitly waived with reason; client can operate it.
|
|
75
|
+
|
|
76
|
+
## Notes
|
|
77
|
+
- Gated content, user accounts, or a dashboard → this is no longer a `website`. Use `web-app` (adds auth + RLS-everywhere + app-quality DoD).
|
|
78
|
+
- The Road is a default, not a cage. The roadmapper may merge M2/M3 for a 3-page site or split M2 for a 30-page one — but every DoD line still needs an owner.
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: Qualia Constitution
|
|
3
|
+
scope: org-level — inherited by every Qualia project
|
|
4
|
+
updated: 2026-05-29
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Qualia Constitution
|
|
8
|
+
|
|
9
|
+
> The top of the standards hierarchy. **Every Qualia project inherits these standards**, and they are **enforced at every increment's verify step** (`qualia-verify`, milestone close). Archetype Definitions of Done in `references/archetypes/*.md` *extend* this file — they add archetype-specific bars, never relax these. A senior should be able to read this in two minutes.
|
|
10
|
+
|
|
11
|
+
## Supabase security (non-negotiable)
|
|
12
|
+
|
|
13
|
+
- [ ] **RLS on every table**, with explicit policies. Verify by querying the table as two different users — each must see only their own rows.
|
|
14
|
+
- [ ] **Authorize on `app_metadata`, never `user_metadata`.** `user_metadata` is user-editable and must never gate access.
|
|
15
|
+
- [ ] **`service_role` key is server-only.** Never prefixed `NEXT_PUBLIC_`, never imported into a client component.
|
|
16
|
+
- [ ] **Postgres views set `security_invoker = true`** — otherwise the view runs as its owner and bypasses the caller's RLS.
|
|
17
|
+
- [ ] **Every `UPDATE` policy has a matching `SELECT` policy.** Without it, updates fail silently.
|
|
18
|
+
- [ ] **Storage upsert needs `INSERT` + `SELECT` + `UPDATE`** policies on the bucket.
|
|
19
|
+
- [ ] **Revoke a user's sessions before deleting the user** — deletion alone leaves issued JWTs valid until expiry.
|
|
20
|
+
|
|
21
|
+
## Schema flow
|
|
22
|
+
|
|
23
|
+
- [ ] **Local container → staging branch → production.** No manual schema edits on remote DBs.
|
|
24
|
+
- [ ] **All schema changes are migrations** in `supabase/migrations/`, applied through CI — never hand-applied to a remote.
|
|
25
|
+
|
|
26
|
+
## Gates over prompts
|
|
27
|
+
|
|
28
|
+
Dangerous-command and architectural rules are enforced as **deterministic hooks**, not prose the model may forget. The framework already ships:
|
|
29
|
+
|
|
30
|
+
- [ ] **`migration-guard`** — blocks schema edits that bypass `supabase/migrations/`.
|
|
31
|
+
- [ ] **`supabase-destructive-guard`** — blocks destructive operations on remote DBs.
|
|
32
|
+
- [ ] **`branch-guard`** — enforces feature-branch-only; main stays deployable.
|
|
33
|
+
|
|
34
|
+
A rule worth enforcing is worth a hook. Add one rather than relying on instructions alone.
|
|
35
|
+
|
|
36
|
+
## Context grounding
|
|
37
|
+
|
|
38
|
+
- [ ] **Bundled, version-matched docs are the source of truth for stack APIs** — over model memory. When in doubt about a Supabase / Next.js / vendor API, read the pinned docs, don't recall from weights.
|
|
39
|
+
|
|
40
|
+
---
|
|
41
|
+
|
|
42
|
+
*This file contains only verified org standards. Archetype DoDs extend it; they do not override it.*
|
package/skills/qualia/SKILL.md
CHANGED
|
@@ -20,6 +20,8 @@ Read project state. Classify your situation. Tell you the exact next command.
|
|
|
20
20
|
node ${QUALIA_BIN}/state.js check 2>/dev/null
|
|
21
21
|
```
|
|
22
22
|
|
|
23
|
+
The JSON carries a `profile` field (`strict` or `standard`; env `$QUALIA_PROFILE` wins). `strict` = hard gates, no waivers; `standard` = gates advisory, a senior may waive with a reason logged to `.planning/decisions/`. Surface it when a gate is involved.
|
|
24
|
+
|
|
23
25
|
Also gather context:
|
|
24
26
|
```bash
|
|
25
27
|
test -f .continue-here.md && echo "HANDOFF_EXISTS" && head -20 .continue-here.md || echo "NO_HANDOFF"
|