qualia-framework 6.9.2 → 6.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/AGENTS.md +8 -5
  2. package/CHANGELOG.md +208 -0
  3. package/CLAUDE.md +3 -1
  4. package/agents/roadmapper.md +16 -14
  5. package/agents/verifier.md +1 -1
  6. package/bin/agent-status.js +264 -0
  7. package/bin/analyze-gate.js +318 -0
  8. package/bin/branch-hygiene.js +135 -0
  9. package/bin/command-surface.js +2 -0
  10. package/bin/compile-instructions.js +82 -0
  11. package/bin/eval-runner.js +218 -0
  12. package/bin/host-adapters.js +72 -12
  13. package/bin/install.js +27 -17
  14. package/bin/last-report.js +207 -0
  15. package/bin/project-sync.js +315 -0
  16. package/bin/report-payload.js +7 -0
  17. package/bin/runtime-manifest.js +8 -0
  18. package/bin/state.js +257 -12
  19. package/bin/verify-panel.js +294 -0
  20. package/bin/wave-plan.js +211 -0
  21. package/docs/EMPLOYEE-QUICKSTART.md +3 -3
  22. package/docs/erp-contract.md +168 -0
  23. package/docs/qualia-manual.html +5 -5
  24. package/hooks/branch-guard.js +133 -63
  25. package/hooks/pre-deploy-gate.js +38 -0
  26. package/hooks/task-write-guard.js +165 -0
  27. package/package.json +3 -2
  28. package/rules/codex-goal.md +28 -26
  29. package/rules/infrastructure.md +1 -1
  30. package/skills/qualia/SKILL.md +6 -0
  31. package/skills/qualia-build/SKILL.md +39 -7
  32. package/skills/qualia-eval/SKILL.md +83 -0
  33. package/skills/qualia-feature/SKILL.md +20 -4
  34. package/skills/qualia-fix/SKILL.md +13 -1
  35. package/skills/qualia-milestone/SKILL.md +12 -6
  36. package/skills/qualia-new/REFERENCE.md +6 -4
  37. package/skills/qualia-new/SKILL.md +27 -15
  38. package/skills/qualia-plan/SKILL.md +2 -2
  39. package/skills/qualia-report/SKILL.md +10 -0
  40. package/skills/qualia-scope/SKILL.md +3 -3
  41. package/skills/qualia-ship/SKILL.md +37 -4
  42. package/skills/qualia-update/SKILL.md +100 -0
  43. package/skills/qualia-verify/SKILL.md +51 -24
  44. package/templates/instructions.md +32 -0
  45. package/templates/journey.md +2 -2
  46. package/templates/project-discovery.md +30 -23
  47. package/templates/requirements.md +7 -7
  48. package/tests/agent-status.test.sh +153 -0
  49. package/tests/analyze-gate.test.sh +170 -0
  50. package/tests/bin.test.sh +5 -4
  51. package/tests/branch-hygiene.test.sh +93 -0
  52. package/tests/eval-runner.test.sh +147 -0
  53. package/tests/hooks.test.sh +218 -17
  54. package/tests/install-smoke.test.sh +4 -3
  55. package/tests/instructions.test.sh +109 -0
  56. package/tests/last-report.test.sh +156 -0
  57. package/tests/lib.test.sh +2 -2
  58. package/tests/project-sync.test.sh +175 -0
  59. package/tests/run-all.sh +9 -0
  60. package/tests/runner.js +3 -2
  61. package/tests/state.test.sh +187 -0
  62. package/tests/verify-panel.test.sh +162 -0
  63. package/tests/wave-plan.test.sh +153 -0
  64. package/skills/qualia-discuss/SKILL.md +0 -222
@@ -0,0 +1,165 @@
1
+ #!/usr/bin/env node
2
+ // ~/.claude/hooks/task-write-guard.js — runtime enforcement of the plan
3
+ // contract's declared file sets. PreToolUse hook on Edit/Write.
4
+ // Exits 2 to BLOCK. Exits 0 to allow. Cross-platform (Windows/macOS/Linux).
5
+ //
6
+ // WHY: plan-contract.js proves file-disjointness across parallel tasks at PLAN
7
+ // time, but nothing stops a builder writing outside its declared set at RUN
8
+ // time — the documented #1 cause of cross-wave merge conflicts and AI entropy
9
+ // (files nobody planned). This turns the static check into a deterministic
10
+ // guardrail ("a rule worth enforcing is worth a hook" — constitution).
11
+ //
12
+ // SCOPE & HONEST LIMITATION: Claude Code fires the same stateless hook for
13
+ // every subagent and gives it no task identity, so this hook cannot attribute a
14
+ // write to a *specific* task. What it CAN enforce — and does — is that, while a
15
+ // build is in flight, every Edit/Write targets a path DECLARED by SOME task in
16
+ // the active phase contract (files_modify ∪ files_create). Plan-time
17
+ // disjointness already guarantees no two tasks share a path, and the builder's
18
+ // <wave_context> prompt tells it which set is its own; so the residual gap
19
+ // ("T3 edits T4's declared file") is prompt-guarded while the high-frequency
20
+ // vector ("builder invents/edits a file nobody planned") is hard-blocked.
21
+ //
22
+ // The guard is SCOPED: it is a no-op unless a build is active (≥1 RUNNING entry
23
+ // in .agent-status/). Outside a build it never fires, so it can't interfere with
24
+ // the orchestrator, the verifier, or ordinary editing. Fails OPEN on any error.
25
+
26
+ const fs = require("fs");
27
+ const path = require("path");
28
+
29
+ const _traceStart = Date.now();
30
+
31
+ // ── stdin reader (same robust pattern as the other guards) ──────────────
32
+ function sleepSync(ms) {
33
+ try { Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, ms); } catch {}
34
+ }
35
+ function readInput() {
36
+ const deadline = Date.now() + 1000;
37
+ const buf = Buffer.alloc(65536);
38
+ let data = "";
39
+ try {
40
+ while (Date.now() < deadline) {
41
+ let n = 0;
42
+ try {
43
+ n = fs.readSync(0, buf, 0, buf.length, null);
44
+ } catch (e) {
45
+ if (e && (e.code === "EAGAIN" || e.code === "EWOULDBLOCK")) { sleepSync(1); continue; }
46
+ break;
47
+ }
48
+ if (n === 0) break;
49
+ data += buf.slice(0, n).toString("utf8");
50
+ }
51
+ if (!data) return {};
52
+ return JSON.parse(data);
53
+ } catch {
54
+ return {};
55
+ }
56
+ }
57
+
58
+ function _trace(result, extra) {
59
+ try {
60
+ const os = require("os");
61
+ const parent = path.basename(path.dirname(__dirname));
62
+ const qualiaHome = process.env.QUALIA_HOME ||
63
+ (parent === ".codex" || parent === ".claude" ? path.dirname(__dirname) : path.join(os.homedir(), ".claude"));
64
+ const traceDir = path.join(qualiaHome, ".qualia-traces");
65
+ if (!fs.existsSync(traceDir)) fs.mkdirSync(traceDir, { recursive: true });
66
+ const entry = { hook: "task-write-guard", result, timestamp: new Date().toISOString(), duration_ms: Date.now() - _traceStart, ...extra };
67
+ fs.appendFileSync(path.join(traceDir, `${new Date().toISOString().split("T")[0]}.jsonl`), JSON.stringify(entry) + "\n");
68
+ } catch {}
69
+ }
70
+
71
+ function allow(reason, extra) { _trace("allow", { reason, ...extra }); process.exit(0); }
72
+
73
+ // OWNER / debugging escape hatch, mirroring git-guardrails' QUALIA_ALLOW_*.
74
+ if (process.env.QUALIA_ALLOW_OUTSIDE_CONTRACT === "1") allow("escape-hatch");
75
+
76
+ const input = readInput();
77
+ const ti = input.tool_input || {};
78
+ const rawPath = String(ti.file_path || "");
79
+ if (!rawPath) allow("no file_path");
80
+
81
+ const root = process.cwd();
82
+
83
+ // Reuse the status + contract libraries that ship alongside this hook (bin/ is a
84
+ // sibling of hooks/ in both the repo and the installed layout). If they're not
85
+ // resolvable (older/partial install), fail open.
86
+ let agentStatus, planContract;
87
+ try {
88
+ agentStatus = require(path.join(__dirname, "..", "bin", "agent-status.js"));
89
+ planContract = require(path.join(__dirname, "..", "bin", "plan-contract.js"));
90
+ } catch {
91
+ allow("libs unavailable");
92
+ }
93
+
94
+ // SCOPE: only enforce during an active build (≥1 RUNNING agent-status entry).
95
+ let running;
96
+ try {
97
+ running = agentStatus.listStatuses(root).filter((s) => s.status === "RUNNING");
98
+ } catch {
99
+ allow("status unreadable");
100
+ }
101
+ if (!running || running.length === 0) allow("no active build");
102
+
103
+ // Locate the active phase contract. Prefer the phase declared by a RUNNING
104
+ // builder; fall back to the sole phase-*-contract.json if unambiguous.
105
+ function findContractPath() {
106
+ const phases = [...new Set(running.map((s) => s.phase).filter((p) => p != null))];
107
+ for (const p of phases) {
108
+ const cp = path.join(root, ".planning", `phase-${p}-contract.json`);
109
+ if (fs.existsSync(cp)) return cp;
110
+ }
111
+ try {
112
+ const dir = path.join(root, ".planning");
113
+ const matches = fs.readdirSync(dir).filter((f) => /^phase-\d+-contract\.json$/.test(f));
114
+ if (matches.length === 1) return path.join(dir, matches[0]);
115
+ } catch {}
116
+ return null;
117
+ }
118
+
119
+ const contractPath = findContractPath();
120
+ if (!contractPath) allow("no active contract");
121
+
122
+ let contract;
123
+ try {
124
+ const loaded = planContract.readContractFile(contractPath);
125
+ if (!loaded.ok) allow("contract unreadable");
126
+ contract = loaded.contract;
127
+ } catch {
128
+ allow("contract parse error");
129
+ }
130
+
131
+ // Build the union of writable declared paths across all tasks.
132
+ // Edit/Write create or modify; deletes are out of band for this tool family.
133
+ function norm(p) {
134
+ return String(p).replace(/\\/g, "/").replace(/^\.\//, "");
135
+ }
136
+ const declared = new Set();
137
+ for (const t of contract.tasks || []) {
138
+ for (const f of t.files_modify || []) declared.add(norm(f));
139
+ for (const f of t.files_create || []) declared.add(norm(f));
140
+ }
141
+
142
+ // Resolve the target to a path relative to the project root.
143
+ const abs = path.isAbsolute(rawPath) ? rawPath : path.resolve(root, rawPath);
144
+ const rel = norm(path.relative(root, abs));
145
+
146
+ // Out of project root → not this guard's concern (other guards handle secrets).
147
+ if (rel.startsWith("../") || rel === "" || path.isAbsolute(rel)) allow("outside project root", { rel });
148
+
149
+ // Framework scratch / planning artifacts are always writable during a build:
150
+ // the status protocol, evidence, deviations, plan and contract files.
151
+ if (rel.startsWith(".agent-status/") || rel.startsWith(".planning/")) allow("framework path", { rel });
152
+
153
+ if (declared.has(rel)) allow("declared", { rel });
154
+
155
+ // Not declared by any task → block.
156
+ console.error("⬢ task-write-guard — write outside the plan contract:");
157
+ console.error(` ✗ ${rel}`);
158
+ console.error("");
159
+ console.error(` No task in ${path.relative(root, contractPath)} declares this file`);
160
+ console.error(" (files_modify / files_create). Builders may only write files");
161
+ console.error(" their task planned. If this file is genuinely needed, add it to");
162
+ console.error(" the contract via the locked-decision channel, or re-plan the phase.");
163
+ console.error(" OWNER override: QUALIA_ALLOW_OUTSIDE_CONTRACT=1");
164
+ _trace("block", { rel, contract: path.relative(root, contractPath) });
165
+ process.exit(2);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "qualia-framework",
3
- "version": "6.9.2",
3
+ "version": "6.22.0",
4
4
  "description": "Claude Code and Codex workflow framework by Qualia Solutions. Plan, build, verify, ship.",
5
5
  "bin": {
6
6
  "qualia-framework": "./bin/cli.js"
@@ -33,7 +33,8 @@
33
33
  "test:statusline": "bash tests/statusline.test.sh",
34
34
  "test:refs": "bash tests/refs.test.sh",
35
35
  "test:published-install": "bash tests/published-install-smoke.test.sh",
36
- "test:shell": "bash tests/run-all.sh"
36
+ "test:shell": "bash tests/run-all.sh",
37
+ "compile:instructions": "node bin/compile-instructions.js"
37
38
  },
38
39
  "files": [
39
40
  "bin/",
@@ -1,46 +1,48 @@
1
- # Codex /goal integration
1
+ # Work-unit goal (both runtimes)
2
2
 
3
- When this skill spawns a unit of work on **Codex** (not Claude Code), set the thread goal at the start so Codex's native token-budget + status tracking takes over.
3
+ When a skill begins a defined **unit of work** (a phase build, a feature, a milestone, a fix), set an explicit goal an objective + a token budget — so the session tracks burn-vs-budget and stays anchored to one outcome. Both runtimes get this; the *surface* differs.
4
4
 
5
- ## Runtime detection
6
-
7
- You are on Codex when `~/.codex/` exists and `~/.claude/` is absent or stale. The simplest probe:
5
+ The objective + budget come from one shared helper, regardless of runtime:
8
6
 
9
7
  ```bash
10
- test -f ~/.codex/AGENTS.md && echo codex || echo claude
8
+ node ${QUALIA_BIN}/codex-goal.js {scope} # scope phase · task · feature · quick
11
9
  ```
12
10
 
13
- If the answer is `claude`, **skip this entire rule** — Claude Code has no equivalent surface and emitting `/goal` text would just be noise.
11
+ It prints two lines from `.planning/STATE.md` + `ROADMAP.md`:
12
+
13
+ ```
14
+ /goal {objective text}
15
+ # token_budget suggestion: {N}
16
+ ```
14
17
 
15
- ## How to set the goal
18
+ ## Runtime detection
19
+
20
+ ```bash
21
+ test -f ~/.codex/AGENTS.md && [ ! -d ~/.claude ] && echo codex || echo claude
22
+ ```
16
23
 
17
- 1. Run the helper to produce the objective string + suggested token budget:
24
+ ## Codex native `/goal`
18
25
 
19
- ```bash
20
- node ~/.codex/bin/codex-goal.js {scope}
21
- ```
26
+ Codex has a first-class goal surface (`thread_goals`: objective, token_budget, tokens_used, status).
22
27
 
23
- `{scope}` is one of: `phase` · `task` · `feature` · `quick`. Use the scope of the current skill.
28
+ 1. **If the `update_goal` tool is available** (Codex exposes it as a model-callable tool), call it with `objective` = the text after `/goal ` and `token_budget` = the integer suggestion.
29
+ 2. **Otherwise** surface the `/goal` line for the user to paste. Don't silently skip — it's a one-second set and the only way Codex's budget telemetry knows what to track.
24
30
 
25
- 2. The output is two lines:
31
+ ## Claude Code equivalent via the harness work-list + budget
26
32
 
27
- ```
28
- /goal {objective text from STATE.md + ROADMAP.md}
29
- # token_budget suggestion: {N}
30
- ```
33
+ Claude Code has no `/goal` table, but it has a native equivalent: the **session task-list** (the model's todo/task tool) and the turn **token budget**. Use them so the work unit is just as anchored and visible:
31
34
 
32
- 3. **If the `update_goal` tool is available** to you (Codex exposes it as a model-callable tool), call it directly with:
33
- - `objective` = the text after `/goal ` on line 1
34
- - `token_budget` = the integer suggestion on line 2
35
+ 1. **Create a tracked task** for the unit with the objective as its title (e.g. *"Phase 3 checkout + Stripe webhook"*). Mark it `in_progress` at start, `completed` at end. This is the Claude-side "active goal" — it shows in the UI and survives compaction.
36
+ 2. **Treat `token_budget` as the unit's context budget.** State it in the opening line (banner) — *"Goal: {objective} · budget ~{N} tok"* — so the operator and the model both see how much room the unit has. If a `+Nk` turn directive is set, prefer that.
37
+ 3. For a multi-wave phase, the per-task `.agent-status/` entries (see `/qualia-build`) are the sub-goals under this one.
35
38
 
36
- 4. **If `update_goal` is not available**, surface the `/goal` line to the user in your next message and let them paste it. Do not silently skip — the goal-set takes 1 second and is the only way Codex's budget telemetry knows what to track.
39
+ Either way the rule is the same: **one named objective + one budget per work unit, surfaced, not silent.**
37
40
 
38
41
  ## When NOT to set a goal
39
42
 
40
- - The user is on Claude Code (no `/goal` surface).
41
- - A goal is already active for this thread (Codex rejects `update_goal` when one exists call `thread/goal/get` first if you're using the tool API directly).
42
- - The work is open-ended exploration with no clear objective (e.g. `/qualia`, `/qualia-scope`). Goals are for executing a defined scope.
43
+ - A goal/task is already active for this unit (don't double-set; Codex rejects `update_goal` when one exists — check first).
44
+ - Open-ended exploration with no defined scope (`/qualia`, `/qualia-scope` PROJECT MODE, `/qualia-idk`). Goals are for *executing* a defined scope, not discovering one.
43
45
 
44
46
  ## Why
45
47
 
46
- Codex's `thread_goals` table tracks `objective`, `token_budget`, `tokens_used`, and a `status` enum (`active | paused | blocked | usage_limited | budget_limited | complete`). Setting the goal lets the user see burn-vs-budget in the TUI without the framework reinventing it. The token-budget number also makes the model self-aware of how much context it has left for the current unit of work.
48
+ A named objective + budget keeps a unit of work from sprawling: the model stays self-aware of how much context remains, the operator sees burn-vs-budget, and the unit has a single definition of done. On Codex this rides `thread_goals`; on Claude Code it rides the task-list + turn budget. Same discipline, native surface on each.
@@ -49,7 +49,7 @@ Standard services across all Qualia projects. Use these unless the project expli
49
49
  - **QualiasolutionsCY** — primary org for all Qualia Solutions projects
50
50
  - **SakaniQualia** — org for Sakani-related projects (real estate platform)
51
51
  - All repos are private by default
52
- - Branch protection: main/master require PR reviews (enforced by framework guards)
52
+ - Main integration: feature branches integrate to `main` at **`/qualia-ship`** (ship is the single merge point — it fast-forwards the branch into `main`, deploys from `main`, and deletes the branch). Pushes to `main` are **allowed and recorded** by `branch-guard` (per-employee tally → ERP) — accountability, not a hard block. `/qualia-report` sweeps for branches with unshipped commits + stale PRs at clock-out so nothing lingers. Keep GitHub branch protection on `main` OFF (or with the team allowed to push) for this model; if you re-enable required reviews, switch ship to an auto-merged PR instead.
53
53
 
54
54
  ## Vercel Teams (admin knowledge)
55
55
  - Qualia operates across **3 Vercel teams** — projects are distributed across them
@@ -33,6 +33,12 @@ ls .planning/phase-*-plan.md 2>/dev/null || echo "NO_PLANS"
33
33
  ls .planning/phase-*-verification.md 2>/dev/null || echo "NO_VERIFICATIONS"
34
34
  ```
35
35
 
36
+ And surface where work was left off last time — the richest "where we left off" signal lives in `.planning/reports/`:
37
+ ```bash
38
+ node ${QUALIA_BIN}/last-report.js 2>/dev/null
39
+ ```
40
+ Exit 0 → it prints a one-line digest of the newest session report (`Last session ({date}, {age}d ago): {summary} → next: {next}`). Exit 1 → no reports yet (nothing to surface). When a project is loaded and a digest exists, print that line **at the very TOP of your output**, before the banner — so the first thing the operator (or a teammate picking the project up) sees is exactly where the last session ended.
41
+
36
42
  Read conversation context — what has the user been doing, what errors occurred.
37
43
 
38
44
  ### 2. Classify and Route
@@ -21,12 +21,13 @@ Execute phase plan. Each task = fresh subagent. Independent tasks run parallel.
21
21
  `/qualia-build` — build current planned phase
22
22
  `/qualia-build {N}` — build specific phase
23
23
  `/qualia-build {N} --auto` — build + chain into `/qualia-verify {N} --auto` (no human gate)
24
+ `/qualia-build {N} --parallel K` — cap concurrent builders at K (default auto: sequential under 3 tasks, else up to 5)
24
25
 
25
26
  ## Process
26
27
 
27
- ### 0. Codex goal (Codex runtime only)
28
+ ### 0. Set the work-unit goal
28
29
 
29
- Per `rules/codex-goal.md` — set the thread goal at phase start with scope `phase`.
30
+ Per `rules/codex-goal.md` — set the work-unit goal at phase start with scope `phase` (Codex `/goal`; on Claude Code, a tracked task + budget in the banner). One named objective + budget for the whole build.
30
31
 
31
32
  ### 1. Load Plan
32
33
 
@@ -38,6 +39,20 @@ node ${QUALIA_BIN}/plan-contract.js validate .planning/phase-{N}-contract.json
38
39
 
39
40
  Parse tasks, waves, file refs. Prefer the JSON contract for task ids, dependencies, file lists, and verification checks; use the Markdown plan as the human-readable context.
40
41
 
42
+ ### 1a. Analyze Gate (scope ↔ plan, before any build)
43
+
44
+ `plan-contract.js` proves the contract is internally well-formed; this gate diffs it **against intent** — scope acceptance criteria (`phase-{N}-context.md`) + the CONTEXT.md glossary — to catch requirements the plan silently dropped or contradicted. This is the plan→build seam Spec-Kit calls `/analyze`.
45
+
46
+ ```bash
47
+ node ${QUALIA_BIN}/analyze-gate.js {N}
48
+ ```
49
+
50
+ Exit 0 → consistent, proceed. Non-zero → it lists under-covered scope criteria, orphan success criteria, glossary violations, and scope-reduction language. **Profile-aware** (the `profile` field from `state.js check`):
51
+ - **strict** → a HIGH finding is a stop. Route to `/qualia-plan {N} --gaps` (plan dropped a requirement) or `/qualia-scope {N}` (scope itself is wrong). Do not build.
52
+ - **standard** → surface findings to the operator and proceed only with an explicit ack; log the waiver reason to `.planning/decisions/` if you proceed past a HIGH.
53
+
54
+ (No scope file = scope-coverage check is skipped, not a failure — `/qualia-feature` trivia and scope-less phases still build.)
55
+
41
56
  ### 1b. Recovery Reference
42
57
 
43
58
  Tag HEAD before executing. Reference only, no auto-rollback.
@@ -62,13 +77,15 @@ git diff --stat
62
77
  node ${QUALIA_BIN}/qualia-ui.js banner build {N} "{phase name}"
63
78
  ```
64
79
 
65
- **For each wave (sequential):**
80
+ **Derive the build schedule from the dependency graph (don't trust hand-numbered waves, don't over-spawn):**
66
81
 
67
82
  ```bash
68
- node ${QUALIA_BIN}/qualia-ui.js wave {W} {total_waves} {tasks_in_wave}
83
+ node ${QUALIA_BIN}/wave-plan.js .planning/phase-{N}-contract.json {--parallel K if set} --json
69
84
  ```
70
85
 
71
- **Per task in wave: spawn ALL as separate `Agent()` calls in SAME turn (concurrent). Do NOT await one before spawning next.**
86
+ `wave-plan.js` recomputes minimal-depth waves from `depends_on` (maximal safe parallelism) and splits each into **batches capped at `max_concurrency`** (auto: 1 if <3 tasks, else 5; `--parallel K` overrides). Spawn **one batch at a time, in order** every task in a batch is dependency-free of its batch-mates, so they run concurrently; the next batch waits for the fan-in barrier (§ after each wave). Follow the emitted `batches[]`, not the raw contract `wave` numbers.
87
+
88
+ **Per batch: spawn ALL its tasks as separate `Agent()` calls in the SAME turn (concurrent). Do NOT await one before spawning the next.**
72
89
 
73
90
  ```bash
74
91
  node ${QUALIA_BIN}/qualia-ui.js task {task_num} "{task title}"
@@ -117,7 +134,13 @@ Parallel tasks Wave {W} (do NOT touch their files):
117
134
  </task_contract>
118
135
 
119
136
  Context tags already loaded. Only Read project code you modify.
120
- Execute. Commit. Return DONE/BLOCKED/PARTIAL.
137
+
138
+ Status protocol (machine-readable fan-in — do this, do not skip):
139
+ - First action: `node ${QUALIA_BIN}/agent-status.js write {task_id} RUNNING --phase {N} --wave {W}`
140
+ - Last action, after committing: `node ${QUALIA_BIN}/agent-status.js write {task_id} DONE --commit $(git rev-parse --short HEAD)`
141
+ (use BLOCKED or PARTIAL with `--note \"why\"` instead of DONE if you could not finish)
142
+
143
+ Execute. Commit. Write your DONE/BLOCKED/PARTIAL status. Return DONE/BLOCKED/PARTIAL.
121
144
  ", subagent_type="qualia-builder", description="Task {N}: {title}")
122
145
  ```
123
146
 
@@ -130,7 +153,15 @@ Execute. Commit. Return DONE/BLOCKED/PARTIAL.
130
153
  node ${QUALIA_BIN}/qualia-ui.js done {task_num} "{title}" {commit_hash}
131
154
  ```
132
155
 
133
- **After each wave:** move to next, show summary.
156
+ **After each batch fan-in barrier (deterministic, not "did the model notice"):**
157
+
158
+ ```bash
159
+ node ${QUALIA_BIN}/agent-status.js barrier --tasks {comma-separated task ids in this batch}
160
+ ```
161
+
162
+ Exit 0 ⇔ every task in the batch wrote `DONE`. Non-zero → the barrier lists which tasks are RUNNING/BLOCKED/PARTIAL/MISSING. Do NOT spawn the next batch until the barrier passes; a BLOCKED/PARTIAL task is a wave failure (§4). `agent-status.js list` shows the live view. (Gating per batch — not per contract wave — keeps the barrier aligned with the `wave-plan.js` schedule, whose derived waves needn't match the contract's declared wave numbers.)
163
+
164
+ **After each batch:** move to the next batch in the schedule, show summary.
134
165
 
135
166
  ### 3. Wave Completion
136
167
 
@@ -141,6 +172,7 @@ node ${QUALIA_BIN}/qualia-ui.js divider
141
172
  node ${QUALIA_BIN}/qualia-ui.js ok "Tasks: {done}/{total}"
142
173
  node ${QUALIA_BIN}/qualia-ui.js ok "Commits: {count}"
143
174
  node ${QUALIA_BIN}/qualia-ui.js ok "Waves: {count}"
175
+ node ${QUALIA_BIN}/agent-status.js clear # drop ephemeral .agent-status/ scratch
144
176
  ```
145
177
 
146
178
  ### 4. Handle Failures
@@ -0,0 +1,83 @@
1
+ ---
2
+ name: qualia-eval
3
+ description: "Evaluate an AI feature (chat / RAG / voice / agent) against a layered eval suite — deterministic assertions first, then llm-rubric judges — and gate on the result. Qualia gates UI and code; this is the equivalent gate for the AI artifacts a project builds. Triggers: 'eval this agent', 'test the chatbot', 'evaluate the AI feature', 'rag eval', 'does the assistant answer correctly', 'judge the model output', 'qualia-eval'."
4
+ allowed-tools:
5
+ - Bash
6
+ - Read
7
+ - Write
8
+ - Edit
9
+ - Grep
10
+ - Glob
11
+ - Agent
12
+ ---
13
+
14
+ # /qualia-eval — Evaluate an AI Feature
15
+
16
+ `contract-runner` proves the code exists; `verify-panel` proves the code is correct. Neither can tell you whether the **chatbot actually answers the refund question**. This lane closes that gap with a layered eval suite — cheap deterministic checks first, model judgment only where a model is required — mirroring the contract-runner evidence model.
17
+
18
+ ## Usage
19
+ `/qualia-eval {suite.json}` — run an eval suite for one AI feature
20
+ `/qualia-eval {N}` — run every `.planning/evals/*-suite.json` for phase N (verify-step gate)
21
+
22
+ ## The suite (JSON)
23
+
24
+ One suite per AI feature. Each case carries a captured `output` (or `output_file`) plus optional `latency_ms` / `cost_usd`, and a list of assertions:
25
+
26
+ ```json
27
+ {
28
+ "feature": "support-chat",
29
+ "cases": [
30
+ { "name": "refund window", "input": "what's your refund policy?",
31
+ "output": "We refund within 30 days of purchase.",
32
+ "latency_ms": 1200, "cost_usd": 0.008,
33
+ "assert": [
34
+ { "type": "contains", "value": "30 days" },
35
+ { "type": "not_contains", "value": "I cannot help" },
36
+ { "type": "max_latency_ms", "value": 2000 },
37
+ { "type": "llm_rubric", "rubric": "answer is grounded in the policy, no hallucinated terms" }
38
+ ] } ]
39
+ }
40
+ ```
41
+
42
+ Deterministic assertion types (settled with no model): `contains`, `not_contains`, `equals`, `regex`, `not_regex`, `min_length`, `max_length`, `json_valid`, `json_path` (`equals`/`contains`), `max_latency_ms`, `max_cost_usd`. The model-only type is `llm_rubric`.
43
+
44
+ ## Process
45
+
46
+ ### 1. Capture outputs
47
+
48
+ For each case, run the AI feature on `input` and record the real `output` (+ `latency_ms`/`cost_usd` if measurable) back into the suite. Use the project's own entrypoint — an API route, a script, or the agent SDK. If outputs are already captured (replay fixtures), skip to step 2.
49
+
50
+ ### 2. Judge the rubrics (one judge per llm_rubric, fresh context)
51
+
52
+ Deterministic assertions need no model — `eval-runner.js` settles them. For each `llm_rubric` assertion, spawn a judge to return a verdict, then write `"verdict": "pass"|"fail"` onto that assertion in the suite. This mirrors how `verify-panel` consumes skeptic votes: the model judges, the runner aggregates.
53
+
54
+ ```
55
+ Agent(prompt="
56
+ Role: @${QUALIA_AGENTS}/verifier.md
57
+
58
+ JUDGE one rubric against one output. No code to grep — judge the text only.
59
+ Rubric: {rubric}
60
+ Input: {input}
61
+ Output to judge: {output}
62
+
63
+ Return exactly one line: PASS — {reason} OR FAIL — {reason}. Default FAIL if the output does not clearly satisfy the rubric.
64
+ ", subagent_type="qualia-verifier", description="Judge rubric — {case name}")
65
+ ```
66
+
67
+ An `llm_rubric` with no verdict is PENDING and **fails** the suite — never silently pass an unjudged rubric.
68
+
69
+ ### 3. Run the deterministic verdict
70
+
71
+ ```bash
72
+ node ${QUALIA_BIN}/eval-runner.js {suite.json} --write
73
+ ```
74
+
75
+ `eval-runner.js` runs every deterministic assertion itself, folds in the rubric verdicts, and exits **0 = all cases pass / 1 = any failure or unjudged rubric**. Artifact: `.planning/evals/eval-{feature}.json`.
76
+
77
+ ### 4. Gate
78
+
79
+ Exit 0 → the AI feature meets its bar; report PASS with the per-case summary. Exit 1 → list the failing cases + assertions and route to `/qualia-fix` (behavior wrong) or back to the prompt/RAG config. When run as a phase verify-step gate (`/qualia-eval {N}`), a FAIL is a phase FAIL — same standing as a failing contract.
80
+
81
+ ```bash
82
+ node ${QUALIA_BIN}/qualia-ui.js end "EVAL COMPLETE" "/qualia-verify {N}"
83
+ ```
@@ -40,9 +40,9 @@ One command for adding a small new capability outside the planned Road. Auto-det
40
40
 
41
41
  ## Process
42
42
 
43
- ### 0. Codex goal (Codex runtime only)
43
+ ### 0. Set the work-unit goal
44
44
 
45
- Per `rules/codex-goal.md` — set the thread goal with scope matching the auto-detected bucket (`quick` for inline, `feature` for spawn). Do this AFTER Step 2 (auto-detect scope) so the budget matches the actual work shape.
45
+ Per `rules/codex-goal.md` — set the work-unit goal (Codex `/goal`; on Claude Code, a tracked task + budget) with scope matching the auto-detected bucket (`quick` for inline, `feature` for spawn). Do this AFTER Step 2 (auto-detect scope) so the budget matches the actual work shape.
46
46
 
47
47
  ### 1. Capture description
48
48
 
@@ -50,6 +50,22 @@ If invoked without args, ask: **"What do you want to build?"**
50
50
 
51
51
  Wait for free-text answer. Don't paraphrase back. Capture the user's exact phrasing — it feeds both the auto-scope classifier and the eventual commit message.
52
52
 
53
+ ### 1b. Scope gate (anti-drift — keep work on the milestone arc)
54
+
55
+ Before building, check whether this work belongs to the active milestone. This is what stops feature/fix from drifting off-plan.
56
+
57
+ ```bash
58
+ node ${QUALIA_BIN}/state.js check 2>/dev/null # → milestone, profile; JOURNEY.md = the arc
59
+ node ${QUALIA_BIN}/state.js reqs-check 2>/dev/null # current milestone's open REQ-IDs
60
+ ```
61
+
62
+ - **No active project / no milestone** (`.planning/` absent) → not governed; proceed normally (skip to Step 2).
63
+ - **Active milestone**: decide if this work serves it.
64
+ - **In-scope** (it advances the current milestone's goal or an open REQ-ID) → proceed. Record it tagged to scope in Steps 4/5: add `--scope in --ref {REQ-ID or phase}` to the `state.js transition --to note` call.
65
+ - **Off-road** (a new capability/feature that isn't in the current milestone): this is exactly the drift the framework guards against. Resolve by profile (`state.js check` → `profile`):
66
+ - **strict** → STOP. Do not build off-road. Route to `/qualia-scope` to fold it into the arc (a phase/REQ in the current or a future milestone) or `/qualia-milestone` if it's a new milestone. Off-road building is blocked.
67
+ - **standard** → allowed, but **recorded**: build it, then record with `--scope off --ref "{what + why off-road}"` so the OWNER + ERP see the off-road tally (it is never silent).
68
+
53
69
  ### 2. Auto-detect scope
54
70
 
55
71
  Classify the description into one of three buckets:
@@ -116,7 +132,7 @@ git commit -m "fix: {description}"
116
132
  5. Record in state:
117
133
 
118
134
  ```bash
119
- node ${QUALIA_BIN}/state.js transition --to note --notes "{brief description}" --tasks-done 1
135
+ node ${QUALIA_BIN}/state.js transition --to note --notes "{brief description}" --tasks-done 1 {--scope in --ref {REQ/phase} | --scope off --ref "{why off-road}" — from the §1b scope gate}
120
136
  ```
121
137
 
122
138
  6. End with:
@@ -184,7 +200,7 @@ node ${QUALIA_BIN}/qualia-ui.js end "FEATURE SHIPPED (spawn)"
184
200
  5. Record in state:
185
201
 
186
202
  ```bash
187
- node ${QUALIA_BIN}/state.js transition --to note --notes "{description}" --tasks-done 1
203
+ node ${QUALIA_BIN}/state.js transition --to note --notes "{description}" --tasks-done 1 {--scope in --ref {REQ/phase} | --scope off --ref "{why off-road}" — from the §1b scope gate}
188
204
  ```
189
205
 
190
206
  ### 6. Execute the refuse path
@@ -48,6 +48,10 @@ Fix is the practical lane for "this used to work, or should work, and now it doe
48
48
  node ${QUALIA_BIN}/qualia-ui.js banner fix
49
49
  ```
50
50
 
51
+ ### 0. Set the work-unit goal
52
+
53
+ Per `rules/codex-goal.md` — set the work-unit goal (Codex `/goal`; on Claude Code, a tracked task + budget) with scope `quick` for `--quick`, else `feature`. Anchors the fix to one objective + budget so root-cause work doesn't sprawl.
54
+
51
55
  ### 1. Classify The Request
52
56
 
53
57
  Parse `$ARGUMENTS` into:
@@ -70,6 +74,14 @@ If the request is phase-sized, stop and route:
70
74
  node ${QUALIA_BIN}/qualia-ui.js end "ROUTED" "/qualia-plan"
71
75
  ```
72
76
 
77
+ ### 1b. Scope tag (anti-drift)
78
+
79
+ ```bash
80
+ node ${QUALIA_BIN}/state.js check 2>/dev/null # milestone + profile
81
+ ```
82
+
83
+ Repairing broken behavior in what the current milestone already built is **in-scope** — proceed, and tag the record `--scope in --ref {REQ/phase}` in Step 7. But a "fix" that is really **new off-road behavior** (a capability the milestone never included, dressed as a bug) is drift: in **strict** profile, STOP and route to `/qualia-scope` to fold it into the arc; in **standard**, proceed but record `--scope off --ref "{why off-road}"` so it's counted, never silent. No active milestone → not governed, proceed.
84
+
73
85
  ### 2. Build The Feedback Loop
74
86
 
75
87
  Use the cheapest check that can prove the bug is real and later prove it is fixed.
@@ -175,7 +187,7 @@ git commit -m "fix: {short symptom/root-cause summary}"
175
187
  Record state:
176
188
 
177
189
  ```bash
178
- node ${QUALIA_BIN}/state.js transition --to note --notes "{short fix summary}" --tasks-done 1
190
+ node ${QUALIA_BIN}/state.js transition --to note --notes "{short fix summary}" --tasks-done 1 {--scope in --ref {REQ/phase} | --scope off --ref "{why off-road}" — from the §1b scope tag}
179
191
  ```
180
192
 
181
193
  ### 8. Output
@@ -30,13 +30,17 @@ Triggered after `/qualia-verify` passes on the LAST phase of the current milesto
30
30
 
31
31
  ```bash
32
32
  node ${QUALIA_BIN}/state.js check
33
+ node ${QUALIA_BIN}/state.js reqs-check # this milestone's REQ-ID completion
33
34
  ```
34
35
 
35
- `state.js close-milestone` enforces two guards:
36
+ `state.js close-milestone` enforces three guards:
36
37
  - `MILESTONE_NOT_READY` — any phase not verified
37
38
  - `MILESTONE_TOO_SMALL` — milestone has < 2 phases
39
+ - `MILESTONE_REQS_INCOMPLETE` — a REQ-ID mapped to this milestone in REQUIREMENTS.md is not yet `Complete` (strict profile blocks; standard profile proceeds but the unfinished REQs are surfaced as `warnings` to log). This is what stops "finishing a milestone with scope still open."
38
40
 
39
- If either fires (without `--force`), stop and show the error. The user must verify remaining phases first (or add `--force` for explicit bypass on a preview/demo milestone).
41
+ If any fires (without `--force`), stop and show the error. Resolve before closing: verify remaining phases, finish the open requirements, or **explicitly defer** a requirement by moving it to `Out of Scope` in REQUIREMENTS.md (a conscious deferral, not silent). `--force` bypasses all three for retroactive bookkeeping only.
42
+
43
+ Run `reqs-check` first so the user sees exactly which requirements are still open before the close attempt — Step 4 (mark Complete) should already have flipped the finished ones.
40
44
 
41
45
  ### 1b. Demo-Extension Branch
42
46
 
@@ -59,7 +63,7 @@ If `PROJECT_TYPE=demo` AND `MILESTONE_COUNT=1`, the demo's one milestone is clos
59
63
  **If "Client signed — extend to full project":**
60
64
 
61
65
  1. Update `.planning/PROJECT.md` frontmatter: `project_type: full`.
62
- 2. Run a brief discovery top-up — invoke `/qualia-scope` in PROJECT MODE, but only ask §9-§14 (the full-project-only questions). This adds the milestone arc, compliance, integrations, content ownership, handoff team, and budget shape.
66
+ 2. Run a brief discovery top-up — invoke `/qualia-scope` in PROJECT MODE, but only ask §9–§15 (the full-project-only questions). This adds the **capability inventory** (the whole project's scope), the **whole-project definition of done**, shipping order, compliance, integrations, content ownership, handoff team, and budget shape.
63
67
  3. Spawn the roadmapper in `extend-to-full` mode (see prompt below). It reads the existing single milestone (now M1), the updated discovery, and produces a full JOURNEY.md with M2..M{N-1} sketches plus the Handoff milestone.
64
68
  4. Then proceed with the standard close-milestone flow (Steps 2-9) — M1 closes, M2 opens, the user is asked to continue.
65
69
 
@@ -75,11 +79,13 @@ Read your role: @${QUALIA_AGENTS}/roadmapper.md
75
79
 
76
80
  <task>
77
81
  The existing JOURNEY.md has 1 milestone (the demo, now M1 and shipped). Extend it
78
- into a 2-5 milestone arc to Handoff:
82
+ into the FULL milestone arc to Handoff — as many milestones as the agreed scope
83
+ needs (no cap), covering the entire capability inventory:
79
84
 
80
85
  - Keep M1 exactly as-is (it shipped).
81
- - Add M2..M{N-1} based on §9 of project-discovery.md (the milestone-arc question
82
- the user answered when converting from demo).
86
+ - Add M2..M{N-1} covering every capability in §9 of project-discovery.md (the
87
+ capability inventory), ordered per §11 (shipping order). Every §9 capability
88
+ must land in a milestone — nothing agreed is left unplanned.
83
89
  - Append a Handoff milestone (fixed 4 phases: Polish, Content + SEO, Final QA,
84
90
  Handoff).
85
91
  - Update REQUIREMENTS.md to add REQ-IDs for the new milestones.
@@ -59,8 +59,10 @@ Read your role: @${QUALIA_AGENTS}/research-synthesizer.md
59
59
 
60
60
  Merge the 4 research files at .planning/research/ into .planning/research/SUMMARY.md.
61
61
  This is a multi-milestone project -- the SUMMARY must suggest a FULL milestone arc
62
- (2-5 milestones including Handoff), not just a v1 phase list. Include roadmap
63
- implications AND handoff implications (what client takeover requires).
62
+ that covers the ENTIRE capability set to its done-state (as many milestones as the
63
+ scope needs, ending in Handoff for client projects -- no milestone cap), not just a
64
+ v1 phase list. Include roadmap implications AND handoff implications (what client
65
+ takeover requires).
64
66
  ", subagent_type="qualia-research-synthesizer", description="Synthesize research")
65
67
  ```
66
68
 
@@ -74,7 +76,7 @@ Read your role: @${QUALIA_AGENTS}/roadmapper.md
74
76
 
75
77
  <task>
76
78
  Create the FULL JOURNEY for this project:
77
- - .planning/JOURNEY.md -- all milestones (2-5 including Handoff) with exit criteria
79
+ - .planning/JOURNEY.md -- all milestones (2, no upper cap; ending in Handoff for client projects) covering every capability from discovery §9, with exit criteria
78
80
  - .planning/REQUIREMENTS.md -- requirements grouped by milestone
79
81
  - .planning/ROADMAP.md -- Milestone 1's phase detail (and ALL milestones if full_detail=true)
80
82
 
@@ -115,7 +117,7 @@ The branded journey ladder rendered in Step 11. Use `node ${QUALIA_BIN}/qualia-u
115
117
  ```
116
118
  ## Proposed Journey
117
119
 
118
- **{N} milestones to handoff** | **{X} requirements mapped** | All v1 requirements covered
120
+ **{N} milestones to handoff** | **{X}/{X} capabilities mapped** | Full §9 inventory covered (0 unmapped)
119
121
 
120
122
  +-- Milestone 1 . {Name} [CURRENT]
121
123
  | Why now: {one line}