qualia-framework 6.9.2 → 6.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +8 -5
- package/CHANGELOG.md +208 -0
- package/CLAUDE.md +3 -1
- package/agents/roadmapper.md +16 -14
- package/agents/verifier.md +1 -1
- package/bin/agent-status.js +264 -0
- package/bin/analyze-gate.js +318 -0
- package/bin/branch-hygiene.js +135 -0
- package/bin/command-surface.js +2 -0
- package/bin/compile-instructions.js +82 -0
- package/bin/eval-runner.js +218 -0
- package/bin/host-adapters.js +72 -12
- package/bin/install.js +27 -17
- package/bin/last-report.js +207 -0
- package/bin/project-sync.js +315 -0
- package/bin/report-payload.js +7 -0
- package/bin/runtime-manifest.js +8 -0
- package/bin/state.js +257 -12
- package/bin/verify-panel.js +294 -0
- package/bin/wave-plan.js +211 -0
- package/docs/EMPLOYEE-QUICKSTART.md +3 -3
- package/docs/erp-contract.md +168 -0
- package/docs/qualia-manual.html +5 -5
- package/hooks/branch-guard.js +133 -63
- package/hooks/pre-deploy-gate.js +38 -0
- package/hooks/task-write-guard.js +165 -0
- package/package.json +3 -2
- package/rules/codex-goal.md +28 -26
- package/rules/infrastructure.md +1 -1
- package/skills/qualia/SKILL.md +6 -0
- package/skills/qualia-build/SKILL.md +39 -7
- package/skills/qualia-eval/SKILL.md +83 -0
- package/skills/qualia-feature/SKILL.md +20 -4
- package/skills/qualia-fix/SKILL.md +13 -1
- package/skills/qualia-milestone/SKILL.md +12 -6
- package/skills/qualia-new/REFERENCE.md +6 -4
- package/skills/qualia-new/SKILL.md +27 -15
- package/skills/qualia-plan/SKILL.md +2 -2
- package/skills/qualia-report/SKILL.md +10 -0
- package/skills/qualia-scope/SKILL.md +3 -3
- package/skills/qualia-ship/SKILL.md +37 -4
- package/skills/qualia-update/SKILL.md +100 -0
- package/skills/qualia-verify/SKILL.md +51 -24
- package/templates/instructions.md +32 -0
- package/templates/journey.md +2 -2
- package/templates/project-discovery.md +30 -23
- package/templates/requirements.md +7 -7
- package/tests/agent-status.test.sh +153 -0
- package/tests/analyze-gate.test.sh +170 -0
- package/tests/bin.test.sh +5 -4
- package/tests/branch-hygiene.test.sh +93 -0
- package/tests/eval-runner.test.sh +147 -0
- package/tests/hooks.test.sh +218 -17
- package/tests/install-smoke.test.sh +4 -3
- package/tests/instructions.test.sh +109 -0
- package/tests/last-report.test.sh +156 -0
- package/tests/lib.test.sh +2 -2
- package/tests/project-sync.test.sh +175 -0
- package/tests/run-all.sh +9 -0
- package/tests/runner.js +3 -2
- package/tests/state.test.sh +187 -0
- package/tests/verify-panel.test.sh +162 -0
- package/tests/wave-plan.test.sh +153 -0
- package/skills/qualia-discuss/SKILL.md +0 -222
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// ~/.claude/hooks/task-write-guard.js — runtime enforcement of the plan
|
|
3
|
+
// contract's declared file sets. PreToolUse hook on Edit/Write.
|
|
4
|
+
// Exits 2 to BLOCK. Exits 0 to allow. Cross-platform (Windows/macOS/Linux).
|
|
5
|
+
//
|
|
6
|
+
// WHY: plan-contract.js proves file-disjointness across parallel tasks at PLAN
|
|
7
|
+
// time, but nothing stops a builder writing outside its declared set at RUN
|
|
8
|
+
// time — the documented #1 cause of cross-wave merge conflicts and AI entropy
|
|
9
|
+
// (files nobody planned). This turns the static check into a deterministic
|
|
10
|
+
// guardrail ("a rule worth enforcing is worth a hook" — constitution).
|
|
11
|
+
//
|
|
12
|
+
// SCOPE & HONEST LIMITATION: Claude Code fires the same stateless hook for
|
|
13
|
+
// every subagent and gives it no task identity, so this hook cannot attribute a
|
|
14
|
+
// write to a *specific* task. What it CAN enforce — and does — is that, while a
|
|
15
|
+
// build is in flight, every Edit/Write targets a path DECLARED by SOME task in
|
|
16
|
+
// the active phase contract (files_modify ∪ files_create). Plan-time
|
|
17
|
+
// disjointness already guarantees no two tasks share a path, and the builder's
|
|
18
|
+
// <wave_context> prompt tells it which set is its own; so the residual gap
|
|
19
|
+
// ("T3 edits T4's declared file") is prompt-guarded while the high-frequency
|
|
20
|
+
// vector ("builder invents/edits a file nobody planned") is hard-blocked.
|
|
21
|
+
//
|
|
22
|
+
// The guard is SCOPED: it is a no-op unless a build is active (≥1 RUNNING entry
|
|
23
|
+
// in .agent-status/). Outside a build it never fires, so it can't interfere with
|
|
24
|
+
// the orchestrator, the verifier, or ordinary editing. Fails OPEN on any error.
|
|
25
|
+
|
|
26
|
+
const fs = require("fs");
|
|
27
|
+
const path = require("path");
|
|
28
|
+
|
|
29
|
+
const _traceStart = Date.now();
|
|
30
|
+
|
|
31
|
+
// ── stdin reader (same robust pattern as the other guards) ──────────────
|
|
32
|
+
function sleepSync(ms) {
|
|
33
|
+
try { Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, ms); } catch {}
|
|
34
|
+
}
|
|
35
|
+
function readInput() {
|
|
36
|
+
const deadline = Date.now() + 1000;
|
|
37
|
+
const buf = Buffer.alloc(65536);
|
|
38
|
+
let data = "";
|
|
39
|
+
try {
|
|
40
|
+
while (Date.now() < deadline) {
|
|
41
|
+
let n = 0;
|
|
42
|
+
try {
|
|
43
|
+
n = fs.readSync(0, buf, 0, buf.length, null);
|
|
44
|
+
} catch (e) {
|
|
45
|
+
if (e && (e.code === "EAGAIN" || e.code === "EWOULDBLOCK")) { sleepSync(1); continue; }
|
|
46
|
+
break;
|
|
47
|
+
}
|
|
48
|
+
if (n === 0) break;
|
|
49
|
+
data += buf.slice(0, n).toString("utf8");
|
|
50
|
+
}
|
|
51
|
+
if (!data) return {};
|
|
52
|
+
return JSON.parse(data);
|
|
53
|
+
} catch {
|
|
54
|
+
return {};
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function _trace(result, extra) {
|
|
59
|
+
try {
|
|
60
|
+
const os = require("os");
|
|
61
|
+
const parent = path.basename(path.dirname(__dirname));
|
|
62
|
+
const qualiaHome = process.env.QUALIA_HOME ||
|
|
63
|
+
(parent === ".codex" || parent === ".claude" ? path.dirname(__dirname) : path.join(os.homedir(), ".claude"));
|
|
64
|
+
const traceDir = path.join(qualiaHome, ".qualia-traces");
|
|
65
|
+
if (!fs.existsSync(traceDir)) fs.mkdirSync(traceDir, { recursive: true });
|
|
66
|
+
const entry = { hook: "task-write-guard", result, timestamp: new Date().toISOString(), duration_ms: Date.now() - _traceStart, ...extra };
|
|
67
|
+
fs.appendFileSync(path.join(traceDir, `${new Date().toISOString().split("T")[0]}.jsonl`), JSON.stringify(entry) + "\n");
|
|
68
|
+
} catch {}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
function allow(reason, extra) { _trace("allow", { reason, ...extra }); process.exit(0); }
|
|
72
|
+
|
|
73
|
+
// OWNER / debugging escape hatch, mirroring git-guardrails' QUALIA_ALLOW_*.
|
|
74
|
+
if (process.env.QUALIA_ALLOW_OUTSIDE_CONTRACT === "1") allow("escape-hatch");
|
|
75
|
+
|
|
76
|
+
const input = readInput();
|
|
77
|
+
const ti = input.tool_input || {};
|
|
78
|
+
const rawPath = String(ti.file_path || "");
|
|
79
|
+
if (!rawPath) allow("no file_path");
|
|
80
|
+
|
|
81
|
+
const root = process.cwd();
|
|
82
|
+
|
|
83
|
+
// Reuse the status + contract libraries that ship alongside this hook (bin/ is a
|
|
84
|
+
// sibling of hooks/ in both the repo and the installed layout). If they're not
|
|
85
|
+
// resolvable (older/partial install), fail open.
|
|
86
|
+
let agentStatus, planContract;
|
|
87
|
+
try {
|
|
88
|
+
agentStatus = require(path.join(__dirname, "..", "bin", "agent-status.js"));
|
|
89
|
+
planContract = require(path.join(__dirname, "..", "bin", "plan-contract.js"));
|
|
90
|
+
} catch {
|
|
91
|
+
allow("libs unavailable");
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// SCOPE: only enforce during an active build (≥1 RUNNING agent-status entry).
|
|
95
|
+
let running;
|
|
96
|
+
try {
|
|
97
|
+
running = agentStatus.listStatuses(root).filter((s) => s.status === "RUNNING");
|
|
98
|
+
} catch {
|
|
99
|
+
allow("status unreadable");
|
|
100
|
+
}
|
|
101
|
+
if (!running || running.length === 0) allow("no active build");
|
|
102
|
+
|
|
103
|
+
// Locate the active phase contract. Prefer the phase declared by a RUNNING
|
|
104
|
+
// builder; fall back to the sole phase-*-contract.json if unambiguous.
|
|
105
|
+
function findContractPath() {
|
|
106
|
+
const phases = [...new Set(running.map((s) => s.phase).filter((p) => p != null))];
|
|
107
|
+
for (const p of phases) {
|
|
108
|
+
const cp = path.join(root, ".planning", `phase-${p}-contract.json`);
|
|
109
|
+
if (fs.existsSync(cp)) return cp;
|
|
110
|
+
}
|
|
111
|
+
try {
|
|
112
|
+
const dir = path.join(root, ".planning");
|
|
113
|
+
const matches = fs.readdirSync(dir).filter((f) => /^phase-\d+-contract\.json$/.test(f));
|
|
114
|
+
if (matches.length === 1) return path.join(dir, matches[0]);
|
|
115
|
+
} catch {}
|
|
116
|
+
return null;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
const contractPath = findContractPath();
|
|
120
|
+
if (!contractPath) allow("no active contract");
|
|
121
|
+
|
|
122
|
+
let contract;
|
|
123
|
+
try {
|
|
124
|
+
const loaded = planContract.readContractFile(contractPath);
|
|
125
|
+
if (!loaded.ok) allow("contract unreadable");
|
|
126
|
+
contract = loaded.contract;
|
|
127
|
+
} catch {
|
|
128
|
+
allow("contract parse error");
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// Build the union of writable declared paths across all tasks.
|
|
132
|
+
// Edit/Write create or modify; deletes are out of band for this tool family.
|
|
133
|
+
function norm(p) {
|
|
134
|
+
return String(p).replace(/\\/g, "/").replace(/^\.\//, "");
|
|
135
|
+
}
|
|
136
|
+
const declared = new Set();
|
|
137
|
+
for (const t of contract.tasks || []) {
|
|
138
|
+
for (const f of t.files_modify || []) declared.add(norm(f));
|
|
139
|
+
for (const f of t.files_create || []) declared.add(norm(f));
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// Resolve the target to a path relative to the project root.
|
|
143
|
+
const abs = path.isAbsolute(rawPath) ? rawPath : path.resolve(root, rawPath);
|
|
144
|
+
const rel = norm(path.relative(root, abs));
|
|
145
|
+
|
|
146
|
+
// Out of project root → not this guard's concern (other guards handle secrets).
|
|
147
|
+
if (rel.startsWith("../") || rel === "" || path.isAbsolute(rel)) allow("outside project root", { rel });
|
|
148
|
+
|
|
149
|
+
// Framework scratch / planning artifacts are always writable during a build:
|
|
150
|
+
// the status protocol, evidence, deviations, plan and contract files.
|
|
151
|
+
if (rel.startsWith(".agent-status/") || rel.startsWith(".planning/")) allow("framework path", { rel });
|
|
152
|
+
|
|
153
|
+
if (declared.has(rel)) allow("declared", { rel });
|
|
154
|
+
|
|
155
|
+
// Not declared by any task → block.
|
|
156
|
+
console.error("⬢ task-write-guard — write outside the plan contract:");
|
|
157
|
+
console.error(` ✗ ${rel}`);
|
|
158
|
+
console.error("");
|
|
159
|
+
console.error(` No task in ${path.relative(root, contractPath)} declares this file`);
|
|
160
|
+
console.error(" (files_modify / files_create). Builders may only write files");
|
|
161
|
+
console.error(" their task planned. If this file is genuinely needed, add it to");
|
|
162
|
+
console.error(" the contract via the locked-decision channel, or re-plan the phase.");
|
|
163
|
+
console.error(" OWNER override: QUALIA_ALLOW_OUTSIDE_CONTRACT=1");
|
|
164
|
+
_trace("block", { rel, contract: path.relative(root, contractPath) });
|
|
165
|
+
process.exit(2);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "qualia-framework",
|
|
3
|
-
"version": "6.
|
|
3
|
+
"version": "6.22.0",
|
|
4
4
|
"description": "Claude Code and Codex workflow framework by Qualia Solutions. Plan, build, verify, ship.",
|
|
5
5
|
"bin": {
|
|
6
6
|
"qualia-framework": "./bin/cli.js"
|
|
@@ -33,7 +33,8 @@
|
|
|
33
33
|
"test:statusline": "bash tests/statusline.test.sh",
|
|
34
34
|
"test:refs": "bash tests/refs.test.sh",
|
|
35
35
|
"test:published-install": "bash tests/published-install-smoke.test.sh",
|
|
36
|
-
"test:shell": "bash tests/run-all.sh"
|
|
36
|
+
"test:shell": "bash tests/run-all.sh",
|
|
37
|
+
"compile:instructions": "node bin/compile-instructions.js"
|
|
37
38
|
},
|
|
38
39
|
"files": [
|
|
39
40
|
"bin/",
|
package/rules/codex-goal.md
CHANGED
|
@@ -1,46 +1,48 @@
|
|
|
1
|
-
#
|
|
1
|
+
# Work-unit goal (both runtimes)
|
|
2
2
|
|
|
3
|
-
When
|
|
3
|
+
When a skill begins a defined **unit of work** (a phase build, a feature, a milestone, a fix), set an explicit goal — an objective + a token budget — so the session tracks burn-vs-budget and stays anchored to one outcome. Both runtimes get this; the *surface* differs.
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
You are on Codex when `~/.codex/` exists and `~/.claude/` is absent or stale. The simplest probe:
|
|
5
|
+
The objective + budget come from one shared helper, regardless of runtime:
|
|
8
6
|
|
|
9
7
|
```bash
|
|
10
|
-
|
|
8
|
+
node ${QUALIA_BIN}/codex-goal.js {scope} # scope ∈ phase · task · feature · quick
|
|
11
9
|
```
|
|
12
10
|
|
|
13
|
-
|
|
11
|
+
It prints two lines from `.planning/STATE.md` + `ROADMAP.md`:
|
|
12
|
+
|
|
13
|
+
```
|
|
14
|
+
/goal {objective text}
|
|
15
|
+
# token_budget suggestion: {N}
|
|
16
|
+
```
|
|
14
17
|
|
|
15
|
-
##
|
|
18
|
+
## Runtime detection
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
test -f ~/.codex/AGENTS.md && [ ! -d ~/.claude ] && echo codex || echo claude
|
|
22
|
+
```
|
|
16
23
|
|
|
17
|
-
|
|
24
|
+
## Codex — native `/goal`
|
|
18
25
|
|
|
19
|
-
|
|
20
|
-
node ~/.codex/bin/codex-goal.js {scope}
|
|
21
|
-
```
|
|
26
|
+
Codex has a first-class goal surface (`thread_goals`: objective, token_budget, tokens_used, status).
|
|
22
27
|
|
|
23
|
-
|
|
28
|
+
1. **If the `update_goal` tool is available** (Codex exposes it as a model-callable tool), call it with `objective` = the text after `/goal ` and `token_budget` = the integer suggestion.
|
|
29
|
+
2. **Otherwise** surface the `/goal` line for the user to paste. Don't silently skip — it's a one-second set and the only way Codex's budget telemetry knows what to track.
|
|
24
30
|
|
|
25
|
-
|
|
31
|
+
## Claude Code — equivalent via the harness work-list + budget
|
|
26
32
|
|
|
27
|
-
|
|
28
|
-
/goal {objective text from STATE.md + ROADMAP.md}
|
|
29
|
-
# token_budget suggestion: {N}
|
|
30
|
-
```
|
|
33
|
+
Claude Code has no `/goal` table, but it has a native equivalent: the **session task-list** (the model's todo/task tool) and the turn **token budget**. Use them so the work unit is just as anchored and visible:
|
|
31
34
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
+
1. **Create a tracked task** for the unit with the objective as its title (e.g. *"Phase 3 — checkout + Stripe webhook"*). Mark it `in_progress` at start, `completed` at end. This is the Claude-side "active goal" — it shows in the UI and survives compaction.
|
|
36
|
+
2. **Treat `token_budget` as the unit's context budget.** State it in the opening line (banner) — *"Goal: {objective} · budget ~{N} tok"* — so the operator and the model both see how much room the unit has. If a `+Nk` turn directive is set, prefer that.
|
|
37
|
+
3. For a multi-wave phase, the per-task `.agent-status/` entries (see `/qualia-build`) are the sub-goals under this one.
|
|
35
38
|
|
|
36
|
-
|
|
39
|
+
Either way the rule is the same: **one named objective + one budget per work unit, surfaced, not silent.**
|
|
37
40
|
|
|
38
41
|
## When NOT to set a goal
|
|
39
42
|
|
|
40
|
-
-
|
|
41
|
-
-
|
|
42
|
-
- The work is open-ended exploration with no clear objective (e.g. `/qualia`, `/qualia-scope`). Goals are for executing a defined scope.
|
|
43
|
+
- A goal/task is already active for this unit (don't double-set; Codex rejects `update_goal` when one exists — check first).
|
|
44
|
+
- Open-ended exploration with no defined scope (`/qualia`, `/qualia-scope` PROJECT MODE, `/qualia-idk`). Goals are for *executing* a defined scope, not discovering one.
|
|
43
45
|
|
|
44
46
|
## Why
|
|
45
47
|
|
|
46
|
-
|
|
48
|
+
A named objective + budget keeps a unit of work from sprawling: the model stays self-aware of how much context remains, the operator sees burn-vs-budget, and the unit has a single definition of done. On Codex this rides `thread_goals`; on Claude Code it rides the task-list + turn budget. Same discipline, native surface on each.
|
package/rules/infrastructure.md
CHANGED
|
@@ -49,7 +49,7 @@ Standard services across all Qualia projects. Use these unless the project expli
|
|
|
49
49
|
- **QualiasolutionsCY** — primary org for all Qualia Solutions projects
|
|
50
50
|
- **SakaniQualia** — org for Sakani-related projects (real estate platform)
|
|
51
51
|
- All repos are private by default
|
|
52
|
-
-
|
|
52
|
+
- Main integration: feature branches integrate to `main` at **`/qualia-ship`** (ship is the single merge point — it fast-forwards the branch into `main`, deploys from `main`, and deletes the branch). Pushes to `main` are **allowed and recorded** by `branch-guard` (per-employee tally → ERP) — accountability, not a hard block. `/qualia-report` sweeps for branches with unshipped commits + stale PRs at clock-out so nothing lingers. Keep GitHub branch protection on `main` OFF (or with the team allowed to push) for this model; if you re-enable required reviews, switch ship to an auto-merged PR instead.
|
|
53
53
|
|
|
54
54
|
## Vercel Teams (admin knowledge)
|
|
55
55
|
- Qualia operates across **3 Vercel teams** — projects are distributed across them
|
package/skills/qualia/SKILL.md
CHANGED
|
@@ -33,6 +33,12 @@ ls .planning/phase-*-plan.md 2>/dev/null || echo "NO_PLANS"
|
|
|
33
33
|
ls .planning/phase-*-verification.md 2>/dev/null || echo "NO_VERIFICATIONS"
|
|
34
34
|
```
|
|
35
35
|
|
|
36
|
+
And surface where work was left off last time — the richest "where we left off" signal lives in `.planning/reports/`:
|
|
37
|
+
```bash
|
|
38
|
+
node ${QUALIA_BIN}/last-report.js 2>/dev/null
|
|
39
|
+
```
|
|
40
|
+
Exit 0 → it prints a one-line digest of the newest session report (`Last session ({date}, {age}d ago): {summary} → next: {next}`). Exit 1 → no reports yet (nothing to surface). When a project is loaded and a digest exists, print that line **at the very TOP of your output**, before the banner — so the first thing the operator (or a teammate picking the project up) sees is exactly where the last session ended.
|
|
41
|
+
|
|
36
42
|
Read conversation context — what has the user been doing, what errors occurred.
|
|
37
43
|
|
|
38
44
|
### 2. Classify and Route
|
|
@@ -21,12 +21,13 @@ Execute phase plan. Each task = fresh subagent. Independent tasks run parallel.
|
|
|
21
21
|
`/qualia-build` — build current planned phase
|
|
22
22
|
`/qualia-build {N}` — build specific phase
|
|
23
23
|
`/qualia-build {N} --auto` — build + chain into `/qualia-verify {N} --auto` (no human gate)
|
|
24
|
+
`/qualia-build {N} --parallel K` — cap concurrent builders at K (default auto: sequential under 3 tasks, else up to 5)
|
|
24
25
|
|
|
25
26
|
## Process
|
|
26
27
|
|
|
27
|
-
### 0.
|
|
28
|
+
### 0. Set the work-unit goal
|
|
28
29
|
|
|
29
|
-
Per `rules/codex-goal.md` — set the
|
|
30
|
+
Per `rules/codex-goal.md` — set the work-unit goal at phase start with scope `phase` (Codex `/goal`; on Claude Code, a tracked task + budget in the banner). One named objective + budget for the whole build.
|
|
30
31
|
|
|
31
32
|
### 1. Load Plan
|
|
32
33
|
|
|
@@ -38,6 +39,20 @@ node ${QUALIA_BIN}/plan-contract.js validate .planning/phase-{N}-contract.json
|
|
|
38
39
|
|
|
39
40
|
Parse tasks, waves, file refs. Prefer the JSON contract for task ids, dependencies, file lists, and verification checks; use the Markdown plan as the human-readable context.
|
|
40
41
|
|
|
42
|
+
### 1a. Analyze Gate (scope ↔ plan, before any build)
|
|
43
|
+
|
|
44
|
+
`plan-contract.js` proves the contract is internally well-formed; this gate diffs it **against intent** — scope acceptance criteria (`phase-{N}-context.md`) + the CONTEXT.md glossary — to catch requirements the plan silently dropped or contradicted. This is the plan→build seam Spec-Kit calls `/analyze`.
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
node ${QUALIA_BIN}/analyze-gate.js {N}
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
Exit 0 → consistent, proceed. Non-zero → it lists under-covered scope criteria, orphan success criteria, glossary violations, and scope-reduction language. **Profile-aware** (the `profile` field from `state.js check`):
|
|
51
|
+
- **strict** → a HIGH finding is a stop. Route to `/qualia-plan {N} --gaps` (plan dropped a requirement) or `/qualia-scope {N}` (scope itself is wrong). Do not build.
|
|
52
|
+
- **standard** → surface findings to the operator and proceed only with an explicit ack; log the waiver reason to `.planning/decisions/` if you proceed past a HIGH.
|
|
53
|
+
|
|
54
|
+
(No scope file = scope-coverage check is skipped, not a failure — `/qualia-feature` trivia and scope-less phases still build.)
|
|
55
|
+
|
|
41
56
|
### 1b. Recovery Reference
|
|
42
57
|
|
|
43
58
|
Tag HEAD before executing. Reference only, no auto-rollback.
|
|
@@ -62,13 +77,15 @@ git diff --stat
|
|
|
62
77
|
node ${QUALIA_BIN}/qualia-ui.js banner build {N} "{phase name}"
|
|
63
78
|
```
|
|
64
79
|
|
|
65
|
-
**
|
|
80
|
+
**Derive the build schedule from the dependency graph (don't trust hand-numbered waves, don't over-spawn):**
|
|
66
81
|
|
|
67
82
|
```bash
|
|
68
|
-
node ${QUALIA_BIN}/
|
|
83
|
+
node ${QUALIA_BIN}/wave-plan.js .planning/phase-{N}-contract.json {--parallel K if set} --json
|
|
69
84
|
```
|
|
70
85
|
|
|
71
|
-
**
|
|
86
|
+
`wave-plan.js` recomputes minimal-depth waves from `depends_on` (maximal safe parallelism) and splits each into **batches capped at `max_concurrency`** (auto: 1 if <3 tasks, else 5; `--parallel K` overrides). Spawn **one batch at a time, in order** — every task in a batch is dependency-free of its batch-mates, so they run concurrently; the next batch waits for the fan-in barrier (§ after each wave). Follow the emitted `batches[]`, not the raw contract `wave` numbers.
|
|
87
|
+
|
|
88
|
+
**Per batch: spawn ALL its tasks as separate `Agent()` calls in the SAME turn (concurrent). Do NOT await one before spawning the next.**
|
|
72
89
|
|
|
73
90
|
```bash
|
|
74
91
|
node ${QUALIA_BIN}/qualia-ui.js task {task_num} "{task title}"
|
|
@@ -117,7 +134,13 @@ Parallel tasks Wave {W} (do NOT touch their files):
|
|
|
117
134
|
</task_contract>
|
|
118
135
|
|
|
119
136
|
Context tags already loaded. Only Read project code you modify.
|
|
120
|
-
|
|
137
|
+
|
|
138
|
+
Status protocol (machine-readable fan-in — do this, do not skip):
|
|
139
|
+
- First action: `node ${QUALIA_BIN}/agent-status.js write {task_id} RUNNING --phase {N} --wave {W}`
|
|
140
|
+
- Last action, after committing: `node ${QUALIA_BIN}/agent-status.js write {task_id} DONE --commit $(git rev-parse --short HEAD)`
|
|
141
|
+
(use BLOCKED or PARTIAL with `--note \"why\"` instead of DONE if you could not finish)
|
|
142
|
+
|
|
143
|
+
Execute. Commit. Write your DONE/BLOCKED/PARTIAL status. Return DONE/BLOCKED/PARTIAL.
|
|
121
144
|
", subagent_type="qualia-builder", description="Task {N}: {title}")
|
|
122
145
|
```
|
|
123
146
|
|
|
@@ -130,7 +153,15 @@ Execute. Commit. Return DONE/BLOCKED/PARTIAL.
|
|
|
130
153
|
node ${QUALIA_BIN}/qualia-ui.js done {task_num} "{title}" {commit_hash}
|
|
131
154
|
```
|
|
132
155
|
|
|
133
|
-
**After each
|
|
156
|
+
**After each batch — fan-in barrier (deterministic, not "did the model notice"):**
|
|
157
|
+
|
|
158
|
+
```bash
|
|
159
|
+
node ${QUALIA_BIN}/agent-status.js barrier --tasks {comma-separated task ids in this batch}
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
Exit 0 ⇔ every task in the batch wrote `DONE`. Non-zero → the barrier lists which tasks are RUNNING/BLOCKED/PARTIAL/MISSING. Do NOT spawn the next batch until the barrier passes; a BLOCKED/PARTIAL task is a wave failure (§4). `agent-status.js list` shows the live view. (Gating per batch — not per contract wave — keeps the barrier aligned with the `wave-plan.js` schedule, whose derived waves needn't match the contract's declared wave numbers.)
|
|
163
|
+
|
|
164
|
+
**After each batch:** move to the next batch in the schedule, show summary.
|
|
134
165
|
|
|
135
166
|
### 3. Wave Completion
|
|
136
167
|
|
|
@@ -141,6 +172,7 @@ node ${QUALIA_BIN}/qualia-ui.js divider
|
|
|
141
172
|
node ${QUALIA_BIN}/qualia-ui.js ok "Tasks: {done}/{total}"
|
|
142
173
|
node ${QUALIA_BIN}/qualia-ui.js ok "Commits: {count}"
|
|
143
174
|
node ${QUALIA_BIN}/qualia-ui.js ok "Waves: {count}"
|
|
175
|
+
node ${QUALIA_BIN}/agent-status.js clear # drop ephemeral .agent-status/ scratch
|
|
144
176
|
```
|
|
145
177
|
|
|
146
178
|
### 4. Handle Failures
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: qualia-eval
|
|
3
|
+
description: "Evaluate an AI feature (chat / RAG / voice / agent) against a layered eval suite — deterministic assertions first, then llm-rubric judges — and gate on the result. Qualia gates UI and code; this is the equivalent gate for the AI artifacts a project builds. Triggers: 'eval this agent', 'test the chatbot', 'evaluate the AI feature', 'rag eval', 'does the assistant answer correctly', 'judge the model output', 'qualia-eval'."
|
|
4
|
+
allowed-tools:
|
|
5
|
+
- Bash
|
|
6
|
+
- Read
|
|
7
|
+
- Write
|
|
8
|
+
- Edit
|
|
9
|
+
- Grep
|
|
10
|
+
- Glob
|
|
11
|
+
- Agent
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
# /qualia-eval — Evaluate an AI Feature
|
|
15
|
+
|
|
16
|
+
`contract-runner` proves the code exists; `verify-panel` proves the code is correct. Neither can tell you whether the **chatbot actually answers the refund question**. This lane closes that gap with a layered eval suite — cheap deterministic checks first, model judgment only where a model is required — mirroring the contract-runner evidence model.
|
|
17
|
+
|
|
18
|
+
## Usage
|
|
19
|
+
`/qualia-eval {suite.json}` — run an eval suite for one AI feature
|
|
20
|
+
`/qualia-eval {N}` — run every `.planning/evals/*-suite.json` for phase N (verify-step gate)
|
|
21
|
+
|
|
22
|
+
## The suite (JSON)
|
|
23
|
+
|
|
24
|
+
One suite per AI feature. Each case carries a captured `output` (or `output_file`) plus optional `latency_ms` / `cost_usd`, and a list of assertions:
|
|
25
|
+
|
|
26
|
+
```json
|
|
27
|
+
{
|
|
28
|
+
"feature": "support-chat",
|
|
29
|
+
"cases": [
|
|
30
|
+
{ "name": "refund window", "input": "what's your refund policy?",
|
|
31
|
+
"output": "We refund within 30 days of purchase.",
|
|
32
|
+
"latency_ms": 1200, "cost_usd": 0.008,
|
|
33
|
+
"assert": [
|
|
34
|
+
{ "type": "contains", "value": "30 days" },
|
|
35
|
+
{ "type": "not_contains", "value": "I cannot help" },
|
|
36
|
+
{ "type": "max_latency_ms", "value": 2000 },
|
|
37
|
+
{ "type": "llm_rubric", "rubric": "answer is grounded in the policy, no hallucinated terms" }
|
|
38
|
+
] } ]
|
|
39
|
+
}
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
Deterministic assertion types (settled with no model): `contains`, `not_contains`, `equals`, `regex`, `not_regex`, `min_length`, `max_length`, `json_valid`, `json_path` (`equals`/`contains`), `max_latency_ms`, `max_cost_usd`. The model-only type is `llm_rubric`.
|
|
43
|
+
|
|
44
|
+
## Process
|
|
45
|
+
|
|
46
|
+
### 1. Capture outputs
|
|
47
|
+
|
|
48
|
+
For each case, run the AI feature on `input` and record the real `output` (+ `latency_ms`/`cost_usd` if measurable) back into the suite. Use the project's own entrypoint — an API route, a script, or the agent SDK. If outputs are already captured (replay fixtures), skip to step 2.
|
|
49
|
+
|
|
50
|
+
### 2. Judge the rubrics (one judge per llm_rubric, fresh context)
|
|
51
|
+
|
|
52
|
+
Deterministic assertions need no model — `eval-runner.js` settles them. For each `llm_rubric` assertion, spawn a judge to return a verdict, then write `"verdict": "pass"|"fail"` onto that assertion in the suite. This mirrors how `verify-panel` consumes skeptic votes: the model judges, the runner aggregates.
|
|
53
|
+
|
|
54
|
+
```
|
|
55
|
+
Agent(prompt="
|
|
56
|
+
Role: @${QUALIA_AGENTS}/verifier.md
|
|
57
|
+
|
|
58
|
+
JUDGE one rubric against one output. No code to grep — judge the text only.
|
|
59
|
+
Rubric: {rubric}
|
|
60
|
+
Input: {input}
|
|
61
|
+
Output to judge: {output}
|
|
62
|
+
|
|
63
|
+
Return exactly one line: PASS — {reason} OR FAIL — {reason}. Default FAIL if the output does not clearly satisfy the rubric.
|
|
64
|
+
", subagent_type="qualia-verifier", description="Judge rubric — {case name}")
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
An `llm_rubric` with no verdict is PENDING and **fails** the suite — never silently pass an unjudged rubric.
|
|
68
|
+
|
|
69
|
+
### 3. Run the deterministic verdict
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
node ${QUALIA_BIN}/eval-runner.js {suite.json} --write
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
`eval-runner.js` runs every deterministic assertion itself, folds in the rubric verdicts, and exits **0 = all cases pass / 1 = any failure or unjudged rubric**. Artifact: `.planning/evals/eval-{feature}.json`.
|
|
76
|
+
|
|
77
|
+
### 4. Gate
|
|
78
|
+
|
|
79
|
+
Exit 0 → the AI feature meets its bar; report PASS with the per-case summary. Exit 1 → list the failing cases + assertions and route to `/qualia-fix` (behavior wrong) or back to the prompt/RAG config. When run as a phase verify-step gate (`/qualia-eval {N}`), a FAIL is a phase FAIL — same standing as a failing contract.
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
node ${QUALIA_BIN}/qualia-ui.js end "EVAL COMPLETE" "/qualia-verify {N}"
|
|
83
|
+
```
|
|
@@ -40,9 +40,9 @@ One command for adding a small new capability outside the planned Road. Auto-det
|
|
|
40
40
|
|
|
41
41
|
## Process
|
|
42
42
|
|
|
43
|
-
### 0.
|
|
43
|
+
### 0. Set the work-unit goal
|
|
44
44
|
|
|
45
|
-
Per `rules/codex-goal.md` — set the
|
|
45
|
+
Per `rules/codex-goal.md` — set the work-unit goal (Codex `/goal`; on Claude Code, a tracked task + budget) with scope matching the auto-detected bucket (`quick` for inline, `feature` for spawn). Do this AFTER Step 2 (auto-detect scope) so the budget matches the actual work shape.
|
|
46
46
|
|
|
47
47
|
### 1. Capture description
|
|
48
48
|
|
|
@@ -50,6 +50,22 @@ If invoked without args, ask: **"What do you want to build?"**
|
|
|
50
50
|
|
|
51
51
|
Wait for free-text answer. Don't paraphrase back. Capture the user's exact phrasing — it feeds both the auto-scope classifier and the eventual commit message.
|
|
52
52
|
|
|
53
|
+
### 1b. Scope gate (anti-drift — keep work on the milestone arc)
|
|
54
|
+
|
|
55
|
+
Before building, check whether this work belongs to the active milestone. This is what stops feature/fix from drifting off-plan.
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
node ${QUALIA_BIN}/state.js check 2>/dev/null # → milestone, profile; JOURNEY.md = the arc
|
|
59
|
+
node ${QUALIA_BIN}/state.js reqs-check 2>/dev/null # current milestone's open REQ-IDs
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
- **No active project / no milestone** (`.planning/` absent) → not governed; proceed normally (skip to Step 2).
|
|
63
|
+
- **Active milestone**: decide if this work serves it.
|
|
64
|
+
- **In-scope** (it advances the current milestone's goal or an open REQ-ID) → proceed. Record it tagged to scope in Steps 4/5: add `--scope in --ref {REQ-ID or phase}` to the `state.js transition --to note` call.
|
|
65
|
+
- **Off-road** (a new capability/feature that isn't in the current milestone): this is exactly the drift the framework guards against. Resolve by profile (`state.js check` → `profile`):
|
|
66
|
+
- **strict** → STOP. Do not build off-road. Route to `/qualia-scope` to fold it into the arc (a phase/REQ in the current or a future milestone) or `/qualia-milestone` if it's a new milestone. Off-road building is blocked.
|
|
67
|
+
- **standard** → allowed, but **recorded**: build it, then record with `--scope off --ref "{what + why off-road}"` so the OWNER + ERP see the off-road tally (it is never silent).
|
|
68
|
+
|
|
53
69
|
### 2. Auto-detect scope
|
|
54
70
|
|
|
55
71
|
Classify the description into one of three buckets:
|
|
@@ -116,7 +132,7 @@ git commit -m "fix: {description}"
|
|
|
116
132
|
5. Record in state:
|
|
117
133
|
|
|
118
134
|
```bash
|
|
119
|
-
node ${QUALIA_BIN}/state.js transition --to note --notes "{brief description}" --tasks-done 1
|
|
135
|
+
node ${QUALIA_BIN}/state.js transition --to note --notes "{brief description}" --tasks-done 1 {--scope in --ref {REQ/phase} | --scope off --ref "{why off-road}" — from the §1b scope gate}
|
|
120
136
|
```
|
|
121
137
|
|
|
122
138
|
6. End with:
|
|
@@ -184,7 +200,7 @@ node ${QUALIA_BIN}/qualia-ui.js end "FEATURE SHIPPED (spawn)"
|
|
|
184
200
|
5. Record in state:
|
|
185
201
|
|
|
186
202
|
```bash
|
|
187
|
-
node ${QUALIA_BIN}/state.js transition --to note --notes "{description}" --tasks-done 1
|
|
203
|
+
node ${QUALIA_BIN}/state.js transition --to note --notes "{description}" --tasks-done 1 {--scope in --ref {REQ/phase} | --scope off --ref "{why off-road}" — from the §1b scope gate}
|
|
188
204
|
```
|
|
189
205
|
|
|
190
206
|
### 6. Execute the refuse path
|
|
@@ -48,6 +48,10 @@ Fix is the practical lane for "this used to work, or should work, and now it doe
|
|
|
48
48
|
node ${QUALIA_BIN}/qualia-ui.js banner fix
|
|
49
49
|
```
|
|
50
50
|
|
|
51
|
+
### 0. Set the work-unit goal
|
|
52
|
+
|
|
53
|
+
Per `rules/codex-goal.md` — set the work-unit goal (Codex `/goal`; on Claude Code, a tracked task + budget) with scope `quick` for `--quick`, else `feature`. Anchors the fix to one objective + budget so root-cause work doesn't sprawl.
|
|
54
|
+
|
|
51
55
|
### 1. Classify The Request
|
|
52
56
|
|
|
53
57
|
Parse `$ARGUMENTS` into:
|
|
@@ -70,6 +74,14 @@ If the request is phase-sized, stop and route:
|
|
|
70
74
|
node ${QUALIA_BIN}/qualia-ui.js end "ROUTED" "/qualia-plan"
|
|
71
75
|
```
|
|
72
76
|
|
|
77
|
+
### 1b. Scope tag (anti-drift)
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
node ${QUALIA_BIN}/state.js check 2>/dev/null # milestone + profile
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Repairing broken behavior in what the current milestone already built is **in-scope** — proceed, and tag the record `--scope in --ref {REQ/phase}` in Step 7. But a "fix" that is really **new off-road behavior** (a capability the milestone never included, dressed as a bug) is drift: in **strict** profile, STOP and route to `/qualia-scope` to fold it into the arc; in **standard**, proceed but record `--scope off --ref "{why off-road}"` so it's counted, never silent. No active milestone → not governed, proceed.
|
|
84
|
+
|
|
73
85
|
### 2. Build The Feedback Loop
|
|
74
86
|
|
|
75
87
|
Use the cheapest check that can prove the bug is real and later prove it is fixed.
|
|
@@ -175,7 +187,7 @@ git commit -m "fix: {short symptom/root-cause summary}"
|
|
|
175
187
|
Record state:
|
|
176
188
|
|
|
177
189
|
```bash
|
|
178
|
-
node ${QUALIA_BIN}/state.js transition --to note --notes "{short fix summary}" --tasks-done 1
|
|
190
|
+
node ${QUALIA_BIN}/state.js transition --to note --notes "{short fix summary}" --tasks-done 1 {--scope in --ref {REQ/phase} | --scope off --ref "{why off-road}" — from the §1b scope tag}
|
|
179
191
|
```
|
|
180
192
|
|
|
181
193
|
### 8. Output
|
|
@@ -30,13 +30,17 @@ Triggered after `/qualia-verify` passes on the LAST phase of the current milesto
|
|
|
30
30
|
|
|
31
31
|
```bash
|
|
32
32
|
node ${QUALIA_BIN}/state.js check
|
|
33
|
+
node ${QUALIA_BIN}/state.js reqs-check # this milestone's REQ-ID completion
|
|
33
34
|
```
|
|
34
35
|
|
|
35
|
-
`state.js close-milestone` enforces
|
|
36
|
+
`state.js close-milestone` enforces three guards:
|
|
36
37
|
- `MILESTONE_NOT_READY` — any phase not verified
|
|
37
38
|
- `MILESTONE_TOO_SMALL` — milestone has < 2 phases
|
|
39
|
+
- `MILESTONE_REQS_INCOMPLETE` — a REQ-ID mapped to this milestone in REQUIREMENTS.md is not yet `Complete` (strict profile blocks; standard profile proceeds but the unfinished REQs are surfaced as `warnings` to log). This is what stops "finishing a milestone with scope still open."
|
|
38
40
|
|
|
39
|
-
If
|
|
41
|
+
If any fires (without `--force`), stop and show the error. Resolve before closing: verify remaining phases, finish the open requirements, or **explicitly defer** a requirement by moving it to `Out of Scope` in REQUIREMENTS.md (a conscious deferral, not silent). `--force` bypasses all three for retroactive bookkeeping only.
|
|
42
|
+
|
|
43
|
+
Run `reqs-check` first so the user sees exactly which requirements are still open before the close attempt — Step 4 (mark Complete) should already have flipped the finished ones.
|
|
40
44
|
|
|
41
45
|
### 1b. Demo-Extension Branch
|
|
42
46
|
|
|
@@ -59,7 +63,7 @@ If `PROJECT_TYPE=demo` AND `MILESTONE_COUNT=1`, the demo's one milestone is clos
|
|
|
59
63
|
**If "Client signed — extend to full project":**
|
|
60
64
|
|
|
61
65
|
1. Update `.planning/PROJECT.md` frontmatter: `project_type: full`.
|
|
62
|
-
2. Run a brief discovery top-up — invoke `/qualia-scope` in PROJECT MODE, but only ask §9
|
|
66
|
+
2. Run a brief discovery top-up — invoke `/qualia-scope` in PROJECT MODE, but only ask §9–§15 (the full-project-only questions). This adds the **capability inventory** (the whole project's scope), the **whole-project definition of done**, shipping order, compliance, integrations, content ownership, handoff team, and budget shape.
|
|
63
67
|
3. Spawn the roadmapper in `extend-to-full` mode (see prompt below). It reads the existing single milestone (now M1), the updated discovery, and produces a full JOURNEY.md with M2..M{N-1} sketches plus the Handoff milestone.
|
|
64
68
|
4. Then proceed with the standard close-milestone flow (Steps 2-9) — M1 closes, M2 opens, the user is asked to continue.
|
|
65
69
|
|
|
@@ -75,11 +79,13 @@ Read your role: @${QUALIA_AGENTS}/roadmapper.md
|
|
|
75
79
|
|
|
76
80
|
<task>
|
|
77
81
|
The existing JOURNEY.md has 1 milestone (the demo, now M1 and shipped). Extend it
|
|
78
|
-
into
|
|
82
|
+
into the FULL milestone arc to Handoff — as many milestones as the agreed scope
|
|
83
|
+
needs (no cap), covering the entire capability inventory:
|
|
79
84
|
|
|
80
85
|
- Keep M1 exactly as-is (it shipped).
|
|
81
|
-
- Add M2..M{N-1}
|
|
82
|
-
|
|
86
|
+
- Add M2..M{N-1} covering every capability in §9 of project-discovery.md (the
|
|
87
|
+
capability inventory), ordered per §11 (shipping order). Every §9 capability
|
|
88
|
+
must land in a milestone — nothing agreed is left unplanned.
|
|
83
89
|
- Append a Handoff milestone (fixed 4 phases: Polish, Content + SEO, Final QA,
|
|
84
90
|
Handoff).
|
|
85
91
|
- Update REQUIREMENTS.md to add REQ-IDs for the new milestones.
|
|
@@ -59,8 +59,10 @@ Read your role: @${QUALIA_AGENTS}/research-synthesizer.md
|
|
|
59
59
|
|
|
60
60
|
Merge the 4 research files at .planning/research/ into .planning/research/SUMMARY.md.
|
|
61
61
|
This is a multi-milestone project -- the SUMMARY must suggest a FULL milestone arc
|
|
62
|
-
|
|
63
|
-
|
|
62
|
+
that covers the ENTIRE capability set to its done-state (as many milestones as the
|
|
63
|
+
scope needs, ending in Handoff for client projects -- no milestone cap), not just a
|
|
64
|
+
v1 phase list. Include roadmap implications AND handoff implications (what client
|
|
65
|
+
takeover requires).
|
|
64
66
|
", subagent_type="qualia-research-synthesizer", description="Synthesize research")
|
|
65
67
|
```
|
|
66
68
|
|
|
@@ -74,7 +76,7 @@ Read your role: @${QUALIA_AGENTS}/roadmapper.md
|
|
|
74
76
|
|
|
75
77
|
<task>
|
|
76
78
|
Create the FULL JOURNEY for this project:
|
|
77
|
-
- .planning/JOURNEY.md -- all milestones (2
|
|
79
|
+
- .planning/JOURNEY.md -- all milestones (≥2, no upper cap; ending in Handoff for client projects) covering every capability from discovery §9, with exit criteria
|
|
78
80
|
- .planning/REQUIREMENTS.md -- requirements grouped by milestone
|
|
79
81
|
- .planning/ROADMAP.md -- Milestone 1's phase detail (and ALL milestones if full_detail=true)
|
|
80
82
|
|
|
@@ -115,7 +117,7 @@ The branded journey ladder rendered in Step 11. Use `node ${QUALIA_BIN}/qualia-u
|
|
|
115
117
|
```
|
|
116
118
|
## Proposed Journey
|
|
117
119
|
|
|
118
|
-
**{N} milestones to handoff** | **{X}
|
|
120
|
+
**{N} milestones to handoff** | **{X}/{X} capabilities mapped** | Full §9 inventory covered (0 unmapped)
|
|
119
121
|
|
|
120
122
|
+-- Milestone 1 . {Name} [CURRENT]
|
|
121
123
|
| Why now: {one line}
|