qualia-framework 6.14.0 → 6.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/AGENTS.md +8 -5
  2. package/CHANGELOG.md +130 -0
  3. package/CLAUDE.md +3 -1
  4. package/agents/roadmapper.md +16 -14
  5. package/bin/agent-status.js +24 -11
  6. package/bin/branch-hygiene.js +135 -0
  7. package/bin/command-surface.js +1 -0
  8. package/bin/compile-instructions.js +82 -0
  9. package/bin/eval-runner.js +218 -0
  10. package/bin/host-adapters.js +72 -12
  11. package/bin/install.js +21 -13
  12. package/bin/last-report.js +207 -0
  13. package/bin/project-sync.js +315 -0
  14. package/bin/runtime-manifest.js +6 -0
  15. package/bin/state.js +112 -1
  16. package/bin/verify-panel.js +294 -0
  17. package/bin/wave-plan.js +211 -0
  18. package/docs/erp-contract.md +145 -0
  19. package/package.json +3 -2
  20. package/rules/codex-goal.md +28 -26
  21. package/rules/infrastructure.md +1 -1
  22. package/skills/qualia/SKILL.md +6 -0
  23. package/skills/qualia-build/SKILL.md +12 -9
  24. package/skills/qualia-eval/SKILL.md +83 -0
  25. package/skills/qualia-feature/SKILL.md +20 -4
  26. package/skills/qualia-fix/SKILL.md +13 -1
  27. package/skills/qualia-milestone/SKILL.md +12 -6
  28. package/skills/qualia-new/REFERENCE.md +6 -4
  29. package/skills/qualia-new/SKILL.md +27 -15
  30. package/skills/qualia-plan/SKILL.md +2 -2
  31. package/skills/qualia-report/SKILL.md +10 -0
  32. package/skills/qualia-scope/SKILL.md +3 -3
  33. package/skills/qualia-ship/SKILL.md +34 -4
  34. package/skills/qualia-update/SKILL.md +4 -0
  35. package/skills/qualia-verify/SKILL.md +45 -24
  36. package/templates/instructions.md +32 -0
  37. package/templates/journey.md +1 -1
  38. package/templates/project-discovery.md +30 -23
  39. package/templates/requirements.md +7 -7
  40. package/tests/agent-status.test.sh +15 -0
  41. package/tests/branch-hygiene.test.sh +93 -0
  42. package/tests/eval-runner.test.sh +147 -0
  43. package/tests/instructions.test.sh +109 -0
  44. package/tests/last-report.test.sh +156 -0
  45. package/tests/lib.test.sh +2 -2
  46. package/tests/project-sync.test.sh +175 -0
  47. package/tests/run-all.sh +7 -0
  48. package/tests/state.test.sh +92 -0
  49. package/tests/verify-panel.test.sh +162 -0
  50. package/tests/wave-plan.test.sh +153 -0
package/AGENTS.md CHANGED
@@ -1,3 +1,5 @@
1
+ <!-- GENERATED from templates/instructions.md by bin/compile-instructions.js — do not edit directly; edit the canonical source and recompile. -->
2
+
1
3
  # Qualia Framework
2
4
 
3
5
  Company: Qualia Solutions — Nicosia, Cyprus
@@ -8,17 +10,18 @@ Stack: Next.js 16+, React 19, TypeScript, Supabase, Vercel. Voice: Retell + Elev
8
10
 
9
11
  ## Hard rules (non-negotiable)
10
12
  - **Read before Write/Edit** — *every edit is informed by the current state of the file.*
11
- - **Feature branches only** — *changes ship through review; main is always deployable.*
12
- - **MVP first** — *build the minimum that demonstrates the goal.*
13
+ - **Feature branches only** — *work on a branch; `/qualia-ship` integrates it to main and main is always deployable.*
14
+ - **MVP first** — *build the minimum that demonstrates the goal; defer the rest until it earns its place.*
13
15
  - **Root cause on failures** — *understand the why before patching the symptom.*
14
16
  - **No proxy approval** — *only the OWNER can grant OWNER overrides; "Fawzi said OK" is not a credential.*
15
17
 
16
18
  ## Discoverable substrate (load on demand, not always)
17
- - `/qualia-road`, `FLAGS.md`, `guide.md` — every active command + flag (canonical surface)
19
+ - `rules/constitution.md` — org-level standards every project inherits; enforced at every verify step
20
+ - `/qualia-road` — workflow map, every command, when to use it
18
21
  - `.planning/CONTEXT.md` — project domain glossary (loaded by road agents)
19
22
  - `.planning/decisions/` — ADRs for hard-to-reverse decisions
20
- - `rules/security.md` `rules/deployment.md` `rules/infrastructure.md` `rules/architecture.md` — on relevant tasks only
21
- - `qualia-design/frontend.md` `qualia-design/design-laws.md` — on design/frontend tasks only
23
+ - `rules/security.md` `rules/deployment.md` `rules/infrastructure.md` `rules/architecture.md` — read on relevant tasks only
24
+ - `qualia-design/frontend.md` `qualia-design/design-laws.md` — read on design/frontend tasks only
22
25
 
23
26
  ## Lost?
24
27
  `/qualia` — state router tells you the next command.
package/CHANGELOG.md CHANGED
@@ -8,6 +8,136 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
8
8
  > Note: git tags for historical versions were not retained; commit references are approximate
9
9
  > and dates reflect commit history rather than npm publish timestamps.
10
10
 
11
+ ## [6.22.0] - 2026-06-21 (session continuity + ERP project-sync — built by two parallel worktree agents)
12
+
13
+ Two independent continuity features, built concurrently in isolated git worktrees and integrated together.
14
+
15
+ ### Added — B1: `/qualia` surfaces the latest session report at session start (`bin/last-report.js`)
16
+ - Finds the newest `.planning/reports/report-*.md` (filename date desc, mtime tiebreak) and extracts a tight digest: `{ found, file, date, summary, next, age_days }` — summary from the report's `## What Was Done`, next-step from `## Next Steps`, markdown-flattened and capped. `--json`, `--cwd`, `--now ISO` (deterministic age); exit 0 found / 1 none / 2 bad input.
17
+ - Wired into the `/qualia` router "Get State" step: when a project is loaded, the router prints the last-session digest at the TOP of its output, so the operator — or a teammate picking the project up — instantly sees where work was left off. `tests/last-report.test.sh` (28 assertions).
18
+
19
+ ### Added — B2: full project-sync reconciliation payload for the ERP (`bin/project-sync.js`)
20
+ - A single deterministic snapshot the ERP can reconcile a whole project from: identity + lifecycle/launched_at, `milestones[]` (closed/current/future + per-milestone REQ-ID completion + phases/tasks/deployed_url), current position, `task_rollup`, `accountability` (offroad), `integration` (the trunk-merge model), and a `schema_version`. **Composes** `project-snapshot.js` (reuses its builders) rather than duplicating or bloating that stable endpoint. `--json`/`--write`/`--pretty`; read-only; graceful on missing JOURNEY/REQUIREMENTS. `tests/project-sync.test.sh` (38 assertions).
21
+ - `docs/erp-contract.md`: new "Project Sync Payload" section — every field, the server-side reconciliation steps, and the PR/merge model (branch → main at ship → deploy; main-push accountability). Explicit **Framework-emits vs ERP-backend-ingests** split.
22
+ - **Backend remains (out of this repo):** a `POST /api/v1/project-sync` endpoint + server reconciliation (upsert milestones by num, completion from REQ counts, roll up phases/tasks, store offroad, encode the merge model). The framework emits + documents; it does not POST yet (the ERP team mirrors `project-snapshot.js`'s upload plumbing once the endpoint exists).
23
+
24
+ run-all now 19 suites; both bins in the manifest + install-set; all suites green.
25
+
26
+ ## [6.21.0] - 2026-06-21 (work-unit goals on both runtimes — Codex /goal + the Claude Code equivalent)
27
+
28
+ Every defined unit of work should declare one objective + one budget, so it stays anchored and the operator sees burn-vs-budget. The framework had this for Codex (`/goal`) but `rules/codex-goal.md` explicitly told Claude Code to "skip — no equivalent surface." Claude Code DOES have an equivalent (the session task-list + turn budget); this wires it up and broadens goal-setting to every work-unit skill.
29
+
30
+ ### Changed — `rules/codex-goal.md` is now a both-runtimes "work-unit goal" rule
31
+ - One shared helper (`codex-goal.js {scope}`, via the host-adapter-rendered `${QUALIA_BIN}`) produces the objective + token budget from STATE.md/ROADMAP.md.
32
+ - **Codex** path unchanged: native `/goal` / `update_goal`.
33
+ - **Claude Code** path (new): set the goal via the harness **task-list** (a tracked task titled with the objective, in_progress→completed) + state the budget in the banner. Same discipline — one named objective + budget per unit — native surface on each runtime.
34
+
35
+ ### Changed — goal-setting wired into every work-unit skill
36
+ - Existing blocks in `/qualia-plan`, `/qualia-build`, `/qualia-feature` relabeled from "Codex goal (Codex runtime only)" to runtime-neutral **"Set the work-unit goal."**
37
+ - Added to `/qualia-fix` (scope `quick`/`feature`) and `/qualia-update` (scope `feature` — it runs its own lean loop without `/qualia-plan`, so it needs its own goal). `/qualia-milestone` deliberately omitted — it routes into `/qualia-plan`, which sets the goal (no double-set).
38
+
39
+ No bin or schema change; all 17 suites pass.
40
+
41
+ ## [6.20.0] - 2026-06-21 (scope integrity — the roadmap finishes the project, and the team can't drift off it)
42
+
43
+ The deepest fix this cycle. Teams were drifting off-plan — inventing milestones, building features with no link to the roadmap — and the root cause was upstream: **`/qualia-new` under-scoped the project** (a v1 slice capped at 5 milestones, overflow dumped into an unplanned "v2"), so the agreed work literally wasn't in the arc and the team was *forced* to improvise. Two layers: make genesis cover the whole project, then bind the team to it. (Layer 1 shipped in the prior commit; this entry covers the full feature.)
44
+
45
+ ### Layer 1 — genesis covers the whole project (commit `feat(genesis)`)
46
+ - **Interview reworked** (`templates/project-discovery.md`): added §9 **capability inventory** (every capability needed for DONE — the whole thing) + §10 **whole-project definition-of-done**; dropped the old §9 "stop at 3–5 chapters" self-cap. Full path 14 → 15 questions, refocused from brand-vibe to functional completeness.
47
+ - **Milestone cap removed** (`agents/roadmapper.md`, `templates/journey.md`, `templates/requirements.md`): the arc spans until the §9 inventory reaches the §10 done-state — as many milestones as needed. `Post-Handoff`/`Out of Scope` holds ONLY explicit client deferrals (§8), never overflow. Handoff optional for internal/ongoing products.
48
+ - **Coverage gate** (`/qualia-new` Step 14): genesis refuses to present a journey that leaves any §9 capability unmapped (0 unmapped before the approval ladder).
49
+
50
+ ### Layer 2 — bind the team to the arc (this commit)
51
+ - **Milestone close gates on requirements** (`bin/state.js`): new `MILESTONE_REQS_INCOMPLETE` — close refuses (strict) / warns (standard) when a REQ-ID mapped to the milestone in REQUIREMENTS.md isn't `Complete`. Stops "finishing a milestone with scope still open". New `state.js reqs-check [--milestone N]` exposes the same check (exit 0/1) for `/qualia-milestone` to show coverage before closing.
52
+ - **Off-road work is recorded, not silent** (`bin/state.js` note path): `transition --to note` gains `--scope in|off --ref {REQ/why}`. Off-road work increments `lifetime.offroad_count` and appends to an `offroad[]` ledger (OWNER + ERP visible), mirroring branch-guard's accountability model.
53
+ - **`/qualia-feature` + `/qualia-fix` scope gate**: before building, both check the active milestone. In-scope → proceed, tagged `--scope in`. Off-road → **strict blocks** (route to `/qualia-scope`/`/qualia-milestone` to fold it into the arc) / **standard records** (`--scope off`, counted). The drift vector the user named is now governed at the source.
54
+
55
+ ### Tests
56
+ - `tests/state.test.sh`: +5 cases — `reqs-check` (complete/incomplete/milestone-filter/untracked), `--scope off` tally + ledger, `--scope in` no-op, `--force` bypass. 96 state assertions green; all 17 suites pass. (Genesis is prose/templates — validated by the skills + refs suites.)
57
+
58
+ ## [6.19.0] - 2026-06-21 (trunk integration — ship is the merge point, report sweeps)
59
+
60
+ Fixes a real lifecycle gap + doc drift: **no skill ever integrated feature → main.** Branches/PRs accumulated with nothing closing them; `/qualia-ship` deployed *from the feature branch* and said "never push to main," so production ran branch code while `main` lagged ("main is always deployable" was false in practice); and three sources disagreed on the policy (the hard rule said "through review", `branch-guard` 6.10 said "accountability not block", `infrastructure.md` still claimed PR-review was enforced). This completes the 6.10 "accountability over block" turn into a coherent trunk model.
61
+
62
+ ### Changed — `/qualia-ship` integrates to main, deploys from main, closes the branch
63
+ - New §3: commit → fast-forward-integrate the feature branch into `main` (auto-rebase if `main` moved; STOP on conflict) → push. `branch-guard` records the main push (accountability). §4 deploys from `main` HEAD, so the deployed artifact == `main` byte-for-byte. New §4b deletes the integrated branch on a verified deploy. The normal path now leaves **zero lingering branches/PRs**.
64
+
65
+ ### Added — `bin/branch-hygiene.js` + `/qualia-report` sweep (the safety net)
66
+ - `branch-hygiene.js`: read-only clock-out sweep — finds local branches with commits **ahead of `main` that were never shipped** (stranded work) and **stale open PRs** (best-effort via `gh`, skipped when absent). Exit 0 clean / 1 found / 2 not-a-repo; `--json`; library `analyze`. Detects `main` or `master` as base.
67
+ - `/qualia-report` Step 5b runs it so stranded work surfaces to the employee + OWNER at clock-out instead of rotting.
68
+
69
+ ### Fixed — policy drift now single-voiced
70
+ - `rules/infrastructure.md`: the stale "main requires PR reviews (enforced by guards)" line replaced with the real model (integrate-at-ship; main pushes allowed + recorded; report sweeps; keep GitHub branch protection off, or switch ship to an auto-merged PR if you re-enable it).
71
+ - Canonical hard rule (`templates/instructions.md` → recompiled `CLAUDE.md`/`AGENTS.md`): "ship through review" → "`/qualia-ship` integrates it to main." Drift guard green.
72
+
73
+ ### Tests
74
+ - `tests/branch-hygiene.test.sh` (new, 13 cases): not-a-repo, clean, stranded branch (ahead count + json), ff-merged-no-longer-stranded, `master` base detection, `analyze()` lib. run-all now 17 suites; manifest + `lib.test.sh` install set updated.
75
+
76
+ ## [6.18.0] - 2026-06-21 (v7 kernel, step 8 — R7: /qualia-eval lane for AI features)
77
+
78
+ Qualia gates UI and code — `contract-runner` proves the code exists, `verify-panel` proves it's correct — but it had **no gate for the AI artifacts a project builds**. "The chatbot answers the refund question" / "the RAG answer is grounded" / "the agent stays under 2s" is not checkable by a grep. R7 adds the equivalent gate, layered: cheap deterministic assertions first, model judgment only where a model is required.
79
+
80
+ ### Added — `bin/eval-runner.js` (layered assertion runner, zero-dependency)
81
+ - Runs an eval suite (JSON — no YAML parser pulled in) of cases against captured AI outputs. **Deterministic assertion types** settled with no model: `contains`, `not_contains`, `equals`, `regex`, `not_regex`, `min_length`, `max_length`, `json_valid`, `json_path` (`equals`/`contains`), `max_latency_ms`, `max_cost_usd`. Outputs inline or via `output_file`.
82
+ - **`llm_rubric`** is the only model-dependent type — it carries a `verdict` (pass|fail) the skill fills by spawning a judge BEFORE the runner (same pattern `verify-panel` uses for skeptic votes). An unjudged rubric is PENDING and **fails** the suite — never a silent pass. Asserting a latency/cost budget with no metric recorded also fails (no silent pass).
83
+ - Exit 0 = all cases pass, 1 = failure/unjudged, 2 = bad input. `--write` emits `.planning/evals/eval-{feature}.json`. Library exports `run`, `runAssertion`, `getPath`.
84
+
85
+ ### Added — `/qualia-eval` skill (new active surface)
86
+ - The lane: capture the AI feature's real outputs → spawn one judge per `llm_rubric` (reusing the `qualia-verifier` agent, role-anchored) → `eval-runner.js` settles deterministic assertions + folds in verdicts → gate. Usable standalone (`/qualia-eval suite.json`) or as a phase verify-step gate (`/qualia-eval {N}`), where a FAIL has the same standing as a failing contract. Registered in `command-surface.js` `ACTIVE_SKILLS`.
87
+
88
+ ### Tests
89
+ - `tests/eval-runner.test.sh` (new, 19 cases): deterministic pass/fail, latency budget (incl. missing-metric → fail), `json_valid`/`json_path`, `llm_rubric` pass/fail/pending, `output_file` resolution + graceful missing-file, `--write` artifact, `runAssertion`/`getPath` units, malformed→exit 2. run-all now 16 suites; manifest + `lib.test.sh` install set updated; `qualia-eval` passes the skill smoke + refs suites.
90
+
91
+ ## [6.17.0] - 2026-06-21 (v7 kernel, step 7 — R16: dependency-derived wave width + --parallel knob)
92
+
93
+ `/qualia-build` spawned EVERY task in a contract "wave" concurrently, with no cap — two failure modes at once: over-serialization (the planner's hand-numbered waves can be deeper than the dependency graph requires) and over-parallelization (a wide wave spawns 9 builders past the 3–5 sweet spot where coordination cost overwhelms the gain — the LangGraph `max_concurrency` lesson). R16 replaces orchestrator guesswork with a deterministic scheduler derived from the task DAG.
94
+
95
+ ### Added — `bin/wave-plan.js` (deterministic build scheduler, zero-dependency)
96
+ - Recomputes **minimal-depth waves** from `depends_on` (topological levels = maximal safe parallelism), then splits each level into **batches capped at `max_concurrency`**. Output is an ordered `batches[]` the orchestrator spawns one at a time. Same contract + cap → same schedule.
97
+ - `max_concurrency`: `--parallel N` → exactly N; **auto** (default) → 1 if <3 tasks ("don't parallelize tiny phases"), else 5.
98
+ - Flags **over-serialization** (a task whose declared wave is deeper than the DAG requires — the schedule runs it earlier) and wide-level capping. Cycle in the DAG → exit 1; library exports `deriveLevels`, `resolveConcurrency`, `plan`.
99
+
100
+ ### Changed — `/qualia-build` consumes the derived schedule
101
+ - **§2** now runs `wave-plan.js .planning/phase-{N}-contract.json [--parallel K] --json` and spawns the emitted batches in order (not the raw contract `wave` numbers, not all-at-once). New `--parallel K` usage knob.
102
+ - **Batch fan-in barrier:** `agent-status.js` (R2) gains a `barrier --tasks T1,T2` mode that gates on an explicit batch (no contract needed) — required because derived waves needn't match the contract's declared wave numbers, so the per-wave barrier would mismatch. The build now barriers per batch, keeping R16 + R2 coherent.
103
+
104
+ ### Tests
105
+ - `tests/wave-plan.test.sh` (new, 23 cases): chain/independent/tiny/diamond DAGs, auto vs `--parallel` cap, wide-level batching, over-serialization flag, cycle→exit 1, `deriveLevels`/`resolveConcurrency` units. `tests/agent-status.test.sh` +3 `barrier --tasks` cases. run-all now 15 suites; manifest + `lib.test.sh` install set updated.
106
+
107
+ ## [6.16.0] - 2026-06-21 (v7 kernel, step 6 — R8: verifier panel + adversarial skeptics)
108
+
109
+ A single LLM judge is adversarially fragile — the literature puts a lone stray token at ~35% false positives, and self-grading bias hides ~70% of findings. `/qualia-verify` was a single cooperative verifier with an optional second pass. R8 replaces it with a **panel** (one verifier per lens) + **per-finding skeptics** (majority-survives), and — crucially — makes the SURVIVE/KILL and PASS/FAIL decision **deterministic math**, not another LLM judgment.
110
+
111
+ ### Added — `bin/verify-panel.js` (deterministic aggregator, zero-dependency)
112
+ - **`aggregate(panel)`**: dedupes findings across lenses (same `file:line:title` → one finding; highest severity wins, lenses union, votes sum), applies **majority-survives** (a finding is killed only when skeptics are a strict majority calling it not-real — ties and unvoted findings survive: unverified ≠ disproven), and computes category + per-lens scores via the **`rules/grounding.md` formula** (`5 − floor(weighted_sum/8)`). Verdict FAIL iff any surviving CRITICAL/HIGH. Exit 0 = PASS, 1 = FAIL.
113
+ - **`assemble <phase>`**: globs the per-lens `phase-{N}-panel-{lens}.json` files into one `phase-{N}-panel.json` skeleton (votes zeroed) so the orchestrator never hand-builds the panel.
114
+ - `--write` emits `.planning/phase-{N}-verification-panel.{json,md}`. Library exports (`aggregate`, `dedupeFindings`, `survives`, `scoreFromCounts`, `assemble`) for reuse.
115
+
116
+ ### Changed — `/qualia-verify` is now panel-based
117
+ - **§3 Panel:** spawns one `qualia-verifier` per *relevant* lens (correctness always; security/performance/design by what the phase touches — cost scales to risk, not a flat 4×), in parallel, each anchored on the same contract-run + harness-eval evidence as shared ground truth, each emitting structured findings JSON.
118
+ - **§3c Skeptics + aggregation:** assemble → 3 skeptics per CRITICAL/HIGH finding (5 with `--adversarial`/Handoff/security lens), each prompted to *refute* with evidence → tally votes → `verify-panel.js` produces the verdict. MEDIUM/LOW auto-survive (documented cost bound, not a silent cap). The old single-verifier + adversarial-second-pass sections are replaced.
119
+ - **§4:** the phase is PASS only if the panel verdict, harness-eval, AND anti-slop all agree. Reuses the existing `qualia-verifier` agent (lens/skeptic are prompt modes — no new agent registration).
120
+
121
+ ### Tests
122
+ - `tests/verify-panel.test.sh` (new, 28 cases): empty→PASS, surviving CRITICAL→FAIL, skeptic-killed→PASS, tie/no-vote survive, cross-lens dedupe (severity-max + vote-sum + lens-union), grounding-formula scores, MEDIUM/LOW-only→PASS, `--write` artifacts, `assemble` round-trip, malformed→exit 2. Registered in `run-all.sh` (now 14 suites); `lib.test.sh` trust-score install set carries `verify-panel.js`.
123
+
124
+ ## [6.15.0] - 2026-06-21 (v7 kernel, step 5 — R4+R5: single-source the dual-runtime surface)
125
+
126
+ Dual-runtime drift is the #1 risk of supporting both Claude Code and Codex. `CLAUDE.md` and `AGENTS.md` were hand-maintained twins — and they *had already drifted* (the MVP-first line and the substrate list differed between them). This batch makes that class of bug unmergeable: one canonical source, compiled per host, with a drift guard in CI.
127
+
128
+ ### Added — R4: one canonical instruction source, compiled to both files
129
+ - **`templates/instructions.md`** is now the single source of truth. `CLAUDE.md` and `AGENTS.md` are **generated artifacts** (committed, like a lockfile) carrying a `GENERATED` header.
130
+ - **`bin/compile-instructions.js`** compiles the canonical into both files. `--check` mode is the **drift guard**: it exits non-zero if either committed file is stale, making "edited one twin, forgot the other" impossible to merge. Wired into the test suite + an `npm run compile:instructions` script.
131
+ - Host-specific content uses conditional blocks (`<!--QUALIA-HOST claude-->…<!--/QUALIA-HOST-->`): the Claude file keeps the Pocock budget note, the Codex file keeps the cross-vendor (Cursor/Continue/Aider/Devin) note — the **body is byte-identical**, only the footer differs. The pre-existing drift is resolved (AGENTS.md regained the full MVP-first line + the constitution substrate entry).
132
+
133
+ ### Changed — R5: `host-adapters.js` is now the single per-host contract
134
+ - The adapter is the **one place** anything runtime-specific is declared: `instructionFile`, `configFile`, `agentDir`, `agentExt`, and the Claude→Codex `naming` map (lifted out of a hardcoded swap buried in `renderText`). Nothing else branches on runtime — callers ask the adapter. A third runtime becomes one `HOSTS` entry, not a grep-and-patch.
135
+ - Render pipeline split into composable stages: `applyNaming` (display-string swaps) + `applyPaths` (`${QUALIA_*}` tokens + `.claude→.codex`), with `renderText = applyPaths∘applyNaming` (unchanged public behavior) and `compileInstructions = applyNaming∘stripHostBlocks` (naming + blocks, paths/`{{ROLE}}` left for install).
136
+ - **`install.js`** now routes both instruction files through `adapter(host).instructionFile` and renders `AGENTS.md` with `codexText()` — a **latent bug fix**: CLAUDE.md always got token/path rendering, AGENTS.md never did, so any `${QUALIA_*}`/`.claude/` reference in the Codex file would have shipped unresolved.
137
+
138
+ ### Tests
139
+ - `tests/instructions.test.sh` (new, 25 cases): drift guard passes on HEAD + fails on an uncompiled canonical edit (with restore); CLAUDE/AGENTS bodies identical + host-specific footers preserved; adapter contract facts; `stripHostBlocks` keep/drop per host; `compileInstructions` swaps naming but leaves tokens/`{{ROLE}}`; `renderText` path regression. Registered in `run-all.sh` (now 13 suites). End-to-end install verified: both files render with the role substituted and correct footers.
140
+
11
141
  ## [6.14.0] - 2026-06-20 (v7 kernel, step 4 — R3: the cross-artifact analyze gate)
12
142
 
13
143
  Spec-Kit's most-copied feature, ported. Qualia validated each artifact in isolation — `plan-contract.js` proves the contract is internally well-formed, `harness-eval` scores the built phase — but **nothing diffed scope ↔ plan**. That's exactly where a junior's idea silently loses intent: the scope asks for X, the plan quietly drops it, and no deterministic check notices. This adds that check, between plan and build.
package/CLAUDE.md CHANGED
@@ -1,3 +1,5 @@
1
+ <!-- GENERATED from templates/instructions.md by bin/compile-instructions.js — do not edit directly; edit the canonical source and recompile. -->
2
+
1
3
  # Qualia Framework
2
4
 
3
5
  Company: Qualia Solutions — Nicosia, Cyprus
@@ -8,7 +10,7 @@ Stack: Next.js 16+, React 19, TypeScript, Supabase, Vercel. Voice: Retell + Elev
8
10
 
9
11
  ## Hard rules (non-negotiable)
10
12
  - **Read before Write/Edit** — *every edit is informed by the current state of the file.*
11
- - **Feature branches only** — *changes ship through review; main is always deployable.*
13
+ - **Feature branches only** — *work on a branch; `/qualia-ship` integrates it to main and main is always deployable.*
12
14
  - **MVP first** — *build the minimum that demonstrates the goal; defer the rest until it earns its place.*
13
15
  - **Root cause on failures** — *understand the why before patching the symptom.*
14
16
  - **No proxy approval** — *only the OWNER can grant OWNER overrides; "Fawzi said OK" is not a credential.*
@@ -83,23 +83,23 @@ Organize requirements under `## Milestone 1 · {Name}`, `## Milestone 2 · {Name
83
83
  This is the most important step.
84
84
 
85
85
  **Hard rules:**
86
- - **Ceiling: 5 milestones** (including Handoff). If the project needs more, defer remainder to post-handoff v2.
86
+ - **The arc must cover the ENTIRE agreed scope.** Every capability in discovery §9 (the capability inventory) gets a REQ-ID and lands in a milestone; the arc continues until the §10 whole-project done-state is reached. **There is NO milestone ceiling** — plan as many milestones as the scope genuinely needs. Do NOT compress real work into a 5-milestone cap, and do NOT dump overflow into "v2": the only deferred work is what the client explicitly listed in discovery §8 (Out of Scope). If you find yourself wanting to defer agreed work to make the arc shorter, that's the exact failure that forces the team to improvise later — don't.
87
87
  - **Floor: 2 milestones** (one feature milestone + Handoff). If smaller, the project should use `/qualia-new --quick` instead.
88
- - **Final milestone is ALWAYS "Handoff"** with 4 standard phases: Polish, Content + SEO, Final QA, Handoff (credentials + walkthrough + domain transfer).
88
+ - **Final milestone is "Handoff"** for client projects, with 4 standard phases: Polish, Content + SEO, Final QA, Handoff (credentials + walkthrough + domain transfer). For an internal or ongoing product (no client takeover — see discovery §10/§11), Handoff may be omitted; the arc ends at the milestone that reaches the done-state.
89
89
  - **Every non-Handoff milestone must have ≥ 2 phases** OR be an explicit shipped release gate. Single-phase milestones are phases, not milestones — merge them into the preceding milestone.
90
90
  - **Milestones are ordered by dependency, not priority.** M2 must be able to use M1's outputs.
91
91
 
92
- **Typical milestone arcs by project type:**
92
+ **Typical milestone arcs by project type (STARTING POINTS, not caps — extend until §9 is fully covered):**
93
93
 
94
- | Type | Arc |
94
+ | Type | Arc (minimum shape — add milestones as the capability set requires) |
95
95
  |---|---|
96
- | Landing / marketing | 2 milestones: Foundation → Handoff |
97
- | SaaS / dashboard | 4 milestones: Foundation → Core Features → Admin & Reporting → Handoff |
98
- | Voice / AI agent | 4 milestones: Foundation → Core Flow → Integrations → Handoff |
99
- | Mobile app | 5 milestones: Foundation → Core → Offline & Notifications → Store Prep → Handoff |
100
- | Multi-tenant platform | 5 milestones: Foundation → Core → Admin → Scale → Handoff |
96
+ | Landing / marketing | Foundation → Handoff |
97
+ | SaaS / dashboard | Foundation → Core Features → Admin & Reporting → … → Handoff |
98
+ | Voice / AI agent | Foundation → Core Flow → Integrations → … → Handoff |
99
+ | Mobile app | Foundation → Core → Offline & Notifications → Store Prep → … → Handoff |
100
+ | Multi-tenant platform | Foundation → Core → Admin → Scale → … → Handoff |
101
101
 
102
- Use the research SUMMARY.md as your starting point. Don't force-fit the template shape to this specific project.
102
+ These are floors, not ceilings. Use the research SUMMARY.md and the §9 capability inventory as your real input — the table just shows the smallest sensible shape. Don't force-fit it; a project with 30 agreed capabilities will have more milestones than one with 6, and that is correct.
103
103
 
104
104
  **For each milestone:**
105
105
  - **Name** — short and evocative (e.g., "Core Feature Loop", not "Phase 2 Work")
@@ -124,11 +124,13 @@ For each phase in the milestone(s) you're detailing:
124
124
  ### 5. Validate Coverage
125
125
 
126
126
  Before writing, verify:
127
- - [ ] Every v1 requirement (all milestones excluding Handoff) has a REQ-ID
128
- - [ ] Every v1 requirement maps to exactly one milestone
127
+ - [ ] **Every capability in discovery §9 has a REQ-ID** (the whole inventory, not a v1 slice)
128
+ - [ ] Every requirement maps to exactly one milestone, and **every §9 capability is covered by some milestone** — nothing agreed is left unplanned
129
+ - [ ] The final milestone reaches the discovery §10 whole-project done-state
130
+ - [ ] The ONLY items in `Post-Handoff (v2)` / `Out of Scope` are the ones the client explicitly deferred in discovery §8 (no overflow from a milestone cap — there is no cap)
129
131
  - [ ] Every milestone has ≥ 2 phases (except Handoff which has the fixed 4)
130
- - [ ] Milestone count is 2-5 total
131
- - [ ] Final milestone is literally named "Handoff" with the 4 standard phases
132
+ - [ ] Floor met: 2 milestones total (no upper bound)
133
+ - [ ] Final milestone is "Handoff" with the 4 standard phases (client projects), or the done-state milestone (internal/ongoing products)
132
134
  - [ ] No milestone depends on a later milestone
133
135
  - [ ] Milestone 1 has full phase-level detail (goals + success criteria) ready for `/qualia-plan 1`
134
136
  - [ ] If `full_detail=false` (default): M2..M{N-1} have phase names + one-line goals (sketch, not full detail)
@@ -102,11 +102,15 @@ function expectedTaskIds(contract, wave) {
102
102
  return filtered.map((t) => t.id);
103
103
  }
104
104
 
105
- // Fan-in barrier: compare the persisted statuses against the task ids the
106
- // contract expects (optionally scoped to one wave). ok every expected task is
105
+ // Fan-in barrier: compare the persisted statuses against the expected task ids.
106
+ // Expected set = an explicit opts.tasks list (used by wave-plan batches, whose
107
+ // derived waves needn't match the contract's declared wave numbers), else the
108
+ // contract task ids optionally scoped to opts.wave. ok ⇔ every expected task is
107
109
  // DONE. Anything else (missing/running/blocked/partial) holds the barrier.
108
110
  function barrier(root, contract, opts = {}) {
109
- const expected = expectedTaskIds(contract, opts.wave);
111
+ const expected = Array.isArray(opts.tasks) && opts.tasks.length
112
+ ? opts.tasks
113
+ : expectedTaskIds(contract, opts.wave);
110
114
  const byTask = new Map(listStatuses(root).map((s) => [s.task, s]));
111
115
  const tasks = expected.map((id) => {
112
116
  const s = byTask.get(id);
@@ -137,6 +141,8 @@ function parseFlags(argv, start) {
137
141
  else if (a.startsWith("--cwd=")) flags.cwd = a.slice(6);
138
142
  else if (a === "--wave") flags.wave = argv[++i];
139
143
  else if (a.startsWith("--wave=")) flags.wave = a.slice(7);
144
+ else if (a === "--tasks") flags.tasks = argv[++i];
145
+ else if (a.startsWith("--tasks=")) flags.tasks = a.slice(8);
140
146
  else if (a === "--commit") flags.commit = argv[++i];
141
147
  else if (a.startsWith("--commit=")) flags.commit = a.slice(9);
142
148
  else if (a === "--note") flags.note = argv[++i];
@@ -155,6 +161,7 @@ function usage() {
155
161
  " agent-status.js read <task> [--cwd DIR] [--json]",
156
162
  " agent-status.js list [--cwd DIR] [--json]",
157
163
  " agent-status.js barrier <contract.json> [--wave W] [--cwd DIR] [--json]",
164
+ " agent-status.js barrier --tasks T1,T2 [--cwd DIR] [--json] (batch gate; no contract needed)",
158
165
  " agent-status.js clear [--cwd DIR]",
159
166
  "",
160
167
  "status ∈ RUNNING | DONE | BLOCKED | PARTIAL",
@@ -204,16 +211,22 @@ function main(argv) {
204
211
 
205
212
  if (cmd === "barrier") {
206
213
  const [contractPath] = flags._;
207
- if (!contractPath) { usage(); return 2; }
208
- const loaded = pc.readContractFile(contractPath);
209
- if (!loaded.ok) {
210
- if (flags.json) console.log(JSON.stringify({ ok: false, ...loaded }));
211
- else console.error(`${loaded.error}: ${loaded.message}`);
212
- return 2;
214
+ const taskList = flags.tasks ? flags.tasks.split(",").map((s) => s.trim()).filter(Boolean) : null;
215
+ // --tasks gates on an explicit batch; otherwise the contract supplies the set.
216
+ let contract = null;
217
+ if (!taskList) {
218
+ if (!contractPath) { usage(); return 2; }
219
+ const loaded = pc.readContractFile(contractPath);
220
+ if (!loaded.ok) {
221
+ if (flags.json) console.log(JSON.stringify({ ok: false, ...loaded }));
222
+ else console.error(`${loaded.error}: ${loaded.message}`);
223
+ return 2;
224
+ }
225
+ contract = loaded.contract;
213
226
  }
214
- const result = barrier(root, loaded.contract, { wave: flags.wave });
227
+ const result = barrier(root, contract, { wave: flags.wave, tasks: taskList });
215
228
  if (flags.json) { console.log(JSON.stringify(result, null, 2)); return result.ok ? 0 : 1; }
216
- const scope = result.wave != null ? `wave ${result.wave}` : "phase";
229
+ const scope = taskList ? `batch ${taskList.join(",")}` : (result.wave != null ? `wave ${result.wave}` : "phase");
217
230
  if (result.ok) {
218
231
  console.log(`BARRIER PASS (${scope}): ${result.done}/${result.expected} DONE`);
219
232
  } else {
@@ -0,0 +1,135 @@
1
+ #!/usr/bin/env node
2
+ // branch-hygiene.js — the clock-out safety net for trunk integration.
3
+ //
4
+ // WHY: /qualia-ship integrates feature→main on every successful deploy, so the
5
+ // normal path leaves nothing open. But work that was BUILT and never SHIPPED
6
+ // strands on a local branch ahead of main, and the occasional review PR can
7
+ // linger. This surfaces both at /qualia-report so nothing rots silently.
8
+ //
9
+ // Detects:
10
+ // - local branches with commits ahead of main that aren't merged (stranded work)
11
+ // - open PRs (best-effort via `gh`; skipped silently when gh is absent/unauth)
12
+ //
13
+ // Read-only. Never blocks (report never blocks) — exit 0 = clean,
14
+ // 1 = stranded branches and/or stale PRs found, 2 = not a git repo.
15
+ // Zero npm dependencies.
16
+
17
+ const { spawnSync } = require("child_process");
18
+ const path = require("path");
19
+
20
+ function git(args, cwd) {
21
+ const r = spawnSync("git", args, { cwd, encoding: "utf8", stdio: ["ignore", "pipe", "pipe"] });
22
+ return { ok: r.status === 0, out: (r.stdout || "").trim(), err: (r.stderr || "").trim() };
23
+ }
24
+
25
+ function mainBranch(cwd) {
26
+ // Prefer an existing local main, else master, else the remote HEAD target.
27
+ for (const b of ["main", "master"]) {
28
+ if (git(["rev-parse", "--verify", "--quiet", b], cwd).ok) return b;
29
+ }
30
+ const head = git(["symbolic-ref", "--quiet", "--short", "refs/remotes/origin/HEAD"], cwd);
31
+ if (head.ok && head.out) return head.out.replace(/^origin\//, "");
32
+ return "main";
33
+ }
34
+
35
+ function strandedBranches(cwd, base) {
36
+ const heads = git(["for-each-ref", "--format=%(refname:short)", "refs/heads/"], cwd);
37
+ if (!heads.ok) return [];
38
+ const out = [];
39
+ for (const b of heads.out.split(/\r?\n/).filter(Boolean)) {
40
+ if (b === base) continue;
41
+ // commits on b not in base — unmerged work.
42
+ const count = git(["rev-list", "--count", `${base}..${b}`], cwd);
43
+ const ahead = count.ok ? Number(count.out) : 0;
44
+ if (ahead > 0) {
45
+ const last = git(["log", "-1", "--format=%cI", b], cwd);
46
+ out.push({ branch: b, ahead, last_commit: last.ok ? last.out : null });
47
+ }
48
+ }
49
+ return out.sort((a, b) => b.ahead - a.ahead);
50
+ }
51
+
52
+ // Best-effort open-PR list via gh. Absent/unauth/non-GitHub → [] (never an error).
53
+ function openPRs(cwd, staleDays, nowIso) {
54
+ const r = spawnSync("gh", ["pr", "list", "--state", "open", "--json", "number,title,headRefName,createdAt", "--limit", "100"],
55
+ { cwd, encoding: "utf8", stdio: ["ignore", "pipe", "pipe"] });
56
+ if (r.status !== 0 || !r.stdout) return [];
57
+ let prs;
58
+ try { prs = JSON.parse(r.stdout); } catch { return []; }
59
+ const now = nowIso ? Date.parse(nowIso) : null;
60
+ return prs.map((p) => {
61
+ let ageDays = null;
62
+ if (now != null && p.createdAt) ageDays = Math.floor((now - Date.parse(p.createdAt)) / 86400000);
63
+ return { number: p.number, title: p.title, branch: p.headRefName, age_days: ageDays, stale: ageDays != null && ageDays >= staleDays };
64
+ });
65
+ }
66
+
67
+ function analyze(cwd, opts = {}) {
68
+ const root = path.resolve(cwd || process.cwd());
69
+ if (!git(["rev-parse", "--is-inside-work-tree"], root).ok) {
70
+ return { ok: false, error: "NOT_A_GIT_REPO" };
71
+ }
72
+ const base = mainBranch(root);
73
+ const stranded = strandedBranches(root, base);
74
+ const prs = openPRs(root, opts.staleDays || 7, opts.now);
75
+ const stalePrs = prs.filter((p) => p.stale);
76
+ return {
77
+ ok: stranded.length === 0 && stalePrs.length === 0,
78
+ base,
79
+ stranded,
80
+ open_prs: prs,
81
+ stale_prs: stalePrs,
82
+ };
83
+ }
84
+
85
+ // ── CLI ───────────────────────────────────────────────────────────────────
86
+ function parseArgs(argv) {
87
+ const args = {};
88
+ for (let i = 2; i < argv.length; i++) {
89
+ const a = argv[i];
90
+ if (a === "--json") args.json = true;
91
+ else if (a === "--cwd") args.cwd = argv[++i];
92
+ else if (a.startsWith("--cwd=")) args.cwd = a.slice(6);
93
+ else if (a === "--stale-days") args.staleDays = Number(argv[++i]);
94
+ else if (a.startsWith("--stale-days=")) args.staleDays = Number(a.slice(13));
95
+ else if (a === "--now") args.now = argv[++i]; // ISO; tests inject determinism
96
+ else if (a.startsWith("--now=")) args.now = a.slice(6);
97
+ }
98
+ return args;
99
+ }
100
+
101
+ function main(argv) {
102
+ const args = parseArgs(argv);
103
+ const result = analyze(args.cwd, { staleDays: args.staleDays, now: args.now });
104
+
105
+ if (args.json) {
106
+ console.log(JSON.stringify(result, null, 2));
107
+ return result.ok ? 0 : (result.error ? 2 : 1);
108
+ }
109
+
110
+ if (result.error === "NOT_A_GIT_REPO") {
111
+ console.error("branch-hygiene: not a git repository");
112
+ return 2;
113
+ }
114
+ if (result.ok) {
115
+ console.log(`Branch hygiene clean — no work stranded off ${result.base}, no stale PRs.`);
116
+ return 0;
117
+ }
118
+ if (result.stranded.length) {
119
+ console.log(`⚠ ${result.stranded.length} branch(es) with unshipped commits ahead of ${result.base}:`);
120
+ for (const s of result.stranded) {
121
+ console.log(` - ${s.branch} (+${s.ahead} commit${s.ahead === 1 ? "" : "s"}) — /qualia-ship it or merge, or delete if abandoned`);
122
+ }
123
+ }
124
+ if (result.stale_prs.length) {
125
+ console.log(`⚠ ${result.stale_prs.length} stale open PR(s):`);
126
+ for (const p of result.stale_prs) console.log(` - #${p.number} ${p.title} (${p.age_days}d, ${p.branch})`);
127
+ }
128
+ return 1;
129
+ }
130
+
131
+ module.exports = { analyze, strandedBranches, mainBranch };
132
+
133
+ if (require.main === module) {
134
+ process.exit(main(process.argv));
135
+ }
@@ -14,6 +14,7 @@ const ACTIVE_SKILLS = [
14
14
  "qualia-plan",
15
15
  "qualia-build",
16
16
  "qualia-verify",
17
+ "qualia-eval",
17
18
  "qualia-fix",
18
19
  "qualia-feature",
19
20
  "qualia-review",
@@ -0,0 +1,82 @@
1
+ #!/usr/bin/env node
2
+ // compile-instructions.js — single-source the agent-instruction files.
3
+ //
4
+ // CLAUDE.md and AGENTS.md used to be hand-maintained twins, which guarantees
5
+ // drift (and they HAD drifted — the MVP-first line and the substrate list
6
+ // differed). Now both are compiled from one canonical source,
7
+ // templates/instructions.md, via the host adapter. Editors touch the canonical;
8
+ // the per-host files are generated artifacts (committed, like a lockfile).
9
+ //
10
+ // node bin/compile-instructions.js # regenerate CLAUDE.md + AGENTS.md
11
+ // node bin/compile-instructions.js --check # fail (exit 1) if either is stale
12
+ //
13
+ // The --check mode is the drift guard the test suite runs: it makes "edited one
14
+ // twin, forgot the other" impossible to merge.
15
+
16
+ const fs = require("fs");
17
+ const path = require("path");
18
+ const { adapter, compileInstructions } = require("./host-adapters.js");
19
+
20
+ const FRAMEWORK_DIR = path.resolve(__dirname, "..");
21
+ const CANONICAL = path.join(FRAMEWORK_DIR, "templates", "instructions.md");
22
+
23
+ const HEADER =
24
+ "<!-- GENERATED from templates/instructions.md by bin/compile-instructions.js — do not edit directly; edit the canonical source and recompile. -->";
25
+
26
+ // host → output filename, declared by the adapter (single source of per-host facts).
27
+ const TARGETS = ["claude", "codex"];
28
+
29
+ function expectedFor(canonical, host) {
30
+ return `${HEADER}\n\n${compileInstructions(canonical, host)}`;
31
+ }
32
+
33
+ function targets() {
34
+ return TARGETS.map((host) => ({
35
+ host,
36
+ file: adapter(host).instructionFile,
37
+ pathAbs: path.join(FRAMEWORK_DIR, adapter(host).instructionFile),
38
+ }));
39
+ }
40
+
41
+ function main(argv) {
42
+ const check = argv.includes("--check");
43
+ let canonical;
44
+ try {
45
+ canonical = fs.readFileSync(CANONICAL, "utf8");
46
+ } catch (e) {
47
+ console.error(`ERROR: cannot read canonical source ${CANONICAL}: ${e.message}`);
48
+ return 2;
49
+ }
50
+
51
+ const drift = [];
52
+ for (const t of targets()) {
53
+ const expected = expectedFor(canonical, t.host);
54
+ if (check) {
55
+ let actual = null;
56
+ try { actual = fs.readFileSync(t.pathAbs, "utf8"); } catch {}
57
+ if (actual !== expected) {
58
+ drift.push(t.file);
59
+ console.error(`DRIFT: ${t.file} is out of sync with templates/instructions.md`);
60
+ }
61
+ } else {
62
+ fs.writeFileSync(t.pathAbs, expected, "utf8");
63
+ console.log(`wrote ${t.file} (from templates/instructions.md, host=${t.host})`);
64
+ }
65
+ }
66
+
67
+ if (check) {
68
+ if (drift.length) {
69
+ console.error(`\n${drift.length} file(s) stale. Run: node bin/compile-instructions.js`);
70
+ return 1;
71
+ }
72
+ console.log("CLAUDE.md + AGENTS.md are in sync with templates/instructions.md");
73
+ return 0;
74
+ }
75
+ return 0;
76
+ }
77
+
78
+ module.exports = { expectedFor, targets, HEADER, CANONICAL };
79
+
80
+ if (require.main === module) {
81
+ process.exit(main(process.argv));
82
+ }