qualia-framework 6.9.2 → 6.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +8 -5
- package/CHANGELOG.md +208 -0
- package/CLAUDE.md +3 -1
- package/agents/roadmapper.md +16 -14
- package/agents/verifier.md +1 -1
- package/bin/agent-status.js +264 -0
- package/bin/analyze-gate.js +318 -0
- package/bin/branch-hygiene.js +135 -0
- package/bin/command-surface.js +2 -0
- package/bin/compile-instructions.js +82 -0
- package/bin/eval-runner.js +218 -0
- package/bin/host-adapters.js +72 -12
- package/bin/install.js +27 -17
- package/bin/last-report.js +207 -0
- package/bin/project-sync.js +315 -0
- package/bin/report-payload.js +7 -0
- package/bin/runtime-manifest.js +8 -0
- package/bin/state.js +257 -12
- package/bin/verify-panel.js +294 -0
- package/bin/wave-plan.js +211 -0
- package/docs/EMPLOYEE-QUICKSTART.md +3 -3
- package/docs/erp-contract.md +168 -0
- package/docs/qualia-manual.html +5 -5
- package/hooks/branch-guard.js +133 -63
- package/hooks/pre-deploy-gate.js +38 -0
- package/hooks/task-write-guard.js +165 -0
- package/package.json +3 -2
- package/rules/codex-goal.md +28 -26
- package/rules/infrastructure.md +1 -1
- package/skills/qualia/SKILL.md +6 -0
- package/skills/qualia-build/SKILL.md +39 -7
- package/skills/qualia-eval/SKILL.md +83 -0
- package/skills/qualia-feature/SKILL.md +20 -4
- package/skills/qualia-fix/SKILL.md +13 -1
- package/skills/qualia-milestone/SKILL.md +12 -6
- package/skills/qualia-new/REFERENCE.md +6 -4
- package/skills/qualia-new/SKILL.md +27 -15
- package/skills/qualia-plan/SKILL.md +2 -2
- package/skills/qualia-report/SKILL.md +10 -0
- package/skills/qualia-scope/SKILL.md +3 -3
- package/skills/qualia-ship/SKILL.md +37 -4
- package/skills/qualia-update/SKILL.md +100 -0
- package/skills/qualia-verify/SKILL.md +51 -24
- package/templates/instructions.md +32 -0
- package/templates/journey.md +2 -2
- package/templates/project-discovery.md +30 -23
- package/templates/requirements.md +7 -7
- package/tests/agent-status.test.sh +153 -0
- package/tests/analyze-gate.test.sh +170 -0
- package/tests/bin.test.sh +5 -4
- package/tests/branch-hygiene.test.sh +93 -0
- package/tests/eval-runner.test.sh +147 -0
- package/tests/hooks.test.sh +218 -17
- package/tests/install-smoke.test.sh +4 -3
- package/tests/instructions.test.sh +109 -0
- package/tests/last-report.test.sh +156 -0
- package/tests/lib.test.sh +2 -2
- package/tests/project-sync.test.sh +175 -0
- package/tests/run-all.sh +9 -0
- package/tests/runner.js +3 -2
- package/tests/state.test.sh +187 -0
- package/tests/verify-panel.test.sh +162 -0
- package/tests/wave-plan.test.sh +153 -0
- package/skills/qualia-discuss/SKILL.md +0 -222
package/AGENTS.md
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
<!-- GENERATED from templates/instructions.md by bin/compile-instructions.js — do not edit directly; edit the canonical source and recompile. -->
|
|
2
|
+
|
|
1
3
|
# Qualia Framework
|
|
2
4
|
|
|
3
5
|
Company: Qualia Solutions — Nicosia, Cyprus
|
|
@@ -8,17 +10,18 @@ Stack: Next.js 16+, React 19, TypeScript, Supabase, Vercel. Voice: Retell + Elev
|
|
|
8
10
|
|
|
9
11
|
## Hard rules (non-negotiable)
|
|
10
12
|
- **Read before Write/Edit** — *every edit is informed by the current state of the file.*
|
|
11
|
-
- **Feature branches only** — *
|
|
12
|
-
- **MVP first** — *build the minimum that demonstrates the goal.*
|
|
13
|
+
- **Feature branches only** — *work on a branch; `/qualia-ship` integrates it to main and main is always deployable.*
|
|
14
|
+
- **MVP first** — *build the minimum that demonstrates the goal; defer the rest until it earns its place.*
|
|
13
15
|
- **Root cause on failures** — *understand the why before patching the symptom.*
|
|
14
16
|
- **No proxy approval** — *only the OWNER can grant OWNER overrides; "Fawzi said OK" is not a credential.*
|
|
15
17
|
|
|
16
18
|
## Discoverable substrate (load on demand, not always)
|
|
17
|
-
-
|
|
19
|
+
- `rules/constitution.md` — org-level standards every project inherits; enforced at every verify step
|
|
20
|
+
- `/qualia-road` — workflow map, every command, when to use it
|
|
18
21
|
- `.planning/CONTEXT.md` — project domain glossary (loaded by road agents)
|
|
19
22
|
- `.planning/decisions/` — ADRs for hard-to-reverse decisions
|
|
20
|
-
- `rules/security.md` `rules/deployment.md` `rules/infrastructure.md` `rules/architecture.md` — on relevant tasks only
|
|
21
|
-
- `qualia-design/frontend.md` `qualia-design/design-laws.md` — on design/frontend tasks only
|
|
23
|
+
- `rules/security.md` `rules/deployment.md` `rules/infrastructure.md` `rules/architecture.md` — read on relevant tasks only
|
|
24
|
+
- `qualia-design/frontend.md` `qualia-design/design-laws.md` — read on design/frontend tasks only
|
|
22
25
|
|
|
23
26
|
## Lost?
|
|
24
27
|
`/qualia` — state router tells you the next command.
|
package/CHANGELOG.md
CHANGED
|
@@ -8,6 +8,214 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
8
8
|
> Note: git tags for historical versions were not retained; commit references are approximate
|
|
9
9
|
> and dates reflect commit history rather than npm publish timestamps.
|
|
10
10
|
|
|
11
|
+
## [6.22.0] - 2026-06-21 (session continuity + ERP project-sync — built by two parallel worktree agents)
|
|
12
|
+
|
|
13
|
+
Two independent continuity features, built concurrently in isolated git worktrees and integrated together.
|
|
14
|
+
|
|
15
|
+
### Added — B1: `/qualia` surfaces the latest session report at session start (`bin/last-report.js`)
|
|
16
|
+
- Finds the newest `.planning/reports/report-*.md` (filename date desc, mtime tiebreak) and extracts a tight digest: `{ found, file, date, summary, next, age_days }` — summary from the report's `## What Was Done`, next-step from `## Next Steps`, markdown-flattened and capped. `--json`, `--cwd`, `--now ISO` (deterministic age); exit 0 found / 1 none / 2 bad input.
|
|
17
|
+
- Wired into the `/qualia` router "Get State" step: when a project is loaded, the router prints the last-session digest at the TOP of its output, so the operator — or a teammate picking the project up — instantly sees where work was left off. `tests/last-report.test.sh` (28 assertions).
|
|
18
|
+
|
|
19
|
+
### Added — B2: full project-sync reconciliation payload for the ERP (`bin/project-sync.js`)
|
|
20
|
+
- A single deterministic snapshot the ERP can reconcile a whole project from: identity + lifecycle/launched_at, `milestones[]` (closed/current/future + per-milestone REQ-ID completion + phases/tasks/deployed_url), current position, `task_rollup`, `accountability` (offroad), `integration` (the trunk-merge model), and a `schema_version`. **Composes** `project-snapshot.js` (reuses its builders) rather than duplicating or bloating that stable endpoint. `--json`/`--write`/`--pretty`; read-only; graceful on missing JOURNEY/REQUIREMENTS. `tests/project-sync.test.sh` (38 assertions).
|
|
21
|
+
- `docs/erp-contract.md`: new "Project Sync Payload" section — every field, the server-side reconciliation steps, and the PR/merge model (branch → main at ship → deploy; main-push accountability). Explicit **Framework-emits vs ERP-backend-ingests** split.
|
|
22
|
+
- **Backend remains (out of this repo):** a `POST /api/v1/project-sync` endpoint + server reconciliation (upsert milestones by num, completion from REQ counts, roll up phases/tasks, store offroad, encode the merge model). The framework emits + documents; it does not POST yet (the ERP team mirrors `project-snapshot.js`'s upload plumbing once the endpoint exists).
|
|
23
|
+
|
|
24
|
+
run-all now 19 suites; both bins in the manifest + install-set; all suites green.
|
|
25
|
+
|
|
26
|
+
## [6.21.0] - 2026-06-21 (work-unit goals on both runtimes — Codex /goal + the Claude Code equivalent)
|
|
27
|
+
|
|
28
|
+
Every defined unit of work should declare one objective + one budget, so it stays anchored and the operator sees burn-vs-budget. The framework had this for Codex (`/goal`) but `rules/codex-goal.md` explicitly told Claude Code to "skip — no equivalent surface." Claude Code DOES have an equivalent (the session task-list + turn budget); this wires it up and broadens goal-setting to every work-unit skill.
|
|
29
|
+
|
|
30
|
+
### Changed — `rules/codex-goal.md` is now a both-runtimes "work-unit goal" rule
|
|
31
|
+
- One shared helper (`codex-goal.js {scope}`, via the host-adapter-rendered `${QUALIA_BIN}`) produces the objective + token budget from STATE.md/ROADMAP.md.
|
|
32
|
+
- **Codex** path unchanged: native `/goal` / `update_goal`.
|
|
33
|
+
- **Claude Code** path (new): set the goal via the harness **task-list** (a tracked task titled with the objective, in_progress→completed) + state the budget in the banner. Same discipline — one named objective + budget per unit — native surface on each runtime.
|
|
34
|
+
|
|
35
|
+
### Changed — goal-setting wired into every work-unit skill
|
|
36
|
+
- Existing blocks in `/qualia-plan`, `/qualia-build`, `/qualia-feature` relabeled from "Codex goal (Codex runtime only)" to runtime-neutral **"Set the work-unit goal."**
|
|
37
|
+
- Added to `/qualia-fix` (scope `quick`/`feature`) and `/qualia-update` (scope `feature` — it runs its own lean loop without `/qualia-plan`, so it needs its own goal). `/qualia-milestone` deliberately omitted — it routes into `/qualia-plan`, which sets the goal (no double-set).
|
|
38
|
+
|
|
39
|
+
No bin or schema change; all 17 suites pass.
|
|
40
|
+
|
|
41
|
+
## [6.20.0] - 2026-06-21 (scope integrity — the roadmap finishes the project, and the team can't drift off it)
|
|
42
|
+
|
|
43
|
+
The deepest fix this cycle. Teams were drifting off-plan — inventing milestones, building features with no link to the roadmap — and the root cause was upstream: **`/qualia-new` under-scoped the project** (a v1 slice capped at 5 milestones, overflow dumped into an unplanned "v2"), so the agreed work literally wasn't in the arc and the team was *forced* to improvise. Two layers: make genesis cover the whole project, then bind the team to it. (Layer 1 shipped in the prior commit; this entry covers the full feature.)
|
|
44
|
+
|
|
45
|
+
### Layer 1 — genesis covers the whole project (commit `feat(genesis)`)
|
|
46
|
+
- **Interview reworked** (`templates/project-discovery.md`): added §9 **capability inventory** (every capability needed for DONE — the whole thing) + §10 **whole-project definition-of-done**; dropped the old §9 "stop at 3–5 chapters" self-cap. Full path 14 → 15 questions, refocused from brand-vibe to functional completeness.
|
|
47
|
+
- **Milestone cap removed** (`agents/roadmapper.md`, `templates/journey.md`, `templates/requirements.md`): the arc spans until the §9 inventory reaches the §10 done-state — as many milestones as needed. `Post-Handoff`/`Out of Scope` holds ONLY explicit client deferrals (§8), never overflow. Handoff optional for internal/ongoing products.
|
|
48
|
+
- **Coverage gate** (`/qualia-new` Step 14): genesis refuses to present a journey that leaves any §9 capability unmapped (0 unmapped before the approval ladder).
|
|
49
|
+
|
|
50
|
+
### Layer 2 — bind the team to the arc (this commit)
|
|
51
|
+
- **Milestone close gates on requirements** (`bin/state.js`): new `MILESTONE_REQS_INCOMPLETE` — close refuses (strict) / warns (standard) when a REQ-ID mapped to the milestone in REQUIREMENTS.md isn't `Complete`. Stops "finishing a milestone with scope still open". New `state.js reqs-check [--milestone N]` exposes the same check (exit 0/1) for `/qualia-milestone` to show coverage before closing.
|
|
52
|
+
- **Off-road work is recorded, not silent** (`bin/state.js` note path): `transition --to note` gains `--scope in|off --ref {REQ/why}`. Off-road work increments `lifetime.offroad_count` and appends to an `offroad[]` ledger (OWNER + ERP visible), mirroring branch-guard's accountability model.
|
|
53
|
+
- **`/qualia-feature` + `/qualia-fix` scope gate**: before building, both check the active milestone. In-scope → proceed, tagged `--scope in`. Off-road → **strict blocks** (route to `/qualia-scope`/`/qualia-milestone` to fold it into the arc) / **standard records** (`--scope off`, counted). The drift vector the user named is now governed at the source.
|
|
54
|
+
|
|
55
|
+
### Tests
|
|
56
|
+
- `tests/state.test.sh`: +5 cases — `reqs-check` (complete/incomplete/milestone-filter/untracked), `--scope off` tally + ledger, `--scope in` no-op, `--force` bypass. 96 state assertions green; all 17 suites pass. (Genesis is prose/templates — validated by the skills + refs suites.)
|
|
57
|
+
|
|
58
|
+
## [6.19.0] - 2026-06-21 (trunk integration — ship is the merge point, report sweeps)
|
|
59
|
+
|
|
60
|
+
Fixes a real lifecycle gap + doc drift: **no skill ever integrated feature → main.** Branches/PRs accumulated with nothing closing them; `/qualia-ship` deployed *from the feature branch* and said "never push to main," so production ran branch code while `main` lagged ("main is always deployable" was false in practice); and three sources disagreed on the policy (the hard rule said "through review", `branch-guard` 6.10 said "accountability not block", `infrastructure.md` still claimed PR-review was enforced). This completes the 6.10 "accountability over block" turn into a coherent trunk model.
|
|
61
|
+
|
|
62
|
+
### Changed — `/qualia-ship` integrates to main, deploys from main, closes the branch
|
|
63
|
+
- New §3: commit → fast-forward-integrate the feature branch into `main` (auto-rebase if `main` moved; STOP on conflict) → push. `branch-guard` records the main push (accountability). §4 deploys from `main` HEAD, so the deployed artifact == `main` byte-for-byte. New §4b deletes the integrated branch on a verified deploy. The normal path now leaves **zero lingering branches/PRs**.
|
|
64
|
+
|
|
65
|
+
### Added — `bin/branch-hygiene.js` + `/qualia-report` sweep (the safety net)
|
|
66
|
+
- `branch-hygiene.js`: read-only clock-out sweep — finds local branches with commits **ahead of `main` that were never shipped** (stranded work) and **stale open PRs** (best-effort via `gh`, skipped when absent). Exit 0 clean / 1 found / 2 not-a-repo; `--json`; library `analyze`. Detects `main` or `master` as base.
|
|
67
|
+
- `/qualia-report` Step 5b runs it so stranded work surfaces to the employee + OWNER at clock-out instead of rotting.
|
|
68
|
+
|
|
69
|
+
### Fixed — policy drift now single-voiced
|
|
70
|
+
- `rules/infrastructure.md`: the stale "main requires PR reviews (enforced by guards)" line replaced with the real model (integrate-at-ship; main pushes allowed + recorded; report sweeps; keep GitHub branch protection off, or switch ship to an auto-merged PR if you re-enable it).
|
|
71
|
+
- Canonical hard rule (`templates/instructions.md` → recompiled `CLAUDE.md`/`AGENTS.md`): "ship through review" → "`/qualia-ship` integrates it to main." Drift guard green.
|
|
72
|
+
|
|
73
|
+
### Tests
|
|
74
|
+
- `tests/branch-hygiene.test.sh` (new, 13 cases): not-a-repo, clean, stranded branch (ahead count + json), ff-merged-no-longer-stranded, `master` base detection, `analyze()` lib. run-all now 17 suites; manifest + `lib.test.sh` install set updated.
|
|
75
|
+
|
|
76
|
+
## [6.18.0] - 2026-06-21 (v7 kernel, step 8 — R7: /qualia-eval lane for AI features)
|
|
77
|
+
|
|
78
|
+
Qualia gates UI and code — `contract-runner` proves the code exists, `verify-panel` proves it's correct — but it had **no gate for the AI artifacts a project builds**. "The chatbot answers the refund question" / "the RAG answer is grounded" / "the agent stays under 2s" is not checkable by a grep. R7 adds the equivalent gate, layered: cheap deterministic assertions first, model judgment only where a model is required.
|
|
79
|
+
|
|
80
|
+
### Added — `bin/eval-runner.js` (layered assertion runner, zero-dependency)
|
|
81
|
+
- Runs an eval suite (JSON — no YAML parser pulled in) of cases against captured AI outputs. **Deterministic assertion types** settled with no model: `contains`, `not_contains`, `equals`, `regex`, `not_regex`, `min_length`, `max_length`, `json_valid`, `json_path` (`equals`/`contains`), `max_latency_ms`, `max_cost_usd`. Outputs inline or via `output_file`.
|
|
82
|
+
- **`llm_rubric`** is the only model-dependent type — it carries a `verdict` (pass|fail) the skill fills by spawning a judge BEFORE the runner (same pattern `verify-panel` uses for skeptic votes). An unjudged rubric is PENDING and **fails** the suite — never a silent pass. Asserting a latency/cost budget with no metric recorded also fails (no silent pass).
|
|
83
|
+
- Exit 0 = all cases pass, 1 = failure/unjudged, 2 = bad input. `--write` emits `.planning/evals/eval-{feature}.json`. Library exports `run`, `runAssertion`, `getPath`.
|
|
84
|
+
|
|
85
|
+
### Added — `/qualia-eval` skill (new active surface)
|
|
86
|
+
- The lane: capture the AI feature's real outputs → spawn one judge per `llm_rubric` (reusing the `qualia-verifier` agent, role-anchored) → `eval-runner.js` settles deterministic assertions + folds in verdicts → gate. Usable standalone (`/qualia-eval suite.json`) or as a phase verify-step gate (`/qualia-eval {N}`), where a FAIL has the same standing as a failing contract. Registered in `command-surface.js` `ACTIVE_SKILLS`.
|
|
87
|
+
|
|
88
|
+
### Tests
|
|
89
|
+
- `tests/eval-runner.test.sh` (new, 19 cases): deterministic pass/fail, latency budget (incl. missing-metric → fail), `json_valid`/`json_path`, `llm_rubric` pass/fail/pending, `output_file` resolution + graceful missing-file, `--write` artifact, `runAssertion`/`getPath` units, malformed→exit 2. run-all now 16 suites; manifest + `lib.test.sh` install set updated; `qualia-eval` passes the skill smoke + refs suites.
|
|
90
|
+
|
|
91
|
+
## [6.17.0] - 2026-06-21 (v7 kernel, step 7 — R16: dependency-derived wave width + --parallel knob)
|
|
92
|
+
|
|
93
|
+
`/qualia-build` spawned EVERY task in a contract "wave" concurrently, with no cap — two failure modes at once: over-serialization (the planner's hand-numbered waves can be deeper than the dependency graph requires) and over-parallelization (a wide wave spawns 9 builders past the 3–5 sweet spot where coordination cost overwhelms the gain — the LangGraph `max_concurrency` lesson). R16 replaces orchestrator guesswork with a deterministic scheduler derived from the task DAG.
|
|
94
|
+
|
|
95
|
+
### Added — `bin/wave-plan.js` (deterministic build scheduler, zero-dependency)
|
|
96
|
+
- Recomputes **minimal-depth waves** from `depends_on` (topological levels = maximal safe parallelism), then splits each level into **batches capped at `max_concurrency`**. Output is an ordered `batches[]` the orchestrator spawns one at a time. Same contract + cap → same schedule.
|
|
97
|
+
- `max_concurrency`: `--parallel N` → exactly N; **auto** (default) → 1 if <3 tasks ("don't parallelize tiny phases"), else 5.
|
|
98
|
+
- Flags **over-serialization** (a task whose declared wave is deeper than the DAG requires — the schedule runs it earlier) and wide-level capping. Cycle in the DAG → exit 1; library exports `deriveLevels`, `resolveConcurrency`, `plan`.
|
|
99
|
+
|
|
100
|
+
### Changed — `/qualia-build` consumes the derived schedule
|
|
101
|
+
- **§2** now runs `wave-plan.js .planning/phase-{N}-contract.json [--parallel K] --json` and spawns the emitted batches in order (not the raw contract `wave` numbers, not all-at-once). New `--parallel K` usage knob.
|
|
102
|
+
- **Batch fan-in barrier:** `agent-status.js` (R2) gains a `barrier --tasks T1,T2` mode that gates on an explicit batch (no contract needed) — required because derived waves needn't match the contract's declared wave numbers, so the per-wave barrier would mismatch. The build now barriers per batch, keeping R16 + R2 coherent.
|
|
103
|
+
|
|
104
|
+
### Tests
|
|
105
|
+
- `tests/wave-plan.test.sh` (new, 23 cases): chain/independent/tiny/diamond DAGs, auto vs `--parallel` cap, wide-level batching, over-serialization flag, cycle→exit 1, `deriveLevels`/`resolveConcurrency` units. `tests/agent-status.test.sh` +3 `barrier --tasks` cases. run-all now 15 suites; manifest + `lib.test.sh` install set updated.
|
|
106
|
+
|
|
107
|
+
## [6.16.0] - 2026-06-21 (v7 kernel, step 6 — R8: verifier panel + adversarial skeptics)
|
|
108
|
+
|
|
109
|
+
A single LLM judge is adversarially fragile — the literature puts a lone stray token at ~35% false positives, and self-grading bias hides ~70% of findings. `/qualia-verify` was a single cooperative verifier with an optional second pass. R8 replaces it with a **panel** (one verifier per lens) + **per-finding skeptics** (majority-survives), and — crucially — makes the SURVIVE/KILL and PASS/FAIL decision **deterministic math**, not another LLM judgment.
|
|
110
|
+
|
|
111
|
+
### Added — `bin/verify-panel.js` (deterministic aggregator, zero-dependency)
|
|
112
|
+
- **`aggregate(panel)`**: dedupes findings across lenses (same `file:line:title` → one finding; highest severity wins, lenses union, votes sum), applies **majority-survives** (a finding is killed only when skeptics are a strict majority calling it not-real — ties and unvoted findings survive: unverified ≠ disproven), and computes category + per-lens scores via the **`rules/grounding.md` formula** (`5 − floor(weighted_sum/8)`). Verdict FAIL iff any surviving CRITICAL/HIGH. Exit 0 = PASS, 1 = FAIL.
|
|
113
|
+
- **`assemble <phase>`**: globs the per-lens `phase-{N}-panel-{lens}.json` files into one `phase-{N}-panel.json` skeleton (votes zeroed) so the orchestrator never hand-builds the panel.
|
|
114
|
+
- `--write` emits `.planning/phase-{N}-verification-panel.{json,md}`. Library exports (`aggregate`, `dedupeFindings`, `survives`, `scoreFromCounts`, `assemble`) for reuse.
|
|
115
|
+
|
|
116
|
+
### Changed — `/qualia-verify` is now panel-based
|
|
117
|
+
- **§3 Panel:** spawns one `qualia-verifier` per *relevant* lens (correctness always; security/performance/design by what the phase touches — cost scales to risk, not a flat 4×), in parallel, each anchored on the same contract-run + harness-eval evidence as shared ground truth, each emitting structured findings JSON.
|
|
118
|
+
- **§3c Skeptics + aggregation:** assemble → 3 skeptics per CRITICAL/HIGH finding (5 with `--adversarial`/Handoff/security lens), each prompted to *refute* with evidence → tally votes → `verify-panel.js` produces the verdict. MEDIUM/LOW auto-survive (documented cost bound, not a silent cap). The old single-verifier + adversarial-second-pass sections are replaced.
|
|
119
|
+
- **§4:** the phase is PASS only if the panel verdict, harness-eval, AND anti-slop all agree. Reuses the existing `qualia-verifier` agent (lens/skeptic are prompt modes — no new agent registration).
|
|
120
|
+
|
|
121
|
+
### Tests
|
|
122
|
+
- `tests/verify-panel.test.sh` (new, 28 cases): empty→PASS, surviving CRITICAL→FAIL, skeptic-killed→PASS, tie/no-vote survive, cross-lens dedupe (severity-max + vote-sum + lens-union), grounding-formula scores, MEDIUM/LOW-only→PASS, `--write` artifacts, `assemble` round-trip, malformed→exit 2. Registered in `run-all.sh` (now 14 suites); `lib.test.sh` trust-score install set carries `verify-panel.js`.
|
|
123
|
+
|
|
124
|
+
## [6.15.0] - 2026-06-21 (v7 kernel, step 5 — R4+R5: single-source the dual-runtime surface)
|
|
125
|
+
|
|
126
|
+
Dual-runtime drift is the #1 risk of supporting both Claude Code and Codex. `CLAUDE.md` and `AGENTS.md` were hand-maintained twins — and they *had already drifted* (the MVP-first line and the substrate list differed between them). This batch makes that class of bug unmergeable: one canonical source, compiled per host, with a drift guard in CI.
|
|
127
|
+
|
|
128
|
+
### Added — R4: one canonical instruction source, compiled to both files
|
|
129
|
+
- **`templates/instructions.md`** is now the single source of truth. `CLAUDE.md` and `AGENTS.md` are **generated artifacts** (committed, like a lockfile) carrying a `GENERATED` header.
|
|
130
|
+
- **`bin/compile-instructions.js`** compiles the canonical into both files. `--check` mode is the **drift guard**: it exits non-zero if either committed file is stale, making "edited one twin, forgot the other" impossible to merge. Wired into the test suite + an `npm run compile:instructions` script.
|
|
131
|
+
- Host-specific content uses conditional blocks (`<!--QUALIA-HOST claude-->…<!--/QUALIA-HOST-->`): the Claude file keeps the Pocock budget note, the Codex file keeps the cross-vendor (Cursor/Continue/Aider/Devin) note — the **body is byte-identical**, only the footer differs. The pre-existing drift is resolved (AGENTS.md regained the full MVP-first line + the constitution substrate entry).
|
|
132
|
+
|
|
133
|
+
### Changed — R5: `host-adapters.js` is now the single per-host contract
|
|
134
|
+
- The adapter is the **one place** anything runtime-specific is declared: `instructionFile`, `configFile`, `agentDir`, `agentExt`, and the Claude→Codex `naming` map (lifted out of a hardcoded swap buried in `renderText`). Nothing else branches on runtime — callers ask the adapter. A third runtime becomes one `HOSTS` entry, not a grep-and-patch.
|
|
135
|
+
- Render pipeline split into composable stages: `applyNaming` (display-string swaps) + `applyPaths` (`${QUALIA_*}` tokens + `.claude→.codex`), with `renderText = applyPaths∘applyNaming` (unchanged public behavior) and `compileInstructions = applyNaming∘stripHostBlocks` (naming + blocks, paths/`{{ROLE}}` left for install).
|
|
136
|
+
- **`install.js`** now routes both instruction files through `adapter(host).instructionFile` and renders `AGENTS.md` with `codexText()` — a **latent bug fix**: CLAUDE.md always got token/path rendering, AGENTS.md never did, so any `${QUALIA_*}`/`.claude/` reference in the Codex file would have shipped unresolved.
|
|
137
|
+
|
|
138
|
+
### Tests
|
|
139
|
+
- `tests/instructions.test.sh` (new, 25 cases): drift guard passes on HEAD + fails on an uncompiled canonical edit (with restore); CLAUDE/AGENTS bodies identical + host-specific footers preserved; adapter contract facts; `stripHostBlocks` keep/drop per host; `compileInstructions` swaps naming but leaves tokens/`{{ROLE}}`; `renderText` path regression. Registered in `run-all.sh` (now 13 suites). End-to-end install verified: both files render with the role substituted and correct footers.
|
|
140
|
+
|
|
141
|
+
## [6.14.0] - 2026-06-20 (v7 kernel, step 4 — R3: the cross-artifact analyze gate)
|
|
142
|
+
|
|
143
|
+
Spec-Kit's most-copied feature, ported. Qualia validated each artifact in isolation — `plan-contract.js` proves the contract is internally well-formed, `harness-eval` scores the built phase — but **nothing diffed scope ↔ plan**. That's exactly where a junior's idea silently loses intent: the scope asks for X, the plan quietly drops it, and no deterministic check notices. This adds that check, between plan and build.
|
|
144
|
+
|
|
145
|
+
### Added — `bin/analyze-gate.js` (deterministic, zero-LLM, zero-dependency)
|
|
146
|
+
- Diffs the plan contract against **intent**: scope acceptance criteria (`.planning/phase-{N}-context.md` `## Acceptance Criteria`) and the CONTEXT.md glossary. Pure keyword/token coverage — same inputs → same output.
|
|
147
|
+
- **Four checks:** (1) *uncovered scope AC* — a scope requirement whose key terms don't appear in the contract (HIGH; the plan dropped it); (2) *orphan success criterion* — a contract success criterion no task covers (MEDIUM); (3) *glossary violation* — the plan uses a term CONTEXT.md lists under `Avoid:` (MEDIUM; a genuine spec↔plan contradiction); (4) *scope-reduction language* in task actions/ACs (HIGH; reuses `plan-contract.findScopeReductionPhrases`).
|
|
148
|
+
- CLI `analyze-gate.js <phase>` auto-discovers contract + scope + CONTEXT.md; `--json` for machine output. Exit 0 = clean, 1 = findings, 2 = invocation error. Library exports (`analyze`, `coverage`, `parseScopeAcceptanceCriteria`, `parseGlossaryBannedTerms`) for reuse.
|
|
149
|
+
|
|
150
|
+
### Changed — wired into `qualia-build` as the plan→build gate (§1a), profile-aware
|
|
151
|
+
- Runs `analyze-gate.js {N}` before any build. **strict** profile → a HIGH finding is a stop (route to `/qualia-plan --gaps` or `/qualia-scope`); **standard** → surface + proceed with an explicit ack and a logged waiver. **No scope file = scope-coverage skipped, not a failure** — scope-less phases and `/qualia-feature` trivia still build.
|
|
152
|
+
- Deliberately **not** a `state.js` hard precondition: scope files are optional, so gating the `built` transition there would brick builds for projects that never ran `/qualia-scope`. The gate lives at the skill seam and degrades gracefully.
|
|
153
|
+
- Shipped into installed `bin/` via `runtime-manifest.js`.
|
|
154
|
+
|
|
155
|
+
### Tests
|
|
156
|
+
- `tests/analyze-gate.test.sh` (new, 21 cases): clean pass, under-covered scope AC, orphan success criterion, glossary violation, no-scope skip, missing-contract error, `coverage()` overlap/disjoint units, AC-parser label-strip + section-boundary. Registered in `run-all.sh` (now 12 suites). `lib.test.sh` trust-score install set carries `analyze-gate.js`.
|
|
157
|
+
|
|
158
|
+
## [6.13.0] - 2026-06-20 (v7 kernel, step 3 — enforce what you already declare: 3 missing primitives)
|
|
159
|
+
|
|
160
|
+
The v7 brief's loudest meta-theme is **"enforce, don't just declare"** — every static contract Qualia already computes should have a runtime gate. This batch wires the three primitives the research flagged as genuinely missing (R1, R2 + the slop-gate quick win), turning prose instructions and plan-time checks into deterministic, hook-governed guardrails. No new dependencies; all 11 suites green.
|
|
161
|
+
|
|
162
|
+
### Added — R1: runtime plan-contract file-scope guard (`hooks/task-write-guard.js`)
|
|
163
|
+
- `plan-contract.js` proves file-disjointness across parallel tasks at **plan** time, but nothing stopped a builder writing outside its declared set at **run** time — the documented #1 cross-wave-conflict + AI-entropy vector. New PreToolUse `Edit|Write` hook closes it.
|
|
164
|
+
- **Scoped:** a no-op unless a build is in flight (≥1 `RUNNING` entry in `.agent-status/` — the R2 signal), so it never interferes with the orchestrator, verifier, or ordinary editing. During a build it **blocks any write to a path not declared by some task** in the active phase contract (`files_modify ∪ files_create`). `.planning/` and `.agent-status/` are always writable. **Fails open** on any error; OWNER escape `QUALIA_ALLOW_OUTSIDE_CONTRACT=1`.
|
|
165
|
+
- **Honest limitation (documented in-file):** Claude Code gives a stateless hook no task identity, so it enforces "declared by SOME task" not "by THIS task" — plan-time disjointness + the builder's `<wave_context>` prompt cover the residual gap. Registered in `install.js` (`QUALIA_HOOK_SET` + the `Edit|Write` block); hook count 14 → 15.
|
|
166
|
+
|
|
167
|
+
### Added — R2: machine-readable per-task status + wave fan-in barrier (`bin/agent-status.js`)
|
|
168
|
+
- `contract-runner.js` only checked exit codes; wave completion relied on the orchestrator LLM **reading** each builder's "DONE/BLOCKED/PARTIAL" prose. New helper persists each builder's outcome to `.agent-status/<task>.json` (`RUNNING | DONE | BLOCKED | PARTIAL` + commit hash) — the parallel-worktrees convention.
|
|
169
|
+
- CLI: `write` / `read` / `list` / `clear`, plus `barrier <contract.json> [--wave W]` which **exits 0 ⇔ every expected task is DONE** (a pollable barrier, not "did the model notice"). Task ids validated against `^T\d+$` (rejects path traversal). `buildActive()` export is the signal R1 keys off.
|
|
170
|
+
- `qualia-build`: builders now write `RUNNING` at start + `DONE/BLOCKED/PARTIAL` at end; the orchestrator runs the barrier after each wave (a `BLOCKED`/`PARTIAL` task holds the wave) and clears scratch at completion. Shipped into installed `bin/` via `runtime-manifest.js`.
|
|
171
|
+
|
|
172
|
+
### Added — slop-detect wired into the verify + ship gates (quick win)
|
|
173
|
+
- The anti-slop scanner (`bin/slop-detect.mjs`) already existed and exits non-zero on CRITICAL design tells; this adds the **gate wiring** (same role as `migration-guard`/`branch-guard`). `pre-deploy-gate.js` re-runs it at `vercel --prod` time as the hard, non-bypassable block (CRITICAL → deploy refused). Skipped silently when the scanner isn't installed (brownfield/older installs); OWNER-only `QUALIA_SKIP_SLOP=1` escape mirrors `QUALIA_SKIP_LINT`. Surfaced in `qualia-verify` (CRITICAL = verification FAIL) and `qualia-ship` Quality Gates.
|
|
174
|
+
|
|
175
|
+
### Tests
|
|
176
|
+
- `tests/agent-status.test.sh` (new, 24 cases): round-trip, validation/traversal rejection, wave + phase barriers, BLOCKED-holds, list/clear, `buildActive`. Registered in `run-all.sh`.
|
|
177
|
+
- `tests/hooks.test.sh`: +10 `task-write-guard` cases (idle no-op, declared allow, undeclared block, `files_create`, framework-path exemptions, escape hatch, absolute paths, quiet-after-DONE, fail-open) and +4 `pre-deploy-gate` anti-slop cases (gradient block, clean pass, OWNER-only skip, graceful skip when absent).
|
|
178
|
+
- Hook-count assertions bumped 14 → 15 (`install-smoke`, `bin`); `runner.js` install test corrected to 15; `lib.test.sh` trust-score install set carries `agent-status.js`.
|
|
179
|
+
|
|
180
|
+
## [6.12.0] - 2026-06-20 (v7 kernel, step 2 — lifecycle: build → operate, the forced handoff is gone)
|
|
181
|
+
|
|
182
|
+
The v7.0-defining change the redesign brief called out as #1 (Section 6, "the change you raised"): a project that has **launched** should stop being a milestone-journey dragged to a Handoff, and become an **update stream**. This is the v7 thesis in miniature — a behavior hard-coded in prose ("the final milestone is always Handoff, never negotiable") is now a **branch on explicit state**.
|
|
183
|
+
|
|
184
|
+
### Added — a `lifecycle: "build" | "operate"` dimension to the state machine
|
|
185
|
+
- **`bin/state.js`:** new projects default to `lifecycle: "build"` (unchanged behavior). New `state.js launch [--deployed-url U] [--source erp|manual]` flips a project to `"operate"`, stamping `launched_at` + `launch_source` (idempotent — relaunching is a no-op). This is the discrete, **ERP-drivable** "is_live" event (the ERP can call it when it detects a project is live), so "launched" is state, not a milestone the team must mislabel as handoff.
|
|
186
|
+
- **The forced-handoff funnel is now lifecycle-gated:** `checkPreconditions` requires `HANDOFF.md` for `handed_off` **only in `build` mode**. An `operate` project can complete without ever producing a handoff. (Build mode is byte-for-byte unchanged — verified by a regression test.)
|
|
187
|
+
- **`nextCommand` is lifecycle-aware:** in `operate`, a final-phase `verified(pass)` routes to `/qualia-update` (not `/qualia-polish → ship → handoff`); a `verified(pass)` on the last phase increments `lifetime.updates_completed` (the operate analogue of closing a milestone). `cmdCheck` now surfaces `lifecycle` + `launched_at`.
|
|
188
|
+
- **`/qualia-update` skill (new):** the operate-mode counterpart to `/qualia-milestone` — a lean plan → build → verify → ship loop with no milestone/handoff machinery. Added to `ACTIVE_SKILLS`.
|
|
189
|
+
- **ERP contract:** `bin/report-payload.js` now sends `lifecycle` (+ `launched_at`/`launch_source` when set) so the ERP counts updates vs milestones and stops expecting a handoff for a live product. `templates/journey.md` rule 3 demoted from "the final milestone is always Handoff, never negotiable" to a **build-mode convention**.
|
|
190
|
+
- **Tests:** 7 new `state.test.sh` cases (build default; launch→operate stamping + routing; idempotent launch; operate verified→`/qualia-update` + `updates_completed` bump; **build mode still requires HANDOFF.md**; operate mode allows `handed_off` without it). All 10 suites green (89 state cases).
|
|
191
|
+
|
|
192
|
+
> Verification evidence (6.10.0) stays fully in force in operate mode — an update's contract still runs and its evidence must be clean to PASS. Lifecycle relaxes the *handoff* requirement, never the *evidence* requirement.
|
|
193
|
+
|
|
194
|
+
## [6.11.0] - 2026-06-20 (main-push: accountability instead of a block)
|
|
195
|
+
|
|
196
|
+
Owner policy change: an employee pushing to `main`/`master` is **no longer blocked** — it is **counted**. The framework records each employee main-push locally (per-employee running total) and reports it to the ERP as a policy-event the OWNER can see, plus a visible on-push notice. OWNER pushes are unaffected and silent.
|
|
197
|
+
|
|
198
|
+
### Changed — `hooks/branch-guard.js` is now allow-and-record, never block
|
|
199
|
+
- Was: `fail()`/exit 2 (BLOCK) on any non-OWNER push to a protected branch. Now: detects the same protected-branch pushes (current branch **and** `<src>:main` refspec), records an `employee_main_push` event to `~/.claude/.main-push-events.json` (`{counts:{<actor>:{total}}, events:[…]}`, mode 0600), enqueues it to the ERP `/api/v1/policy-events` endpoint via `erp-retry.js` (idempotent, `client_report_id` = `QS-MAINPUSH-<actor>-<count>`), prints a non-blocking NOTICE, and **exits 0**. Mirrors the existing `fawzi-approval-guard` model. The hook no longer blocks on missing/malformed config either — it never blocks.
|
|
200
|
+
- **ERP side:** `docs/erp-contract.md` now documents the `employee_main_push` event type (stored by `(type, actor_code)` for a per-employee tally, `branch` field replaces `sample`).
|
|
201
|
+
- Docs updated to the new reality (`EMPLOYEE-QUICKSTART.md`, `docs/qualia-manual.html`, installer comments + the push hook's status message now reads "Recording branch activity").
|
|
202
|
+
- Tests rewritten (`tests/hooks.test.sh`): employee main-push → exit 0 + recorded + count increments + notice; refspec `:main` → recorded; feature-branch / OWNER / missing-config → allowed, not recorded. All 10 suites green.
|
|
203
|
+
|
|
204
|
+
> Trade-off (by owner decision): `branch-guard` is no longer a hard gate on `main`; "main is always deployable" now rests on review discipline + the visible per-employee tally rather than a block. `git-guardrails` still blocks genuinely destructive pushes (force-push to main, branch -D).
|
|
205
|
+
|
|
206
|
+
## [6.10.0] - 2026-06-20 (v7 kernel, step 1 — verification evidence is no longer optional)
|
|
207
|
+
|
|
208
|
+
Closes the headline finding of the v7 redesign brief: **the verification gate could PASS with zero evidence.** This was real — verified on disk against 6.9.2. The fix is the first incremental step of the v7 "invariants in an enforceable kernel, not prose" thesis, shipped on the 6.x line (main stays deployable).
|
|
209
|
+
|
|
210
|
+
### Fixed — a phase can no longer reach `verified(pass)` without machine evidence
|
|
211
|
+
- **Root cause:** the enforcement machinery already existed (`contract-runner.js`, `state.js checkMachineEvidence`), but it was *bypassable by omission*. `checkMachineEvidence` only engages when `phase-N-contract.json` exists (`state.js:1024` returned `{ok:true}` when absent), and nothing forced a contract — the `planned` precondition checked the plan file but never the contract. So the common no-contract path fell through to the prose verifier.
|
|
212
|
+
- **Structural fix (`bin/state.js`):** the `planned` precondition now requires a valid `phase-N-contract.json` (exists + parseable + non-empty `tasks[]`), failing `MISSING_CONTRACT`/`INVALID_CONTRACT` otherwise. Because every planned phase now has a contract, `checkMachineEvidence` always engages at the `verified` gate — machine evidence (`evidence/phase-N-contract-run.json` with `ok:true`) becomes mandatory for PASS. "I built it" is no longer sufficient; "the contract ran clean" is required. (Implements the 2026-05-22 harness audit's Finding 3.)
|
|
213
|
+
- **Defense-in-depth (`agents/verifier.md`):** the design-rubric instruction "Default to 3 unless evidence supports otherwise" — which *contradicted* `rules/grounding.md` ("Score without evidence = 0") — is replaced. An unevidenced dimension is now `INSUFFICIENT EVIDENCE` and scores 1 (FAIL). A verifier that runs no checks and writes "3" across the board now produces a FAIL, and `state.js:983` already refuses PASS on that literal.
|
|
214
|
+
- **Tests (`tests/state.test.sh`):** `make_valid_plan` now emits a contract + passing evidence by default so happy-path setups satisfy the new preconditions; the two cases that exercise *absence* (missing-evidence guard, `--require-contract` missing) strip them explicitly. All 10 suites green.
|
|
215
|
+
|
|
216
|
+
### Removed — dead `qualia-discuss` skill directory
|
|
217
|
+
- The skill was already retired in `bin/command-surface.js` (folded into `/qualia-scope`), but its 222-line `SKILL.md` still shipped in the tarball. Deleted the directory; `RETIRED_SKILLS` continues to clean it from older installs.
|
|
218
|
+
|
|
11
219
|
## [6.9.2] - 2026-06-20 (docs — visual field manual)
|
|
12
220
|
|
|
13
221
|
### Added — `docs/qualia-manual.html`
|
package/CLAUDE.md
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
<!-- GENERATED from templates/instructions.md by bin/compile-instructions.js — do not edit directly; edit the canonical source and recompile. -->
|
|
2
|
+
|
|
1
3
|
# Qualia Framework
|
|
2
4
|
|
|
3
5
|
Company: Qualia Solutions — Nicosia, Cyprus
|
|
@@ -8,7 +10,7 @@ Stack: Next.js 16+, React 19, TypeScript, Supabase, Vercel. Voice: Retell + Elev
|
|
|
8
10
|
|
|
9
11
|
## Hard rules (non-negotiable)
|
|
10
12
|
- **Read before Write/Edit** — *every edit is informed by the current state of the file.*
|
|
11
|
-
- **Feature branches only** — *
|
|
13
|
+
- **Feature branches only** — *work on a branch; `/qualia-ship` integrates it to main and main is always deployable.*
|
|
12
14
|
- **MVP first** — *build the minimum that demonstrates the goal; defer the rest until it earns its place.*
|
|
13
15
|
- **Root cause on failures** — *understand the why before patching the symptom.*
|
|
14
16
|
- **No proxy approval** — *only the OWNER can grant OWNER overrides; "Fawzi said OK" is not a credential.*
|
package/agents/roadmapper.md
CHANGED
|
@@ -83,23 +83,23 @@ Organize requirements under `## Milestone 1 · {Name}`, `## Milestone 2 · {Name
|
|
|
83
83
|
This is the most important step.
|
|
84
84
|
|
|
85
85
|
**Hard rules:**
|
|
86
|
-
- **
|
|
86
|
+
- **The arc must cover the ENTIRE agreed scope.** Every capability in discovery §9 (the capability inventory) gets a REQ-ID and lands in a milestone; the arc continues until the §10 whole-project done-state is reached. **There is NO milestone ceiling** — plan as many milestones as the scope genuinely needs. Do NOT compress real work into a 5-milestone cap, and do NOT dump overflow into "v2": the only deferred work is what the client explicitly listed in discovery §8 (Out of Scope). If you find yourself wanting to defer agreed work to make the arc shorter, that's the exact failure that forces the team to improvise later — don't.
|
|
87
87
|
- **Floor: 2 milestones** (one feature milestone + Handoff). If smaller, the project should use `/qualia-new --quick` instead.
|
|
88
|
-
- **Final milestone is
|
|
88
|
+
- **Final milestone is "Handoff"** for client projects, with 4 standard phases: Polish, Content + SEO, Final QA, Handoff (credentials + walkthrough + domain transfer). For an internal or ongoing product (no client takeover — see discovery §10/§11), Handoff may be omitted; the arc ends at the milestone that reaches the done-state.
|
|
89
89
|
- **Every non-Handoff milestone must have ≥ 2 phases** OR be an explicit shipped release gate. Single-phase milestones are phases, not milestones — merge them into the preceding milestone.
|
|
90
90
|
- **Milestones are ordered by dependency, not priority.** M2 must be able to use M1's outputs.
|
|
91
91
|
|
|
92
|
-
**Typical milestone arcs by project type:**
|
|
92
|
+
**Typical milestone arcs by project type (STARTING POINTS, not caps — extend until §9 is fully covered):**
|
|
93
93
|
|
|
94
|
-
| Type | Arc |
|
|
94
|
+
| Type | Arc (minimum shape — add milestones as the capability set requires) |
|
|
95
95
|
|---|---|
|
|
96
|
-
| Landing / marketing |
|
|
97
|
-
| SaaS / dashboard |
|
|
98
|
-
| Voice / AI agent |
|
|
99
|
-
| Mobile app |
|
|
100
|
-
| Multi-tenant platform |
|
|
96
|
+
| Landing / marketing | Foundation → Handoff |
|
|
97
|
+
| SaaS / dashboard | Foundation → Core Features → Admin & Reporting → … → Handoff |
|
|
98
|
+
| Voice / AI agent | Foundation → Core Flow → Integrations → … → Handoff |
|
|
99
|
+
| Mobile app | Foundation → Core → Offline & Notifications → Store Prep → … → Handoff |
|
|
100
|
+
| Multi-tenant platform | Foundation → Core → Admin → Scale → … → Handoff |
|
|
101
101
|
|
|
102
|
-
Use the research SUMMARY.md as your
|
|
102
|
+
These are floors, not ceilings. Use the research SUMMARY.md and the §9 capability inventory as your real input — the table just shows the smallest sensible shape. Don't force-fit it; a project with 30 agreed capabilities will have more milestones than one with 6, and that is correct.
|
|
103
103
|
|
|
104
104
|
**For each milestone:**
|
|
105
105
|
- **Name** — short and evocative (e.g., "Core Feature Loop", not "Phase 2 Work")
|
|
@@ -124,11 +124,13 @@ For each phase in the milestone(s) you're detailing:
|
|
|
124
124
|
### 5. Validate Coverage
|
|
125
125
|
|
|
126
126
|
Before writing, verify:
|
|
127
|
-
- [ ] Every
|
|
128
|
-
- [ ] Every
|
|
127
|
+
- [ ] **Every capability in discovery §9 has a REQ-ID** (the whole inventory, not a v1 slice)
|
|
128
|
+
- [ ] Every requirement maps to exactly one milestone, and **every §9 capability is covered by some milestone** — nothing agreed is left unplanned
|
|
129
|
+
- [ ] The final milestone reaches the discovery §10 whole-project done-state
|
|
130
|
+
- [ ] The ONLY items in `Post-Handoff (v2)` / `Out of Scope` are the ones the client explicitly deferred in discovery §8 (no overflow from a milestone cap — there is no cap)
|
|
129
131
|
- [ ] Every milestone has ≥ 2 phases (except Handoff which has the fixed 4)
|
|
130
|
-
- [ ]
|
|
131
|
-
- [ ] Final milestone is
|
|
132
|
+
- [ ] Floor met: ≥ 2 milestones total (no upper bound)
|
|
133
|
+
- [ ] Final milestone is "Handoff" with the 4 standard phases (client projects), or the done-state milestone (internal/ongoing products)
|
|
132
134
|
- [ ] No milestone depends on a later milestone
|
|
133
135
|
- [ ] Milestone 1 has full phase-level detail (goals + success criteria) ready for `/qualia-plan 1`
|
|
134
136
|
- [ ] If `full_detail=false` (default): M2..M{N-1} have phase names + one-line goals (sketch, not full detail)
|
package/agents/verifier.md
CHANGED
|
@@ -176,7 +176,7 @@ If exit code is 1 (critical findings present), the phase FAILS. Quote the findin
|
|
|
176
176
|
|
|
177
177
|
### Step B — Design rubric scoring (9 dimensions)
|
|
178
178
|
|
|
179
|
-
Apply `qualia-design/design-rubric.md`. Score 1-5 per dimension WITH evidence
|
|
179
|
+
Apply `qualia-design/design-rubric.md`. Score 1-5 per dimension WITH cited `file:line` evidence in the Evidence column. **Do NOT default to a passing score.** A dimension you cannot back with evidence is `INSUFFICIENT EVIDENCE` and scores **1 (FAIL)** — per `rules/grounding.md` ("Score without evidence = 0"). A verifier that runs no checks and writes "3" across the board must produce a FAIL, not a PASS. Never write a score you cannot cite.
|
|
180
180
|
|
|
181
181
|
Scoped by phase scope:
|
|
182
182
|
- Component-only phase → score Typography, Color cohesion, States, Motion intent, Microcopy, Container depth, and Visual system & graphics when the component owns a primary visual (skip Layout originality and Spatial rhythm when those are page-level concerns)
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// agent-status.js — machine-readable per-task build status + fan-in barrier.
|
|
3
|
+
//
|
|
4
|
+
// The gap this closes: `qualia-build` spawns one subagent per task and the
|
|
5
|
+
// orchestrator LLM has to *read* each builder's "DONE/BLOCKED/PARTIAL" prose to
|
|
6
|
+
// know a wave finished. That's not a barrier — it's the model noticing. This
|
|
7
|
+
// helper persists each builder's outcome to `.agent-status/<task>.json` so wave
|
|
8
|
+
// completion gates on a deterministic, pollable signal (the parallel-worktrees
|
|
9
|
+
// convention) and drives a live wave view.
|
|
10
|
+
//
|
|
11
|
+
// Builder convention (see skills/qualia-build):
|
|
12
|
+
// 1. At task start: agent-status.js write <task> RUNNING --phase N --wave W
|
|
13
|
+
// 2. At task end: agent-status.js write <task> DONE --commit <hash>
|
|
14
|
+
// (or BLOCKED / PARTIAL with a --note)
|
|
15
|
+
// Orchestrator after spawning a wave:
|
|
16
|
+
// agent-status.js barrier <contract.json> --wave W # exit 0 ⇔ all DONE
|
|
17
|
+
//
|
|
18
|
+
// Zero npm dependencies. Library + tiny CLI.
|
|
19
|
+
|
|
20
|
+
const fs = require("fs");
|
|
21
|
+
const path = require("path");
|
|
22
|
+
const pc = require("./plan-contract.js");
|
|
23
|
+
|
|
24
|
+
const STATUSES = new Set(["RUNNING", "DONE", "BLOCKED", "PARTIAL"]);
|
|
25
|
+
const STATUS_DIR = ".agent-status";
|
|
26
|
+
|
|
27
|
+
function statusDir(root) {
|
|
28
|
+
return path.join(root, STATUS_DIR);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
// Task ids match ^T\d+$ (plan-contract schema). Reject anything else so a bad
|
|
32
|
+
// arg can't write outside .agent-status/ via a traversal in the filename.
|
|
33
|
+
function isTaskId(task) {
|
|
34
|
+
return typeof task === "string" && /^T\d+$/.test(task);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function statusFile(root, task) {
|
|
38
|
+
return path.join(statusDir(root), `${task}.json`);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function writeStatus(root, entry) {
|
|
42
|
+
if (!isTaskId(entry.task)) throw new Error(`invalid task id: ${entry.task} (must match ^T\\d+$)`);
|
|
43
|
+
const status = String(entry.status || "").toUpperCase();
|
|
44
|
+
if (!STATUSES.has(status)) throw new Error(`invalid status: ${entry.status} (must be ${[...STATUSES].join("|")})`);
|
|
45
|
+
const dir = statusDir(root);
|
|
46
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
47
|
+
const record = {
|
|
48
|
+
task: entry.task,
|
|
49
|
+
status,
|
|
50
|
+
commit: entry.commit || null,
|
|
51
|
+
note: entry.note || null,
|
|
52
|
+
phase: entry.phase != null ? Number(entry.phase) : null,
|
|
53
|
+
wave: entry.wave != null ? Number(entry.wave) : null,
|
|
54
|
+
updated_at: entry.now || new Date().toISOString(),
|
|
55
|
+
};
|
|
56
|
+
fs.writeFileSync(statusFile(root, entry.task), JSON.stringify(record, null, 2) + "\n");
|
|
57
|
+
return record;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function readStatus(root, task) {
|
|
61
|
+
try {
|
|
62
|
+
return JSON.parse(fs.readFileSync(statusFile(root, task), "utf8"));
|
|
63
|
+
} catch {
|
|
64
|
+
return null;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function listStatuses(root) {
|
|
69
|
+
const dir = statusDir(root);
|
|
70
|
+
if (!fs.existsSync(dir)) return [];
|
|
71
|
+
const out = [];
|
|
72
|
+
for (const f of fs.readdirSync(dir)) {
|
|
73
|
+
if (!f.endsWith(".json")) continue;
|
|
74
|
+
try {
|
|
75
|
+
out.push(JSON.parse(fs.readFileSync(path.join(dir, f), "utf8")));
|
|
76
|
+
} catch {}
|
|
77
|
+
}
|
|
78
|
+
out.sort((a, b) => String(a.task).localeCompare(String(b.task), undefined, { numeric: true }));
|
|
79
|
+
return out;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
function clearStatuses(root) {
|
|
83
|
+
const dir = statusDir(root);
|
|
84
|
+
if (!fs.existsSync(dir)) return 0;
|
|
85
|
+
let n = 0;
|
|
86
|
+
for (const f of fs.readdirSync(dir)) {
|
|
87
|
+
if (f.endsWith(".json")) { fs.unlinkSync(path.join(dir, f)); n++; }
|
|
88
|
+
}
|
|
89
|
+
try { fs.rmdirSync(dir); } catch {}
|
|
90
|
+
return n;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// True when a build is in flight: at least one task is RUNNING. R1's pre-write
|
|
94
|
+
// guard keys off this so it only enforces file-discipline during a build.
|
|
95
|
+
function buildActive(root) {
|
|
96
|
+
return listStatuses(root).some((s) => s.status === "RUNNING");
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
function expectedTaskIds(contract, wave) {
|
|
100
|
+
const tasks = (contract && contract.tasks) || [];
|
|
101
|
+
const filtered = wave != null ? tasks.filter((t) => Number(t.wave) === Number(wave)) : tasks;
|
|
102
|
+
return filtered.map((t) => t.id);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Fan-in barrier: compare the persisted statuses against the expected task ids.
|
|
106
|
+
// Expected set = an explicit opts.tasks list (used by wave-plan batches, whose
|
|
107
|
+
// derived waves needn't match the contract's declared wave numbers), else the
|
|
108
|
+
// contract task ids optionally scoped to opts.wave. ok ⇔ every expected task is
|
|
109
|
+
// DONE. Anything else (missing/running/blocked/partial) holds the barrier.
|
|
110
|
+
function barrier(root, contract, opts = {}) {
|
|
111
|
+
const expected = Array.isArray(opts.tasks) && opts.tasks.length
|
|
112
|
+
? opts.tasks
|
|
113
|
+
: expectedTaskIds(contract, opts.wave);
|
|
114
|
+
const byTask = new Map(listStatuses(root).map((s) => [s.task, s]));
|
|
115
|
+
const tasks = expected.map((id) => {
|
|
116
|
+
const s = byTask.get(id);
|
|
117
|
+
return { task: id, status: s ? s.status : "MISSING", commit: s ? s.commit : null, note: s ? s.note : null };
|
|
118
|
+
});
|
|
119
|
+
const count = (st) => tasks.filter((t) => t.status === st).length;
|
|
120
|
+
const done = count("DONE");
|
|
121
|
+
return {
|
|
122
|
+
ok: expected.length > 0 && done === expected.length,
|
|
123
|
+
wave: opts.wave != null ? Number(opts.wave) : null,
|
|
124
|
+
expected: expected.length,
|
|
125
|
+
done,
|
|
126
|
+
blocked: count("BLOCKED"),
|
|
127
|
+
partial: count("PARTIAL"),
|
|
128
|
+
running: count("RUNNING"),
|
|
129
|
+
missing: count("MISSING"),
|
|
130
|
+
tasks,
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// ── CLI ───────────────────────────────────────────────────────────────
|
|
135
|
+
function parseFlags(argv, start) {
|
|
136
|
+
const flags = { _: [] };
|
|
137
|
+
for (let i = start; i < argv.length; i++) {
|
|
138
|
+
const a = argv[i];
|
|
139
|
+
if (a === "--json") flags.json = true;
|
|
140
|
+
else if (a === "--cwd") flags.cwd = argv[++i];
|
|
141
|
+
else if (a.startsWith("--cwd=")) flags.cwd = a.slice(6);
|
|
142
|
+
else if (a === "--wave") flags.wave = argv[++i];
|
|
143
|
+
else if (a.startsWith("--wave=")) flags.wave = a.slice(7);
|
|
144
|
+
else if (a === "--tasks") flags.tasks = argv[++i];
|
|
145
|
+
else if (a.startsWith("--tasks=")) flags.tasks = a.slice(8);
|
|
146
|
+
else if (a === "--commit") flags.commit = argv[++i];
|
|
147
|
+
else if (a.startsWith("--commit=")) flags.commit = a.slice(9);
|
|
148
|
+
else if (a === "--note") flags.note = argv[++i];
|
|
149
|
+
else if (a.startsWith("--note=")) flags.note = a.slice(7);
|
|
150
|
+
else if (a === "--phase") flags.phase = argv[++i];
|
|
151
|
+
else if (a.startsWith("--phase=")) flags.phase = a.slice(8);
|
|
152
|
+
else flags._.push(a);
|
|
153
|
+
}
|
|
154
|
+
return flags;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
function usage() {
|
|
158
|
+
console.error([
|
|
159
|
+
"Usage:",
|
|
160
|
+
" agent-status.js write <task> <status> [--commit H] [--note N] [--phase P] [--wave W] [--cwd DIR]",
|
|
161
|
+
" agent-status.js read <task> [--cwd DIR] [--json]",
|
|
162
|
+
" agent-status.js list [--cwd DIR] [--json]",
|
|
163
|
+
" agent-status.js barrier <contract.json> [--wave W] [--cwd DIR] [--json]",
|
|
164
|
+
" agent-status.js barrier --tasks T1,T2 [--cwd DIR] [--json] (batch gate; no contract needed)",
|
|
165
|
+
" agent-status.js clear [--cwd DIR]",
|
|
166
|
+
"",
|
|
167
|
+
"status ∈ RUNNING | DONE | BLOCKED | PARTIAL",
|
|
168
|
+
"barrier exits 0 ⇔ every expected task is DONE.",
|
|
169
|
+
].join("\n"));
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
function main(argv) {
|
|
173
|
+
const cmd = argv[2];
|
|
174
|
+
if (!cmd || cmd === "-h" || cmd === "--help") { usage(); return 2; }
|
|
175
|
+
const flags = parseFlags(argv, 3);
|
|
176
|
+
const root = path.resolve(flags.cwd || process.cwd());
|
|
177
|
+
|
|
178
|
+
if (cmd === "write") {
|
|
179
|
+
const [task, status] = flags._;
|
|
180
|
+
if (!task || !status) { usage(); return 2; }
|
|
181
|
+
try {
|
|
182
|
+
const rec = writeStatus(root, {
|
|
183
|
+
task, status, commit: flags.commit, note: flags.note, phase: flags.phase, wave: flags.wave,
|
|
184
|
+
});
|
|
185
|
+
if (flags.json) console.log(JSON.stringify(rec));
|
|
186
|
+
else console.log(`${rec.task} ${rec.status}${rec.commit ? ` @ ${rec.commit}` : ""}`);
|
|
187
|
+
return 0;
|
|
188
|
+
} catch (e) {
|
|
189
|
+
console.error(`ERROR: ${e.message}`);
|
|
190
|
+
return 2;
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
if (cmd === "read") {
|
|
195
|
+
const [task] = flags._;
|
|
196
|
+
if (!task) { usage(); return 2; }
|
|
197
|
+
const rec = readStatus(root, task);
|
|
198
|
+
if (!rec) { console.error(`no status for ${task}`); return 1; }
|
|
199
|
+
if (flags.json) console.log(JSON.stringify(rec));
|
|
200
|
+
else console.log(`${rec.task} ${rec.status}${rec.commit ? ` @ ${rec.commit}` : ""}${rec.note ? ` — ${rec.note}` : ""}`);
|
|
201
|
+
return 0;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
if (cmd === "list") {
|
|
205
|
+
const all = listStatuses(root);
|
|
206
|
+
if (flags.json) { console.log(JSON.stringify(all, null, 2)); return 0; }
|
|
207
|
+
if (all.length === 0) { console.log("(no agent statuses)"); return 0; }
|
|
208
|
+
for (const s of all) console.log(`${s.task} ${s.status.padEnd(8)}${s.commit ? ` @ ${s.commit}` : ""}${s.note ? ` — ${s.note}` : ""}`);
|
|
209
|
+
return 0;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
if (cmd === "barrier") {
|
|
213
|
+
const [contractPath] = flags._;
|
|
214
|
+
const taskList = flags.tasks ? flags.tasks.split(",").map((s) => s.trim()).filter(Boolean) : null;
|
|
215
|
+
// --tasks gates on an explicit batch; otherwise the contract supplies the set.
|
|
216
|
+
let contract = null;
|
|
217
|
+
if (!taskList) {
|
|
218
|
+
if (!contractPath) { usage(); return 2; }
|
|
219
|
+
const loaded = pc.readContractFile(contractPath);
|
|
220
|
+
if (!loaded.ok) {
|
|
221
|
+
if (flags.json) console.log(JSON.stringify({ ok: false, ...loaded }));
|
|
222
|
+
else console.error(`${loaded.error}: ${loaded.message}`);
|
|
223
|
+
return 2;
|
|
224
|
+
}
|
|
225
|
+
contract = loaded.contract;
|
|
226
|
+
}
|
|
227
|
+
const result = barrier(root, contract, { wave: flags.wave, tasks: taskList });
|
|
228
|
+
if (flags.json) { console.log(JSON.stringify(result, null, 2)); return result.ok ? 0 : 1; }
|
|
229
|
+
const scope = taskList ? `batch ${taskList.join(",")}` : (result.wave != null ? `wave ${result.wave}` : "phase");
|
|
230
|
+
if (result.ok) {
|
|
231
|
+
console.log(`BARRIER PASS (${scope}): ${result.done}/${result.expected} DONE`);
|
|
232
|
+
} else {
|
|
233
|
+
console.error(`BARRIER HOLD (${scope}): ${result.done}/${result.expected} DONE` +
|
|
234
|
+
` (running=${result.running} blocked=${result.blocked} partial=${result.partial} missing=${result.missing})`);
|
|
235
|
+
for (const t of result.tasks) if (t.status !== "DONE") console.error(` - ${t.task}: ${t.status}${t.note ? ` — ${t.note}` : ""}`);
|
|
236
|
+
}
|
|
237
|
+
return result.ok ? 0 : 1;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
if (cmd === "clear") {
|
|
241
|
+
const n = clearStatuses(root);
|
|
242
|
+
console.log(`cleared ${n} status file(s)`);
|
|
243
|
+
return 0;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
usage();
|
|
247
|
+
return 2;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
module.exports = {
|
|
251
|
+
STATUSES,
|
|
252
|
+
STATUS_DIR,
|
|
253
|
+
writeStatus,
|
|
254
|
+
readStatus,
|
|
255
|
+
listStatuses,
|
|
256
|
+
clearStatuses,
|
|
257
|
+
buildActive,
|
|
258
|
+
expectedTaskIds,
|
|
259
|
+
barrier,
|
|
260
|
+
};
|
|
261
|
+
|
|
262
|
+
if (require.main === module) {
|
|
263
|
+
process.exit(main(process.argv));
|
|
264
|
+
}
|