npm - qualia-framework - Versions diffs - 7.2.2 → 7.3.0 - Mend

qualia-framework 7.2.2 → 7.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/.claude-plugin/marketplace.json +20 -0
package/.claude-plugin/plugin.json +17 -0
package/AGENTS.md +1 -1
package/CHANGELOG.md +42 -0
package/CLAUDE.md +1 -1
package/README.md +17 -4
package/TROUBLESHOOTING.md +8 -7
package/agents/verifier.md +1 -1
package/bin/agent-status.js +115 -11
package/bin/auto-report.js +15 -7
package/bin/cli.js +173 -4
package/bin/erp-retry.js +92 -8
package/bin/install.js +102 -2
package/bin/qualia-doctor.js +115 -1
package/bin/state.js +102 -13
package/bin/verify-panel.js +409 -0
package/docs/onboarding.html +1 -1
package/hooks/branch-guard.js +19 -5
package/hooks/fawzi-approval-guard.js +16 -3
package/hooks/hooks.json +60 -0
package/hooks/migration-guard.js +143 -66
package/hooks/session-start.js +27 -0
package/package.json +3 -1
package/skills/qualia/SKILL.md +20 -13
package/skills/qualia-build/SKILL.md +20 -9
package/skills/qualia-verify/SKILL.md +43 -5
package/templates/instructions.md +2 -2
package/tests/bin.test.sh +183 -0
package/tests/hooks.test.sh +124 -0
package/tests/install-smoke.test.sh +14 -0
package/tests/instructions.test.sh +2 -2
package/tests/lib.test.sh +149 -0
package/tests/plugin-manifest.test.sh +168 -0
package/tests/refs.test.sh +64 -0
package/tests/run-all.sh +1 -0
package/tests/state.test.sh +174 -0
package/tests/verify-panel.test.sh +236 -0

package/hooks/migration-guard.js CHANGED Viewed

@@ -1,7 +1,10 @@
 #!/usr/bin/env node
 // ~/.claude/hooks/migration-guard.js — catch dangerous SQL patterns in migrations.
-// PreToolUse hook on Edit/Write tool calls. Reads tool input as JSON on stdin.
-// Exits 2 to BLOCK. Exits 0 to allow.
+// PreToolUse hook on Edit/Write tool calls AND Bash tool calls. Reads tool input
+// as JSON on stdin. Exits 2 to BLOCK. Exits 0 to allow.
+// Edit|Write: scans file content for .sql / migrations/ files.
+// Bash: scans inline SQL that bypasses supabase/migrations/ (heredoc→.sql,
+//   psql -c/-f, supabase db execute/push) — same destructive/RLS check.
 // Cross-platform (Windows/macOS/Linux).
 const fs = require("fs");
@@ -42,6 +45,7 @@ function readInput() {
 const input = readInput();
 const ti = input.tool_input || {};
+const toolName = String(input.tool_name || "");
 const file = String(ti.file_path || "").replace(/\\/g, "/");
 // For Edit tool calls, dangerous SQL might live in old_string OR new_string.
@@ -52,6 +56,11 @@ const content = [ti.old_string, ti.new_string, ti.content]
   .map((v) => String(v))
   .join("\n");
+// For Bash tool calls, dangerous SQL can be written or executed through the
+// shell, bypassing the Edit|Write matcher entirely. We mirror the Bash-content
+// scan pattern from supabase-destructive-guard.js (reads tool_input.command).
+const command = String(ti.command || "");
 function _trace(hookName, result, extra) {
   try {
     const os = require("os");
@@ -73,13 +82,6 @@ function _trace(hookName, result, extra) {
   } catch {}
 }
-// Only inspect SQL files or files that live inside a migrations/ directory.
-// Prior regex was over-broad (matched MigrationModal.tsx, migrations.md, etc.).
-if (!/(^|\/)migrations?\//i.test(file) && !/\.sql$/i.test(file)) {
-  _trace("migration-guard", "allow", { reason: "non-migration file" });
-  process.exit(0);
-}
 // Strip SQL comments before pattern matching so rolled-back/explanatory
 // statements inside `-- ...` line comments or `/* ... */` block comments
 // don't trigger false positives.
@@ -91,76 +93,85 @@ function stripSqlComments(src) {
   return out;
 }
-const scan = stripSqlComments(content);
+function splitStatements(src) {
+  return src.split(/;/g).map((s) => s.trim()).filter(Boolean);
+}
-const errors = [];
+// Scan a blob of SQL for destructive / RLS-violating patterns. Returns an array
+// of human-readable error strings (empty = clean). Shared by the Edit|Write
+// path (scans file content) and the Bash path (scans inline shell SQL) so the
+// constitution's destructive-SQL + RLS check applies no matter how the SQL
+// reaches disk or the database.
+function scanSql(rawSql) {
+  const scan = stripSqlComments(rawSql);
+  const errors = [];
-// DROP TABLE without IF EXISTS
-if (/DROP\s+TABLE/i.test(scan) && !/IF\s+EXISTS/i.test(scan)) {
-  errors.push("DROP TABLE without IF EXISTS");
-}
+  // DROP TABLE without IF EXISTS
+  if (/DROP\s+TABLE/i.test(scan) && !/IF\s+EXISTS/i.test(scan)) {
+    errors.push("DROP TABLE without IF EXISTS");
+  }
-// DROP DATABASE — almost never appropriate in app migrations
-if (/DROP\s+DATABASE/i.test(scan)) {
-  errors.push("DROP DATABASE detected — refuse unless explicitly approved");
-}
+  // DROP DATABASE — almost never appropriate in app migrations
+  if (/DROP\s+DATABASE/i.test(scan)) {
+    errors.push("DROP DATABASE detected — refuse unless explicitly approved");
+  }
-// DROP SCHEMA — destructive, especially with CASCADE
-if (/DROP\s+SCHEMA/i.test(scan)) {
-  errors.push("DROP SCHEMA detected — refuse unless explicitly approved");
-}
+  // DROP SCHEMA — destructive, especially with CASCADE
+  if (/DROP\s+SCHEMA/i.test(scan)) {
+    errors.push("DROP SCHEMA detected — refuse unless explicitly approved");
+  }
-// ALTER TABLE ... DROP COLUMN — destructive schema change
-if (/ALTER\s+TABLE\s+[^;]*\bDROP\s+COLUMN\b/i.test(scan)) {
-  errors.push("ALTER TABLE ... DROP COLUMN is destructive");
-}
+  // ALTER TABLE ... DROP COLUMN — destructive schema change
+  if (/ALTER\s+TABLE\s+[^;]*\bDROP\s+COLUMN\b/i.test(scan)) {
+    errors.push("ALTER TABLE ... DROP COLUMN is destructive");
+  }
-// DELETE / UPDATE without WHERE — check per-statement, not file-global.
-// Previously a file containing "DELETE FROM foo;" followed by any later
-// "... WHERE ..." (in a SELECT, JOIN, etc.) would pass the check.
-function splitStatements(src) {
-  return src.split(/;/g).map((s) => s.trim()).filter(Boolean);
-}
-const statements = splitStatements(scan);
-for (const stmt of statements) {
-  if (/^\s*DELETE\s+FROM\b/i.test(stmt) && !/\bWHERE\b/i.test(stmt)) {
-    errors.push("DELETE FROM without WHERE clause");
-    break;
+  // DELETE / UPDATE without WHERE — check per-statement, not file-global.
+  // Previously a file containing "DELETE FROM foo;" followed by any later
+  // "... WHERE ..." (in a SELECT, JOIN, etc.) would pass the check.
+  const statements = splitStatements(scan);
+  for (const stmt of statements) {
+    if (/^\s*DELETE\s+FROM\b/i.test(stmt) && !/\bWHERE\b/i.test(stmt)) {
+      errors.push("DELETE FROM without WHERE clause");
+      break;
+    }
   }
-}
-for (const stmt of statements) {
-  if (/^\s*UPDATE\s+\w+(?:\.\w+)?\s+SET\b/i.test(stmt) && !/\bWHERE\b/i.test(stmt)) {
-    errors.push("UPDATE without WHERE clause — affects every row");
-    break;
+  for (const stmt of statements) {
+    if (/^\s*UPDATE\s+\w+(?:\.\w+)?\s+SET\b/i.test(stmt) && !/\bWHERE\b/i.test(stmt)) {
+      errors.push("UPDATE without WHERE clause — affects every row");
+      break;
+    }
   }
-}
-// TRUNCATE (almost always wrong in migrations)
-if (/TRUNCATE/i.test(scan)) {
-  errors.push("TRUNCATE detected — are you sure?");
-}
+  // TRUNCATE (almost always wrong in migrations)
+  if (/TRUNCATE/i.test(scan)) {
+    errors.push("TRUNCATE detected — are you sure?");
+  }
-// GRANT ... TO PUBLIC — privilege leak
-if (/GRANT\s+[^;]*\bTO\s+PUBLIC\b/i.test(scan)) {
-  errors.push("GRANT ... TO PUBLIC detected — privilege leak");
-}
+  // GRANT ... TO PUBLIC — privilege leak
+  if (/GRANT\s+[^;]*\bTO\s+PUBLIC\b/i.test(scan)) {
+    errors.push("GRANT ... TO PUBLIC detected — privilege leak");
+  }
-// CREATE TABLE without RLS — but skip TEMP/TEMPORARY tables and partitions.
-// Strategy: enumerate CREATE TABLE statements, drop the ones that don't need RLS,
-// then if any "real" CREATE TABLE remains, require ENABLE ROW LEVEL SECURITY.
-const createTableMatches = scan.match(/CREATE\s+(?:(?:GLOBAL|LOCAL)\s+)?(?:TEMP|TEMPORARY|UNLOGGED)?\s*TABLE\s+(?:IF\s+NOT\s+EXISTS\s+)?[^;]*/gi) || [];
-const realCreateTables = createTableMatches.filter((stmt) => {
-  // Skip TEMP/TEMPORARY tables — they're session-scoped, no RLS needed.
-  if (/CREATE\s+(?:(?:GLOBAL|LOCAL)\s+)?(?:TEMP|TEMPORARY)\b/i.test(stmt)) return false;
-  // Skip partition tables — RLS lives on the parent table.
-  if (/\bPARTITION\s+OF\b/i.test(stmt)) return false;
-  return true;
-});
-if (realCreateTables.length > 0 && !/ENABLE\s+ROW\s+LEVEL\s+SECURITY/i.test(scan)) {
-  errors.push("CREATE TABLE without ENABLE ROW LEVEL SECURITY");
+  // CREATE TABLE without RLS — but skip TEMP/TEMPORARY tables and partitions.
+  // Strategy: enumerate CREATE TABLE statements, drop the ones that don't need RLS,
+  // then if any "real" CREATE TABLE remains, require ENABLE ROW LEVEL SECURITY.
+  const createTableMatches = scan.match(/CREATE\s+(?:(?:GLOBAL|LOCAL)\s+)?(?:TEMP|TEMPORARY|UNLOGGED)?\s*TABLE\s+(?:IF\s+NOT\s+EXISTS\s+)?[^;]*/gi) || [];
+  const realCreateTables = createTableMatches.filter((stmt) => {
+    // Skip TEMP/TEMPORARY tables — they're session-scoped, no RLS needed.
+    if (/CREATE\s+(?:(?:GLOBAL|LOCAL)\s+)?(?:TEMP|TEMPORARY)\b/i.test(stmt)) return false;
+    // Skip partition tables — RLS lives on the parent table.
+    if (/\bPARTITION\s+OF\b/i.test(stmt)) return false;
+    return true;
+  });
+  if (realCreateTables.length > 0 && !/ENABLE\s+ROW\s+LEVEL\s+SECURITY/i.test(scan)) {
+    errors.push("CREATE TABLE without ENABLE ROW LEVEL SECURITY");
+  }
+  return errors;
 }
-if (errors.length > 0) {
+function block(errors) {
   console.error("⬢ Migration guard — dangerous patterns found:");
   for (const e of errors) {
     console.error(`  ✗ ${e}`);
@@ -171,5 +182,71 @@ if (errors.length > 0) {
   process.exit(2);
 }
+// ── Bash path ─────────────────────────────────────────────────────────────
+// SQL written or executed through the shell bypasses the Edit|Write matcher,
+// so it would never reach the destructive/RLS check above. Mirror the Bash-
+// content scan from supabase-destructive-guard.js: extract the inline SQL from
+// the command and run it through the same scanSql() the file path uses.
+// Targets: heredoc redirected into a .sql file, psql -c/-f, and
+// `supabase db execute/push` of inline SQL. Fail-closed on a destructive
+// match; fail-open (allow) on anything we can't confidently parse.
+if (toolName === "Bash" || (command && !file)) {
+  if (!command) {
+    _trace("migration-guard", "allow", { reason: "no-command" });
+    process.exit(0);
+  }
+  // Only bother scanning shell that writes/executes SQL outside the
+  // migrations pipeline. `supabase migration new` + applying files under
+  // supabase/migrations/ go through the proper flow and are not our target.
+  const writesSqlFile = /<<-?\s*['"]?\w+['"]?[\s\S]*?\.sql\b/i.test(command) ||
+    />>?\s*\S*\.sql\b/i.test(command);
+  const psqlInline = /\bpsql\b[\s\S]*\s-(?:c|f)\b/i.test(command);
+  const supabaseExec = /\b(npx\s+)?supabase\s+db\s+(execute|push)\b/i.test(command);
+  if (!writesSqlFile && !psqlInline && !supabaseExec) {
+    _trace("migration-guard", "allow", { reason: "no inline SQL in command" });
+    process.exit(0);
+  }
+  // Statement-level checks (DELETE/UPDATE without WHERE) anchor at the start of
+  // a SQL statement, so scanning the raw shell line (prefixed by `psql -c "`,
+  // `supabase db execute "`, etc.) would never match. Strip the shell wrapper:
+  // pull out heredoc bodies and quoted args so what we scan looks like raw SQL.
+  const sqlFragments = [];
+  // Heredoc bodies: `<<EOF ... EOF` / `<<'TAG' ... TAG` (the delimiter may be
+  // quoted; the closing tag sits on its own line).
+  let hd;
+  const heredocRe = /<<-?\s*(['"]?)(\w+)\1[^\n]*\n([\s\S]*?)\n\s*\2\b/g;
+  while ((hd = heredocRe.exec(command)) !== null) sqlFragments.push(hd[3]);
+  // Quoted argument bodies (single- or double-quoted) — captures the SQL passed
+  // to `-c`, `execute`, etc. without the surrounding shell quotes.
+  let q;
+  const quotedRe = /(['"])([\s\S]*?)\1/g;
+  while ((q = quotedRe.exec(command)) !== null) sqlFragments.push(q[2]);
+  // Scan both the raw command (catches DROP/TRUNCATE/GRANT — not statement-
+  // anchored) and the extracted fragments (catches DELETE/UPDATE-without-WHERE).
+  const errors = scanSql([command, ...sqlFragments].join("\n;\n"));
+  if (errors.length > 0) {
+    block(errors);
+  }
+  _trace("migration-guard", "allow", { reason: "bash SQL clean" });
+  process.exit(0);
+}
+// ── Edit|Write path ───────────────────────────────────────────────────────
+// Only inspect SQL files or files that live inside a migrations/ directory.
+// Prior regex was over-broad (matched MigrationModal.tsx, migrations.md, etc.).
+if (!/(^|\/)migrations?\//i.test(file) && !/\.sql$/i.test(file)) {
+  _trace("migration-guard", "allow", { reason: "non-migration file" });
+  process.exit(0);
+}
+const errors = scanSql(content);
+if (errors.length > 0) {
+  block(errors);
+}
 _trace("migration-guard", "allow");
 process.exit(0);

package/hooks/session-start.js CHANGED Viewed

@@ -167,6 +167,32 @@ function maybeDrainErpQueue() {
   } catch {}
 }
+function surfaceErpQueue() {
+  // One-line, non-blocking notice when the retry queue is non-empty: count +
+  // age of the oldest item. Reads the queue file directly (no spawn) and never
+  // throws — the drain itself runs separately in maybeDrainErpQueue(). Quiet
+  // when the queue is empty so a healthy session stays clean.
+  try {
+    if (!fs.existsSync(ERP_QUEUE)) return;
+    const parsed = JSON.parse(fs.readFileSync(ERP_QUEUE, "utf8"));
+    const q = parsed && Array.isArray(parsed.queue) ? parsed.queue : [];
+    if (q.length === 0) return;
+    const now = Date.now();
+    let oldest = 0;
+    let stuck = 0;
+    for (const it of q) {
+      const t = Date.parse(it && it.enqueued_at);
+      if (Number.isFinite(t)) oldest = Math.max(oldest, now - t);
+      if (it && it.give_up) stuck++;
+    }
+    const oldestHours = Math.floor(oldest / (60 * 60 * 1000));
+    const stuckNote = stuck > 0 ? `, ${stuck} stuck` : "";
+    const msg = `${q.length} ERP report(s) pending upload (oldest ${oldestHours}h${stuckNote}) — runs \`qualia-framework erp-flush\` to retry`;
+    if (fs.existsSync(UI)) runUi("warn", msg);
+    else console.log(`QUALIA: ${msg}`);
+  } catch {}
+}
 function cmpVersions(a, b) {
   // Returns >0 if a>b, <0 if a<b, 0 if equal. Tolerates missing/non-numeric
   // segments by treating them as 0. Pure semver-major.minor.patch compare.
@@ -217,6 +243,7 @@ function renderHealthWarning(missing) {
 try {
   maybeRenderUpdateBanner();
   maybeDrainErpQueue();
+  surfaceErpQueue();
   const healthMissing = checkInstallHealth();
   if (healthMissing) renderHealthWarning(healthMissing);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "qualia-framework",
-  "version": "7.2.2",
+  "version": "7.3.0",
   "description": "Claude Code and Codex workflow framework by Qualia Solutions. Plan, build, verify, ship.",
   "bin": {
     "qualia-framework": "./bin/cli.js"
@@ -36,6 +36,7 @@
     "test:statusline": "bash tests/statusline.test.sh",
     "test:refs": "bash tests/refs.test.sh",
     "test:published-install": "bash tests/published-install-smoke.test.sh",
+    "test:plugin": "bash tests/plugin-manifest.test.sh",
     "test:shell": "bash tests/run-all.sh",
     "test:node": "node --test tests/runner.js",
     "compile:instructions": "node bin/compile-instructions.js"
@@ -44,6 +45,7 @@
     "bin/",
     "agents/",
     "hooks/",
+    ".claude-plugin/",
     "mcp/",
     "rules/",
     "qualia-design/",

package/skills/qualia/SKILL.md CHANGED Viewed

@@ -22,7 +22,7 @@ node ${QUALIA_BIN}/state.js check 2>/dev/null
 The JSON carries a `profile` field (`strict` or `standard`; env `$QUALIA_PROFILE` wins). `strict` = hard gates, no waivers; `standard` = gates advisory, a senior may waive with a reason logged to `.planning/decisions/`. Surface it when a gate is involved.
-**A5 — multi-person layout.** If the JSON has `"layout": "increments"`, this project is concurrency-aware: route by the increment fields, not the single phase cursor. Continue your own claim first (`my_claim`), else start `next_increment` (the first unclaimed, not-done increment). **Never** route the operator to anything in `claimed_increments[]` — those are held by another person on another branch. The status→command mapping in the table below still applies (it's computed into `next_command`); the increment fields just tell you *which* unit is yours. Legacy projects (no `layout` field) route exactly as before.
+**A5 — multi-person layout.** If the JSON has `"layout": "increments"`, this project is concurrency-aware: route by the increment fields, not the single phase cursor. Continue your own claim first (`my_claim`), else start `next_increment` (the first unclaimed, not-done increment). **Never** route the operator to anything in `claimed_increments[]` — those are held by another person on another branch. The status→command routing is already computed into `next_command` (by state.js, the single authority — see step 2); the increment fields just tell you *which* unit is yours. Legacy projects (no `layout` field) route exactly as before.
 Also gather context:
 ```bash
@@ -49,27 +49,34 @@ Read conversation context — what has the user been doing, what errors occurred
 ### 2. Classify and Route
-Use the state.js JSON output plus gathered context:
+**state.js owns status→command routing — do not re-derive it.** The JSON's
+`next_command` field is computed by `nextCommand()` in state.js, the single
+routing authority. It already accounts for status, gap-cycle failures, the
+last-phase terminal move, and the project `lifecycle` (a launched/operate
+project routes to `/qualia-update`, not the build-mode polish→ship→handoff
+chain). **Surface `next_command` verbatim.** Do NOT hand-maintain a status
+table here — that duplicate has drifted before.
-| Situation | Detection | Route |
+Override the JSON `next_command` ONLY for context-only situations state.js
+cannot see (it reads tracked state, not your conversation or the working tree):
+| Situation | Detection (context state.js can't see) | Route |
 |-----------|-----------|-------|
 | `no-project` | state.js returns NO_PROJECT | → `/qualia-new` |
 | `handoff` | `.continue-here.md` exists | → Read it, summarize, route to next step |
 | `mid-work` | Uncommitted changes + phase in progress | → Continue, or write `.continue-here.md` if the user wants to pause |
-| `ready-to-plan` | status == "setup" | → `/qualia-plan {N}` |
-| `ready-to-build` | status == "planned" | → `/qualia-build {N}` |
-| `ready-to-verify` | status == "built" | → `/qualia-verify {N}` |
-| `gaps-found` | status == "verified", verification == "fail", gap_cycles < 2 | → `/qualia-plan {N} --gaps` |
-| `gap-limit` | status == "verified", verification == "fail", gap_cycles >= 2 | → Escalate to Fawzi or re-plan from scratch |
-| `phase-complete` | state.js auto-advanced (status == "setup", phase > 1) | → `/qualia-plan {N}` |
-| `all-verified` | last phase verified pass, status == "verified" | → `/qualia-polish` |
-| `polished` | status == "polished" | → `/qualia-ship` |
-| `shipped` | status == "shipped" | → `/qualia-handoff` |
-| `handed-off` | status == "handed_off" | → `/qualia-report` then done |
+| `gap-limit` | `gap_cycles >= gap_cycle_limit` (both from the JSON) | → Escalate to Fawzi or re-plan from scratch — do NOT keep routing to `--gaps` |
 | `blocked` | STATE.md lists blockers or same error 3+ times | → Diagnose the evidence; `/qualia-fix` if expected behavior is known, `/qualia-review` if broader audit is needed |
 | `bug-loop` | Same files edited 3+ times, user frustrated | → Stop patching; summarize root cause evidence and route to `/qualia-fix` or `/qualia-review` |
 | `need-tests` | User mentions "tests", "coverage", "test this" | → `/qualia-test` |
+Note `gap_cycles` and `gap_cycle_limit` are both in the JSON (the limit is
+configurable via `tracking.json`/`PROJECT.md`, default 2) — compare them, never
+hardcode the threshold. For every non-override situation, route to
+`next_command` as-is. If `next_command` starts with `UNRECOGNIZED_STATUS`, the
+project state is corrupt — surface the diagnostic and route to `/qualia-doctor`;
+never loop back to `/qualia`.
 **Employee escalation:** If role is EMPLOYEE and situation is `gap-limit` or `bug-loop`, suggest: "Want to flag this for Fawzi?"
 ### 3. Display

package/skills/qualia-build/SKILL.md CHANGED Viewed

@@ -70,19 +70,23 @@ node ${QUALIA_BIN}/plan-contract.js validate .planning/phase-{N}-contract.json
 Parse tasks, waves, file refs. Prefer the JSON contract for task ids, dependencies, file lists, and verification checks; use the Markdown plan as the human-readable context.
-### 1a. Analyze Gate (scope ↔ plan, before any build)
+### 1a. Analyze Gate (scope ↔ plan) — ENFORCED at the planned→built transition
-`plan-contract.js` proves the contract is internally well-formed; this gate diffs it **against intent** — scope acceptance criteria (`phase-{N}-context.md`) + the CONTEXT.md glossary — to catch requirements the plan silently dropped or contradicted. This is the plan→build seam Spec-Kit calls `/analyze`.
+`plan-contract.js` proves the contract is internally well-formed; the analyze gate diffs it **against intent** — scope acceptance criteria (`phase-{N}-context.md`) + the CONTEXT.md glossary — to catch requirements the plan silently dropped or contradicted. This is the plan→build seam Spec-Kit calls `/analyze`.
+This is no longer a "please run" step — **`state.js` enforces it deterministically.** The `state.js transition --to built` call in §5 runs the analyze gate for the phase before it lets the status advance ("a rule worth enforcing is worth a hook"). The behavior is **profile-aware** (the `profile` field from `state.js check`):
+- **strict** → a HIGH analyze finding (an under-covered scope AC or scope-reduction language) **REFUSES** the transition with `error: "SCOPE_DRIFT"`, naming the dropped criterion, and writes a `scope-drift` trace. Route to `/qualia-plan {N} --gaps` (plan dropped a requirement) or `/qualia-scope {N}` (scope itself is wrong). Do not build. To override (senior waiver), re-run the transition with `--force`.
+- **standard** → findings are advisory: the transition proceeds. If you proceed past a HIGH, log the waiver reason to `.planning/decisions/`.
+The gate is **fail-soft on its own error**: a missing/unreadable contract or no scope file (`phase-{N}-context.md`) means the scope-coverage check is skipped, not a failure — `/qualia-feature` trivia and scope-less phases still build.
+To preview the findings *before* attempting the build (optional — the transition enforces it regardless), run:
 ```bash
 node ${QUALIA_BIN}/analyze-gate.js {N}
 ```
-Exit 0 → consistent, proceed. Non-zero → it lists under-covered scope criteria, orphan success criteria, glossary violations, and scope-reduction language. **Profile-aware** (the `profile` field from `state.js check`):
-- **strict** → a HIGH finding is a stop. Route to `/qualia-plan {N} --gaps` (plan dropped a requirement) or `/qualia-scope {N}` (scope itself is wrong). Do not build.
-- **standard** → surface findings to the operator and proceed only with an explicit ack; log the waiver reason to `.planning/decisions/` if you proceed past a HIGH.
-(No scope file = scope-coverage check is skipped, not a failure — `/qualia-feature` trivia and scope-less phases still build.)
+Exit 0 → consistent. Non-zero → it lists under-covered scope criteria, orphan success criteria, glossary violations, and scope-reduction language.
 ### 1b. Recovery Reference
@@ -194,10 +198,15 @@ node ${QUALIA_BIN}/qualia-ui.js done {task_num} "{title}" {commit_hash}
 **After each batch — fan-in barrier (deterministic, not "did the model notice"):**
 ```bash
-node ${QUALIA_BIN}/agent-status.js barrier --tasks {comma-separated task ids in this batch}
+node ${QUALIA_BIN}/agent-status.js barrier --tasks {comma-separated task ids in this batch} --timeout 900
 ```
-Exit 0 ⇔ every task in the batch wrote `DONE`. Non-zero → the barrier lists which tasks are RUNNING/BLOCKED/PARTIAL/MISSING. Do NOT spawn the next batch until the barrier passes; a BLOCKED/PARTIAL task is a wave failure (§4). `agent-status.js list` shows the live view. (Gating per batch — not per contract wave — keeps the barrier aligned with the `wave-plan.js` schedule, whose derived waves needn't match the contract's declared wave numbers.)
+Exit codes (always pass `--timeout` — default `900` (15 min); raise it for batches of genuinely large tasks):
+- **0 — PASS.** Every task in the batch wrote `DONE`. Move to the next batch.
+- **1 — HOLD.** A task is still `RUNNING`/`MISSING` *within* the deadline. Transient — wait and re-poll; do NOT spawn the next batch.
+- **3 — FAIL.** A task is `BLOCKED`/`PARTIAL`, or (past `--timeout`) a `RUNNING` heartbeat went stale or a builder returned without writing any status (`STALE`). This is terminal — a crashed/stalled builder is a wave failure. Do NOT re-poll and do NOT spawn the next batch: route straight to §4 (Handle Failures). The barrier names the offending task(s) and their staleness age.
+The `--timeout` is what stops a crashed builder from stalling an unattended `--auto` wave forever: a builder that dies without writing `DONE`/`BLOCKED` would otherwise hold the barrier indefinitely; past the deadline it becomes a clean FAIL the wave can recover from. The barrier lists which tasks are RUNNING/BLOCKED/PARTIAL/MISSING/STALE; `agent-status.js list` shows the live view. (Gating per batch — not per contract wave — keeps the barrier aligned with the `wave-plan.js` schedule, whose derived waves needn't match the contract's declared wave numbers.)
 **After each batch:** move to the next batch in the schedule, show summary.
@@ -219,12 +228,14 @@ Builder returns deviation/blocker:
 - **Minor:** Log, continue
 - **Major:** Show to employee, ask how to proceed
 - **Blocker:** Show, suggest fix or escalation
+- **Stalled (barrier exit 3 with a `STALE` task):** the builder crashed or returned without writing terminal status — no deviation to read. Show the stalled task id(s) and staleness age from the barrier output. On `--auto`, re-spawn that single task once (fresh builder); if it stalls again, escalate as a Blocker. Interactively, surface it and ask how to proceed.
 ### 5. Update State
 ```bash
 node ${QUALIA_BIN}/state.js transition --to built --phase {N} --tasks-done {done} --tasks-total {total} --wave {wave}
 ```
+This is also where the **scope-drift gate** (§1a) fires: in a strict-profile project a HIGH analyze finding returns `error: "SCOPE_DRIFT"` and the status does NOT advance. Surface the named dropped criterion and route to `/qualia-plan {N} --gaps` or `/qualia-scope {N}`; re-run with `--force` only for an authorized senior waiver.
 Error → show, stop.
 Do NOT edit STATE.md or tracking.json manually; state.js handles both.

package/skills/qualia-verify/SKILL.md CHANGED Viewed

@@ -109,7 +109,15 @@ Wait for both verifier + QA before step 3. Playwright MCP unavailable → QA ret
 The panel FINDS; skeptics decide what's REAL; `verify-panel.js` decides the verdict — math, not a vibe.
-**1. Assemble** the per-lens finding files into one panel skeleton (votes zeroed):
+**0. Execution lens (run the app, don't just grep it).** A grep proves a symbol EXISTS; it does not prove the feature RUNS — and a builder can satisfy a grep pattern while the project fails to compile or its tests are red (the dominant reward-hacking failure mode). Run the project's OWN checks and emit them as a panel lens BEFORE assembling, so a red build folds into the verdict as a CRITICAL the panel cannot grep around:
+```bash
+node ${QUALIA_BIN}/verify-panel.js execution {N}   # → .planning/phase-{N}-panel-execution.json
+```
+This runs `npx tsc --noEmit` (when `tsconfig.json` exists), the `test` script (when `package.json` defines one), and `build` (when defined). **Fail-soft:** an absent tool is SKIPPED, not a failure — its absence is not evidence of breakage. **Fail-closed:** a present check that exits non-zero becomes a CRITICAL finding. Output is the same `[{file,line,severity,title}]` array shape `assemble` consumes, so the next step folds it in with no special-casing.
+**1. Assemble** the per-lens finding files (including `panel-execution.json` from step 0) into one panel skeleton (votes zeroed):
 ```bash
 node ${QUALIA_BIN}/verify-panel.js assemble {N}   # → .planning/phase-{N}-panel.json
@@ -133,7 +141,16 @@ Return exactly one line: REAL — {file:line reason}   OR   NOT_REAL — {file:l
 Skeptics deliberately **omit `model=`** so they inherit the session's frontier model: their REAL/NOT_REAL judgment is what flips a CRITICAL/HIGH verdict, and that is the one step in the pipeline where model strength most changes the outcome. Route cheap on the finding pass, never on the adjudication.
-Tally each finding's votes into `.planning/phase-{N}-panel.json` (`votes.real` / `votes.notReal`).
+**Tally deterministically — do NOT hand-edit the JSON.** For each finding, collect its skeptics' one-line replies (the `REAL — …` / `NOT_REAL — …` lines, one per line) and pipe them to the tally subcommand. It counts the verdicts and writes `votes.real`/`votes.notReal` onto the matching finding itself — a miscount or dropped reply can no longer silently flip a CRITICAL's survival:
+```bash
+# finding-key is "<file>:<line>:<slugged-title>" — the key verify-panel.js uses
+# internally (slug = lowercased title, non-alphanumerics → spaces, ≤48 chars).
+printf '%s\n' "$SKEPTIC_REPLY_1" "$SKEPTIC_REPLY_2" "$SKEPTIC_REPLY_3" \
+  | node ${QUALIA_BIN}/verify-panel.js skeptic {N} "{file}:{line}:{slugged-title}"
+```
+Same skeptic replies in → identical counts out; the majority-survives decision stays in the aggregator (step 3).
 **3. Aggregate** deterministically:
@@ -194,22 +211,43 @@ node ${QUALIA_BIN}/qualia-ui.js end "PHASE {N} GAPS FOUND" "/qualia-plan {N} --g
 ### 4. Update State
-Write the deterministic eval artifact before changing state:
+Write the deterministic eval artifact before changing state, and capture its exit into a gate artifact in the same step:
 ```bash
 node ${QUALIA_BIN}/harness-eval.js --phase {N} --run --write
+node ${QUALIA_BIN}/verify-panel.js gate {N} harness-eval --exit $?   # hard FAIL → blocking CRITICAL
 ```
-Run the zero-token deterministic gates (same role as `migration-guard`/`branch-guard` — each exits non-zero on a hard fault). A non-zero exit is a verification FAIL, not a soft note:
+Run the zero-token deterministic gates (same role as `migration-guard`/`branch-guard` — each exits non-zero on a hard fault), and **record each gate's REAL exit code mechanically** into a normalized gate artifact. The recorder writes the JSON from the gate's exit code — never hand-write it — so a dropped exit code can no longer silently flip a CRITICAL:
 ```bash
+# slop-detect: run at --severity=critical, so exit 1 = CRITICAL slop only.
 node ${QUALIA_BIN}/slop-detect.mjs --severity=critical   # CRITICAL design tells (the slop half)
+node ${QUALIA_BIN}/verify-panel.js gate {N} slop-detect --exit $?
+# dep-verify: a hallucinated/slopsquatted import → blocking CRITICAL (auto-survives, no skeptic).
 node ${QUALIA_BIN}/dep-verify.mjs --severity=critical     # hallucinated/slopsquatted imports (the correctness half)
+node ${QUALIA_BIN}/verify-panel.js gate {N} dep-verify --exit $?
 ```
 `dep-verify` flags any import whose package is BOTH undeclared in `package.json` AND absent from `node_modules` — the exact signature of an AI-invented or typosquatted dependency (the #1 named AI-generated-code security failure mode). It is the correctness/security companion to the design-focused `slop-detect`.
-The phase is PASS only if ALL of these agree: the panel verdict (§3c `verify-panel.js` exit 0), the harness-eval status, the anti-slop scan, and the dependency scan. If any is FAIL/non-zero, mark the phase FAIL. The state machine also refuses PASS when a contract exists but `.planning/evidence/phase-{N}-contract-run.json` is missing/failing, or when the verification report contains `INSUFFICIENT EVIDENCE`.
+If you ran `/qualia-eval` for this phase, record **one gate per failing SUITE** (per-suite, not per-case) so each red suite is its own blocking CRITICAL:
+```bash
+# for each failing eval suite S (eval-runner.js exit != 0 for that suite):
+node ${QUALIA_BIN}/verify-panel.js gate {N} eval-{S} --exit 1 --title "qualia-eval suite {S} FAILED"
+```
+**The phase verdict is one deterministic call** — it replaces the prose "ALL of these agree" AND. `verdict {N}` globs the per-lens panel files (with skeptic survival already applied) AND the recorded `gate-{name}.json` artifacts, folds them with the SAME severity weighting `aggregate()` uses, and exits **0 = PASS / 1 = FAIL**. That exit code IS the phase verdict:
+```bash
+node ${QUALIA_BIN}/verify-panel.js verdict {N} --write   # → .planning/phase-{N}-verdict.{json,md}
+```
+A surviving blocking CRITICAL/HIGH from ANY input (a panel lens, the execution lens, or a recorded gate) → FAIL; non-blocking gate findings (e.g. a soft harness-eval sub-check recorded `--severity MEDIUM`) are recorded and visible but never flip the verdict — the aggregator only makes the existing AND deterministic, it does not start blocking anything that passes today. The state machine still independently refuses PASS when a contract exists but `.planning/evidence/phase-{N}-contract-run.json` is missing/failing, or when the verification report contains `INSUFFICIENT EVIDENCE`.
+**Browser QA is NOT folded into the verdict (deferred).** The §3b browser-QA result stays a prose `## Browser QA` note in the verification file; `verdict {N}` does not ingest it yet (it has no normalized gate artifact). A BLOCKED browser QA remains a note, not a phase failure, exactly as today. Ingesting browser-QA as a gate is a future increment (see ADR-0002).
 ```bash
 node ${QUALIA_BIN}/state.js transition --to verified --phase {N} --verification {pass|fail} --evidence .planning/evals/harness-eval-*.json

package/templates/instructions.md CHANGED Viewed

@@ -25,8 +25,8 @@ Stack: Next.js 16+, React 19, TypeScript, Supabase, Vercel. Voice: Retell + Elev
 `/qualia` — state router tells you the next command.
 <!--QUALIA-HOST claude-->
-<!-- Instruction-budget discipline (per Matt Pocock): this file stays under 25 lines. Steering rules go into discoverable skills, not into the global system prompt. CLI preferences go into hooks. Stack/architecture details are trivially discoverable in package.json/config. -->
+<!-- Instruction-budget discipline (per Matt Pocock): this file stays lean — instruction content kept minimal. Steering rules go into discoverable skills, not into the global system prompt. CLI preferences go into hooks. Stack/architecture details are trivially discoverable in package.json/config. -->
 <!--/QUALIA-HOST-->
 <!--QUALIA-HOST codex-->
-<!-- AGENTS.md mirrors CLAUDE.md for cross-vendor compatibility (Codex, Cursor, Continue, Aider, Devin). Both files stay under 25 lines per Matt Pocock's instruction-budget discipline (LLMs realistically hold 300–500 instructions; bloating this file hamstrings every spawn). -->
+<!-- AGENTS.md mirrors CLAUDE.md for cross-vendor compatibility (Codex, Cursor, Continue, Aider, Devin). Both files stay lean per Matt Pocock's instruction-budget discipline (LLMs realistically hold 300–500 instructions; bloating this file hamstrings every spawn). -->
 <!--/QUALIA-HOST-->