npm - qualia-framework - Versions diffs - 7.2.2 → 7.3.0 - Mend

qualia-framework 7.2.2 → 7.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/.claude-plugin/marketplace.json +20 -0
package/.claude-plugin/plugin.json +17 -0
package/AGENTS.md +1 -1
package/CHANGELOG.md +42 -0
package/CLAUDE.md +1 -1
package/README.md +17 -4
package/TROUBLESHOOTING.md +8 -7
package/agents/verifier.md +1 -1
package/bin/agent-status.js +115 -11
package/bin/auto-report.js +15 -7
package/bin/cli.js +173 -4
package/bin/erp-retry.js +92 -8
package/bin/install.js +102 -2
package/bin/qualia-doctor.js +115 -1
package/bin/state.js +102 -13
package/bin/verify-panel.js +409 -0
package/docs/onboarding.html +1 -1
package/hooks/branch-guard.js +19 -5
package/hooks/fawzi-approval-guard.js +16 -3
package/hooks/hooks.json +60 -0
package/hooks/migration-guard.js +143 -66
package/hooks/session-start.js +27 -0
package/package.json +3 -1
package/skills/qualia/SKILL.md +20 -13
package/skills/qualia-build/SKILL.md +20 -9
package/skills/qualia-verify/SKILL.md +43 -5
package/templates/instructions.md +2 -2
package/tests/bin.test.sh +183 -0
package/tests/hooks.test.sh +124 -0
package/tests/install-smoke.test.sh +14 -0
package/tests/instructions.test.sh +2 -2
package/tests/lib.test.sh +149 -0
package/tests/plugin-manifest.test.sh +168 -0
package/tests/refs.test.sh +64 -0
package/tests/run-all.sh +1 -0
package/tests/state.test.sh +174 -0
package/tests/verify-panel.test.sh +236 -0

package/.claude-plugin/marketplace.json ADDED Viewed

@@ -0,0 +1,20 @@
+{
+  "$schema": "https://json.schemastore.org/claude-code-marketplace.json",
+  "name": "qualia",
+  "owner": {
+    "name": "Qualia Solutions",
+    "email": "hello@qualia.solutions"
+  },
+  "description": "Qualia Solutions workflow framework for Claude Code and Codex — plan, build, verify, ship.",
+  "plugins": [
+    {
+      "name": "qualia-framework",
+      "source": "./",
+      "description": "Claude Code and Codex workflow framework by Qualia Solutions. Plan, build, verify, ship.",
+      "version": "7.3.0",
+      "author": { "name": "Qualia Solutions", "email": "hello@qualia.solutions" },
+      "category": "workflow",
+      "keywords": ["claude-code", "workflow", "qualia", "agents", "automation"]
+    }
+  ]
+}

package/.claude-plugin/plugin.json ADDED Viewed

@@ -0,0 +1,17 @@
+{
+  "$schema": "https://json.schemastore.org/claude-code-plugin-manifest.json",
+  "name": "qualia-framework",
+  "displayName": "Qualia Framework",
+  "description": "Claude Code and Codex workflow framework by Qualia Solutions. Plan, build, verify, ship.",
+  "version": "7.3.0",
+  "author": {
+    "name": "Qualia Solutions",
+    "email": "hello@qualia.solutions",
+    "url": "https://github.com/Qualiasolutions"
+  },
+  "homepage": "https://github.com/Qualiasolutions/qualia-framework#readme",
+  "repository": "https://github.com/Qualiasolutions/qualia-framework",
+  "license": "MIT",
+  "keywords": ["claude-code", "claude", "ai", "framework", "workflow", "qualia", "agents", "automation"],
+  "hooks": "./hooks/hooks.json"
+}

package/AGENTS.md CHANGED Viewed

@@ -26,4 +26,4 @@ Stack: Next.js 16+, React 19, TypeScript, Supabase, Vercel. Voice: Retell + Elev
 ## Lost?
 `/qualia` — state router tells you the next command.
-<!-- AGENTS.md mirrors CLAUDE.md for cross-vendor compatibility (Codex, Cursor, Continue, Aider, Devin). Both files stay under 25 lines per Matt Pocock's instruction-budget discipline (LLMs realistically hold 300–500 instructions; bloating this file hamstrings every spawn). -->
+<!-- AGENTS.md mirrors CLAUDE.md for cross-vendor compatibility (Codex, Cursor, Continue, Aider, Devin). Both files stay lean per Matt Pocock's instruction-budget discipline (LLMs realistically hold 300–500 instructions; bloating this file hamstrings every spawn). -->

package/CHANGELOG.md CHANGED Viewed

@@ -8,6 +8,48 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 > Note: git tags for historical versions were not retained; commit references are approximate
 > and dates reflect commit history rather than npm publish timestamps.
+## [7.3.0] - 2026-06-30 (x10 pass — deterministic verify, update safety, closed bypasses)
+A focused improvement pass from a full framework audit. Every change was
+adversarially verified and the full suite (31 shell suites + node tests) is green.
+Three ADRs (`.planning/decisions/ADR-0001..0003`) record the additive decisions.
+### Security
+- **migration-guard Bash bypass closed** — the guard fired only on `Edit|Write`,
+  so destructive SQL via heredoc / `psql -c/-f` / `supabase db execute` skipped the
+  destructive-SQL + RLS check. A Bash-content scan path is now wired on the Bash
+  matcher for **both Claude and Codex**; the hook self-gates on inline SQL.
+- **Fail-loud role resolution** — `branch-guard` and `fawzi-approval-guard` now emit
+  a stderr diagnostic when role resolution fails instead of silently allowing.
+### Added
+- **Bare `qualia-framework` command** — the installer self-links a PATH shim
+  (`~/.local/bin`), so `qualia-framework doctor` works without `npx` or `npm i -g`.
+- **`qualia-framework rollback`** — `update` now snapshots the framework-owned
+  subtrees before updating; `rollback` restores the previous version (ADR-0003).
+- **Claude Code plugin + marketplace** packaging (additive; npx stays primary) —
+  `/plugin marketplace add Qualiasolutions/qualia-framework` (ADR-0001).
+- **Deterministic verdict aggregator** — one `verify-panel.js verdict` folds every
+  machine-JSON gate into a single PASS/FAIL, replacing the orchestrator-LLM prose
+  combine; no-regression severity policy (ADR-0002).
+- **Execution-grounded verify lens** — verify now runs tsc/tests/smoke, not just greps.
+- **Scope-drift gate enforced** in `state.js` (was prose-only in qualia-build).
+- **Doctor version-reconciliation** + doc/code **coherence gate**.
+- **Token-budget telemetry** on agent-status DONE records (per-wave burn vs budget).
+### Fixed
+- **Routing/lifecycle** — `lifecycle` is now threaded at every `nextCommand()` call
+  site; the `/qualia` skill's drifted status→command table collapsed to "surface
+  `next_command` verbatim"; unknown status no longer self-recommends `/qualia`.
+- **Deterministic skeptic tally** — votes recorded mechanically, not hand-edited.
+- **Barrier liveness** — `agent-status barrier --timeout` fails a stalled wave
+  instead of holding forever.
+- **ERP queue** — `give_up` items no longer starve the drain cap; length cap + TTL
+  prune added; non-empty queue surfaced at session start.
+- **auto-report** writes its dedupe marker before the POST (closes a double-post window).
+- **Version single-source** — install writes one `PKG_VERSION` to every store; doctor
+  flags drift.
 ## [7.2.2] - 2026-06-27 (install UX — masked codes, clean references, update-on-/qualia)
 ### Fixed

package/CLAUDE.md CHANGED Viewed

@@ -26,4 +26,4 @@ Stack: Next.js 16+, React 19, TypeScript, Supabase, Vercel. Voice: Retell + Elev
 ## Lost?
 `/qualia` — state router tells you the next command.
-<!-- Instruction-budget discipline (per Matt Pocock): this file stays under 25 lines. Steering rules go into discoverable skills, not into the global system prompt. CLI preferences go into hooks. Stack/architecture details are trivially discoverable in package.json/config. -->
+<!-- Instruction-budget discipline (per Matt Pocock): this file stays lean — instruction content kept minimal. Steering rules go into discoverable skills, not into the global system prompt. CLI preferences go into hooks. Stack/architecture details are trivially discoverable in package.json/config. -->

package/README.md CHANGED Viewed

@@ -30,16 +30,29 @@ Enter your team code when prompted. Get your code from Fawzi.
 > **Why `@latest`?** npx caches packages at `~/.npm/_npx/` and has no time-based TTL — `npx qualia-framework install` (without `@latest`) will silently run whatever version you happened to fetch the first time, even if a newer one shipped. Always pin `@latest` when installing or upgrading. If a stale cache still bites you: `npx clear-npx-cache` then re-run.
-**Other commands:**
+> **Bare command:** the installer self-links a `qualia-framework` shim into `~/.local/bin` (no `npm i -g` needed — that prefix varies per machine and forks a second, drifting copy). After install you can drop the `npx` prefix: `qualia-framework doctor`. If the installer warns `~/.local/bin` isn't on your PATH, run the one-liner it prints, then re-open your shell. `update` re-points the shim automatically.
+**Other commands** (with the shim, drop the `npx qualia-framework@latest` prefix — just `qualia-framework <cmd>`):
 ```bash
 npx qualia-framework@latest version    # Check installed version + updates
-npx qualia-framework@latest update     # Update to latest (remembers your code)
-npx qualia-framework@latest uninstall  # Clean removal from installed Claude/Codex homes
+npx qualia-framework@latest update     # Update to latest (remembers your code; re-links the shim)
+npx qualia-framework@latest uninstall  # Clean removal from installed Claude/Codex homes (+ the shim)
 npx qualia-framework@latest team list  # Show team members
 npx qualia-framework@latest team add   # Add a team member
 npx qualia-framework@latest traces     # View recent hook telemetry
 ```
+### Install as a plugin (experimental)
+The npx installer above is the primary, supported path. A second, **experimental** Claude Code plugin distribution is also available — it ships the same hook gates (branch-guard, secret-guard, migration-guard, pre-deploy-gate, …) wired through `${CLAUDE_PLUGIN_ROOT}`:
+```
+/plugin marketplace add Qualiasolutions/qualia-framework
+/plugin install qualia-framework@qualia
+```
+> **Experimental:** the plugin path is layered *alongside* the npx install, not a replacement. Skill bash blocks still resolve their helpers via the install-time `${QUALIA_BIN}` path, so a pure plugin install (without the npx installer also having run) gets working hooks but skills that assume `~/.claude/bin`. Reconciling that path model is deferred — see [`.planning/decisions/ADR-0001-plugin-packaging.md`](.planning/decisions/ADR-0001-plugin-packaging.md). For a fully self-contained install today, use the npx flow above.
 ## Usage
 Open Claude Code or Codex in any project directory.
@@ -141,7 +154,7 @@ Project
 - **9 agents** (each runs in fresh context): planner, builder, verifier, qa-browser, researcher, research-synthesizer, roadmapper, plan-checker, visual-evaluator
 - **16 hooks** (pure Node.js, cross-platform): session-start, auto-update, git-guardrails, branch-guard, pre-push tracking stamp, migration-guard, pre-deploy-gate, stop-session-log, fawzi-approval-guard, vercel-account-guard, env-empty-guard, supabase-destructive-guard, secret-guard, task-write-guard, pre-compact, usage-capture
 - **12 installed rules** (`rules/`): constitution, grounding, security, access, infrastructure, deployment, speed, architecture, trust-boundary, codex-goal, one-opinion, and always-on command-output transparency.
-- **7 lazy-loaded design files** (`qualia-design/`): design-laws, design-brand, design-product, design-rubric, design-reference, frontend, graphics — `Read` on demand by design-aware skills/agents only.
+- **8 lazy-loaded design files** (`qualia-design/`): design-laws, design-brand, design-product, design-dials, design-rubric, design-reference, frontend, graphics — `Read` on demand by design-aware skills/agents only.
 - **25 template files**: project.md, journey.md, plan.md (story-file format), state.md, DESIGN.md, CONTEXT.md (domain glossary), work-packet.md (ERP-approved session context), decisions/ADR-template.md, tracking.json (with `milestone_name` + `milestones[]`), requirements.md (multi-milestone), roadmap.md (current milestone only), phase-context.md, 4 project-type templates (website, ai-agent, voice-agent, mobile-app), 5 research-project templates (STACK, FEATURES, ARCHITECTURE, PITFALLS, SUMMARY), knowledge templates, help.html
 - **Planning hygiene guard**: `planning-hygiene.js` scans `.planning/` for loose reports/assets and can organize them under `reports/`, `assets/`, `design/`, or `archive/loose/` only with explicit `--write`
 - **1 reference** — questioning.md methodology for deep project initialization

package/TROUBLESHOOTING.md CHANGED Viewed

@@ -92,10 +92,11 @@ node ~/.claude/bin/prune-deprecated.js ~/.codex
 **Cause:** You stacked install methods. The most common broken setup is `/plugin install` first, then `npx qualia-framework install`. Each path tries to own the same files.
 **Fix:**
 ```bash
-# 1. Pick ONE path. Recommended: the plugin (for v2.1+ Claude Code).
-# 2. Uninstall the other:
-npx qualia-framework@latest uninstall   # if you went plugin-first, this clears npm-installed copies
-# 3. Restart Claude Code so it reloads the plugin manifest cleanly.
+# 1. Pick ONE path. Recommended: the npx installer (fully self-contained;
+#    the /plugin path is experimental and layered alongside it — see README).
+# 2. Re-run the canonical installer to reclaim ownership of the files:
+npx qualia-framework@latest install
+# 3. Restart Claude Code so it reloads skills/hooks cleanly.
 ```
 ### `Run: npx qualia-framework@latest install`
@@ -122,12 +123,12 @@ npx qualia-framework@latest uninstall   # if you went plugin-first, this clears
 **What it means:** A direct caller into the retry queue is missing a required field. You generally don't see this — `/qualia-report` builds the payload for you.
 **Fix:** Use `/qualia-report` rather than calling `erp-retry.js` directly.
-### `qualia-framework report` queued instead of sent
+### `/qualia-report` queued instead of sent
 **What it means:** ERP was unreachable. The payload is now in `bin/erp-retry.js`'s persistent queue.
 **Fix:**
 ```bash
-qualia-framework erp-status     # see queue depth
-qualia-framework erp-flush      # retry sending now
+qualia-framework erp-flush show     # see queue depth
+qualia-framework erp-flush          # retry sending now
 ```
 Queue auto-drains on next session start when ERP is reachable.

package/agents/verifier.md CHANGED Viewed

@@ -23,7 +23,7 @@ You verify that a phase achieved its GOAL, not just completed its TASKS.
 LLMs are unreliable narrators — they prioritize confidence over accuracy and hallucinate when the evidence isn't in front of them. This file overrides that default.
-1. **Tool-use is mandatory.** Before stating that a file, function, route, import, or behavior exists, run `Read`, `Grep`, or `Bash` and put the result in your scratchpad. No claim from memory.
+1. **Tool-use is mandatory.** Before stating that a file, function, route, import, or behavior exists, run `Read`, `Grep`, or `Bash` and put the result in your scratchpad. No claim from memory. **Execution is now its own lens, not just grep:** `/qualia-verify` runs `verify-panel.js execution {N}` (tsc / test / build) and folds a red result into the panel as a CRITICAL — a grep-satisfying symbol that doesn't compile or whose tests fail is NOT a pass. Grep tells you a thing EXISTS; the execution lens tells you it RUNS.
 2. **Every finding carries `file:line — "quoted snippet"`.** Format exactly as in `rules/grounding.md`. Findings without this format are discarded by the orchestrator — they will not appear in the final report regardless of how confidently you wrote them.
 3. **No hedging language.** "It seems", "appears to", "probably", "might", "likely" — banned. Either you ran a tool and have evidence (cite), or you did not (write `INSUFFICIENT EVIDENCE: searched {files} with {commands}`).
 4. **Score with criterion citation.** Every 1–5 score in the design rubric needs evidence on the very next line. Severity (CRITICAL/HIGH/MEDIUM/LOW) requires quoting the matching row from `rules/grounding.md` Severity Rubric.

package/bin/agent-status.js CHANGED Viewed

@@ -51,6 +51,12 @@ function writeStatus(root, entry) {
     note: entry.note || null,
     phase: entry.phase != null ? Number(entry.phase) : null,
     wave: entry.wave != null ? Number(entry.wave) : null,
+    // Burn-vs-budget telemetry (codex-goal discipline): a task may report the
+    // tokens it spent vs the budget it was given. Both OPTIONAL — older records
+    // and writers that omit them stay null, and every reader/rollup tolerates
+    // the absence (backward compatible).
+    tokens_used: entry.tokens_used != null && entry.tokens_used !== "" ? Number(entry.tokens_used) : null,
+    token_budget: entry.token_budget != null && entry.token_budget !== "" ? Number(entry.token_budget) : null,
     updated_at: entry.now || new Date().toISOString(),
   };
   fs.writeFileSync(statusFile(root, entry.task), JSON.stringify(record, null, 2) + "\n");
@@ -107,19 +113,51 @@ function expectedTaskIds(contract, wave) {
 // derived waves needn't match the contract's declared wave numbers), else the
 // contract task ids optionally scoped to opts.wave. ok ⇔ every expected task is
 // DONE. Anything else (missing/running/blocked/partial) holds the barrier.
+//
+// Timeout (opts.timeout, seconds): a wall-clock deadline so a crashed builder
+// that never wrote terminal status can't stall the wave forever. When set, a
+// RUNNING task whose updated_at is older than `timeout` seconds, or a MISSING
+// task (a builder that returned without writing any status), is reclassified
+// STALE and the barrier FAILS (distinct from a transient HOLD) so the wave can
+// route to failure handling instead of spinning. updated_at is compared against
+// opts.now (defaults to wall-clock now) — both ISO strings, injectable for tests.
+// With no timeout, behavior is unchanged: RUNNING/MISSING just hold the barrier.
 function barrier(root, contract, opts = {}) {
   const expected = Array.isArray(opts.tasks) && opts.tasks.length
     ? opts.tasks
     : expectedTaskIds(contract, opts.wave);
+  const timeout = opts.timeout != null && opts.timeout !== "" ? Number(opts.timeout) : null;
+  const hasTimeout = timeout != null && Number.isFinite(timeout) && timeout > 0;
+  const nowMs = hasTimeout ? Date.parse(opts.now || new Date().toISOString()) : null;
   const byTask = new Map(listStatuses(root).map((s) => [s.task, s]));
   const tasks = expected.map((id) => {
     const s = byTask.get(id);
-    return { task: id, status: s ? s.status : "MISSING", commit: s ? s.commit : null, note: s ? s.note : null };
+    let status = s ? s.status : "MISSING";
+    let age = null;
+    if (hasTimeout) {
+      // A builder that never wrote status (MISSING) past the deadline is a
+      // crash, not a pending start: fail it. A RUNNING entry is stale once its
+      // last heartbeat (updated_at) predates the deadline window.
+      if (status === "MISSING") {
+        status = "STALE";
+      } else if (status === "RUNNING") {
+        const t = s && s.updated_at ? Date.parse(s.updated_at) : NaN;
+        age = Number.isFinite(t) ? Math.round((nowMs - t) / 1000) : null;
+        if (!Number.isFinite(t) || nowMs - t > timeout * 1000) status = "STALE";
+      }
+    }
+    return { task: id, status, commit: s ? s.commit : null, note: s ? s.note : null, age_sec: age };
   });
   const count = (st) => tasks.filter((t) => t.status === st).length;
   const done = count("DONE");
+  const stale = count("STALE");
+  // FAIL = a terminal-bad outcome that should stop the wave: blocked/partial
+  // (a builder reported failure) or stale (a builder vanished past timeout).
+  const failed = (stale + count("BLOCKED") + count("PARTIAL")) > 0;
   return {
     ok: expected.length > 0 && done === expected.length,
+    failed,
+    timeout: hasTimeout ? timeout : null,
     wave: opts.wave != null ? Number(opts.wave) : null,
     expected: expected.length,
     done,
@@ -127,10 +165,41 @@ function barrier(root, contract, opts = {}) {
     partial: count("PARTIAL"),
     running: count("RUNNING"),
     missing: count("MISSING"),
+    stale,
     tasks,
   };
 }
+// Burn-vs-budget rollup: sum tokens_used vs token_budget across persisted
+// statuses, optionally scoped to opts.wave. A wave can thus surface its total
+// burn against its total budget — the cheap per-wave equivalent of the
+// codex-goal objective+budget for a unit of work. tokens_used/token_budget are
+// OPTIONAL on a record; records that omit a field contribute 0 to that sum and
+// are still counted as tasks, so an absent field never breaks the rollup.
+function budget(root, opts = {}) {
+  const wave = opts.wave != null && opts.wave !== "" ? Number(opts.wave) : null;
+  const all = listStatuses(root).filter((s) => wave == null || Number(s.wave) === wave);
+  let tokensUsed = 0;
+  let tokenBudget = 0;
+  const tasks = all.map((s) => {
+    const used = Number.isFinite(Number(s.tokens_used)) ? Number(s.tokens_used) : null;
+    const budgeted = Number.isFinite(Number(s.token_budget)) ? Number(s.token_budget) : null;
+    if (used != null) tokensUsed += used;
+    if (budgeted != null) tokenBudget += budgeted;
+    return { task: s.task, status: s.status, wave: s.wave != null ? Number(s.wave) : null, tokens_used: used, token_budget: budgeted };
+  });
+  return {
+    wave,
+    tasks: tasks.length,
+    tokens_used: tokensUsed,
+    token_budget: tokenBudget,
+    // remaining/over_budget only meaningful when a budget was reported.
+    remaining: tokenBudget > 0 ? tokenBudget - tokensUsed : null,
+    over_budget: tokenBudget > 0 ? tokensUsed > tokenBudget : null,
+    details: tasks,
+  };
+}
 // ── CLI ───────────────────────────────────────────────────────────────
 function parseFlags(argv, start) {
   const flags = { _: [] };
@@ -149,6 +218,14 @@ function parseFlags(argv, start) {
     else if (a.startsWith("--note=")) flags.note = a.slice(7);
     else if (a === "--phase") flags.phase = argv[++i];
     else if (a.startsWith("--phase=")) flags.phase = a.slice(8);
+    else if (a === "--timeout") flags.timeout = argv[++i];
+    else if (a.startsWith("--timeout=")) flags.timeout = a.slice(10);
+    else if (a === "--now") flags.now = argv[++i];
+    else if (a.startsWith("--now=")) flags.now = a.slice(6);
+    else if (a === "--tokens") flags.tokens = argv[++i];
+    else if (a.startsWith("--tokens=")) flags.tokens = a.slice(9);
+    else if (a === "--budget") flags.budget = argv[++i];
+    else if (a.startsWith("--budget=")) flags.budget = a.slice(9);
     else flags._.push(a);
   }
   return flags;
@@ -157,15 +234,17 @@ function parseFlags(argv, start) {
 function usage() {
   console.error([
     "Usage:",
-    "  agent-status.js write <task> <status> [--commit H] [--note N] [--phase P] [--wave W] [--cwd DIR]",
+    "  agent-status.js write <task> <status> [--commit H] [--note N] [--phase P] [--wave W] [--tokens N] [--budget N] [--cwd DIR]",
     "  agent-status.js read <task> [--cwd DIR] [--json]",
     "  agent-status.js list [--cwd DIR] [--json]",
-    "  agent-status.js barrier <contract.json> [--wave W] [--cwd DIR] [--json]",
-    "  agent-status.js barrier --tasks T1,T2 [--cwd DIR] [--json]   (batch gate; no contract needed)",
+    "  agent-status.js barrier <contract.json> [--wave W] [--timeout SEC] [--cwd DIR] [--json]",
+    "  agent-status.js barrier --tasks T1,T2 [--timeout SEC] [--cwd DIR] [--json]   (batch gate; no contract needed)",
+    "  agent-status.js budget [--wave W] [--cwd DIR] [--json]   (per-wave token burn vs budget rollup)",
     "  agent-status.js clear [--cwd DIR]",
     "",
     "status ∈ RUNNING | DONE | BLOCKED | PARTIAL",
-    "barrier exits 0 ⇔ every expected task is DONE.",
+    "barrier exit 0 ⇔ every expected task is DONE; exit 1 ⇔ still HOLDING (running/missing);",
+    "exit 3 ⇔ FAIL — a task is BLOCKED/PARTIAL, or (with --timeout) RUNNING-stale/MISSING past deadline.",
   ].join("\n"));
 }
@@ -181,6 +260,7 @@ function main(argv) {
     try {
       const rec = writeStatus(root, {
         task, status, commit: flags.commit, note: flags.note, phase: flags.phase, wave: flags.wave,
+        tokens_used: flags.tokens, token_budget: flags.budget,
       });
       if (flags.json) console.log(JSON.stringify(rec));
       else console.log(`${rec.task} ${rec.status}${rec.commit ? ` @ ${rec.commit}` : ""}`);
@@ -224,17 +304,40 @@ function main(argv) {
       }
       contract = loaded.contract;
     }
-    const result = barrier(root, contract, { wave: flags.wave, tasks: taskList });
-    if (flags.json) { console.log(JSON.stringify(result, null, 2)); return result.ok ? 0 : 1; }
+    const result = barrier(root, contract, { wave: flags.wave, tasks: taskList, timeout: flags.timeout, now: flags.now });
+    // Exit codes: 0 PASS · 3 FAIL (terminal — blocked/partial/stale; route to
+    // failure handling, do not re-poll) · 1 HOLD (transient — running/missing
+    // within timeout; safe to poll again).
+    // The distinct FAIL code is opt-in via --timeout — without it, every
+    // non-DONE outcome stays exit 1 (backward compatible with pollers that only
+    // distinguish 0 from non-0).
+    const code = result.ok ? 0 : (result.timeout != null && result.failed) ? 3 : 1;
+    if (flags.json) { console.log(JSON.stringify(result, null, 2)); return code; }
     const scope = taskList ? `batch ${taskList.join(",")}` : (result.wave != null ? `wave ${result.wave}` : "phase");
     if (result.ok) {
       console.log(`BARRIER PASS (${scope}): ${result.done}/${result.expected} DONE`);
     } else {
-      console.error(`BARRIER HOLD (${scope}): ${result.done}/${result.expected} DONE` +
-        ` (running=${result.running} blocked=${result.blocked} partial=${result.partial} missing=${result.missing})`);
-      for (const t of result.tasks) if (t.status !== "DONE") console.error(`  - ${t.task}: ${t.status}${t.note ? ` — ${t.note}` : ""}`);
+      const verb = result.failed ? "FAIL" : "HOLD";
+      console.error(`BARRIER ${verb} (${scope}): ${result.done}/${result.expected} DONE` +
+        ` (running=${result.running} blocked=${result.blocked} partial=${result.partial}` +
+        ` missing=${result.missing} stale=${result.stale})` +
+        (result.timeout != null ? ` [timeout=${result.timeout}s]` : ""));
+      for (const t of result.tasks) if (t.status !== "DONE") {
+        const aged = t.age_sec != null ? ` (${t.age_sec}s)` : "";
+        console.error(`  - ${t.task}: ${t.status}${aged}${t.note ? ` — ${t.note}` : ""}`);
+      }
     }
-    return result.ok ? 0 : 1;
+    return code;
+  }
+  if (cmd === "budget") {
+    const result = budget(root, { wave: flags.wave });
+    if (flags.json) { console.log(JSON.stringify(result, null, 2)); return 0; }
+    const scope = result.wave != null ? `wave ${result.wave}` : "all";
+    const pct = result.token_budget > 0 ? ` (${Math.round((result.tokens_used / result.token_budget) * 100)}%)` : "";
+    const over = result.over_budget ? " OVER BUDGET" : "";
+    console.log(`BUDGET (${scope}): ${result.tokens_used}/${result.token_budget} tokens${pct} across ${result.tasks} task(s)${over}`);
+    return 0;
   }
   if (cmd === "clear") {
@@ -257,6 +360,7 @@ module.exports = {
   buildActive,
   expectedTaskIds,
   barrier,
+  budget,
 };
 if (require.main === module) {

package/bin/auto-report.js CHANGED Viewed

@@ -104,11 +104,6 @@ async function maybeAutoReport({ cwd = process.cwd(), home = os.homedir(), env =
     const body = JSON.stringify(payload);
     const url = erpUrl(cfg);
-    const result = await postOnce(
-      { url, payload: body, idempotency_key: idempotencyKey },
-      apiKey,
-    );
     const writeMarker = (extra) => {
       try {
         fs.writeFileSync(
@@ -119,14 +114,27 @@ async function maybeAutoReport({ cwd = process.cwd(), home = os.homedir(), env =
       } catch {}
     };
+    // Close the double-post window: write the dedupe marker BEFORE the POST.
+    // A crash between this and the network call can no longer cause a re-post —
+    // the next run sees marker.last === unit and short-circuits at Guard 3. The
+    // failure path below still guarantees eventual delivery by enqueuing the
+    // same client_report_id, so the early marker never drops an unsent report.
+    writeMarker({ posting: true });
+    const result = await postOnce(
+      { url, payload: body, idempotency_key: idempotencyKey },
+      apiKey,
+    );
     if (result.code === "200") {
       writeMarker({ posted: true });
       return { posted: clientReportId, unit };
     }
     // Any non-200 → enqueue for the retry queue (session-start drains it).
-    // Mark the unit so we don't re-allocate a new id on the next turn; the
-    // queued item carries this client_report_id and the ERP dedupes on it.
+    // The marker is already written, so we never re-allocate a new id on the
+    // next turn; the queued item carries this client_report_id and the ERP
+    // dedupes on it.
     try {
       enqueue({
         client_report_id: clientReportId,