qualia-framework 7.2.2 → 7.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +20 -0
- package/.claude-plugin/plugin.json +17 -0
- package/AGENTS.md +1 -1
- package/CHANGELOG.md +42 -0
- package/CLAUDE.md +1 -1
- package/README.md +17 -4
- package/TROUBLESHOOTING.md +8 -7
- package/agents/verifier.md +1 -1
- package/bin/agent-status.js +115 -11
- package/bin/auto-report.js +15 -7
- package/bin/cli.js +173 -4
- package/bin/erp-retry.js +92 -8
- package/bin/install.js +102 -2
- package/bin/qualia-doctor.js +115 -1
- package/bin/state.js +102 -13
- package/bin/verify-panel.js +409 -0
- package/docs/onboarding.html +1 -1
- package/hooks/branch-guard.js +19 -5
- package/hooks/fawzi-approval-guard.js +16 -3
- package/hooks/hooks.json +60 -0
- package/hooks/migration-guard.js +143 -66
- package/hooks/session-start.js +27 -0
- package/package.json +3 -1
- package/skills/qualia/SKILL.md +20 -13
- package/skills/qualia-build/SKILL.md +20 -9
- package/skills/qualia-verify/SKILL.md +43 -5
- package/templates/instructions.md +2 -2
- package/tests/bin.test.sh +183 -0
- package/tests/hooks.test.sh +124 -0
- package/tests/install-smoke.test.sh +14 -0
- package/tests/instructions.test.sh +2 -2
- package/tests/lib.test.sh +149 -0
- package/tests/plugin-manifest.test.sh +168 -0
- package/tests/refs.test.sh +64 -0
- package/tests/run-all.sh +1 -0
- package/tests/state.test.sh +174 -0
- package/tests/verify-panel.test.sh +236 -0
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json.schemastore.org/claude-code-marketplace.json",
|
|
3
|
+
"name": "qualia",
|
|
4
|
+
"owner": {
|
|
5
|
+
"name": "Qualia Solutions",
|
|
6
|
+
"email": "hello@qualia.solutions"
|
|
7
|
+
},
|
|
8
|
+
"description": "Qualia Solutions workflow framework for Claude Code and Codex — plan, build, verify, ship.",
|
|
9
|
+
"plugins": [
|
|
10
|
+
{
|
|
11
|
+
"name": "qualia-framework",
|
|
12
|
+
"source": "./",
|
|
13
|
+
"description": "Claude Code and Codex workflow framework by Qualia Solutions. Plan, build, verify, ship.",
|
|
14
|
+
"version": "7.3.0",
|
|
15
|
+
"author": { "name": "Qualia Solutions", "email": "hello@qualia.solutions" },
|
|
16
|
+
"category": "workflow",
|
|
17
|
+
"keywords": ["claude-code", "workflow", "qualia", "agents", "automation"]
|
|
18
|
+
}
|
|
19
|
+
]
|
|
20
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json.schemastore.org/claude-code-plugin-manifest.json",
|
|
3
|
+
"name": "qualia-framework",
|
|
4
|
+
"displayName": "Qualia Framework",
|
|
5
|
+
"description": "Claude Code and Codex workflow framework by Qualia Solutions. Plan, build, verify, ship.",
|
|
6
|
+
"version": "7.3.0",
|
|
7
|
+
"author": {
|
|
8
|
+
"name": "Qualia Solutions",
|
|
9
|
+
"email": "hello@qualia.solutions",
|
|
10
|
+
"url": "https://github.com/Qualiasolutions"
|
|
11
|
+
},
|
|
12
|
+
"homepage": "https://github.com/Qualiasolutions/qualia-framework#readme",
|
|
13
|
+
"repository": "https://github.com/Qualiasolutions/qualia-framework",
|
|
14
|
+
"license": "MIT",
|
|
15
|
+
"keywords": ["claude-code", "claude", "ai", "framework", "workflow", "qualia", "agents", "automation"],
|
|
16
|
+
"hooks": "./hooks/hooks.json"
|
|
17
|
+
}
|
package/AGENTS.md
CHANGED
|
@@ -26,4 +26,4 @@ Stack: Next.js 16+, React 19, TypeScript, Supabase, Vercel. Voice: Retell + Elev
|
|
|
26
26
|
## Lost?
|
|
27
27
|
`/qualia` — state router tells you the next command.
|
|
28
28
|
|
|
29
|
-
<!-- AGENTS.md mirrors CLAUDE.md for cross-vendor compatibility (Codex, Cursor, Continue, Aider, Devin). Both files stay
|
|
29
|
+
<!-- AGENTS.md mirrors CLAUDE.md for cross-vendor compatibility (Codex, Cursor, Continue, Aider, Devin). Both files stay lean per Matt Pocock's instruction-budget discipline (LLMs realistically hold 300–500 instructions; bloating this file hamstrings every spawn). -->
|
package/CHANGELOG.md
CHANGED
|
@@ -8,6 +8,48 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
8
8
|
> Note: git tags for historical versions were not retained; commit references are approximate
|
|
9
9
|
> and dates reflect commit history rather than npm publish timestamps.
|
|
10
10
|
|
|
11
|
+
## [7.3.0] - 2026-06-30 (x10 pass — deterministic verify, update safety, closed bypasses)
|
|
12
|
+
|
|
13
|
+
A focused improvement pass from a full framework audit. Every change was
|
|
14
|
+
adversarially verified and the full suite (31 shell suites + node tests) is green.
|
|
15
|
+
Three ADRs (`.planning/decisions/ADR-0001..0003`) record the additive decisions.
|
|
16
|
+
|
|
17
|
+
### Security
|
|
18
|
+
- **migration-guard Bash bypass closed** — the guard fired only on `Edit|Write`,
|
|
19
|
+
so destructive SQL via heredoc / `psql -c/-f` / `supabase db execute` skipped the
|
|
20
|
+
destructive-SQL + RLS check. A Bash-content scan path is now wired on the Bash
|
|
21
|
+
matcher for **both Claude and Codex**; the hook self-gates on inline SQL.
|
|
22
|
+
- **Fail-loud role resolution** — `branch-guard` and `fawzi-approval-guard` now emit
|
|
23
|
+
a stderr diagnostic when role resolution fails instead of silently allowing.
|
|
24
|
+
|
|
25
|
+
### Added
|
|
26
|
+
- **Bare `qualia-framework` command** — the installer self-links a PATH shim
|
|
27
|
+
(`~/.local/bin`), so `qualia-framework doctor` works without `npx` or `npm i -g`.
|
|
28
|
+
- **`qualia-framework rollback`** — `update` now snapshots the framework-owned
|
|
29
|
+
subtrees before updating; `rollback` restores the previous version (ADR-0003).
|
|
30
|
+
- **Claude Code plugin + marketplace** packaging (additive; npx stays primary) —
|
|
31
|
+
`/plugin marketplace add Qualiasolutions/qualia-framework` (ADR-0001).
|
|
32
|
+
- **Deterministic verdict aggregator** — one `verify-panel.js verdict` folds every
|
|
33
|
+
machine-JSON gate into a single PASS/FAIL, replacing the orchestrator-LLM prose
|
|
34
|
+
combine; no-regression severity policy (ADR-0002).
|
|
35
|
+
- **Execution-grounded verify lens** — verify now runs tsc/tests/smoke, not just greps.
|
|
36
|
+
- **Scope-drift gate enforced** in `state.js` (was prose-only in qualia-build).
|
|
37
|
+
- **Doctor version-reconciliation** + doc/code **coherence gate**.
|
|
38
|
+
- **Token-budget telemetry** on agent-status DONE records (per-wave burn vs budget).
|
|
39
|
+
|
|
40
|
+
### Fixed
|
|
41
|
+
- **Routing/lifecycle** — `lifecycle` is now threaded at every `nextCommand()` call
|
|
42
|
+
site; the `/qualia` skill's drifted status→command table collapsed to "surface
|
|
43
|
+
`next_command` verbatim"; unknown status no longer self-recommends `/qualia`.
|
|
44
|
+
- **Deterministic skeptic tally** — votes recorded mechanically, not hand-edited.
|
|
45
|
+
- **Barrier liveness** — `agent-status barrier --timeout` fails a stalled wave
|
|
46
|
+
instead of holding forever.
|
|
47
|
+
- **ERP queue** — `give_up` items no longer starve the drain cap; length cap + TTL
|
|
48
|
+
prune added; non-empty queue surfaced at session start.
|
|
49
|
+
- **auto-report** writes its dedupe marker before the POST (closes a double-post window).
|
|
50
|
+
- **Version single-source** — install writes one `PKG_VERSION` to every store; doctor
|
|
51
|
+
flags drift.
|
|
52
|
+
|
|
11
53
|
## [7.2.2] - 2026-06-27 (install UX — masked codes, clean references, update-on-/qualia)
|
|
12
54
|
|
|
13
55
|
### Fixed
|
package/CLAUDE.md
CHANGED
|
@@ -26,4 +26,4 @@ Stack: Next.js 16+, React 19, TypeScript, Supabase, Vercel. Voice: Retell + Elev
|
|
|
26
26
|
## Lost?
|
|
27
27
|
`/qualia` — state router tells you the next command.
|
|
28
28
|
|
|
29
|
-
<!-- Instruction-budget discipline (per Matt Pocock): this file stays
|
|
29
|
+
<!-- Instruction-budget discipline (per Matt Pocock): this file stays lean — instruction content kept minimal. Steering rules go into discoverable skills, not into the global system prompt. CLI preferences go into hooks. Stack/architecture details are trivially discoverable in package.json/config. -->
|
package/README.md
CHANGED
|
@@ -30,16 +30,29 @@ Enter your team code when prompted. Get your code from Fawzi.
|
|
|
30
30
|
|
|
31
31
|
> **Why `@latest`?** npx caches packages at `~/.npm/_npx/` and has no time-based TTL — `npx qualia-framework install` (without `@latest`) will silently run whatever version you happened to fetch the first time, even if a newer one shipped. Always pin `@latest` when installing or upgrading. If a stale cache still bites you: `npx clear-npx-cache` then re-run.
|
|
32
32
|
|
|
33
|
-
**
|
|
33
|
+
> **Bare command:** the installer self-links a `qualia-framework` shim into `~/.local/bin` (no `npm i -g` needed — that prefix varies per machine and forks a second, drifting copy). After install you can drop the `npx` prefix: `qualia-framework doctor`. If the installer warns `~/.local/bin` isn't on your PATH, run the one-liner it prints, then re-open your shell. `update` re-points the shim automatically.
|
|
34
|
+
|
|
35
|
+
**Other commands** (with the shim, drop the `npx qualia-framework@latest` prefix — just `qualia-framework <cmd>`):
|
|
34
36
|
```bash
|
|
35
37
|
npx qualia-framework@latest version # Check installed version + updates
|
|
36
|
-
npx qualia-framework@latest update # Update to latest (remembers your code)
|
|
37
|
-
npx qualia-framework@latest uninstall # Clean removal from installed Claude/Codex homes
|
|
38
|
+
npx qualia-framework@latest update # Update to latest (remembers your code; re-links the shim)
|
|
39
|
+
npx qualia-framework@latest uninstall # Clean removal from installed Claude/Codex homes (+ the shim)
|
|
38
40
|
npx qualia-framework@latest team list # Show team members
|
|
39
41
|
npx qualia-framework@latest team add # Add a team member
|
|
40
42
|
npx qualia-framework@latest traces # View recent hook telemetry
|
|
41
43
|
```
|
|
42
44
|
|
|
45
|
+
### Install as a plugin (experimental)
|
|
46
|
+
|
|
47
|
+
The npx installer above is the primary, supported path. A second, **experimental** Claude Code plugin distribution is also available — it ships the same hook gates (branch-guard, secret-guard, migration-guard, pre-deploy-gate, …) wired through `${CLAUDE_PLUGIN_ROOT}`:
|
|
48
|
+
|
|
49
|
+
```
|
|
50
|
+
/plugin marketplace add Qualiasolutions/qualia-framework
|
|
51
|
+
/plugin install qualia-framework@qualia
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
> **Experimental:** the plugin path is layered *alongside* the npx install, not a replacement. Skill bash blocks still resolve their helpers via the install-time `${QUALIA_BIN}` path, so a pure plugin install (without the npx installer also having run) gets working hooks but skills that assume `~/.claude/bin`. Reconciling that path model is deferred — see [`.planning/decisions/ADR-0001-plugin-packaging.md`](.planning/decisions/ADR-0001-plugin-packaging.md). For a fully self-contained install today, use the npx flow above.
|
|
55
|
+
|
|
43
56
|
## Usage
|
|
44
57
|
|
|
45
58
|
Open Claude Code or Codex in any project directory.
|
|
@@ -141,7 +154,7 @@ Project
|
|
|
141
154
|
- **9 agents** (each runs in fresh context): planner, builder, verifier, qa-browser, researcher, research-synthesizer, roadmapper, plan-checker, visual-evaluator
|
|
142
155
|
- **16 hooks** (pure Node.js, cross-platform): session-start, auto-update, git-guardrails, branch-guard, pre-push tracking stamp, migration-guard, pre-deploy-gate, stop-session-log, fawzi-approval-guard, vercel-account-guard, env-empty-guard, supabase-destructive-guard, secret-guard, task-write-guard, pre-compact, usage-capture
|
|
143
156
|
- **12 installed rules** (`rules/`): constitution, grounding, security, access, infrastructure, deployment, speed, architecture, trust-boundary, codex-goal, one-opinion, and always-on command-output transparency.
|
|
144
|
-
- **
|
|
157
|
+
- **8 lazy-loaded design files** (`qualia-design/`): design-laws, design-brand, design-product, design-dials, design-rubric, design-reference, frontend, graphics — `Read` on demand by design-aware skills/agents only.
|
|
145
158
|
- **25 template files**: project.md, journey.md, plan.md (story-file format), state.md, DESIGN.md, CONTEXT.md (domain glossary), work-packet.md (ERP-approved session context), decisions/ADR-template.md, tracking.json (with `milestone_name` + `milestones[]`), requirements.md (multi-milestone), roadmap.md (current milestone only), phase-context.md, 4 project-type templates (website, ai-agent, voice-agent, mobile-app), 5 research-project templates (STACK, FEATURES, ARCHITECTURE, PITFALLS, SUMMARY), knowledge templates, help.html
|
|
146
159
|
- **Planning hygiene guard**: `planning-hygiene.js` scans `.planning/` for loose reports/assets and can organize them under `reports/`, `assets/`, `design/`, or `archive/loose/` only with explicit `--write`
|
|
147
160
|
- **1 reference** — questioning.md methodology for deep project initialization
|
package/TROUBLESHOOTING.md
CHANGED
|
@@ -92,10 +92,11 @@ node ~/.claude/bin/prune-deprecated.js ~/.codex
|
|
|
92
92
|
**Cause:** You stacked install methods. The most common broken setup is `/plugin install` first, then `npx qualia-framework install`. Each path tries to own the same files.
|
|
93
93
|
**Fix:**
|
|
94
94
|
```bash
|
|
95
|
-
# 1. Pick ONE path. Recommended: the
|
|
96
|
-
#
|
|
97
|
-
|
|
98
|
-
|
|
95
|
+
# 1. Pick ONE path. Recommended: the npx installer (fully self-contained;
|
|
96
|
+
# the /plugin path is experimental and layered alongside it — see README).
|
|
97
|
+
# 2. Re-run the canonical installer to reclaim ownership of the files:
|
|
98
|
+
npx qualia-framework@latest install
|
|
99
|
+
# 3. Restart Claude Code so it reloads skills/hooks cleanly.
|
|
99
100
|
```
|
|
100
101
|
|
|
101
102
|
### `Run: npx qualia-framework@latest install`
|
|
@@ -122,12 +123,12 @@ npx qualia-framework@latest uninstall # if you went plugin-first, this clears
|
|
|
122
123
|
**What it means:** A direct caller into the retry queue is missing a required field. You generally don't see this — `/qualia-report` builds the payload for you.
|
|
123
124
|
**Fix:** Use `/qualia-report` rather than calling `erp-retry.js` directly.
|
|
124
125
|
|
|
125
|
-
###
|
|
126
|
+
### `/qualia-report` queued instead of sent
|
|
126
127
|
**What it means:** ERP was unreachable. The payload is now in `bin/erp-retry.js`'s persistent queue.
|
|
127
128
|
**Fix:**
|
|
128
129
|
```bash
|
|
129
|
-
qualia-framework erp-
|
|
130
|
-
qualia-framework erp-flush
|
|
130
|
+
qualia-framework erp-flush show # see queue depth
|
|
131
|
+
qualia-framework erp-flush # retry sending now
|
|
131
132
|
```
|
|
132
133
|
Queue auto-drains on next session start when ERP is reachable.
|
|
133
134
|
|
package/agents/verifier.md
CHANGED
|
@@ -23,7 +23,7 @@ You verify that a phase achieved its GOAL, not just completed its TASKS.
|
|
|
23
23
|
|
|
24
24
|
LLMs are unreliable narrators — they prioritize confidence over accuracy and hallucinate when the evidence isn't in front of them. This file overrides that default.
|
|
25
25
|
|
|
26
|
-
1. **Tool-use is mandatory.** Before stating that a file, function, route, import, or behavior exists, run `Read`, `Grep`, or `Bash` and put the result in your scratchpad. No claim from memory.
|
|
26
|
+
1. **Tool-use is mandatory.** Before stating that a file, function, route, import, or behavior exists, run `Read`, `Grep`, or `Bash` and put the result in your scratchpad. No claim from memory. **Execution is now its own lens, not just grep:** `/qualia-verify` runs `verify-panel.js execution {N}` (tsc / test / build) and folds a red result into the panel as a CRITICAL — a grep-satisfying symbol that doesn't compile or whose tests fail is NOT a pass. Grep tells you a thing EXISTS; the execution lens tells you it RUNS.
|
|
27
27
|
2. **Every finding carries `file:line — "quoted snippet"`.** Format exactly as in `rules/grounding.md`. Findings without this format are discarded by the orchestrator — they will not appear in the final report regardless of how confidently you wrote them.
|
|
28
28
|
3. **No hedging language.** "It seems", "appears to", "probably", "might", "likely" — banned. Either you ran a tool and have evidence (cite), or you did not (write `INSUFFICIENT EVIDENCE: searched {files} with {commands}`).
|
|
29
29
|
4. **Score with criterion citation.** Every 1–5 score in the design rubric needs evidence on the very next line. Severity (CRITICAL/HIGH/MEDIUM/LOW) requires quoting the matching row from `rules/grounding.md` Severity Rubric.
|
package/bin/agent-status.js
CHANGED
|
@@ -51,6 +51,12 @@ function writeStatus(root, entry) {
|
|
|
51
51
|
note: entry.note || null,
|
|
52
52
|
phase: entry.phase != null ? Number(entry.phase) : null,
|
|
53
53
|
wave: entry.wave != null ? Number(entry.wave) : null,
|
|
54
|
+
// Burn-vs-budget telemetry (codex-goal discipline): a task may report the
|
|
55
|
+
// tokens it spent vs the budget it was given. Both OPTIONAL — older records
|
|
56
|
+
// and writers that omit them stay null, and every reader/rollup tolerates
|
|
57
|
+
// the absence (backward compatible).
|
|
58
|
+
tokens_used: entry.tokens_used != null && entry.tokens_used !== "" ? Number(entry.tokens_used) : null,
|
|
59
|
+
token_budget: entry.token_budget != null && entry.token_budget !== "" ? Number(entry.token_budget) : null,
|
|
54
60
|
updated_at: entry.now || new Date().toISOString(),
|
|
55
61
|
};
|
|
56
62
|
fs.writeFileSync(statusFile(root, entry.task), JSON.stringify(record, null, 2) + "\n");
|
|
@@ -107,19 +113,51 @@ function expectedTaskIds(contract, wave) {
|
|
|
107
113
|
// derived waves needn't match the contract's declared wave numbers), else the
|
|
108
114
|
// contract task ids optionally scoped to opts.wave. ok ⇔ every expected task is
|
|
109
115
|
// DONE. Anything else (missing/running/blocked/partial) holds the barrier.
|
|
116
|
+
//
|
|
117
|
+
// Timeout (opts.timeout, seconds): a wall-clock deadline so a crashed builder
|
|
118
|
+
// that never wrote terminal status can't stall the wave forever. When set, a
|
|
119
|
+
// RUNNING task whose updated_at is older than `timeout` seconds, or a MISSING
|
|
120
|
+
// task (a builder that returned without writing any status), is reclassified
|
|
121
|
+
// STALE and the barrier FAILS (distinct from a transient HOLD) so the wave can
|
|
122
|
+
// route to failure handling instead of spinning. updated_at is compared against
|
|
123
|
+
// opts.now (defaults to wall-clock now) — both ISO strings, injectable for tests.
|
|
124
|
+
// With no timeout, behavior is unchanged: RUNNING/MISSING just hold the barrier.
|
|
110
125
|
function barrier(root, contract, opts = {}) {
|
|
111
126
|
const expected = Array.isArray(opts.tasks) && opts.tasks.length
|
|
112
127
|
? opts.tasks
|
|
113
128
|
: expectedTaskIds(contract, opts.wave);
|
|
129
|
+
const timeout = opts.timeout != null && opts.timeout !== "" ? Number(opts.timeout) : null;
|
|
130
|
+
const hasTimeout = timeout != null && Number.isFinite(timeout) && timeout > 0;
|
|
131
|
+
const nowMs = hasTimeout ? Date.parse(opts.now || new Date().toISOString()) : null;
|
|
114
132
|
const byTask = new Map(listStatuses(root).map((s) => [s.task, s]));
|
|
115
133
|
const tasks = expected.map((id) => {
|
|
116
134
|
const s = byTask.get(id);
|
|
117
|
-
|
|
135
|
+
let status = s ? s.status : "MISSING";
|
|
136
|
+
let age = null;
|
|
137
|
+
if (hasTimeout) {
|
|
138
|
+
// A builder that never wrote status (MISSING) past the deadline is a
|
|
139
|
+
// crash, not a pending start: fail it. A RUNNING entry is stale once its
|
|
140
|
+
// last heartbeat (updated_at) predates the deadline window.
|
|
141
|
+
if (status === "MISSING") {
|
|
142
|
+
status = "STALE";
|
|
143
|
+
} else if (status === "RUNNING") {
|
|
144
|
+
const t = s && s.updated_at ? Date.parse(s.updated_at) : NaN;
|
|
145
|
+
age = Number.isFinite(t) ? Math.round((nowMs - t) / 1000) : null;
|
|
146
|
+
if (!Number.isFinite(t) || nowMs - t > timeout * 1000) status = "STALE";
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
return { task: id, status, commit: s ? s.commit : null, note: s ? s.note : null, age_sec: age };
|
|
118
150
|
});
|
|
119
151
|
const count = (st) => tasks.filter((t) => t.status === st).length;
|
|
120
152
|
const done = count("DONE");
|
|
153
|
+
const stale = count("STALE");
|
|
154
|
+
// FAIL = a terminal-bad outcome that should stop the wave: blocked/partial
|
|
155
|
+
// (a builder reported failure) or stale (a builder vanished past timeout).
|
|
156
|
+
const failed = (stale + count("BLOCKED") + count("PARTIAL")) > 0;
|
|
121
157
|
return {
|
|
122
158
|
ok: expected.length > 0 && done === expected.length,
|
|
159
|
+
failed,
|
|
160
|
+
timeout: hasTimeout ? timeout : null,
|
|
123
161
|
wave: opts.wave != null ? Number(opts.wave) : null,
|
|
124
162
|
expected: expected.length,
|
|
125
163
|
done,
|
|
@@ -127,10 +165,41 @@ function barrier(root, contract, opts = {}) {
|
|
|
127
165
|
partial: count("PARTIAL"),
|
|
128
166
|
running: count("RUNNING"),
|
|
129
167
|
missing: count("MISSING"),
|
|
168
|
+
stale,
|
|
130
169
|
tasks,
|
|
131
170
|
};
|
|
132
171
|
}
|
|
133
172
|
|
|
173
|
+
// Burn-vs-budget rollup: sum tokens_used vs token_budget across persisted
|
|
174
|
+
// statuses, optionally scoped to opts.wave. A wave can thus surface its total
|
|
175
|
+
// burn against its total budget — the cheap per-wave equivalent of the
|
|
176
|
+
// codex-goal objective+budget for a unit of work. tokens_used/token_budget are
|
|
177
|
+
// OPTIONAL on a record; records that omit a field contribute 0 to that sum and
|
|
178
|
+
// are still counted as tasks, so an absent field never breaks the rollup.
|
|
179
|
+
function budget(root, opts = {}) {
|
|
180
|
+
const wave = opts.wave != null && opts.wave !== "" ? Number(opts.wave) : null;
|
|
181
|
+
const all = listStatuses(root).filter((s) => wave == null || Number(s.wave) === wave);
|
|
182
|
+
let tokensUsed = 0;
|
|
183
|
+
let tokenBudget = 0;
|
|
184
|
+
const tasks = all.map((s) => {
|
|
185
|
+
const used = Number.isFinite(Number(s.tokens_used)) ? Number(s.tokens_used) : null;
|
|
186
|
+
const budgeted = Number.isFinite(Number(s.token_budget)) ? Number(s.token_budget) : null;
|
|
187
|
+
if (used != null) tokensUsed += used;
|
|
188
|
+
if (budgeted != null) tokenBudget += budgeted;
|
|
189
|
+
return { task: s.task, status: s.status, wave: s.wave != null ? Number(s.wave) : null, tokens_used: used, token_budget: budgeted };
|
|
190
|
+
});
|
|
191
|
+
return {
|
|
192
|
+
wave,
|
|
193
|
+
tasks: tasks.length,
|
|
194
|
+
tokens_used: tokensUsed,
|
|
195
|
+
token_budget: tokenBudget,
|
|
196
|
+
// remaining/over_budget only meaningful when a budget was reported.
|
|
197
|
+
remaining: tokenBudget > 0 ? tokenBudget - tokensUsed : null,
|
|
198
|
+
over_budget: tokenBudget > 0 ? tokensUsed > tokenBudget : null,
|
|
199
|
+
details: tasks,
|
|
200
|
+
};
|
|
201
|
+
}
|
|
202
|
+
|
|
134
203
|
// ── CLI ───────────────────────────────────────────────────────────────
|
|
135
204
|
function parseFlags(argv, start) {
|
|
136
205
|
const flags = { _: [] };
|
|
@@ -149,6 +218,14 @@ function parseFlags(argv, start) {
|
|
|
149
218
|
else if (a.startsWith("--note=")) flags.note = a.slice(7);
|
|
150
219
|
else if (a === "--phase") flags.phase = argv[++i];
|
|
151
220
|
else if (a.startsWith("--phase=")) flags.phase = a.slice(8);
|
|
221
|
+
else if (a === "--timeout") flags.timeout = argv[++i];
|
|
222
|
+
else if (a.startsWith("--timeout=")) flags.timeout = a.slice(10);
|
|
223
|
+
else if (a === "--now") flags.now = argv[++i];
|
|
224
|
+
else if (a.startsWith("--now=")) flags.now = a.slice(6);
|
|
225
|
+
else if (a === "--tokens") flags.tokens = argv[++i];
|
|
226
|
+
else if (a.startsWith("--tokens=")) flags.tokens = a.slice(9);
|
|
227
|
+
else if (a === "--budget") flags.budget = argv[++i];
|
|
228
|
+
else if (a.startsWith("--budget=")) flags.budget = a.slice(9);
|
|
152
229
|
else flags._.push(a);
|
|
153
230
|
}
|
|
154
231
|
return flags;
|
|
@@ -157,15 +234,17 @@ function parseFlags(argv, start) {
|
|
|
157
234
|
function usage() {
|
|
158
235
|
console.error([
|
|
159
236
|
"Usage:",
|
|
160
|
-
" agent-status.js write <task> <status> [--commit H] [--note N] [--phase P] [--wave W] [--cwd DIR]",
|
|
237
|
+
" agent-status.js write <task> <status> [--commit H] [--note N] [--phase P] [--wave W] [--tokens N] [--budget N] [--cwd DIR]",
|
|
161
238
|
" agent-status.js read <task> [--cwd DIR] [--json]",
|
|
162
239
|
" agent-status.js list [--cwd DIR] [--json]",
|
|
163
|
-
" agent-status.js barrier <contract.json> [--wave W] [--cwd DIR] [--json]",
|
|
164
|
-
" agent-status.js barrier --tasks T1,T2 [--cwd DIR] [--json] (batch gate; no contract needed)",
|
|
240
|
+
" agent-status.js barrier <contract.json> [--wave W] [--timeout SEC] [--cwd DIR] [--json]",
|
|
241
|
+
" agent-status.js barrier --tasks T1,T2 [--timeout SEC] [--cwd DIR] [--json] (batch gate; no contract needed)",
|
|
242
|
+
" agent-status.js budget [--wave W] [--cwd DIR] [--json] (per-wave token burn vs budget rollup)",
|
|
165
243
|
" agent-status.js clear [--cwd DIR]",
|
|
166
244
|
"",
|
|
167
245
|
"status ∈ RUNNING | DONE | BLOCKED | PARTIAL",
|
|
168
|
-
"barrier
|
|
246
|
+
"barrier exit 0 ⇔ every expected task is DONE; exit 1 ⇔ still HOLDING (running/missing);",
|
|
247
|
+
"exit 3 ⇔ FAIL — a task is BLOCKED/PARTIAL, or (with --timeout) RUNNING-stale/MISSING past deadline.",
|
|
169
248
|
].join("\n"));
|
|
170
249
|
}
|
|
171
250
|
|
|
@@ -181,6 +260,7 @@ function main(argv) {
|
|
|
181
260
|
try {
|
|
182
261
|
const rec = writeStatus(root, {
|
|
183
262
|
task, status, commit: flags.commit, note: flags.note, phase: flags.phase, wave: flags.wave,
|
|
263
|
+
tokens_used: flags.tokens, token_budget: flags.budget,
|
|
184
264
|
});
|
|
185
265
|
if (flags.json) console.log(JSON.stringify(rec));
|
|
186
266
|
else console.log(`${rec.task} ${rec.status}${rec.commit ? ` @ ${rec.commit}` : ""}`);
|
|
@@ -224,17 +304,40 @@ function main(argv) {
|
|
|
224
304
|
}
|
|
225
305
|
contract = loaded.contract;
|
|
226
306
|
}
|
|
227
|
-
const result = barrier(root, contract, { wave: flags.wave, tasks: taskList });
|
|
228
|
-
|
|
307
|
+
const result = barrier(root, contract, { wave: flags.wave, tasks: taskList, timeout: flags.timeout, now: flags.now });
|
|
308
|
+
// Exit codes: 0 PASS · 3 FAIL (terminal — blocked/partial/stale; route to
|
|
309
|
+
// failure handling, do not re-poll) · 1 HOLD (transient — running/missing
|
|
310
|
+
// within timeout; safe to poll again).
|
|
311
|
+
// The distinct FAIL code is opt-in via --timeout — without it, every
|
|
312
|
+
// non-DONE outcome stays exit 1 (backward compatible with pollers that only
|
|
313
|
+
// distinguish 0 from non-0).
|
|
314
|
+
const code = result.ok ? 0 : (result.timeout != null && result.failed) ? 3 : 1;
|
|
315
|
+
if (flags.json) { console.log(JSON.stringify(result, null, 2)); return code; }
|
|
229
316
|
const scope = taskList ? `batch ${taskList.join(",")}` : (result.wave != null ? `wave ${result.wave}` : "phase");
|
|
230
317
|
if (result.ok) {
|
|
231
318
|
console.log(`BARRIER PASS (${scope}): ${result.done}/${result.expected} DONE`);
|
|
232
319
|
} else {
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
320
|
+
const verb = result.failed ? "FAIL" : "HOLD";
|
|
321
|
+
console.error(`BARRIER ${verb} (${scope}): ${result.done}/${result.expected} DONE` +
|
|
322
|
+
` (running=${result.running} blocked=${result.blocked} partial=${result.partial}` +
|
|
323
|
+
` missing=${result.missing} stale=${result.stale})` +
|
|
324
|
+
(result.timeout != null ? ` [timeout=${result.timeout}s]` : ""));
|
|
325
|
+
for (const t of result.tasks) if (t.status !== "DONE") {
|
|
326
|
+
const aged = t.age_sec != null ? ` (${t.age_sec}s)` : "";
|
|
327
|
+
console.error(` - ${t.task}: ${t.status}${aged}${t.note ? ` — ${t.note}` : ""}`);
|
|
328
|
+
}
|
|
236
329
|
}
|
|
237
|
-
return
|
|
330
|
+
return code;
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
if (cmd === "budget") {
|
|
334
|
+
const result = budget(root, { wave: flags.wave });
|
|
335
|
+
if (flags.json) { console.log(JSON.stringify(result, null, 2)); return 0; }
|
|
336
|
+
const scope = result.wave != null ? `wave ${result.wave}` : "all";
|
|
337
|
+
const pct = result.token_budget > 0 ? ` (${Math.round((result.tokens_used / result.token_budget) * 100)}%)` : "";
|
|
338
|
+
const over = result.over_budget ? " OVER BUDGET" : "";
|
|
339
|
+
console.log(`BUDGET (${scope}): ${result.tokens_used}/${result.token_budget} tokens${pct} across ${result.tasks} task(s)${over}`);
|
|
340
|
+
return 0;
|
|
238
341
|
}
|
|
239
342
|
|
|
240
343
|
if (cmd === "clear") {
|
|
@@ -257,6 +360,7 @@ module.exports = {
|
|
|
257
360
|
buildActive,
|
|
258
361
|
expectedTaskIds,
|
|
259
362
|
barrier,
|
|
363
|
+
budget,
|
|
260
364
|
};
|
|
261
365
|
|
|
262
366
|
if (require.main === module) {
|
package/bin/auto-report.js
CHANGED
|
@@ -104,11 +104,6 @@ async function maybeAutoReport({ cwd = process.cwd(), home = os.homedir(), env =
|
|
|
104
104
|
const body = JSON.stringify(payload);
|
|
105
105
|
const url = erpUrl(cfg);
|
|
106
106
|
|
|
107
|
-
const result = await postOnce(
|
|
108
|
-
{ url, payload: body, idempotency_key: idempotencyKey },
|
|
109
|
-
apiKey,
|
|
110
|
-
);
|
|
111
|
-
|
|
112
107
|
const writeMarker = (extra) => {
|
|
113
108
|
try {
|
|
114
109
|
fs.writeFileSync(
|
|
@@ -119,14 +114,27 @@ async function maybeAutoReport({ cwd = process.cwd(), home = os.homedir(), env =
|
|
|
119
114
|
} catch {}
|
|
120
115
|
};
|
|
121
116
|
|
|
117
|
+
// Close the double-post window: write the dedupe marker BEFORE the POST.
|
|
118
|
+
// A crash between this and the network call can no longer cause a re-post —
|
|
119
|
+
// the next run sees marker.last === unit and short-circuits at Guard 3. The
|
|
120
|
+
// failure path below still guarantees eventual delivery by enqueuing the
|
|
121
|
+
// same client_report_id, so the early marker never drops an unsent report.
|
|
122
|
+
writeMarker({ posting: true });
|
|
123
|
+
|
|
124
|
+
const result = await postOnce(
|
|
125
|
+
{ url, payload: body, idempotency_key: idempotencyKey },
|
|
126
|
+
apiKey,
|
|
127
|
+
);
|
|
128
|
+
|
|
122
129
|
if (result.code === "200") {
|
|
123
130
|
writeMarker({ posted: true });
|
|
124
131
|
return { posted: clientReportId, unit };
|
|
125
132
|
}
|
|
126
133
|
|
|
127
134
|
// Any non-200 → enqueue for the retry queue (session-start drains it).
|
|
128
|
-
//
|
|
129
|
-
// queued item carries this client_report_id and the ERP
|
|
135
|
+
// The marker is already written, so we never re-allocate a new id on the
|
|
136
|
+
// next turn; the queued item carries this client_report_id and the ERP
|
|
137
|
+
// dedupes on it.
|
|
130
138
|
try {
|
|
131
139
|
enqueue({
|
|
132
140
|
client_report_id: clientReportId,
|