qualia-framework 4.1.1 → 4.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +15 -11
  2. package/agents/builder.md +28 -0
  3. package/agents/research-synthesizer.md +7 -0
  4. package/bin/agent-runs.js +233 -0
  5. package/bin/cli.js +355 -16
  6. package/bin/install.js +87 -6
  7. package/bin/knowledge-flush.js +164 -0
  8. package/bin/knowledge.js +317 -0
  9. package/bin/plan-contract.js +220 -0
  10. package/bin/state.js +15 -9
  11. package/docs/agent-runs.md +273 -0
  12. package/docs/journey-demo.html +1008 -0
  13. package/docs/plan-contract.md +321 -0
  14. package/docs/reviews/v4.1.0-audit.html +1488 -0
  15. package/docs/reviews/v4.1.0-audit.md +263 -0
  16. package/hooks/auto-update.js +3 -7
  17. package/hooks/git-guardrails.js +167 -0
  18. package/hooks/pre-compact.js +22 -11
  19. package/hooks/pre-deploy-gate.js +16 -2
  20. package/hooks/pre-push.js +22 -2
  21. package/hooks/stop-session-log.js +180 -0
  22. package/package.json +8 -2
  23. package/skills/qualia-build/SKILL.md +5 -5
  24. package/skills/qualia-debug/SKILL.md +1 -1
  25. package/skills/qualia-design/SKILL.md +15 -0
  26. package/skills/qualia-flush/SKILL.md +200 -0
  27. package/skills/qualia-learn/SKILL.md +47 -37
  28. package/skills/qualia-new/SKILL.md +1 -1
  29. package/skills/qualia-plan/SKILL.md +3 -2
  30. package/skills/qualia-postmortem/SKILL.md +238 -0
  31. package/skills/qualia-quick/SKILL.md +1 -1
  32. package/skills/qualia-report/SKILL.md +1 -1
  33. package/skills/qualia-review/SKILL.md +3 -2
  34. package/skills/qualia-ship/SKILL.md +12 -10
  35. package/skills/qualia-verify/SKILL.md +60 -0
  36. package/templates/help.html +13 -7
  37. package/templates/knowledge/agents.md +71 -0
  38. package/templates/knowledge/index.md +47 -0
  39. package/tests/bin.test.sh +322 -12
  40. package/tests/hooks.test.sh +131 -20
  41. package/tests/lib.test.sh +217 -0
  42. package/tests/runner.js +103 -77
  43. package/tests/state.test.sh +4 -3
@@ -0,0 +1,220 @@
1
+ #!/usr/bin/env node
2
+ // Plan contract validator + helpers. See docs/plan-contract.md.
3
+ //
4
+ // Pure library — no CLI dispatch. Required by state.js and by skills that
5
+ // emit/consume `.planning/phase-{N}-contract.json`.
6
+ //
7
+ // Zero npm dependencies. Hand-rolled validator, ~100 LOC.
8
+
9
+ const fs = require("fs");
10
+ const path = require("path");
11
+ const crypto = require("crypto");
12
+
13
+ const SCHEMA_VERSION = 1;
14
+
15
+ const PERSONAS = new Set([
16
+ "security", "architect", "ux", "frontend",
17
+ "backend", "data", "performance", "none",
18
+ ]);
19
+
20
+ const CHECK_TYPES = new Set([
21
+ "file-exists", "grep-match", "command-exit", "behavioral",
22
+ ]);
23
+
24
+ function isStringArray(v) {
25
+ return Array.isArray(v) && v.every((x) => typeof x === "string");
26
+ }
27
+
28
+ function isPlainObject(v) {
29
+ return v && typeof v === "object" && !Array.isArray(v);
30
+ }
31
+
32
+ function validateCheck(check, taskId, idx) {
33
+ const errs = [];
34
+ const where = `tasks[id=${taskId}].verification[${idx}]`;
35
+ if (!isPlainObject(check)) return [`${where}: not an object`];
36
+ if (!CHECK_TYPES.has(check.type)) {
37
+ errs.push(`${where}.type: must be one of ${[...CHECK_TYPES].join("|")}`);
38
+ return errs;
39
+ }
40
+ switch (check.type) {
41
+ case "file-exists":
42
+ if (typeof check.path !== "string" || !check.path) errs.push(`${where}.path: required string`);
43
+ if (check.must_contain != null && typeof check.must_contain !== "string") errs.push(`${where}.must_contain: must be string`);
44
+ break;
45
+ case "grep-match":
46
+ if (typeof check.path !== "string" || !check.path) errs.push(`${where}.path: required string`);
47
+ if (typeof check.pattern !== "string" || !check.pattern) errs.push(`${where}.pattern: required string`);
48
+ else { try { new RegExp(check.pattern); } catch { errs.push(`${where}.pattern: invalid regex`); } }
49
+ if (check.expect !== "present" && check.expect !== "absent") errs.push(`${where}.expect: must be "present" or "absent"`);
50
+ break;
51
+ case "command-exit":
52
+ if (typeof check.command !== "string" || !check.command) errs.push(`${where}.command: required string`);
53
+ else if (/[;&|`$<>(){}\\]/.test(check.command)) errs.push(`${where}.command: shell metacharacters not allowed (use args[])`);
54
+ if (!isStringArray(check.args || [])) errs.push(`${where}.args: must be string[]`);
55
+ if (typeof check.expected_exit !== "number") errs.push(`${where}.expected_exit: required number`);
56
+ if (check.timeout_ms != null && (typeof check.timeout_ms !== "number" || check.timeout_ms <= 0)) {
57
+ errs.push(`${where}.timeout_ms: must be positive number`);
58
+ }
59
+ if (check.expect_stdout_match != null) {
60
+ if (typeof check.expect_stdout_match !== "string") errs.push(`${where}.expect_stdout_match: must be string`);
61
+ else { try { new RegExp(check.expect_stdout_match); } catch { errs.push(`${where}.expect_stdout_match: invalid regex`); } }
62
+ }
63
+ break;
64
+ case "behavioral":
65
+ if (typeof check.description !== "string" || !check.description) errs.push(`${where}.description: required string`);
66
+ if (!Array.isArray(check.evidence_required) || check.evidence_required.length === 0) {
67
+ errs.push(`${where}.evidence_required: must be a non-empty array`);
68
+ } else {
69
+ check.evidence_required.forEach((ev, i) => {
70
+ const w = `${where}.evidence_required[${i}]`;
71
+ if (!isPlainObject(ev)) { errs.push(`${w}: not an object`); return; }
72
+ if (typeof ev.path !== "string" || !ev.path) errs.push(`${w}.path: required string`);
73
+ if (typeof ev.description !== "string" || !ev.description) errs.push(`${w}.description: required string`);
74
+ if (ev.matcher != null) {
75
+ if (typeof ev.matcher !== "string") errs.push(`${w}.matcher: must be string`);
76
+ else { try { new RegExp(ev.matcher); } catch { errs.push(`${w}.matcher: invalid regex`); } }
77
+ }
78
+ });
79
+ }
80
+ break;
81
+ }
82
+ return errs;
83
+ }
84
+
85
+ function validateTask(task, idx, allIds) {
86
+ const errs = [];
87
+ const where = `tasks[${idx}]`;
88
+ if (!isPlainObject(task)) return [`${where}: not an object`];
89
+ if (typeof task.id !== "string" || !/^T\d+$/.test(task.id)) errs.push(`${where}.id: must match ^T\\d+$`);
90
+ if (typeof task.title !== "string" || !task.title) errs.push(`${where}.title: required string`);
91
+ if (typeof task.wave !== "number" || task.wave < 1) errs.push(`${where}.wave: must be positive number`);
92
+ if (!isStringArray(task.depends_on || [])) errs.push(`${where}.depends_on: must be string[]`);
93
+ if (task.persona != null && !PERSONAS.has(task.persona)) errs.push(`${where}.persona: invalid value`);
94
+ if (!isStringArray(task.files_modify || [])) errs.push(`${where}.files_modify: must be string[]`);
95
+ if (!isStringArray(task.files_create || [])) errs.push(`${where}.files_create: must be string[]`);
96
+ if (!isStringArray(task.files_delete || [])) errs.push(`${where}.files_delete: must be string[]`);
97
+ if (!isStringArray(task.acceptance_criteria || []) || (task.acceptance_criteria || []).length === 0) {
98
+ errs.push(`${where}.acceptance_criteria: must be a non-empty string[]`);
99
+ }
100
+ if (typeof task.action !== "string") errs.push(`${where}.action: required string`);
101
+ else if (task.action.length > 500) errs.push(`${where}.action: must be ≤ 500 characters (got ${task.action.length})`);
102
+ if (!isStringArray(task.context_files || [])) errs.push(`${where}.context_files: must be string[]`);
103
+ if (!Array.isArray(task.verification) || task.verification.length === 0) {
104
+ errs.push(`${where}.verification: must be a non-empty array`);
105
+ } else {
106
+ task.verification.forEach((c, i) => errs.push(...validateCheck(c, task.id, i)));
107
+ }
108
+
109
+ // disjointness across files_modify/create/delete
110
+ const m = new Set(task.files_modify || []);
111
+ const c = new Set(task.files_create || []);
112
+ const d = new Set(task.files_delete || []);
113
+ for (const p of m) if (c.has(p) || d.has(p)) errs.push(`${where}: ${p} appears in multiple file lists`);
114
+ for (const p of c) if (d.has(p)) errs.push(`${where}: ${p} appears in multiple file lists`);
115
+
116
+ // depends_on references must exist
117
+ for (const dep of task.depends_on || []) {
118
+ if (!allIds.has(dep)) errs.push(`${where}.depends_on: references unknown id "${dep}"`);
119
+ }
120
+ return errs;
121
+ }
122
+
123
+ function detectCycles(tasks) {
124
+ const graph = new Map(tasks.map((t) => [t.id, t.depends_on || []]));
125
+ const WHITE = 0, GRAY = 1, BLACK = 2;
126
+ const color = new Map([...graph.keys()].map((k) => [k, WHITE]));
127
+ const cycles = [];
128
+ function dfs(u, stack) {
129
+ color.set(u, GRAY);
130
+ stack.push(u);
131
+ for (const v of graph.get(u) || []) {
132
+ if (!graph.has(v)) continue;
133
+ const cv = color.get(v);
134
+ if (cv === GRAY) { cycles.push([...stack, v].join(" → ")); return; }
135
+ if (cv === WHITE) dfs(v, stack);
136
+ }
137
+ color.set(u, BLACK);
138
+ stack.pop();
139
+ }
140
+ for (const k of graph.keys()) if (color.get(k) === WHITE) dfs(k, []);
141
+ return cycles;
142
+ }
143
+
144
+ function validate(contract) {
145
+ const errs = [];
146
+ if (!isPlainObject(contract)) return ["contract: not an object"];
147
+ if (contract.version !== SCHEMA_VERSION) errs.push(`version: must be ${SCHEMA_VERSION}, got ${contract.version}`);
148
+ if (typeof contract.phase !== "number" || contract.phase < 1) errs.push("phase: must be positive number");
149
+ if (typeof contract.goal !== "string" || !contract.goal) errs.push("goal: required string");
150
+ if (typeof contract.why !== "string" || !contract.why) errs.push("why: required string");
151
+ if (typeof contract.generated_at !== "string") errs.push("generated_at: required ISO 8601 string");
152
+ if (!["planner", "compile-plan", "manual"].includes(contract.generated_by)) {
153
+ errs.push('generated_by: must be "planner" | "compile-plan" | "manual"');
154
+ }
155
+ if (typeof contract.source_plan_hash !== "string") errs.push("source_plan_hash: required string (empty for manual)");
156
+ if (!isStringArray(contract.success_criteria || []) || (contract.success_criteria || []).length === 0) {
157
+ errs.push("success_criteria: must be a non-empty string[]");
158
+ }
159
+ if (!Array.isArray(contract.tasks) || contract.tasks.length === 0) {
160
+ errs.push("tasks: must be a non-empty array");
161
+ return errs;
162
+ }
163
+
164
+ const ids = new Set();
165
+ for (const t of contract.tasks) {
166
+ if (t && typeof t.id === "string") {
167
+ if (ids.has(t.id)) errs.push(`tasks: duplicate id "${t.id}"`);
168
+ ids.add(t.id);
169
+ }
170
+ }
171
+
172
+ contract.tasks.forEach((t, i) => errs.push(...validateTask(t, i, ids)));
173
+
174
+ const cycles = detectCycles(contract.tasks);
175
+ if (cycles.length) errs.push(`tasks: cycle detected: ${cycles[0]}`);
176
+
177
+ // wave > max(deps wave)
178
+ const byId = new Map(contract.tasks.map((t) => [t.id, t]));
179
+ for (const t of contract.tasks) {
180
+ const deps = (t.depends_on || []).map((id) => byId.get(id)).filter(Boolean);
181
+ if (deps.length === 0) continue;
182
+ const maxDepWave = Math.max(...deps.map((d) => d.wave || 0));
183
+ if ((t.wave || 0) <= maxDepWave) {
184
+ errs.push(`tasks[id=${t.id}].wave: must be > ${maxDepWave} (max wave of its dependencies)`);
185
+ }
186
+ }
187
+
188
+ return errs;
189
+ }
190
+
191
+ function parseSafely(text) {
192
+ try {
193
+ return { ok: true, value: JSON.parse(text) };
194
+ } catch (e) {
195
+ return { ok: false, error: e.message };
196
+ }
197
+ }
198
+
199
+ function hashPlan(text) {
200
+ return "sha256:" + crypto.createHash("sha256").update(text, "utf8").digest("hex");
201
+ }
202
+
203
+ function checkDrift(contractPath, planMdPath) {
204
+ if (!fs.existsSync(contractPath)) return { ok: false, reason: "contract-missing" };
205
+ if (!fs.existsSync(planMdPath)) return { ok: false, reason: "plan-md-missing" };
206
+ const parsed = parseSafely(fs.readFileSync(contractPath, "utf8"));
207
+ if (!parsed.ok) return { ok: false, reason: "contract-unparseable", detail: parsed.error };
208
+ const stored = parsed.value.source_plan_hash;
209
+ if (!stored) return { ok: true, drift: false, reason: "no-hash-stored" };
210
+ const current = hashPlan(fs.readFileSync(planMdPath, "utf8"));
211
+ return { ok: true, drift: stored !== current, stored, current };
212
+ }
213
+
214
+ module.exports = {
215
+ SCHEMA_VERSION,
216
+ validate,
217
+ parseSafely,
218
+ hashPlan,
219
+ checkDrift,
220
+ };
package/bin/state.js CHANGED
@@ -104,11 +104,14 @@ function acquireLock(timeoutMs = 5000) {
104
104
  sleepSync(50);
105
105
  }
106
106
  }
107
- // Couldn't acquire inside the budget — proceed unlocked rather than
108
- // hard-block the user. Surface this in analytics so repeated contention
109
- // is visible instead of silent.
110
- try { _trace("state-lock", "fallthrough", { waited_ms: Date.now() - start }); } catch {}
111
- return null;
107
+ const waited = Date.now() - start;
108
+ try { _trace("state-lock", "timeout", { waited_ms: waited }); } catch {}
109
+ const err = new Error(
110
+ `Could not acquire ${LOCK_FILE} after ${waited}ms. Another state mutation may still be running.`
111
+ );
112
+ err.code = "STATE_LOCK_TIMEOUT";
113
+ err.waited_ms = waited;
114
+ throw err;
112
115
  }
113
116
 
114
117
  function releaseLock(lock) {
@@ -1297,9 +1300,8 @@ const opts = parseArgs(rest);
1297
1300
 
1298
1301
  // Mutators must hold the .planning/.state.lock for the duration of their
1299
1302
  // dual STATE.md + tracking.json writes. Read commands (check, validate-plan)
1300
- // don't need the lock. The lock is best-effort: if it can't be acquired
1301
- // inside acquireLock's timeout, the command proceeds anyway we'd rather
1302
- // risk a rare race than hard-block the user.
1303
+ // don't need the lock. A lock timeout is a hard failure for mutators; racing
1304
+ // state writes are worse than asking the user to retry.
1303
1305
  const READ_ONLY = new Set(["check", "validate-plan"]);
1304
1306
  let __lock = null;
1305
1307
  if (!READ_ONLY.has(cmd)) {
@@ -1307,7 +1309,11 @@ if (!READ_ONLY.has(cmd)) {
1307
1309
  // previous mutator. Runs for mutators only; read commands should still
1308
1310
  // return the actual on-disk state even if it's mid-recovery.
1309
1311
  try { recoverFromJournal(); } catch {}
1310
- __lock = acquireLock();
1312
+ try {
1313
+ __lock = acquireLock();
1314
+ } catch (err) {
1315
+ output(fail(err.code || "STATE_LOCK_ERROR", err.message));
1316
+ }
1311
1317
  process.on("exit", () => releaseLock(__lock));
1312
1318
  process.on("SIGINT", () => { releaseLock(__lock); process.exit(130); });
1313
1319
  process.on("SIGTERM", () => { releaseLock(__lock); process.exit(143); });
@@ -0,0 +1,273 @@
1
+ # Agent Runs Telemetry
2
+
3
+ Append-only JSONL ledger of every subagent spawn, recorded per project. Substrate for `qualia-framework agents`, postmortem analysis, and ERP enrichment.
4
+
5
+ Status: **draft, v1.** Pressure-test the shape against real spawns before locking.
6
+
7
+ ## Why this exists
8
+
9
+ Today, `traces.jsonl` records hook-level events only. There is zero per-agent telemetry: no record of which builder ran for how long on which task, which verifier failed and why, which researcher hit a rate limit. The data needed to answer "which task failed twice and required a postmortem" doesn't exist.
10
+
11
+ This file specifies a per-spawn record that lives next to the project (not in `~/.claude/`) so it travels with the repo, is committed alongside other planning artifacts, and stays attributable to a specific phase.
12
+
13
+ ## File layout
14
+
15
+ ```
16
+ .planning/
17
+ agent-runs.jsonl # all-time, append-only
18
+ agent-runs/
19
+ 2026-04-28.jsonl # daily rotation (optional, see below)
20
+ ```
21
+
22
+ **Rotation:** start with single-file. If `agent-runs.jsonl` exceeds 5MB, rotate to dated subfile. Cheap, no dependency.
23
+
24
+ **Privacy:** records contain file paths, task ids, durations, token counts, error strings — never command output, never file contents, never user prompts. The schema below is the upper bound of what we capture. `QUALIA_TELEMETRY=off` env var disables writes.
25
+
26
+ ## Schema (v1)
27
+
28
+ OpenTelemetry GenAI semantic conventions where they fit; framework-specific fields where they don't.
29
+
30
+ ```ts
31
+ interface AgentRunRecord {
32
+ // Identity
33
+ schema_version: 1;
34
+ run_id: string; // ULID — sortable, monotonic
35
+ parent_run_id?: string; // ONLY for true nesting (an agent spawned this one); null otherwise
36
+ skill_invocation_id: string; // groups runs from one skill call (sequential or parallel siblings)
37
+ session_id?: string; // Claude Code session id when reachable; per-process UUID fallback
38
+
39
+ // What ran
40
+ agent_type: AgentType;
41
+ agent_name?: string; // for custom agents (e.g. "frontend-agent")
42
+ model: string; // "claude-opus-4-7", "claude-sonnet-4-6", etc.
43
+ effort?: "low" | "medium" | "high" | "max";
44
+
45
+ // Where in the road
46
+ project: string; // tracking.json.project
47
+ phase?: number; // current phase if applicable
48
+ milestone?: number;
49
+ task_id?: string; // contract task id ("T1", "T2") for builders
50
+ wave?: number;
51
+ retry_of?: string; // run_id of the prior failed attempt this one is retrying
52
+
53
+ // Lifecycle
54
+ status: AgentStatus;
55
+ started_at: string; // ISO 8601 UTC
56
+ finished_at: string; // ISO 8601 UTC
57
+ duration_ms: number;
58
+
59
+ // Cost (OTel-aligned, optional — only if obtainable from spawn shape)
60
+ input_tokens?: number; // gen_ai.usage.input_tokens
61
+ output_tokens?: number; // gen_ai.usage.output_tokens
62
+ cache_read_tokens?: number; // gen_ai.usage.cache_read.input_tokens
63
+ cache_creation_tokens?: number; // gen_ai.usage.cache_creation.input_tokens
64
+
65
+ // Activity
66
+ tool_calls_count?: number;
67
+ files_changed?: string[]; // repo-relative, deduped
68
+ commit_sha?: string; // if the run produced a commit
69
+
70
+ // Outcome detail
71
+ // status = did the agent process complete cleanly (success/failure/timeout/...)
72
+ // verification_result = did the code under test pass (only on agent_type="verifier")
73
+ // a verifier with status="success" + verification_result="fail" = the verifier ran fine and the code failed.
74
+ // a verifier with status="failure" = the verifier itself errored (timeout, infra, etc.)
75
+ verifier_score?: number; // 1-5 if agent_type=verifier
76
+ verification_result?: "pass" | "fail" | "partial";
77
+ failure_reason?: string; // short, machine-classifiable; see "Failure taxonomy" below
78
+ failure_detail?: string; // last 500 chars of stderr/error — keep the tail (newest content), drop the head
79
+
80
+ // Self-link — only set on failure
81
+ log_file?: string; // .planning/agent-runs/<run_id>.log if status != success
82
+ }
83
+
84
+ type AgentType =
85
+ | "planner"
86
+ | "plan-checker"
87
+ | "builder"
88
+ | "verifier"
89
+ | "qa-browser"
90
+ | "researcher"
91
+ | "research-synthesizer"
92
+ | "roadmapper"
93
+ | "team-orchestrator"
94
+ | "custom"; // user-defined agents
95
+
96
+ type AgentStatus =
97
+ | "success" // completed, no failure_reason
98
+ | "partial" // completed but flagged issues (e.g. builder PARTIAL)
99
+ | "blocked" // builder hit a precondition gate (e.g. file lock)
100
+ | "failure" // explicit failure (verifier fail, builder error)
101
+ | "timeout" // exceeded budget
102
+ | "interrupted"; // user cancelled / parent killed
103
+ ```
104
+
105
+ ### Failure taxonomy
106
+
107
+ `failure_reason` is a closed enum so analytics can classify without parsing free text. Add new values via PR — don't free-text.
108
+
109
+ | Code | Meaning |
110
+ |---|---|
111
+ | `tsc-failed` | TypeScript compilation errors |
112
+ | `lint-failed` | ESLint violations |
113
+ | `tests-failed` | Test runner non-zero exit |
114
+ | `build-failed` | Production build broke |
115
+ | `verification-criteria-unmet` | Verifier ran cleanly but criteria failed |
116
+ | `verification-evidence-missing` | Behavioral check lacked required citations |
117
+ | `verification-execution-error` | Check itself errored (binary missing, timeout, cwd missing) — distinct from criteria failure |
118
+ | `file-not-found` | Referenced file absent |
119
+ | `dependency-missing` | Referenced npm/pip/etc package absent |
120
+ | `lock-timeout` | `.planning/.state.lock` not acquired |
121
+ | `network-error` | Outbound HTTP failed (research, ERP) |
122
+ | `rate-limited` | LLM API 429 |
123
+ | `context-overflow` | Prompt exceeded model context |
124
+ | `tool-misuse` | Builder called a forbidden tool |
125
+ | `precondition-unmet` | Required state/file missing before run |
126
+ | `unknown` | Catch-all; should be rare and trigger triage |
127
+
128
+ ## Example records
129
+
130
+ **Successful builder run** (sequential under `/qualia-build` skill invocation `sk_42`):
131
+ ```json
132
+ {"schema_version":1,"run_id":"01HXY8N3W2K7Q5MZP9V4F8R6T1","skill_invocation_id":"sk_42","session_id":"sess_abc123","agent_type":"builder","model":"claude-sonnet-4-6","effort":"medium","project":"acme-portal","phase":2,"milestone":1,"task_id":"T1","wave":1,"status":"success","started_at":"2026-04-28T14:32:11Z","finished_at":"2026-04-28T14:34:02Z","duration_ms":111000,"input_tokens":12450,"output_tokens":1820,"cache_read_tokens":11200,"tool_calls_count":7,"files_changed":["src/lib/auth.ts","src/lib/auth-schema.ts"],"commit_sha":"a3f5e1c"}
133
+ ```
134
+
135
+ **Failed verifier run** (same skill invocation, no parent — it's a sibling of the builder, not nested):
136
+ ```json
137
+ {"schema_version":1,"run_id":"01HXY8P5R8K7Q5MZP9V4F8R6T2","skill_invocation_id":"sk_42","session_id":"sess_abc123","agent_type":"verifier","model":"claude-opus-4-7","project":"acme-portal","phase":2,"milestone":1,"status":"failure","started_at":"2026-04-28T14:38:10Z","finished_at":"2026-04-28T14:39:55Z","duration_ms":105000,"input_tokens":18200,"output_tokens":2100,"tool_calls_count":12,"verifier_score":2,"verification_result":"fail","failure_reason":"verification-criteria-unmet","failure_detail":"Task T2 acceptance criterion 'Redirect to /dashboard on 200' could not be verified — page.tsx contains no redirect() call","log_file":".planning/agent-runs/01HXY8P5R8K7Q5MZP9V4F8R6T2.log"}
138
+ ```
139
+
140
+ **Researcher spawned by team-orchestrator** (true nesting — parent_run_id is set):
141
+ ```json
142
+ {"schema_version":1,"run_id":"01HXY8QF1ZK7Q5MZP9V4F8R6T3","parent_run_id":"01HXY8QE9V2K7Q5MZP9V4F8R6T0","skill_invocation_id":"sk_43","session_id":"sess_abc123","agent_type":"researcher","model":"claude-sonnet-4-6","project":"acme-portal","status":"failure","started_at":"2026-04-28T14:42:00Z","finished_at":"2026-04-28T14:42:12Z","duration_ms":12000,"tool_calls_count":1,"failure_reason":"rate-limited","failure_detail":"WebFetch returned 429 from context7.com after 1 attempt","log_file":".planning/agent-runs/01HXY8QF1ZK7Q5MZP9V4F8R6T3.log"}
143
+ ```
144
+
145
+ ### `parent_run_id` vs `skill_invocation_id` — when to use which
146
+
147
+ - **`skill_invocation_id`:** every record carries one. Groups all agents that ran under a single user-triggered skill (`/qualia-build phase 2` → planner, all builders, verifier all share an id).
148
+ - **`parent_run_id`:** rare. Set only when one agent literally spawned another via the `Agent` tool — for example, `team-orchestrator` fanning out to `frontend-agent` + `backend-agent`. Sequential planner→builder→verifier under one skill is *not* nesting; those are siblings.
149
+
150
+ ## How records get written
151
+
152
+ A small helper at `bin/lib/agent-runs.js`:
153
+
154
+ ```js
155
+ // pseudocode
156
+ const ar = require('./lib/agent-runs');
157
+
158
+ const run = ar.start({
159
+ agent_type: 'builder',
160
+ task_id: 'T1',
161
+ phase: 2,
162
+ model: process.env.QUALIA_MODEL || 'claude-sonnet-4-6',
163
+ });
164
+
165
+ // ... spawn agent, capture result ...
166
+
167
+ ar.finish(run, {
168
+ status: 'success',
169
+ files_changed: ['src/lib/auth.ts'],
170
+ commit_sha: getHeadSha(),
171
+ input_tokens: 12450,
172
+ output_tokens: 1820,
173
+ });
174
+ ```
175
+
176
+ `start()` returns a token; `finish()` writes the full record via a single `fs.appendFileSync` call. Crash between start and finish leaves no partial record on disk — the in-memory record is lost, which is fine.
177
+
178
+ **Concurrency:** `qualia-build` spawns multiple builders in the same wave, each calling `finish()` concurrently. Atomicity guarantee: `fs.appendFileSync` opens with `O_APPEND` and issues a single `write()` syscall per call. On Linux ext4/btrfs/xfs and macOS APFS, `write()` to a regular file with `O_APPEND` is atomic for sizes up to the filesystem block size (typically 4096 bytes). Records run ~600–800 bytes — well under. On Windows NTFS, `O_APPEND` semantics are implemented by Node's libuv via an internal seek+write under a file lock; effectively atomic for our sizes. This is *not* the POSIX pipe `PIPE_BUF` guarantee — that applies to pipes, not regular files. The protection here is the kernel's regular-file `O_APPEND` + single-`write()` behavior. If records ever exceed ~3.5KB, switch to a per-write lock (`proper-lockfile` or a `.planning/.agent-runs.lock` flock).
179
+
180
+ On `status != "success"`, `finish()` also writes `.planning/agent-runs/<run_id>.log` with the full stderr/error output. JSONL stays lean for analytics; debugging context lives in the side files. Successful runs leave no log file.
181
+
182
+ **Where called from:**
183
+ - The skills that orchestrate spawns (`/qualia-build`, `/qualia-verify`, `/qualia-plan`, etc.) wrap each Agent invocation in `ar.start` / `ar.finish`.
184
+ - Skills can't easily measure tokens — those fields are populated when the harness exposes them via `Task` tool result metadata and left undefined otherwise. Don't gate the design on data we may or may not have.
185
+
186
+ ## How records get read
187
+
188
+ `qualia-framework agents` — summary table:
189
+ ```
190
+ $ qualia-framework agents
191
+ Agent runs (last 50, project: acme-portal)
192
+
193
+ TIME AGENT PHASE TASK STATUS DURATION TOKENS NOTE
194
+ 14:34 builder 2 T1 success 111s 14k in src/lib/auth.ts
195
+ 14:38 verifier 2 — failure 105s 20k in verification-criteria-unmet
196
+ 14:42 builder 2 T1 success 89s 13k in fix: redirect after signin
197
+ 14:45 verifier 2 — success 97s 19k in pass
198
+ ```
199
+
200
+ `qualia-framework agents --failed` — only failure/partial/timeout/blocked:
201
+ ```
202
+ $ qualia-framework agents --failed
203
+ 2 failures in last 7 days
204
+
205
+ 2026-04-28 14:38 verifier phase 2 verification-criteria-unmet
206
+ 2026-04-26 09:22 builder phase 1 tsc-failed
207
+ ```
208
+
209
+ `qualia-framework agents --task T1` — all runs for one task (gap cycles visible):
210
+ ```
211
+ $ qualia-framework agents --task T1
212
+ T1 — Add email/password sign-in handler (3 runs)
213
+
214
+ 2026-04-28 14:32 builder success 111s
215
+ 2026-04-28 14:38 verifier failure 105s verification-criteria-unmet
216
+ 2026-04-28 14:42 builder success 89s ← retry after gap
217
+ ```
218
+
219
+ `qualia-framework analytics` extends with: agent failure rate, slowest agents (p50/p95), verifier fail rate by phase, repeated gap-cycles by task.
220
+
221
+ ## ERP integration (additive, non-breaking)
222
+
223
+ Report payload v2 (in `docs/erp-contract.md`) gains:
224
+ ```json
225
+ "agent_runs": {
226
+ "count": 14,
227
+ "failures": 2,
228
+ "verifier_fail_rate": 0.14,
229
+ "slowest_agent_ms": 312000,
230
+ "by_type": { "builder": 9, "verifier": 4, "planner": 1 }
231
+ }
232
+ ```
233
+
234
+ Aggregated counts only — never raw records. ERP backend treats the field as optional; old reports without it still parse. The full JSONL stays local.
235
+
236
+ ## Privacy and opt-out
237
+
238
+ | Captured | Not captured |
239
+ |---|---|
240
+ | Agent type, model | User prompts |
241
+ | Phase, task id | LLM responses |
242
+ | File paths (repo-relative) | File contents |
243
+ | Token counts | Command output |
244
+ | Duration, status | Stderr/stdout beyond `failure_detail` last 500 chars |
245
+ | Failure code | Network response bodies |
246
+ | Commit SHA | Git diffs |
247
+
248
+ Disable new writes: `export QUALIA_TELEMETRY=off`. The helper short-circuits *writes* — reads (`qualia-framework agents`) still surface previously recorded data. Opting out doesn't erase history.
249
+
250
+ ## Design decisions (locked v1)
251
+
252
+ These were called out as open questions during draft; resolved here so implementation can proceed.
253
+
254
+ 1. **Token counts:** all token fields are optional. Populate when the harness exposes them via `Task` tool metadata; leave undefined otherwise. The schema doesn't depend on always having them.
255
+ 2. **`session_id`:** optional. If Claude Code exposes a stable id to skills/hooks, use it. Otherwise the `bin/lib/agent-runs.js` writer generates a per-process UUID on first call and reuses it for the lifetime of that process.
256
+ 3. **Tool-call telemetry:** aggregate `tool_calls_count` only. No per-call spans. If a future analytics need demands per-call detail, add a separate `agent-tool-calls.jsonl` — don't bloat the main ledger.
257
+ 4. **`parent_run_id` vs `skill_invocation_id`:** added `skill_invocation_id` as the common grouping key (every record has one). `parent_run_id` is reserved strictly for true agent-spawned-agent nesting (team-orchestrator → fan-out children). Documented inline above.
258
+ 5. **`failure_detail`:** capped at 500 chars; keep the tail (most recent stderr), drop the head. The newest content is usually the most useful for classification.
259
+ 6. **Side log files:** on `status != success`, `finish()` writes `.planning/agent-runs/<run_id>.log` with full stderr. Lean JSONL stays grep-friendly; debugging context survives.
260
+ 7. **Cross-project rollup:** rejected. ERP does fleet-wide aggregation. A `~/.claude/agent-runs.jsonl` mirror would add a sync surface for marginal benefit.
261
+ 8. **Append atomicity:** relies on `O_APPEND` + single-`write()` syscall behavior for regular files (Linux/macOS/Windows). Atomic up to filesystem block size; our records are well under. Detailed in the "Concurrency" note above.
262
+ 9. **Cleanup:** `qualia-framework agents prune --before YYYY-MM-DD` removes records and matching log files older than the cutoff. Never auto-prunes — operator-driven only.
263
+ 10. **`QUALIA_TELEMETRY=off` semantics:** disables *writes* only. Reads (`qualia-framework agents`) still surface existing records — opting out of new collection does not retroactively hide history. Set before a session to silence that session's spawns.
264
+
265
+ ## Migration plan
266
+
267
+ 1. Add `bin/lib/agent-runs.js` (writer) + `bin/cli.js agents` (reader). Helper is a no-op if `.planning/` doesn't exist.
268
+ 2. Wire `ar.start`/`ar.finish` calls into the orchestrating skills (`/qualia-build`, `/qualia-verify`, `/qualia-plan`, `/qualia-research`, `/qualia-postmortem`).
269
+ 3. Add `agents` table to `qualia-framework analytics`.
270
+ 4. After two milestones produce real data, extend ERP payload (v2) with aggregated metrics. Coordinate with ERP backend.
271
+ 5. Defer postmortem feedback loop and ERP feedback analyzer until ≥4 weeks of real data exist.
272
+
273
+ No hard cutover. Pre-existing projects acquire the JSONL on first spawn after upgrade — older runs are simply absent.