qualia-framework 4.1.1 → 4.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -11
- package/agents/builder.md +28 -0
- package/agents/research-synthesizer.md +7 -0
- package/bin/agent-runs.js +233 -0
- package/bin/cli.js +355 -16
- package/bin/install.js +87 -6
- package/bin/knowledge-flush.js +164 -0
- package/bin/knowledge.js +317 -0
- package/bin/plan-contract.js +220 -0
- package/bin/state.js +15 -9
- package/docs/agent-runs.md +273 -0
- package/docs/journey-demo.html +1008 -0
- package/docs/plan-contract.md +321 -0
- package/docs/reviews/v4.1.0-audit.html +1488 -0
- package/docs/reviews/v4.1.0-audit.md +263 -0
- package/hooks/auto-update.js +3 -7
- package/hooks/git-guardrails.js +167 -0
- package/hooks/pre-compact.js +22 -11
- package/hooks/pre-deploy-gate.js +16 -2
- package/hooks/pre-push.js +22 -2
- package/hooks/stop-session-log.js +180 -0
- package/package.json +8 -2
- package/skills/qualia-build/SKILL.md +5 -5
- package/skills/qualia-debug/SKILL.md +1 -1
- package/skills/qualia-design/SKILL.md +15 -0
- package/skills/qualia-flush/SKILL.md +200 -0
- package/skills/qualia-learn/SKILL.md +47 -37
- package/skills/qualia-new/SKILL.md +1 -1
- package/skills/qualia-plan/SKILL.md +3 -2
- package/skills/qualia-postmortem/SKILL.md +238 -0
- package/skills/qualia-quick/SKILL.md +1 -1
- package/skills/qualia-report/SKILL.md +1 -1
- package/skills/qualia-review/SKILL.md +3 -2
- package/skills/qualia-ship/SKILL.md +12 -10
- package/skills/qualia-verify/SKILL.md +60 -0
- package/templates/help.html +13 -7
- package/templates/knowledge/agents.md +71 -0
- package/templates/knowledge/index.md +47 -0
- package/tests/bin.test.sh +322 -12
- package/tests/hooks.test.sh +131 -20
- package/tests/lib.test.sh +217 -0
- package/tests/runner.js +103 -77
- package/tests/state.test.sh +4 -3
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Plan contract validator + helpers. See docs/plan-contract.md.
|
|
3
|
+
//
|
|
4
|
+
// Pure library — no CLI dispatch. Required by state.js and by skills that
|
|
5
|
+
// emit/consume `.planning/phase-{N}-contract.json`.
|
|
6
|
+
//
|
|
7
|
+
// Zero npm dependencies. Hand-rolled validator, ~100 LOC.
|
|
8
|
+
|
|
9
|
+
const fs = require("fs");
|
|
10
|
+
const path = require("path");
|
|
11
|
+
const crypto = require("crypto");
|
|
12
|
+
|
|
13
|
+
const SCHEMA_VERSION = 1;
|
|
14
|
+
|
|
15
|
+
const PERSONAS = new Set([
|
|
16
|
+
"security", "architect", "ux", "frontend",
|
|
17
|
+
"backend", "data", "performance", "none",
|
|
18
|
+
]);
|
|
19
|
+
|
|
20
|
+
const CHECK_TYPES = new Set([
|
|
21
|
+
"file-exists", "grep-match", "command-exit", "behavioral",
|
|
22
|
+
]);
|
|
23
|
+
|
|
24
|
+
function isStringArray(v) {
|
|
25
|
+
return Array.isArray(v) && v.every((x) => typeof x === "string");
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function isPlainObject(v) {
|
|
29
|
+
return v && typeof v === "object" && !Array.isArray(v);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function validateCheck(check, taskId, idx) {
|
|
33
|
+
const errs = [];
|
|
34
|
+
const where = `tasks[id=${taskId}].verification[${idx}]`;
|
|
35
|
+
if (!isPlainObject(check)) return [`${where}: not an object`];
|
|
36
|
+
if (!CHECK_TYPES.has(check.type)) {
|
|
37
|
+
errs.push(`${where}.type: must be one of ${[...CHECK_TYPES].join("|")}`);
|
|
38
|
+
return errs;
|
|
39
|
+
}
|
|
40
|
+
switch (check.type) {
|
|
41
|
+
case "file-exists":
|
|
42
|
+
if (typeof check.path !== "string" || !check.path) errs.push(`${where}.path: required string`);
|
|
43
|
+
if (check.must_contain != null && typeof check.must_contain !== "string") errs.push(`${where}.must_contain: must be string`);
|
|
44
|
+
break;
|
|
45
|
+
case "grep-match":
|
|
46
|
+
if (typeof check.path !== "string" || !check.path) errs.push(`${where}.path: required string`);
|
|
47
|
+
if (typeof check.pattern !== "string" || !check.pattern) errs.push(`${where}.pattern: required string`);
|
|
48
|
+
else { try { new RegExp(check.pattern); } catch { errs.push(`${where}.pattern: invalid regex`); } }
|
|
49
|
+
if (check.expect !== "present" && check.expect !== "absent") errs.push(`${where}.expect: must be "present" or "absent"`);
|
|
50
|
+
break;
|
|
51
|
+
case "command-exit":
|
|
52
|
+
if (typeof check.command !== "string" || !check.command) errs.push(`${where}.command: required string`);
|
|
53
|
+
else if (/[;&|`$<>(){}\\]/.test(check.command)) errs.push(`${where}.command: shell metacharacters not allowed (use args[])`);
|
|
54
|
+
if (!isStringArray(check.args || [])) errs.push(`${where}.args: must be string[]`);
|
|
55
|
+
if (typeof check.expected_exit !== "number") errs.push(`${where}.expected_exit: required number`);
|
|
56
|
+
if (check.timeout_ms != null && (typeof check.timeout_ms !== "number" || check.timeout_ms <= 0)) {
|
|
57
|
+
errs.push(`${where}.timeout_ms: must be positive number`);
|
|
58
|
+
}
|
|
59
|
+
if (check.expect_stdout_match != null) {
|
|
60
|
+
if (typeof check.expect_stdout_match !== "string") errs.push(`${where}.expect_stdout_match: must be string`);
|
|
61
|
+
else { try { new RegExp(check.expect_stdout_match); } catch { errs.push(`${where}.expect_stdout_match: invalid regex`); } }
|
|
62
|
+
}
|
|
63
|
+
break;
|
|
64
|
+
case "behavioral":
|
|
65
|
+
if (typeof check.description !== "string" || !check.description) errs.push(`${where}.description: required string`);
|
|
66
|
+
if (!Array.isArray(check.evidence_required) || check.evidence_required.length === 0) {
|
|
67
|
+
errs.push(`${where}.evidence_required: must be a non-empty array`);
|
|
68
|
+
} else {
|
|
69
|
+
check.evidence_required.forEach((ev, i) => {
|
|
70
|
+
const w = `${where}.evidence_required[${i}]`;
|
|
71
|
+
if (!isPlainObject(ev)) { errs.push(`${w}: not an object`); return; }
|
|
72
|
+
if (typeof ev.path !== "string" || !ev.path) errs.push(`${w}.path: required string`);
|
|
73
|
+
if (typeof ev.description !== "string" || !ev.description) errs.push(`${w}.description: required string`);
|
|
74
|
+
if (ev.matcher != null) {
|
|
75
|
+
if (typeof ev.matcher !== "string") errs.push(`${w}.matcher: must be string`);
|
|
76
|
+
else { try { new RegExp(ev.matcher); } catch { errs.push(`${w}.matcher: invalid regex`); } }
|
|
77
|
+
}
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
break;
|
|
81
|
+
}
|
|
82
|
+
return errs;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function validateTask(task, idx, allIds) {
|
|
86
|
+
const errs = [];
|
|
87
|
+
const where = `tasks[${idx}]`;
|
|
88
|
+
if (!isPlainObject(task)) return [`${where}: not an object`];
|
|
89
|
+
if (typeof task.id !== "string" || !/^T\d+$/.test(task.id)) errs.push(`${where}.id: must match ^T\\d+$`);
|
|
90
|
+
if (typeof task.title !== "string" || !task.title) errs.push(`${where}.title: required string`);
|
|
91
|
+
if (typeof task.wave !== "number" || task.wave < 1) errs.push(`${where}.wave: must be positive number`);
|
|
92
|
+
if (!isStringArray(task.depends_on || [])) errs.push(`${where}.depends_on: must be string[]`);
|
|
93
|
+
if (task.persona != null && !PERSONAS.has(task.persona)) errs.push(`${where}.persona: invalid value`);
|
|
94
|
+
if (!isStringArray(task.files_modify || [])) errs.push(`${where}.files_modify: must be string[]`);
|
|
95
|
+
if (!isStringArray(task.files_create || [])) errs.push(`${where}.files_create: must be string[]`);
|
|
96
|
+
if (!isStringArray(task.files_delete || [])) errs.push(`${where}.files_delete: must be string[]`);
|
|
97
|
+
if (!isStringArray(task.acceptance_criteria || []) || (task.acceptance_criteria || []).length === 0) {
|
|
98
|
+
errs.push(`${where}.acceptance_criteria: must be a non-empty string[]`);
|
|
99
|
+
}
|
|
100
|
+
if (typeof task.action !== "string") errs.push(`${where}.action: required string`);
|
|
101
|
+
else if (task.action.length > 500) errs.push(`${where}.action: must be ≤ 500 characters (got ${task.action.length})`);
|
|
102
|
+
if (!isStringArray(task.context_files || [])) errs.push(`${where}.context_files: must be string[]`);
|
|
103
|
+
if (!Array.isArray(task.verification) || task.verification.length === 0) {
|
|
104
|
+
errs.push(`${where}.verification: must be a non-empty array`);
|
|
105
|
+
} else {
|
|
106
|
+
task.verification.forEach((c, i) => errs.push(...validateCheck(c, task.id, i)));
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// disjointness across files_modify/create/delete
|
|
110
|
+
const m = new Set(task.files_modify || []);
|
|
111
|
+
const c = new Set(task.files_create || []);
|
|
112
|
+
const d = new Set(task.files_delete || []);
|
|
113
|
+
for (const p of m) if (c.has(p) || d.has(p)) errs.push(`${where}: ${p} appears in multiple file lists`);
|
|
114
|
+
for (const p of c) if (d.has(p)) errs.push(`${where}: ${p} appears in multiple file lists`);
|
|
115
|
+
|
|
116
|
+
// depends_on references must exist
|
|
117
|
+
for (const dep of task.depends_on || []) {
|
|
118
|
+
if (!allIds.has(dep)) errs.push(`${where}.depends_on: references unknown id "${dep}"`);
|
|
119
|
+
}
|
|
120
|
+
return errs;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
function detectCycles(tasks) {
|
|
124
|
+
const graph = new Map(tasks.map((t) => [t.id, t.depends_on || []]));
|
|
125
|
+
const WHITE = 0, GRAY = 1, BLACK = 2;
|
|
126
|
+
const color = new Map([...graph.keys()].map((k) => [k, WHITE]));
|
|
127
|
+
const cycles = [];
|
|
128
|
+
function dfs(u, stack) {
|
|
129
|
+
color.set(u, GRAY);
|
|
130
|
+
stack.push(u);
|
|
131
|
+
for (const v of graph.get(u) || []) {
|
|
132
|
+
if (!graph.has(v)) continue;
|
|
133
|
+
const cv = color.get(v);
|
|
134
|
+
if (cv === GRAY) { cycles.push([...stack, v].join(" → ")); return; }
|
|
135
|
+
if (cv === WHITE) dfs(v, stack);
|
|
136
|
+
}
|
|
137
|
+
color.set(u, BLACK);
|
|
138
|
+
stack.pop();
|
|
139
|
+
}
|
|
140
|
+
for (const k of graph.keys()) if (color.get(k) === WHITE) dfs(k, []);
|
|
141
|
+
return cycles;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
function validate(contract) {
|
|
145
|
+
const errs = [];
|
|
146
|
+
if (!isPlainObject(contract)) return ["contract: not an object"];
|
|
147
|
+
if (contract.version !== SCHEMA_VERSION) errs.push(`version: must be ${SCHEMA_VERSION}, got ${contract.version}`);
|
|
148
|
+
if (typeof contract.phase !== "number" || contract.phase < 1) errs.push("phase: must be positive number");
|
|
149
|
+
if (typeof contract.goal !== "string" || !contract.goal) errs.push("goal: required string");
|
|
150
|
+
if (typeof contract.why !== "string" || !contract.why) errs.push("why: required string");
|
|
151
|
+
if (typeof contract.generated_at !== "string") errs.push("generated_at: required ISO 8601 string");
|
|
152
|
+
if (!["planner", "compile-plan", "manual"].includes(contract.generated_by)) {
|
|
153
|
+
errs.push('generated_by: must be "planner" | "compile-plan" | "manual"');
|
|
154
|
+
}
|
|
155
|
+
if (typeof contract.source_plan_hash !== "string") errs.push("source_plan_hash: required string (empty for manual)");
|
|
156
|
+
if (!isStringArray(contract.success_criteria || []) || (contract.success_criteria || []).length === 0) {
|
|
157
|
+
errs.push("success_criteria: must be a non-empty string[]");
|
|
158
|
+
}
|
|
159
|
+
if (!Array.isArray(contract.tasks) || contract.tasks.length === 0) {
|
|
160
|
+
errs.push("tasks: must be a non-empty array");
|
|
161
|
+
return errs;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
const ids = new Set();
|
|
165
|
+
for (const t of contract.tasks) {
|
|
166
|
+
if (t && typeof t.id === "string") {
|
|
167
|
+
if (ids.has(t.id)) errs.push(`tasks: duplicate id "${t.id}"`);
|
|
168
|
+
ids.add(t.id);
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
contract.tasks.forEach((t, i) => errs.push(...validateTask(t, i, ids)));
|
|
173
|
+
|
|
174
|
+
const cycles = detectCycles(contract.tasks);
|
|
175
|
+
if (cycles.length) errs.push(`tasks: cycle detected: ${cycles[0]}`);
|
|
176
|
+
|
|
177
|
+
// wave > max(deps wave)
|
|
178
|
+
const byId = new Map(contract.tasks.map((t) => [t.id, t]));
|
|
179
|
+
for (const t of contract.tasks) {
|
|
180
|
+
const deps = (t.depends_on || []).map((id) => byId.get(id)).filter(Boolean);
|
|
181
|
+
if (deps.length === 0) continue;
|
|
182
|
+
const maxDepWave = Math.max(...deps.map((d) => d.wave || 0));
|
|
183
|
+
if ((t.wave || 0) <= maxDepWave) {
|
|
184
|
+
errs.push(`tasks[id=${t.id}].wave: must be > ${maxDepWave} (max wave of its dependencies)`);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
return errs;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
function parseSafely(text) {
|
|
192
|
+
try {
|
|
193
|
+
return { ok: true, value: JSON.parse(text) };
|
|
194
|
+
} catch (e) {
|
|
195
|
+
return { ok: false, error: e.message };
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
function hashPlan(text) {
|
|
200
|
+
return "sha256:" + crypto.createHash("sha256").update(text, "utf8").digest("hex");
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
function checkDrift(contractPath, planMdPath) {
|
|
204
|
+
if (!fs.existsSync(contractPath)) return { ok: false, reason: "contract-missing" };
|
|
205
|
+
if (!fs.existsSync(planMdPath)) return { ok: false, reason: "plan-md-missing" };
|
|
206
|
+
const parsed = parseSafely(fs.readFileSync(contractPath, "utf8"));
|
|
207
|
+
if (!parsed.ok) return { ok: false, reason: "contract-unparseable", detail: parsed.error };
|
|
208
|
+
const stored = parsed.value.source_plan_hash;
|
|
209
|
+
if (!stored) return { ok: true, drift: false, reason: "no-hash-stored" };
|
|
210
|
+
const current = hashPlan(fs.readFileSync(planMdPath, "utf8"));
|
|
211
|
+
return { ok: true, drift: stored !== current, stored, current };
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
module.exports = {
|
|
215
|
+
SCHEMA_VERSION,
|
|
216
|
+
validate,
|
|
217
|
+
parseSafely,
|
|
218
|
+
hashPlan,
|
|
219
|
+
checkDrift,
|
|
220
|
+
};
|
package/bin/state.js
CHANGED
|
@@ -104,11 +104,14 @@ function acquireLock(timeoutMs = 5000) {
|
|
|
104
104
|
sleepSync(50);
|
|
105
105
|
}
|
|
106
106
|
}
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
107
|
+
const waited = Date.now() - start;
|
|
108
|
+
try { _trace("state-lock", "timeout", { waited_ms: waited }); } catch {}
|
|
109
|
+
const err = new Error(
|
|
110
|
+
`Could not acquire ${LOCK_FILE} after ${waited}ms. Another state mutation may still be running.`
|
|
111
|
+
);
|
|
112
|
+
err.code = "STATE_LOCK_TIMEOUT";
|
|
113
|
+
err.waited_ms = waited;
|
|
114
|
+
throw err;
|
|
112
115
|
}
|
|
113
116
|
|
|
114
117
|
function releaseLock(lock) {
|
|
@@ -1297,9 +1300,8 @@ const opts = parseArgs(rest);
|
|
|
1297
1300
|
|
|
1298
1301
|
// Mutators must hold the .planning/.state.lock for the duration of their
|
|
1299
1302
|
// dual STATE.md + tracking.json writes. Read commands (check, validate-plan)
|
|
1300
|
-
// don't need the lock.
|
|
1301
|
-
//
|
|
1302
|
-
// risk a rare race than hard-block the user.
|
|
1303
|
+
// don't need the lock. A lock timeout is a hard failure for mutators; racing
|
|
1304
|
+
// state writes are worse than asking the user to retry.
|
|
1303
1305
|
const READ_ONLY = new Set(["check", "validate-plan"]);
|
|
1304
1306
|
let __lock = null;
|
|
1305
1307
|
if (!READ_ONLY.has(cmd)) {
|
|
@@ -1307,7 +1309,11 @@ if (!READ_ONLY.has(cmd)) {
|
|
|
1307
1309
|
// previous mutator. Runs for mutators only; read commands should still
|
|
1308
1310
|
// return the actual on-disk state even if it's mid-recovery.
|
|
1309
1311
|
try { recoverFromJournal(); } catch {}
|
|
1310
|
-
|
|
1312
|
+
try {
|
|
1313
|
+
__lock = acquireLock();
|
|
1314
|
+
} catch (err) {
|
|
1315
|
+
output(fail(err.code || "STATE_LOCK_ERROR", err.message));
|
|
1316
|
+
}
|
|
1311
1317
|
process.on("exit", () => releaseLock(__lock));
|
|
1312
1318
|
process.on("SIGINT", () => { releaseLock(__lock); process.exit(130); });
|
|
1313
1319
|
process.on("SIGTERM", () => { releaseLock(__lock); process.exit(143); });
|
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
# Agent Runs Telemetry
|
|
2
|
+
|
|
3
|
+
Append-only JSONL ledger of every subagent spawn, recorded per project. Substrate for `qualia-framework agents`, postmortem analysis, and ERP enrichment.
|
|
4
|
+
|
|
5
|
+
Status: **draft, v1.** Pressure-test the shape against real spawns before locking.
|
|
6
|
+
|
|
7
|
+
## Why this exists
|
|
8
|
+
|
|
9
|
+
Today, `traces.jsonl` records hook-level events only. There is zero per-agent telemetry: no record of which builder ran for how long on which task, which verifier failed and why, which researcher hit a rate limit. The data needed to answer "which task failed twice and required a postmortem" doesn't exist.
|
|
10
|
+
|
|
11
|
+
This file specifies a per-spawn record that lives next to the project (not in `~/.claude/`) so it travels with the repo, is committed alongside other planning artifacts, and stays attributable to a specific phase.
|
|
12
|
+
|
|
13
|
+
## File layout
|
|
14
|
+
|
|
15
|
+
```
|
|
16
|
+
.planning/
|
|
17
|
+
agent-runs.jsonl # all-time, append-only
|
|
18
|
+
agent-runs/
|
|
19
|
+
2026-04-28.jsonl # daily rotation (optional, see below)
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
**Rotation:** start with single-file. If `agent-runs.jsonl` exceeds 5MB, rotate to dated subfile. Cheap, no dependency.
|
|
23
|
+
|
|
24
|
+
**Privacy:** records contain file paths, task ids, durations, token counts, error strings — never command output, never file contents, never user prompts. The schema below is the upper bound of what we capture. `QUALIA_TELEMETRY=off` env var disables writes.
|
|
25
|
+
|
|
26
|
+
## Schema (v1)
|
|
27
|
+
|
|
28
|
+
OpenTelemetry GenAI semantic conventions where they fit; framework-specific fields where they don't.
|
|
29
|
+
|
|
30
|
+
```ts
|
|
31
|
+
interface AgentRunRecord {
|
|
32
|
+
// Identity
|
|
33
|
+
schema_version: 1;
|
|
34
|
+
run_id: string; // ULID — sortable, monotonic
|
|
35
|
+
parent_run_id?: string; // ONLY for true nesting (an agent spawned this one); null otherwise
|
|
36
|
+
skill_invocation_id: string; // groups runs from one skill call (sequential or parallel siblings)
|
|
37
|
+
session_id?: string; // Claude Code session id when reachable; per-process UUID fallback
|
|
38
|
+
|
|
39
|
+
// What ran
|
|
40
|
+
agent_type: AgentType;
|
|
41
|
+
agent_name?: string; // for custom agents (e.g. "frontend-agent")
|
|
42
|
+
model: string; // "claude-opus-4-7", "claude-sonnet-4-6", etc.
|
|
43
|
+
effort?: "low" | "medium" | "high" | "max";
|
|
44
|
+
|
|
45
|
+
// Where in the road
|
|
46
|
+
project: string; // tracking.json.project
|
|
47
|
+
phase?: number; // current phase if applicable
|
|
48
|
+
milestone?: number;
|
|
49
|
+
task_id?: string; // contract task id ("T1", "T2") for builders
|
|
50
|
+
wave?: number;
|
|
51
|
+
retry_of?: string; // run_id of the prior failed attempt this one is retrying
|
|
52
|
+
|
|
53
|
+
// Lifecycle
|
|
54
|
+
status: AgentStatus;
|
|
55
|
+
started_at: string; // ISO 8601 UTC
|
|
56
|
+
finished_at: string; // ISO 8601 UTC
|
|
57
|
+
duration_ms: number;
|
|
58
|
+
|
|
59
|
+
// Cost (OTel-aligned, optional — only if obtainable from spawn shape)
|
|
60
|
+
input_tokens?: number; // gen_ai.usage.input_tokens
|
|
61
|
+
output_tokens?: number; // gen_ai.usage.output_tokens
|
|
62
|
+
cache_read_tokens?: number; // gen_ai.usage.cache_read.input_tokens
|
|
63
|
+
cache_creation_tokens?: number; // gen_ai.usage.cache_creation.input_tokens
|
|
64
|
+
|
|
65
|
+
// Activity
|
|
66
|
+
tool_calls_count?: number;
|
|
67
|
+
files_changed?: string[]; // repo-relative, deduped
|
|
68
|
+
commit_sha?: string; // if the run produced a commit
|
|
69
|
+
|
|
70
|
+
// Outcome detail
|
|
71
|
+
// status = did the agent process complete cleanly (success/failure/timeout/...)
|
|
72
|
+
// verification_result = did the code under test pass (only on agent_type="verifier")
|
|
73
|
+
// a verifier with status="success" + verification_result="fail" = the verifier ran fine and the code failed.
|
|
74
|
+
// a verifier with status="failure" = the verifier itself errored (timeout, infra, etc.)
|
|
75
|
+
verifier_score?: number; // 1-5 if agent_type=verifier
|
|
76
|
+
verification_result?: "pass" | "fail" | "partial";
|
|
77
|
+
failure_reason?: string; // short, machine-classifiable; see "Failure taxonomy" below
|
|
78
|
+
failure_detail?: string; // last 500 chars of stderr/error — keep the tail (newest content), drop the head
|
|
79
|
+
|
|
80
|
+
// Self-link — only set on failure
|
|
81
|
+
log_file?: string; // .planning/agent-runs/<run_id>.log if status != success
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
type AgentType =
|
|
85
|
+
| "planner"
|
|
86
|
+
| "plan-checker"
|
|
87
|
+
| "builder"
|
|
88
|
+
| "verifier"
|
|
89
|
+
| "qa-browser"
|
|
90
|
+
| "researcher"
|
|
91
|
+
| "research-synthesizer"
|
|
92
|
+
| "roadmapper"
|
|
93
|
+
| "team-orchestrator"
|
|
94
|
+
| "custom"; // user-defined agents
|
|
95
|
+
|
|
96
|
+
type AgentStatus =
|
|
97
|
+
| "success" // completed, no failure_reason
|
|
98
|
+
| "partial" // completed but flagged issues (e.g. builder PARTIAL)
|
|
99
|
+
| "blocked" // builder hit a precondition gate (e.g. file lock)
|
|
100
|
+
| "failure" // explicit failure (verifier fail, builder error)
|
|
101
|
+
| "timeout" // exceeded budget
|
|
102
|
+
| "interrupted"; // user cancelled / parent killed
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
### Failure taxonomy
|
|
106
|
+
|
|
107
|
+
`failure_reason` is a closed enum so analytics can classify without parsing free text. Add new values via PR — don't free-text.
|
|
108
|
+
|
|
109
|
+
| Code | Meaning |
|
|
110
|
+
|---|---|
|
|
111
|
+
| `tsc-failed` | TypeScript compilation errors |
|
|
112
|
+
| `lint-failed` | ESLint violations |
|
|
113
|
+
| `tests-failed` | Test runner non-zero exit |
|
|
114
|
+
| `build-failed` | Production build broke |
|
|
115
|
+
| `verification-criteria-unmet` | Verifier ran cleanly but criteria failed |
|
|
116
|
+
| `verification-evidence-missing` | Behavioral check lacked required citations |
|
|
117
|
+
| `verification-execution-error` | Check itself errored (binary missing, timeout, cwd missing) — distinct from criteria failure |
|
|
118
|
+
| `file-not-found` | Referenced file absent |
|
|
119
|
+
| `dependency-missing` | Referenced npm/pip/etc package absent |
|
|
120
|
+
| `lock-timeout` | `.planning/.state.lock` not acquired |
|
|
121
|
+
| `network-error` | Outbound HTTP failed (research, ERP) |
|
|
122
|
+
| `rate-limited` | LLM API 429 |
|
|
123
|
+
| `context-overflow` | Prompt exceeded model context |
|
|
124
|
+
| `tool-misuse` | Builder called a forbidden tool |
|
|
125
|
+
| `precondition-unmet` | Required state/file missing before run |
|
|
126
|
+
| `unknown` | Catch-all; should be rare and trigger triage |
|
|
127
|
+
|
|
128
|
+
## Example records
|
|
129
|
+
|
|
130
|
+
**Successful builder run** (sequential under `/qualia-build` skill invocation `sk_42`):
|
|
131
|
+
```json
|
|
132
|
+
{"schema_version":1,"run_id":"01HXY8N3W2K7Q5MZP9V4F8R6T1","skill_invocation_id":"sk_42","session_id":"sess_abc123","agent_type":"builder","model":"claude-sonnet-4-6","effort":"medium","project":"acme-portal","phase":2,"milestone":1,"task_id":"T1","wave":1,"status":"success","started_at":"2026-04-28T14:32:11Z","finished_at":"2026-04-28T14:34:02Z","duration_ms":111000,"input_tokens":12450,"output_tokens":1820,"cache_read_tokens":11200,"tool_calls_count":7,"files_changed":["src/lib/auth.ts","src/lib/auth-schema.ts"],"commit_sha":"a3f5e1c"}
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
**Failed verifier run** (same skill invocation, no parent — it's a sibling of the builder, not nested):
|
|
136
|
+
```json
|
|
137
|
+
{"schema_version":1,"run_id":"01HXY8P5R8K7Q5MZP9V4F8R6T2","skill_invocation_id":"sk_42","session_id":"sess_abc123","agent_type":"verifier","model":"claude-opus-4-7","project":"acme-portal","phase":2,"milestone":1,"status":"failure","started_at":"2026-04-28T14:38:10Z","finished_at":"2026-04-28T14:39:55Z","duration_ms":105000,"input_tokens":18200,"output_tokens":2100,"tool_calls_count":12,"verifier_score":2,"verification_result":"fail","failure_reason":"verification-criteria-unmet","failure_detail":"Task T2 acceptance criterion 'Redirect to /dashboard on 200' could not be verified — page.tsx contains no redirect() call","log_file":".planning/agent-runs/01HXY8P5R8K7Q5MZP9V4F8R6T2.log"}
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
**Researcher spawned by team-orchestrator** (true nesting — parent_run_id is set):
|
|
141
|
+
```json
|
|
142
|
+
{"schema_version":1,"run_id":"01HXY8QF1ZK7Q5MZP9V4F8R6T3","parent_run_id":"01HXY8QE9V2K7Q5MZP9V4F8R6T0","skill_invocation_id":"sk_43","session_id":"sess_abc123","agent_type":"researcher","model":"claude-sonnet-4-6","project":"acme-portal","status":"failure","started_at":"2026-04-28T14:42:00Z","finished_at":"2026-04-28T14:42:12Z","duration_ms":12000,"tool_calls_count":1,"failure_reason":"rate-limited","failure_detail":"WebFetch returned 429 from context7.com after 1 attempt","log_file":".planning/agent-runs/01HXY8QF1ZK7Q5MZP9V4F8R6T3.log"}
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
### `parent_run_id` vs `skill_invocation_id` — when to use which
|
|
146
|
+
|
|
147
|
+
- **`skill_invocation_id`:** every record carries one. Groups all agents that ran under a single user-triggered skill (`/qualia-build phase 2` → planner, all builders, verifier all share an id).
|
|
148
|
+
- **`parent_run_id`:** rare. Set only when one agent literally spawned another via the `Agent` tool — for example, `team-orchestrator` fanning out to `frontend-agent` + `backend-agent`. Sequential planner→builder→verifier under one skill is *not* nesting; those are siblings.
|
|
149
|
+
|
|
150
|
+
## How records get written
|
|
151
|
+
|
|
152
|
+
A small helper at `bin/lib/agent-runs.js`:
|
|
153
|
+
|
|
154
|
+
```js
|
|
155
|
+
// pseudocode
|
|
156
|
+
const ar = require('./lib/agent-runs');
|
|
157
|
+
|
|
158
|
+
const run = ar.start({
|
|
159
|
+
agent_type: 'builder',
|
|
160
|
+
task_id: 'T1',
|
|
161
|
+
phase: 2,
|
|
162
|
+
model: process.env.QUALIA_MODEL || 'claude-sonnet-4-6',
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
// ... spawn agent, capture result ...
|
|
166
|
+
|
|
167
|
+
ar.finish(run, {
|
|
168
|
+
status: 'success',
|
|
169
|
+
files_changed: ['src/lib/auth.ts'],
|
|
170
|
+
commit_sha: getHeadSha(),
|
|
171
|
+
input_tokens: 12450,
|
|
172
|
+
output_tokens: 1820,
|
|
173
|
+
});
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
`start()` returns a token; `finish()` writes the full record via a single `fs.appendFileSync` call. Crash between start and finish leaves no partial record on disk — the in-memory record is lost, which is fine.
|
|
177
|
+
|
|
178
|
+
**Concurrency:** `qualia-build` spawns multiple builders in the same wave, each calling `finish()` concurrently. Atomicity guarantee: `fs.appendFileSync` opens with `O_APPEND` and issues a single `write()` syscall per call. On Linux ext4/btrfs/xfs and macOS APFS, `write()` to a regular file with `O_APPEND` is atomic for sizes up to the filesystem block size (typically 4096 bytes). Records run ~600–800 bytes — well under. On Windows NTFS, `O_APPEND` semantics are implemented by Node's libuv via an internal seek+write under a file lock; effectively atomic for our sizes. This is *not* the POSIX pipe `PIPE_BUF` guarantee — that applies to pipes, not regular files. The protection here is the kernel's regular-file `O_APPEND` + single-`write()` behavior. If records ever exceed ~3.5KB, switch to a per-write lock (`proper-lockfile` or a `.planning/.agent-runs.lock` flock).
|
|
179
|
+
|
|
180
|
+
On `status != "success"`, `finish()` also writes `.planning/agent-runs/<run_id>.log` with the full stderr/error output. JSONL stays lean for analytics; debugging context lives in the side files. Successful runs leave no log file.
|
|
181
|
+
|
|
182
|
+
**Where called from:**
|
|
183
|
+
- The skills that orchestrate spawns (`/qualia-build`, `/qualia-verify`, `/qualia-plan`, etc.) wrap each Agent invocation in `ar.start` / `ar.finish`.
|
|
184
|
+
- Skills can't easily measure tokens — those fields are populated when the harness exposes them via `Task` tool result metadata and left undefined otherwise. Don't gate the design on data we may or may not have.
|
|
185
|
+
|
|
186
|
+
## How records get read
|
|
187
|
+
|
|
188
|
+
`qualia-framework agents` — summary table:
|
|
189
|
+
```
|
|
190
|
+
$ qualia-framework agents
|
|
191
|
+
Agent runs (last 50, project: acme-portal)
|
|
192
|
+
|
|
193
|
+
TIME AGENT PHASE TASK STATUS DURATION TOKENS NOTE
|
|
194
|
+
14:34 builder 2 T1 success 111s 14k in src/lib/auth.ts
|
|
195
|
+
14:38 verifier 2 — failure 105s 20k in verification-criteria-unmet
|
|
196
|
+
14:42 builder 2 T1 success 89s 13k in fix: redirect after signin
|
|
197
|
+
14:45 verifier 2 — success 97s 19k in pass
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
`qualia-framework agents --failed` — only failure/partial/timeout/blocked:
|
|
201
|
+
```
|
|
202
|
+
$ qualia-framework agents --failed
|
|
203
|
+
2 failures in last 7 days
|
|
204
|
+
|
|
205
|
+
2026-04-28 14:38 verifier phase 2 verification-criteria-unmet
|
|
206
|
+
2026-04-26 09:22 builder phase 1 tsc-failed
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
`qualia-framework agents --task T1` — all runs for one task (gap cycles visible):
|
|
210
|
+
```
|
|
211
|
+
$ qualia-framework agents --task T1
|
|
212
|
+
T1 — Add email/password sign-in handler (3 runs)
|
|
213
|
+
|
|
214
|
+
2026-04-28 14:32 builder success 111s
|
|
215
|
+
2026-04-28 14:38 verifier failure 105s verification-criteria-unmet
|
|
216
|
+
2026-04-28 14:42 builder success 89s ← retry after gap
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
`qualia-framework analytics` extends with: agent failure rate, slowest agents (p50/p95), verifier fail rate by phase, repeated gap-cycles by task.
|
|
220
|
+
|
|
221
|
+
## ERP integration (additive, non-breaking)
|
|
222
|
+
|
|
223
|
+
Report payload v2 (in `docs/erp-contract.md`) gains:
|
|
224
|
+
```json
|
|
225
|
+
"agent_runs": {
|
|
226
|
+
"count": 14,
|
|
227
|
+
"failures": 2,
|
|
228
|
+
"verifier_fail_rate": 0.14,
|
|
229
|
+
"slowest_agent_ms": 312000,
|
|
230
|
+
"by_type": { "builder": 9, "verifier": 4, "planner": 1 }
|
|
231
|
+
}
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
Aggregated counts only — never raw records. ERP backend treats the field as optional; old reports without it still parse. The full JSONL stays local.
|
|
235
|
+
|
|
236
|
+
## Privacy and opt-out
|
|
237
|
+
|
|
238
|
+
| Captured | Not captured |
|
|
239
|
+
|---|---|
|
|
240
|
+
| Agent type, model | User prompts |
|
|
241
|
+
| Phase, task id | LLM responses |
|
|
242
|
+
| File paths (repo-relative) | File contents |
|
|
243
|
+
| Token counts | Command output |
|
|
244
|
+
| Duration, status | Stderr/stdout beyond `failure_detail` last 500 chars |
|
|
245
|
+
| Failure code | Network response bodies |
|
|
246
|
+
| Commit SHA | Git diffs |
|
|
247
|
+
|
|
248
|
+
Disable new writes: `export QUALIA_TELEMETRY=off`. The helper short-circuits *writes* — reads (`qualia-framework agents`) still surface previously recorded data. Opting out doesn't erase history.
|
|
249
|
+
|
|
250
|
+
## Design decisions (locked v1)
|
|
251
|
+
|
|
252
|
+
These were called out as open questions during draft; resolved here so implementation can proceed.
|
|
253
|
+
|
|
254
|
+
1. **Token counts:** all token fields are optional. Populate when the harness exposes them via `Task` tool metadata; leave undefined otherwise. The schema doesn't depend on always having them.
|
|
255
|
+
2. **`session_id`:** optional. If Claude Code exposes a stable id to skills/hooks, use it. Otherwise the `bin/lib/agent-runs.js` writer generates a per-process UUID on first call and reuses it for the lifetime of that process.
|
|
256
|
+
3. **Tool-call telemetry:** aggregate `tool_calls_count` only. No per-call spans. If a future analytics need demands per-call detail, add a separate `agent-tool-calls.jsonl` — don't bloat the main ledger.
|
|
257
|
+
4. **`parent_run_id` vs `skill_invocation_id`:** added `skill_invocation_id` as the common grouping key (every record has one). `parent_run_id` is reserved strictly for true agent-spawned-agent nesting (team-orchestrator → fan-out children). Documented inline above.
|
|
258
|
+
5. **`failure_detail`:** capped at 500 chars; keep the tail (most recent stderr), drop the head. The newest content is usually the most useful for classification.
|
|
259
|
+
6. **Side log files:** on `status != success`, `finish()` writes `.planning/agent-runs/<run_id>.log` with full stderr. Lean JSONL stays grep-friendly; debugging context survives.
|
|
260
|
+
7. **Cross-project rollup:** rejected. ERP does fleet-wide aggregation. A `~/.claude/agent-runs.jsonl` mirror would add a sync surface for marginal benefit.
|
|
261
|
+
8. **Append atomicity:** relies on `O_APPEND` + single-`write()` syscall behavior for regular files (Linux/macOS/Windows). Atomic up to filesystem block size; our records are well under. Detailed in the "Concurrency" note above.
|
|
262
|
+
9. **Cleanup:** `qualia-framework agents prune --before YYYY-MM-DD` removes records and matching log files older than the cutoff. Never auto-prunes — operator-driven only.
|
|
263
|
+
10. **`QUALIA_TELEMETRY=off` semantics:** disables *writes* only. Reads (`qualia-framework agents`) still surface existing records — opting out of new collection does not retroactively hide history. Set before a session to silence that session's spawns.
|
|
264
|
+
|
|
265
|
+
## Migration plan
|
|
266
|
+
|
|
267
|
+
1. Add `bin/lib/agent-runs.js` (writer) + `bin/cli.js agents` (reader). Helper is a no-op if `.planning/` doesn't exist.
|
|
268
|
+
2. Wire `ar.start`/`ar.finish` calls into the orchestrating skills (`/qualia-build`, `/qualia-verify`, `/qualia-plan`, `/qualia-research`, `/qualia-postmortem`).
|
|
269
|
+
3. Add `agents` table to `qualia-framework analytics`.
|
|
270
|
+
4. After two milestones produce real data, extend ERP payload (v2) with aggregated metrics. Coordinate with ERP backend.
|
|
271
|
+
5. Defer postmortem feedback loop and ERP feedback analyzer until ≥4 weeks of real data exist.
|
|
272
|
+
|
|
273
|
+
No hard cutover. Pre-existing projects acquire the JSONL on first spawn after upgrade — older runs are simply absent.
|