qualia-framework 4.3.0 → 4.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/CLAUDE.md +13 -1
  2. package/README.md +16 -13
  3. package/agents/builder.md +12 -20
  4. package/agents/plan-checker.md +18 -0
  5. package/agents/planner.md +9 -0
  6. package/agents/verifier.md +62 -0
  7. package/bin/agent-runs.js +233 -0
  8. package/bin/cli.js +225 -21
  9. package/bin/install.js +25 -5
  10. package/bin/plan-contract.js +220 -0
  11. package/bin/slop-detect.mjs +357 -0
  12. package/bin/state.js +199 -10
  13. package/docs/agent-runs.md +273 -0
  14. package/docs/erp-contract.md +5 -0
  15. package/docs/plan-contract.md +321 -0
  16. package/hooks/auto-update.js +3 -7
  17. package/hooks/pre-compact.js +22 -11
  18. package/hooks/pre-deploy-gate.js +16 -2
  19. package/hooks/pre-push.js +22 -2
  20. package/hooks/stop-session-log.js +1 -1
  21. package/package.json +8 -2
  22. package/rules/design-brand.md +110 -0
  23. package/rules/design-laws.md +144 -0
  24. package/rules/design-product.md +110 -0
  25. package/rules/design-rubric.md +153 -0
  26. package/skills/qualia-build/SKILL.md +5 -5
  27. package/skills/qualia-flush/SKILL.md +1 -1
  28. package/skills/qualia-new/SKILL.md +40 -3
  29. package/skills/qualia-polish/SKILL.md +180 -136
  30. package/skills/qualia-quick/SKILL.md +1 -1
  31. package/skills/qualia-report/SKILL.md +25 -5
  32. package/skills/qualia-ship/SKILL.md +12 -10
  33. package/skills/zoho-workflow/SKILL.md +64 -0
  34. package/templates/DESIGN.md +229 -435
  35. package/templates/PRODUCT.md +95 -0
  36. package/templates/help.html +13 -7
  37. package/tests/bin.test.sh +6 -3
  38. package/tests/hooks.test.sh +9 -20
  39. package/tests/lib.test.sh +217 -0
  40. package/tests/runner.js +96 -75
  41. package/tests/state.test.sh +4 -3
  42. package/skills/qualia-design/SKILL.md +0 -169
package/CLAUDE.md CHANGED
@@ -36,8 +36,20 @@ For each milestone, for each phase:
36
36
 
37
37
  /qualia-milestone → close milestone, archive artifacts, prep next (human gate)
38
38
  ↓ (repeat for each milestone until Handoff)
39
+ Design as a thread (v4.5.0+): every road agent loads PRODUCT.md +
40
+ DESIGN.md + design-laws.md substrate. Builders run slop-detect on every
41
+ frontend commit. Verifiers score 8 design dimensions per phase.
42
+
43
+ /qualia-polish is now a flexible verb usable at any scope:
44
+ /qualia-polish src/components/Button.tsx ~30s component touch-up
45
+ /qualia-polish app/dashboard ~3m section pass
46
+ /qualia-polish ~12m whole app, fan-out
47
+ /qualia-polish --redesign ~30m ground-up redesign
48
+ /qualia-polish --critique read-only scored audit
49
+ /qualia-polish --quick ~1m gates only
50
+
39
51
  Final milestone = Handoff:
40
- /qualia-polish → design/UX pass (Phase 1 of Handoff)
52
+ /qualia-polish → final design pass (whole app)
41
53
  (content + SEO) → Phase 2
42
54
  (final QA) → Phase 3
43
55
  /qualia-ship → deploy to production (quality gates → deploy → verify)
package/README.md CHANGED
@@ -40,7 +40,7 @@ Open Claude Code in any project directory.
40
40
  ...repeat plan/build/verify per phase...
41
41
  /qualia-milestone # Close current milestone, open next (loads next scope from JOURNEY.md)
42
42
  ...repeat per milestone until the final "Handoff" milestone...
43
- /qualia-polish # Design and UX pass (first phase of the Handoff milestone)
43
+ /qualia-polish # Design pass flexible scope: component, route, app, redesign, critique, quick
44
44
  /qualia-ship # Deploy to production
45
45
  /qualia-handoff # Enforce the 4 mandatory handoff deliverables
46
46
  /qualia-report # Mandatory end-of-session report + ERP upload
@@ -77,7 +77,6 @@ Two human gates per project. One halt case (gap-cycle limit exceeded on a failin
77
77
 
78
78
  ```
79
79
  /qualia-debug # Structured debugging
80
- /qualia-design # One-shot design transformation
81
80
  /qualia-review # Production audit (scored diagnostics)
82
81
  /qualia-optimize # Deep optimization pass (parallel specialist agents)
83
82
  /qualia-quick # Fast path for trivial fixes (skips planning)
@@ -115,13 +114,13 @@ Project
115
114
 
116
115
  **Why it matters:** non-technical team members can follow the ladder from any entry point. `/qualia` and `/qualia-milestone` render JOURNEY.md as a visual ladder with current position highlighted.
117
116
 
118
- ## What's Inside (v4.0.0)
117
+ ## What's Inside (v4.3.0)
119
118
 
120
- - **26 skills** — from setup to handoff, plus debug, design, review, optimize, diagnostic (`qualia-idk`), session management, skill authoring, per-phase depth (discuss, research, map), and full-journey additions (`--auto` chaining, milestone closure)
119
+ - **28 skills** — from setup to handoff, plus debug, design, review, optimize, diagnostic (`qualia-idk`), memory flush, postmortem, session management, skill authoring, per-phase depth (discuss, research, map), and full-journey additions (`--auto` chaining, milestone closure)
121
120
  - **8 agents** (each runs in fresh context): planner, builder, verifier, qa-browser, researcher, research-synthesizer, roadmapper, plan-checker
122
- - **7 hooks** (pure Node.js, cross-platform): session-start, branch-guard, pre-push tracking sync, migration-guard, pre-deploy-gate, pre-compact state save, auto-update
123
- - **5 rules**: security, frontend, design-reference, deployment, infrastructure
124
- - **19 template files**: project.md, **journey.md** (new in v4), plan.md (story-file format), state.md, DESIGN.md, tracking.json (now with `milestone_name` + `milestones[]`), requirements.md (multi-milestone), roadmap.md (current milestone only), phase-context.md, 4 project-type templates (website, ai-agent, voice-agent, mobile-app), 5 research-project templates (STACK, FEATURES, ARCHITECTURE, PITFALLS, SUMMARY), help.html
121
+ - **9 hooks** (pure Node.js, cross-platform): session-start, auto-update, git-guardrails, branch-guard, pre-push tracking sync, migration-guard, pre-deploy-gate, pre-compact state save, stop-session-log
122
+ - **6 rules**: security, frontend, design-reference, deployment, infrastructure, grounding
123
+ - **21 template files**: project.md, **journey.md** (new in v4), plan.md (story-file format), state.md, DESIGN.md, tracking.json (now with `milestone_name` + `milestones[]`), requirements.md (multi-milestone), roadmap.md (current milestone only), phase-context.md, 4 project-type templates (website, ai-agent, voice-agent, mobile-app), 5 research-project templates (STACK, FEATURES, ARCHITECTURE, PITFALLS, SUMMARY), knowledge templates, help.html
125
124
  - **1 reference** — questioning.md methodology for deep project initialization
126
125
 
127
126
  ## Supported Platforms
@@ -156,13 +155,17 @@ Splitting planner, builder, and verifier into separate agents with separate cont
156
155
 
157
156
  ### Production-Grade Hooks
158
157
 
159
- All 7 hooks are real ops engineering, not theoretical:
158
+ All 9 hooks are real ops engineering, not theoretical:
160
159
 
161
160
  - **Pre-deploy gate** — TypeScript, lint, tests, build, and `service_role` leak scan before `vercel --prod`
161
+ - **Session start** — Shows project state, next command, update notices, and health warnings at session start
162
+ - **Auto-update** — Daily update check with cached failures so offline/npm issues do not slow every command
163
+ - **Git guardrails** — Blocks destructive git operations like force-push to main/master, `git clean -fd`, and `rm -rf .git`
162
164
  - **Branch guard** — Role-aware: owner can push to main, employees can't (parses refspec so `feature/x:main` bypass is blocked)
163
165
  - **Migration guard** — Catches `DROP TABLE` without `IF EXISTS`, `DELETE`/`UPDATE` without `WHERE`, `CREATE TABLE` without RLS, `GRANT ... TO PUBLIC`, `ALTER TABLE ... DROP COLUMN`
164
166
  - **Pre-push** — Stamps tracking.json via a bot commit so the ERP always sees fresh data
165
167
  - **Pre-compact** — Saves state before context compression
168
+ - **Stop-session log** — Writes lightweight daily session checkpoints into the knowledge layer
166
169
 
167
170
  ### Enforced State Machine
168
171
 
@@ -183,12 +186,12 @@ npx qualia-framework@latest install
183
186
  |
184
187
  v
185
188
  ~/.claude/
186
- ├── skills/ 26 slash commands
189
+ ├── skills/ 28 slash commands
187
190
  ├── agents/ 8 agent definitions (planner, builder, verifier, qa-browser, roadmapper, research-synthesizer, researcher, plan-checker)
188
- ├── hooks/ 7 Node.js hooks — cross-platform (no bash dependency)
189
- ├── bin/ state.js (state machine) + qualia-ui.js (cosmetics, banners, journey-tree) + statusline.js
191
+ ├── hooks/ 9 Node.js hooks — cross-platform (no bash dependency)
192
+ ├── bin/ state.js + qualia-ui.js + statusline.js + knowledge.js + knowledge-flush.js
190
193
  ├── knowledge/ learned-patterns.md, common-fixes.md, client-prefs.md
191
- ├── rules/ security, frontend, design-reference, deployment, infrastructure
194
+ ├── rules/ security, frontend, design-reference, deployment, infrastructure, grounding
192
195
  ├── qualia-templates/ project.md, journey.md, plan.md (story-file), state.md, DESIGN.md, tracking.json, requirements.md, roadmap.md, + projects/*.md + research-project/*.md + help.html
193
196
  ├── qualia-references/ questioning.md (deep project initialization methodology)
194
197
  ├── CLAUDE.md global instructions (role-configured per team member)
@@ -201,6 +204,6 @@ Stack: Next.js 16+, React 19, TypeScript, Supabase, Vercel. Voice: Retell AI, El
201
204
 
202
205
  ## Changelog
203
206
 
204
- See [CHANGELOG.md](./CHANGELOG.md) for the full version history. v4.0.0 release notes are the most recent section.
207
+ See [CHANGELOG.md](./CHANGELOG.md) for the full version history. v4.3.0 release notes are the most recent section.
205
208
 
206
209
  Built by [Qualia Solutions](https://qualiasolutions.net) — Nicosia, Cyprus.
package/agents/builder.md CHANGED
@@ -84,10 +84,11 @@ Before committing:
84
84
  1. Run every command in **Validation:** — they must pass
85
85
  2. Mentally walk through each **Acceptance Criterion** — does the code actually produce that observable behavior?
86
86
  3. Run `npx tsc --noEmit` if you touched TypeScript files
87
- 4. No `// TODO`, no placeholder text, no stub functions
88
- 5. Imports are wired not just declared but actually used
87
+ 4. **If you touched any `.tsx/.jsx/.css/.scss/.html` file: run `node bin/slop-detect.mjs {touched paths}`. Exit 1 (critical findings) BLOCKS the commit.** Fix the findings (apply the rewrite recipe in the script's output), re-run, repeat until exit 0.
88
+ 5. No `// TODO`, no placeholder text, no stub functions
89
+ 6. Imports are wired — not just declared but actually used
89
90
 
90
- If any Validation command fails or any AC is not met, fix before committing. Do not commit and hope the verifier catches it.
91
+ If any Validation command fails, slop-detect returns 1, or any AC is not met, fix before committing. Do not commit and hope the verifier catches it.
91
92
 
92
93
  ### 5. Commit
93
94
  One atomic commit per task:
@@ -132,23 +133,14 @@ Rule of thumb: If you can explain the change in one sentence in a commit message
132
133
  - Always check auth server-side
133
134
  - Enable RLS on every table
134
135
  - Validate input with Zod at system boundaries
135
- 5. **Frontend standards (mandatory for any .tsx/.jsx/.css file):**
136
- - Before writing any frontend code: read `.planning/DESIGN.md` if it exists it's the design source of truth
137
- - If no DESIGN.md, apply rules from `rules/frontend.md` (Qualia defaults)
138
- - Distinctive fonts (never Inter, Roboto, Arial, system-ui, Space Grotesk)
139
- - Cohesive color palette via CSS variables sharp accent for CTAs
140
- - All text: WCAG AA contrast (4.5:1 normal, 3:1 large text)
141
- - Full-width fluid layouts no hardcoded max-width caps
142
- - Every interactive element needs ALL states: hover, focus (visible ring), active, disabled, loading, error, empty
143
- - Semantic HTML (`nav`, `main`, `section`, `article`) — not div soup
144
- - Keyboard accessible: Tab, Enter, Escape, Arrow keys work
145
- - Touch targets: 44px minimum
146
- - Form inputs: visible labels (not placeholder-only), error messages with `aria-describedby`
147
- - Motion: 150–200ms hover, 250ms expand, stagger children on load, respect `prefers-reduced-motion`
148
- - Mobile-first responsive: stack on mobile, expand on desktop, fluid typography
149
- - Skip link on every page, heading hierarchy (one h1, sequential order)
150
- - No emoji as icons — use SVGs
151
- - `cursor: pointer` on all clickable elements
136
+ 5. **Frontend standards (mandatory for any .tsx/.jsx/.css/.scss/.html file):**
137
+ - **Read substrate first.** Before any frontend code: read `PRODUCT.md`, `DESIGN.md`, `rules/design-laws.md`, AND the matching register file (`rules/design-brand.md` if `register: brand`, `rules/design-product.md` if `register: product`). These ARE the source of truth.
138
+ - **Honor the task's `**Design:**` contract.** If the planner specified `Tokens used: var(--accent), --space-4`, those are the tokens you use — don't introduce new ones without flagging.
139
+ - **OKLCH only.** No `#000`, no `#fff`, no scattered hex. Reference design tokens via `var(--name)`.
140
+ - **Banned fonts:** Inter, Roboto, Arial, Helvetica, system-ui, Space Grotesk. Use the font defined in DESIGN.md §3.
141
+ - **No purple-blue gradients, no gradient text, no side-stripe borders, no glassmorphism by default, no identical card grids, no modal as first thought, no em dashes** (per `rules/design-laws.md` §8 absolute bans).
142
+ - **Pre-commit guard:** run `node bin/slop-detect.mjs {touched files}`. Exit 1 = blocked.
143
+ - All other rules (states, semantics, keyboard, touch targets, motion, responsive, headings, skip links, no-emoji-icons, cursor:pointer, WCAG AA) carry over from `rules/design-laws.md` and the register file.
152
144
  6. **No empty catch blocks.** At minimum, log the error.
153
145
  7. **No dangerouslySetInnerHTML.** No eval().
154
146
  8. **React/Next.js performance:**
@@ -105,6 +105,24 @@ If `.planning/phase-{N}-context.md` exists, read its "Locked Decisions" section.
105
105
 
106
106
  **FAIL if:** plan contradicts a locked decision (e.g., context says "use library X" but plan uses library Y).
107
107
 
108
+ ### Rule 7b: Frontend tasks have a design contract (v4.5.0+)
109
+
110
+ A "frontend task" is any task whose **Files:** list contains a `.tsx`, `.jsx`, `.css`, `.scss`, `.html`, `.svelte`, `.vue`, or `.astro` path.
111
+
112
+ Every frontend task MUST include a `**Design:**` field with:
113
+ - `Register: brand` or `Register: product`
114
+ - `Tokens used:` non-empty list of CSS custom properties (e.g. `var(--accent), --space-4`) — proves the task references DESIGN.md tokens, not raw hex/px
115
+ - `Scope: component|section|page|app`
116
+ - `Anti-pattern guard:` line confirming builder runs `bin/slop-detect.mjs` pre-commit
117
+
118
+ **FAIL if:**
119
+ - Frontend task missing `**Design:**` field entirely
120
+ - Register is neither `brand` nor `product`
121
+ - Tokens used is empty or contains raw hex (`#ff0000`) instead of CSS-var references
122
+ - Plan steps on absolute bans (per `rules/design-laws.md` §8): grep the plan for `gradient text`, `glassmorphism`, `purple gradient`, `hero metric template`, `identical card grid`, `modal as first thought`, `border-left:.4px` decorative, `font-family: Inter`, `Space Grotesk`. Any hit = REVISE.
123
+
124
+ Non-frontend tasks (backend, migrations, API routes without UI) MUST NOT have a `**Design:**` field. Warn but don't fail if one is mistakenly added.
125
+
108
126
  ### Rule 8: Validation commands test behavior, not just existence
109
127
 
110
128
  Each task's `**Validation:**` list must contain at least one `grep-match` or `command-exit` check — a command that proves the code DOES something. A task whose ONLY validation is `test -f {file}` will pass even if the file contains only `// TODO`.
package/agents/planner.md CHANGED
@@ -11,6 +11,9 @@ You create phase plans. Plans are prompts — they ARE the instructions the buil
11
11
  ## Input
12
12
 
13
13
  - `<project_context>` — inlined `.planning/PROJECT.md` contents
14
+ - `<product_context>` — inlined `PRODUCT.md` (if present — required from v4.5.0 onward; substrate for any frontend task)
15
+ - `<design_spec>` — inlined `DESIGN.md` (if present — visual contract for any frontend task)
16
+ - `<design_substrate>` — inlined `rules/design-laws.md` + matching register file (`rules/design-brand.md` OR `rules/design-product.md` based on PRODUCT.md `register:` field)
14
17
  - `<current_state>` — inlined `.planning/STATE.md` contents
15
18
  - `<phase_details>` — phase goal + success criteria + REQ-IDs from ROADMAP.md
16
19
  - `<locked_decisions>` (optional) — Locked Decisions from `.planning/phase-{N}-context.md` if it exists
@@ -101,6 +104,12 @@ waves: {count}
101
104
 
102
105
  **Context:** Read @{file references}
103
106
 
107
+ **Design:** (REQUIRED for any task touching .tsx/.jsx/.css/.scss/.html — omit otherwise)
108
+ - Register: {brand|product}
109
+ - Tokens used: {var(--accent), var(--text), --space-4, ...}
110
+ - Scope: {component|section|page|app}
111
+ - Anti-pattern guard: builder runs `node bin/slop-detect.mjs {target}` pre-commit; commit blocked on critical findings
112
+
104
113
  ## Success Criteria
105
114
  - [ ] {phase-level truth 1}
106
115
  - [ ] {phase-level truth 2}
@@ -14,6 +14,9 @@ You verify that a phase achieved its GOAL, not just completed its TASKS.
14
14
 
15
15
  - `<plan_path>` — path to `.planning/phase-{N}-plan.md`
16
16
  - `<project_context>` — inlined `.planning/PROJECT.md` contents (for Quality scoring against project conventions)
17
+ - `<product_context>` — inlined `PRODUCT.md` (if present, v4.5.0+) — register, anti-references, principles
18
+ - `<design_spec>` — inlined `DESIGN.md` (if present) — visual contract for design rubric scoring
19
+ - `<design_substrate>` — inlined `rules/design-laws.md`, `rules/design-rubric.md`, and the matching register file
17
20
  - `<previous_verification>` (optional) — inlined `.planning/phase-{N}-verification.md` from a prior run
18
21
 
19
22
  ## Output
@@ -118,6 +121,65 @@ grep -c "async.*=> {}\|() => {}" {file}
118
121
 
119
122
  If Level 2 finds more than 2 stub patterns in a single file, mark that criterion as **FAIL** regardless of other checks. Stubs are not implementations.
120
123
 
124
+ ## Design Verification (v4.5.0+)
125
+
126
+ If the phase touched any frontend file (`.tsx/.jsx/.css/.scss/.html`), run the design verification block IN ADDITION to the functional verification above. Design FAIL blocks the phase the same way a functional FAIL does.
127
+
128
+ ### Step A — slop-detect gate (must pass)
129
+
130
+ ```bash
131
+ node bin/slop-detect.mjs {touched frontend paths from git diff}
132
+ ```
133
+
134
+ If exit code is 1 (critical findings present), the phase FAILS. Quote the findings in the report. Do not score the rubric — fix slop first.
135
+
136
+ ### Step B — Design rubric scoring (8 dimensions)
137
+
138
+ Apply `rules/design-rubric.md`. Score 1-5 per dimension WITH evidence on the next line. Default to 3 unless evidence supports otherwise.
139
+
140
+ Scoped by phase scope:
141
+ - Component-only phase → score Typography, Color cohesion, States, Motion intent, Microcopy, Container depth (skip Layout originality, Spatial rhythm — those are page-level concerns)
142
+ - Page/section phase → all 8 dimensions
143
+ - Full app phase → all 8 dimensions across 2-3 representative routes, average
144
+
145
+ Output format (mandatory, append to verification.md):
146
+
147
+ ```markdown
148
+ ## Design Rubric — Phase {N}
149
+
150
+ | Dim | Score | Evidence |
151
+ |---|---|---|
152
+ | Typography | 4 | `app/page.tsx:14` Fraunces + JetBrains Mono pair, weights 400/500/700 |
153
+ | Color cohesion | 3 | All CSS vars in `app/globals.css:8-22`, OKLCH used, strategy: Restrained |
154
+ | ... | ... | ... |
155
+
156
+ **Aggregate:** {sum}/40 (avg {sum/8})
157
+ **Design verdict:** PASS (all dims ≥ 3) | FAIL (Layout Originality at 2 — three-column grid, see `app/page.tsx:42`)
158
+ ```
159
+
160
+ ### Step C — Drift audit (full app verification only)
161
+
162
+ Compare implementation against DESIGN.md tokens. Flag tokens used in code but not declared, and raw hex values still appearing.
163
+
164
+ ```bash
165
+ # Orphan tokens (used in code, missing from DESIGN.md)
166
+ grep -rE "var\(--[a-z-]+\)" src/ app/ components/ 2>/dev/null | \
167
+ awk -F'var\\(--' '{print $2}' | awk -F'\\)' '{print $1}' | sort -u > /tmp/used-tokens
168
+ grep -E "^\s*--[a-z-]+:" DESIGN.md 2>/dev/null | sed -E 's/.*--([a-z-]+):.*/\1/' | sort -u > /tmp/declared
169
+ comm -23 /tmp/used-tokens /tmp/declared
170
+ ```
171
+
172
+ Drift findings are reported, not auto-failing. Drift may be intentional. But if 5+ orphan tokens appear, flag as MEDIUM finding for the next polish cycle.
173
+
174
+ ### Phase verdict (combined)
175
+
176
+ ```
177
+ phase_pass = functional_pass AND slop_detect_pass AND design_rubric_pass
178
+ phase_fail = ANY of the above failed
179
+ ```
180
+
181
+ A perfect functional verification with a Design Rubric score of 2 in any dimension is a phase FAIL. Design is not a "would be nice" — it's a verification dimension equal to functionality.
182
+
121
183
  ### Wiring Check (Level 3)
122
184
 
123
185
  ```bash
@@ -0,0 +1,233 @@
1
+ #!/usr/bin/env node
2
+ // Agent runs telemetry — JSONL writer + reader. See docs/agent-runs.md.
3
+ //
4
+ // Pure library. Atomic writes via fs.appendFileSync (single write() syscall
5
+ // to an O_APPEND file descriptor; safe at our record sizes — see the spec).
6
+ //
7
+ // Zero npm dependencies.
8
+
9
+ const fs = require("fs");
10
+ const path = require("path");
11
+ const crypto = require("crypto");
12
+
13
+ const SCHEMA_VERSION = 1;
14
+
15
+ const VALID_AGENT_TYPES = new Set([
16
+ "planner", "plan-checker", "builder", "verifier", "qa-browser",
17
+ "researcher", "research-synthesizer", "roadmapper", "team-orchestrator",
18
+ "custom",
19
+ ]);
20
+
21
+ const VALID_STATUS = new Set([
22
+ "success", "partial", "blocked", "failure", "timeout", "interrupted",
23
+ ]);
24
+
25
+ // One UUID per process — fallback when Claude Code doesn't expose a session id.
26
+ let _processSessionId = null;
27
+ function processSessionId() {
28
+ if (!_processSessionId) {
29
+ const buf = crypto.randomBytes(16);
30
+ // RFC 4122 v4
31
+ buf[6] = (buf[6] & 0x0f) | 0x40;
32
+ buf[8] = (buf[8] & 0x3f) | 0x80;
33
+ const h = buf.toString("hex");
34
+ _processSessionId = `${h.slice(0,8)}-${h.slice(8,12)}-${h.slice(12,16)}-${h.slice(16,20)}-${h.slice(20)}`;
35
+ }
36
+ return _processSessionId;
37
+ }
38
+
39
+ // ULID-ish: timestamp prefix + random suffix. Sortable by time.
40
+ function newRunId() {
41
+ const ts = Date.now().toString(36).toUpperCase().padStart(10, "0");
42
+ const rand = crypto.randomBytes(10).toString("hex").toUpperCase();
43
+ return `${ts}${rand}`.slice(0, 26);
44
+ }
45
+
46
+ function planningDir(cwd) {
47
+ return path.join(cwd || process.cwd(), ".planning");
48
+ }
49
+
50
+ function jsonlPath(cwd) {
51
+ return path.join(planningDir(cwd), "agent-runs.jsonl");
52
+ }
53
+
54
+ function logDir(cwd) {
55
+ return path.join(planningDir(cwd), "agent-runs");
56
+ }
57
+
58
+ function telemetryEnabled() {
59
+ return (process.env.QUALIA_TELEMETRY || "").toLowerCase() !== "off";
60
+ }
61
+
62
+ function ensureDir(p) {
63
+ if (!fs.existsSync(p)) fs.mkdirSync(p, { recursive: true });
64
+ }
65
+
66
+ function truncTail(s, max) {
67
+ if (typeof s !== "string") return undefined;
68
+ if (s.length <= max) return s;
69
+ return s.slice(s.length - max);
70
+ }
71
+
72
+ // ─── Writer ────────────────────────────────────────────────────────────
73
+
74
+ // start({ agent_type, model, ... }) → opaque token used by finish()
75
+ function start(opts) {
76
+ const now = new Date().toISOString();
77
+ const token = {
78
+ started_at: now,
79
+ started_ms: Date.now(),
80
+ record: {
81
+ schema_version: SCHEMA_VERSION,
82
+ run_id: opts.run_id || newRunId(),
83
+ parent_run_id: opts.parent_run_id || undefined,
84
+ skill_invocation_id: opts.skill_invocation_id || processSessionId(),
85
+ session_id: opts.session_id || processSessionId(),
86
+ agent_type: opts.agent_type,
87
+ agent_name: opts.agent_name || undefined,
88
+ model: opts.model,
89
+ effort: opts.effort || undefined,
90
+ project: opts.project || undefined,
91
+ phase: opts.phase != null ? opts.phase : undefined,
92
+ milestone: opts.milestone != null ? opts.milestone : undefined,
93
+ task_id: opts.task_id || undefined,
94
+ wave: opts.wave != null ? opts.wave : undefined,
95
+ retry_of: opts.retry_of || undefined,
96
+ started_at: now,
97
+ },
98
+ };
99
+ return token;
100
+ }
101
+
102
+ // finish(token, { status, ... }) → writes the JSONL line + optional log file
103
+ function finish(token, result) {
104
+ if (!token || !token.record) throw new Error("finish: invalid token");
105
+ if (!telemetryEnabled()) return { written: false, reason: "telemetry-off" };
106
+
107
+ const cwd = result.cwd || process.cwd();
108
+ if (!fs.existsSync(planningDir(cwd))) {
109
+ return { written: false, reason: "no-planning-dir" };
110
+ }
111
+
112
+ const finishedMs = Date.now();
113
+ const record = {
114
+ ...token.record,
115
+ status: result.status,
116
+ started_at: token.record.started_at,
117
+ finished_at: new Date(finishedMs).toISOString(),
118
+ duration_ms: finishedMs - token.started_ms,
119
+ input_tokens: result.input_tokens,
120
+ output_tokens: result.output_tokens,
121
+ cache_read_tokens: result.cache_read_tokens,
122
+ cache_creation_tokens: result.cache_creation_tokens,
123
+ tool_calls_count: result.tool_calls_count,
124
+ files_changed: Array.isArray(result.files_changed) ? [...new Set(result.files_changed)] : undefined,
125
+ commit_sha: result.commit_sha || undefined,
126
+ verifier_score: result.verifier_score,
127
+ verification_result: result.verification_result,
128
+ failure_reason: result.failure_reason,
129
+ failure_detail: truncTail(result.failure_detail, 500),
130
+ };
131
+
132
+ if (!VALID_AGENT_TYPES.has(record.agent_type)) {
133
+ record.failure_reason = record.failure_reason || "unknown";
134
+ // don't reject — we want the trace even if the caller misnamed itself
135
+ }
136
+ if (!VALID_STATUS.has(record.status)) {
137
+ record.status = "failure";
138
+ record.failure_reason = record.failure_reason || "unknown";
139
+ }
140
+
141
+ // Side log for non-success runs.
142
+ if (record.status !== "success" && typeof result.full_stderr === "string" && result.full_stderr.length) {
143
+ try {
144
+ ensureDir(logDir(cwd));
145
+ const logFile = path.join(logDir(cwd), `${record.run_id}.log`);
146
+ fs.writeFileSync(logFile, result.full_stderr);
147
+ record.log_file = path.relative(cwd, logFile).split(path.sep).join("/");
148
+ } catch {
149
+ // Side-log is best-effort — never block the JSONL write.
150
+ }
151
+ }
152
+
153
+ // Drop undefined keys for a compact line.
154
+ const clean = {};
155
+ for (const [k, v] of Object.entries(record)) if (v !== undefined) clean[k] = v;
156
+
157
+ const line = JSON.stringify(clean) + "\n";
158
+ ensureDir(planningDir(cwd));
159
+ fs.appendFileSync(jsonlPath(cwd), line);
160
+ return { written: true, run_id: record.run_id, log_file: record.log_file };
161
+ }
162
+
163
+ // ─── Reader ────────────────────────────────────────────────────────────
164
+
165
+ function read(cwd, opts) {
166
+ const file = jsonlPath(cwd);
167
+ if (!fs.existsSync(file)) return [];
168
+ const lines = fs.readFileSync(file, "utf8").split(/\r?\n/).filter(Boolean);
169
+ const records = [];
170
+ for (const line of lines) {
171
+ try { records.push(JSON.parse(line)); }
172
+ catch { /* skip corrupt line; we never want a single bad record to mask the rest */ }
173
+ }
174
+ let out = records;
175
+ if (opts && opts.failed) {
176
+ out = out.filter((r) => r.status !== "success");
177
+ }
178
+ if (opts && opts.task_id) {
179
+ out = out.filter((r) => r.task_id === opts.task_id);
180
+ }
181
+ if (opts && opts.phase != null) {
182
+ out = out.filter((r) => r.phase === opts.phase);
183
+ }
184
+ if (opts && opts.limit) {
185
+ out = out.slice(-opts.limit);
186
+ }
187
+ return out;
188
+ }
189
+
190
+ function prune(cwd, beforeIso) {
191
+ const file = jsonlPath(cwd);
192
+ if (!fs.existsSync(file)) return { removed: 0, logs_removed: 0 };
193
+ const cutoff = Date.parse(beforeIso);
194
+ if (!Number.isFinite(cutoff)) throw new Error(`prune: invalid date "${beforeIso}"`);
195
+
196
+ const lines = fs.readFileSync(file, "utf8").split(/\r?\n/).filter(Boolean);
197
+ const kept = [];
198
+ const removedRunIds = [];
199
+ for (const line of lines) {
200
+ let rec;
201
+ try { rec = JSON.parse(line); }
202
+ catch { kept.push(line); continue; } // preserve unparseable; never destroy data we don't understand
203
+ const ts = Date.parse(rec.finished_at || rec.started_at || "");
204
+ if (Number.isFinite(ts) && ts < cutoff) {
205
+ removedRunIds.push(rec.run_id);
206
+ } else {
207
+ kept.push(line);
208
+ }
209
+ }
210
+ fs.writeFileSync(file, kept.join("\n") + (kept.length ? "\n" : ""));
211
+
212
+ let logsRemoved = 0;
213
+ if (fs.existsSync(logDir(cwd))) {
214
+ for (const id of removedRunIds) {
215
+ const lf = path.join(logDir(cwd), `${id}.log`);
216
+ try { fs.unlinkSync(lf); logsRemoved++; } catch {}
217
+ }
218
+ }
219
+ return { removed: removedRunIds.length, logs_removed: logsRemoved };
220
+ }
221
+
222
+ module.exports = {
223
+ SCHEMA_VERSION,
224
+ start,
225
+ finish,
226
+ read,
227
+ prune,
228
+ // exposed for tests / introspection
229
+ newRunId,
230
+ processSessionId,
231
+ jsonlPath,
232
+ logDir,
233
+ };