npm - qualia-framework - Versions diffs - 4.3.0 → 4.5.0 - Mend

qualia-framework 4.3.0 → 4.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

package/CLAUDE.md +13 -1
package/README.md +16 -13
package/agents/builder.md +12 -20
package/agents/plan-checker.md +18 -0
package/agents/planner.md +9 -0
package/agents/verifier.md +62 -0
package/bin/agent-runs.js +233 -0
package/bin/cli.js +225 -21
package/bin/install.js +25 -5
package/bin/plan-contract.js +220 -0
package/bin/slop-detect.mjs +357 -0
package/bin/state.js +199 -10
package/docs/agent-runs.md +273 -0
package/docs/erp-contract.md +5 -0
package/docs/plan-contract.md +321 -0
package/hooks/auto-update.js +3 -7
package/hooks/pre-compact.js +22 -11
package/hooks/pre-deploy-gate.js +16 -2
package/hooks/pre-push.js +22 -2
package/hooks/stop-session-log.js +1 -1
package/package.json +8 -2
package/rules/design-brand.md +110 -0
package/rules/design-laws.md +144 -0
package/rules/design-product.md +110 -0
package/rules/design-rubric.md +153 -0
package/skills/qualia-build/SKILL.md +5 -5
package/skills/qualia-flush/SKILL.md +1 -1
package/skills/qualia-new/SKILL.md +40 -3
package/skills/qualia-polish/SKILL.md +180 -136
package/skills/qualia-quick/SKILL.md +1 -1
package/skills/qualia-report/SKILL.md +25 -5
package/skills/qualia-ship/SKILL.md +12 -10
package/skills/zoho-workflow/SKILL.md +64 -0
package/templates/DESIGN.md +229 -435
package/templates/PRODUCT.md +95 -0
package/templates/help.html +13 -7
package/tests/bin.test.sh +6 -3
package/tests/hooks.test.sh +9 -20
package/tests/lib.test.sh +217 -0
package/tests/runner.js +96 -75
package/tests/state.test.sh +4 -3
package/skills/qualia-design/SKILL.md +0 -169

package/CLAUDE.md CHANGED Viewed

@@ -36,8 +36,20 @@ For each milestone, for each phase:
      ↓
 /qualia-milestone  → close milestone, archive artifacts, prep next (human gate)
      ↓ (repeat for each milestone until Handoff)
+Design as a thread (v4.5.0+): every road agent loads PRODUCT.md +
+DESIGN.md + design-laws.md substrate. Builders run slop-detect on every
+frontend commit. Verifiers score 8 design dimensions per phase.
+/qualia-polish is now a flexible verb usable at any scope:
+  /qualia-polish src/components/Button.tsx     ~30s component touch-up
+  /qualia-polish app/dashboard                 ~3m  section pass
+  /qualia-polish                               ~12m whole app, fan-out
+  /qualia-polish --redesign                    ~30m ground-up redesign
+  /qualia-polish --critique                    read-only scored audit
+  /qualia-polish --quick                       ~1m  gates only
 Final milestone = Handoff:
-  /qualia-polish   → design/UX pass (Phase 1 of Handoff)
+  /qualia-polish   → final design pass (whole app)
   (content + SEO)  → Phase 2
   (final QA)       → Phase 3
   /qualia-ship     → deploy to production (quality gates → deploy → verify)

package/README.md CHANGED Viewed

@@ -40,7 +40,7 @@ Open Claude Code in any project directory.
 ...repeat plan/build/verify per phase...
 /qualia-milestone   # Close current milestone, open next (loads next scope from JOURNEY.md)
 ...repeat per milestone until the final "Handoff" milestone...
-/qualia-polish      # Design and UX pass (first phase of the Handoff milestone)
+/qualia-polish      # Design pass — flexible scope: component, route, app, redesign, critique, quick
 /qualia-ship        # Deploy to production
 /qualia-handoff     # Enforce the 4 mandatory handoff deliverables
 /qualia-report      # Mandatory end-of-session report + ERP upload
@@ -77,7 +77,6 @@ Two human gates per project. One halt case (gap-cycle limit exceeded on a failin
 ```
 /qualia-debug     # Structured debugging
-/qualia-design    # One-shot design transformation
 /qualia-review    # Production audit (scored diagnostics)
 /qualia-optimize  # Deep optimization pass (parallel specialist agents)
 /qualia-quick     # Fast path for trivial fixes (skips planning)
@@ -115,13 +114,13 @@ Project
 **Why it matters:** non-technical team members can follow the ladder from any entry point. `/qualia` and `/qualia-milestone` render JOURNEY.md as a visual ladder with current position highlighted.
-## What's Inside (v4.0.0)
+## What's Inside (v4.3.0)
-- **26 skills** — from setup to handoff, plus debug, design, review, optimize, diagnostic (`qualia-idk`), session management, skill authoring, per-phase depth (discuss, research, map), and full-journey additions (`--auto` chaining, milestone closure)
+- **28 skills** — from setup to handoff, plus debug, design, review, optimize, diagnostic (`qualia-idk`), memory flush, postmortem, session management, skill authoring, per-phase depth (discuss, research, map), and full-journey additions (`--auto` chaining, milestone closure)
 - **8 agents** (each runs in fresh context): planner, builder, verifier, qa-browser, researcher, research-synthesizer, roadmapper, plan-checker
-- **7 hooks** (pure Node.js, cross-platform): session-start, branch-guard, pre-push tracking sync, migration-guard, pre-deploy-gate, pre-compact state save, auto-update
-- **5 rules**: security, frontend, design-reference, deployment, infrastructure
-- **19 template files**: project.md, **journey.md** (new in v4), plan.md (story-file format), state.md, DESIGN.md, tracking.json (now with `milestone_name` + `milestones[]`), requirements.md (multi-milestone), roadmap.md (current milestone only), phase-context.md, 4 project-type templates (website, ai-agent, voice-agent, mobile-app), 5 research-project templates (STACK, FEATURES, ARCHITECTURE, PITFALLS, SUMMARY), help.html
+- **9 hooks** (pure Node.js, cross-platform): session-start, auto-update, git-guardrails, branch-guard, pre-push tracking sync, migration-guard, pre-deploy-gate, pre-compact state save, stop-session-log
+- **6 rules**: security, frontend, design-reference, deployment, infrastructure, grounding
+- **21 template files**: project.md, **journey.md** (new in v4), plan.md (story-file format), state.md, DESIGN.md, tracking.json (now with `milestone_name` + `milestones[]`), requirements.md (multi-milestone), roadmap.md (current milestone only), phase-context.md, 4 project-type templates (website, ai-agent, voice-agent, mobile-app), 5 research-project templates (STACK, FEATURES, ARCHITECTURE, PITFALLS, SUMMARY), knowledge templates, help.html
 - **1 reference** — questioning.md methodology for deep project initialization
 ## Supported Platforms
@@ -156,13 +155,17 @@ Splitting planner, builder, and verifier into separate agents with separate cont
 ### Production-Grade Hooks
-All 7 hooks are real ops engineering, not theoretical:
+All 9 hooks are real ops engineering, not theoretical:
 - **Pre-deploy gate** — TypeScript, lint, tests, build, and `service_role` leak scan before `vercel --prod`
+- **Session start** — Shows project state, next command, update notices, and health warnings at session start
+- **Auto-update** — Daily update check with cached failures so offline/npm issues do not slow every command
+- **Git guardrails** — Blocks destructive git operations like force-push to main/master, `git clean -fd`, and `rm -rf .git`
 - **Branch guard** — Role-aware: owner can push to main, employees can't (parses refspec so `feature/x:main` bypass is blocked)
 - **Migration guard** — Catches `DROP TABLE` without `IF EXISTS`, `DELETE`/`UPDATE` without `WHERE`, `CREATE TABLE` without RLS, `GRANT ... TO PUBLIC`, `ALTER TABLE ... DROP COLUMN`
 - **Pre-push** — Stamps tracking.json via a bot commit so the ERP always sees fresh data
 - **Pre-compact** — Saves state before context compression
+- **Stop-session log** — Writes lightweight daily session checkpoints into the knowledge layer
 ### Enforced State Machine
@@ -183,12 +186,12 @@ npx qualia-framework@latest install
      |
      v
 ~/.claude/
-  ├── skills/             26 slash commands
+  ├── skills/             28 slash commands
   ├── agents/             8 agent definitions (planner, builder, verifier, qa-browser, roadmapper, research-synthesizer, researcher, plan-checker)
-  ├── hooks/              7 Node.js hooks — cross-platform (no bash dependency)
-  ├── bin/                state.js (state machine) + qualia-ui.js (cosmetics, banners, journey-tree) + statusline.js
+  ├── hooks/              9 Node.js hooks — cross-platform (no bash dependency)
+  ├── bin/                state.js + qualia-ui.js + statusline.js + knowledge.js + knowledge-flush.js
   ├── knowledge/          learned-patterns.md, common-fixes.md, client-prefs.md
-  ├── rules/              security, frontend, design-reference, deployment, infrastructure
+  ├── rules/              security, frontend, design-reference, deployment, infrastructure, grounding
   ├── qualia-templates/   project.md, journey.md, plan.md (story-file), state.md, DESIGN.md, tracking.json, requirements.md, roadmap.md, + projects/*.md + research-project/*.md + help.html
   ├── qualia-references/  questioning.md (deep project initialization methodology)
   ├── CLAUDE.md           global instructions (role-configured per team member)
@@ -201,6 +204,6 @@ Stack: Next.js 16+, React 19, TypeScript, Supabase, Vercel. Voice: Retell AI, El
 ## Changelog
-See [CHANGELOG.md](./CHANGELOG.md) for the full version history. v4.0.0 release notes are the most recent section.
+See [CHANGELOG.md](./CHANGELOG.md) for the full version history. v4.3.0 release notes are the most recent section.
 Built by [Qualia Solutions](https://qualiasolutions.net) — Nicosia, Cyprus.

package/agents/builder.md CHANGED Viewed

@@ -84,10 +84,11 @@ Before committing:
 1. Run every command in **Validation:** — they must pass
 2. Mentally walk through each **Acceptance Criterion** — does the code actually produce that observable behavior?
 3. Run `npx tsc --noEmit` if you touched TypeScript files
-4. No `// TODO`, no placeholder text, no stub functions
-5. Imports are wired — not just declared but actually used
+4. **If you touched any `.tsx/.jsx/.css/.scss/.html` file: run `node bin/slop-detect.mjs {touched paths}`. Exit 1 (critical findings) BLOCKS the commit.** Fix the findings (apply the rewrite recipe in the script's output), re-run, repeat until exit 0.
+5. No `// TODO`, no placeholder text, no stub functions
+6. Imports are wired — not just declared but actually used
-If any Validation command fails or any AC is not met, fix before committing. Do not commit and hope the verifier catches it.
+If any Validation command fails, slop-detect returns 1, or any AC is not met, fix before committing. Do not commit and hope the verifier catches it.
 ### 5. Commit
 One atomic commit per task:
@@ -132,23 +133,14 @@ Rule of thumb: If you can explain the change in one sentence in a commit message
    - Always check auth server-side
    - Enable RLS on every table
    - Validate input with Zod at system boundaries
-5. **Frontend standards (mandatory for any .tsx/.jsx/.css file):**
-   - Before writing any frontend code: read `.planning/DESIGN.md` if it exists — it's the design source of truth
-   - If no DESIGN.md, apply rules from `rules/frontend.md` (Qualia defaults)
-   - Distinctive fonts (never Inter, Roboto, Arial, system-ui, Space Grotesk)
-   - Cohesive color palette via CSS variables — sharp accent for CTAs
-   - All text: WCAG AA contrast (4.5:1 normal, 3:1 large text)
-   - Full-width fluid layouts — no hardcoded max-width caps
-   - Every interactive element needs ALL states: hover, focus (visible ring), active, disabled, loading, error, empty
-   - Semantic HTML (`nav`, `main`, `section`, `article`) — not div soup
-   - Keyboard accessible: Tab, Enter, Escape, Arrow keys work
-   - Touch targets: 44px minimum
-   - Form inputs: visible labels (not placeholder-only), error messages with `aria-describedby`
-   - Motion: 150–200ms hover, 250ms expand, stagger children on load, respect `prefers-reduced-motion`
-   - Mobile-first responsive: stack on mobile, expand on desktop, fluid typography
-   - Skip link on every page, heading hierarchy (one h1, sequential order)
-   - No emoji as icons — use SVGs
-   - `cursor: pointer` on all clickable elements
+5. **Frontend standards (mandatory for any .tsx/.jsx/.css/.scss/.html file):**
+   - **Read substrate first.** Before any frontend code: read `PRODUCT.md`, `DESIGN.md`, `rules/design-laws.md`, AND the matching register file (`rules/design-brand.md` if `register: brand`, `rules/design-product.md` if `register: product`). These ARE the source of truth.
+   - **Honor the task's `**Design:**` contract.** If the planner specified `Tokens used: var(--accent), --space-4`, those are the tokens you use — don't introduce new ones without flagging.
+   - **OKLCH only.** No `#000`, no `#fff`, no scattered hex. Reference design tokens via `var(--name)`.
+   - **Banned fonts:** Inter, Roboto, Arial, Helvetica, system-ui, Space Grotesk. Use the font defined in DESIGN.md §3.
+   - **No purple-blue gradients, no gradient text, no side-stripe borders, no glassmorphism by default, no identical card grids, no modal as first thought, no em dashes** (per `rules/design-laws.md` §8 absolute bans).
+   - **Pre-commit guard:** run `node bin/slop-detect.mjs {touched files}`. Exit 1 = blocked.
+   - All other rules (states, semantics, keyboard, touch targets, motion, responsive, headings, skip links, no-emoji-icons, cursor:pointer, WCAG AA) carry over from `rules/design-laws.md` and the register file.
 6. **No empty catch blocks.** At minimum, log the error.
 7. **No dangerouslySetInnerHTML.** No eval().
 8. **React/Next.js performance:**

package/agents/plan-checker.md CHANGED Viewed

@@ -105,6 +105,24 @@ If `.planning/phase-{N}-context.md` exists, read its "Locked Decisions" section.
 **FAIL if:** plan contradicts a locked decision (e.g., context says "use library X" but plan uses library Y).
+### Rule 7b: Frontend tasks have a design contract (v4.5.0+)
+A "frontend task" is any task whose **Files:** list contains a `.tsx`, `.jsx`, `.css`, `.scss`, `.html`, `.svelte`, `.vue`, or `.astro` path.
+Every frontend task MUST include a `**Design:**` field with:
+- `Register: brand` or `Register: product`
+- `Tokens used:` non-empty list of CSS custom properties (e.g. `var(--accent), --space-4`) — proves the task references DESIGN.md tokens, not raw hex/px
+- `Scope: component|section|page|app`
+- `Anti-pattern guard:` line confirming builder runs `bin/slop-detect.mjs` pre-commit
+**FAIL if:**
+- Frontend task missing `**Design:**` field entirely
+- Register is neither `brand` nor `product`
+- Tokens used is empty or contains raw hex (`#ff0000`) instead of CSS-var references
+- Plan steps on absolute bans (per `rules/design-laws.md` §8): grep the plan for `gradient text`, `glassmorphism`, `purple gradient`, `hero metric template`, `identical card grid`, `modal as first thought`, `border-left:.4px` decorative, `font-family: Inter`, `Space Grotesk`. Any hit = REVISE.
+Non-frontend tasks (backend, migrations, API routes without UI) MUST NOT have a `**Design:**` field. Warn but don't fail if one is mistakenly added.
 ### Rule 8: Validation commands test behavior, not just existence
 Each task's `**Validation:**` list must contain at least one `grep-match` or `command-exit` check — a command that proves the code DOES something. A task whose ONLY validation is `test -f {file}` will pass even if the file contains only `// TODO`.

package/agents/planner.md CHANGED Viewed

@@ -11,6 +11,9 @@ You create phase plans. Plans are prompts — they ARE the instructions the buil
 ## Input
 - `<project_context>` — inlined `.planning/PROJECT.md` contents
+- `<product_context>` — inlined `PRODUCT.md` (if present — required from v4.5.0 onward; substrate for any frontend task)
+- `<design_spec>` — inlined `DESIGN.md` (if present — visual contract for any frontend task)
+- `<design_substrate>` — inlined `rules/design-laws.md` + matching register file (`rules/design-brand.md` OR `rules/design-product.md` based on PRODUCT.md `register:` field)
 - `<current_state>` — inlined `.planning/STATE.md` contents
 - `<phase_details>` — phase goal + success criteria + REQ-IDs from ROADMAP.md
 - `<locked_decisions>` (optional) — Locked Decisions from `.planning/phase-{N}-context.md` if it exists
@@ -101,6 +104,12 @@ waves: {count}
 **Context:** Read @{file references}
+**Design:** (REQUIRED for any task touching .tsx/.jsx/.css/.scss/.html — omit otherwise)
+- Register: {brand|product}
+- Tokens used: {var(--accent), var(--text), --space-4, ...}
+- Scope: {component|section|page|app}
+- Anti-pattern guard: builder runs `node bin/slop-detect.mjs {target}` pre-commit; commit blocked on critical findings
 ## Success Criteria
 - [ ] {phase-level truth 1}
 - [ ] {phase-level truth 2}

package/agents/verifier.md CHANGED Viewed

@@ -14,6 +14,9 @@ You verify that a phase achieved its GOAL, not just completed its TASKS.
 - `<plan_path>` — path to `.planning/phase-{N}-plan.md`
 - `<project_context>` — inlined `.planning/PROJECT.md` contents (for Quality scoring against project conventions)
+- `<product_context>` — inlined `PRODUCT.md` (if present, v4.5.0+) — register, anti-references, principles
+- `<design_spec>` — inlined `DESIGN.md` (if present) — visual contract for design rubric scoring
+- `<design_substrate>` — inlined `rules/design-laws.md`, `rules/design-rubric.md`, and the matching register file
 - `<previous_verification>` (optional) — inlined `.planning/phase-{N}-verification.md` from a prior run
 ## Output
@@ -118,6 +121,65 @@ grep -c "async.*=> {}\|() => {}" {file}
 If Level 2 finds more than 2 stub patterns in a single file, mark that criterion as **FAIL** regardless of other checks. Stubs are not implementations.
+## Design Verification (v4.5.0+)
+If the phase touched any frontend file (`.tsx/.jsx/.css/.scss/.html`), run the design verification block IN ADDITION to the functional verification above. Design FAIL blocks the phase the same way a functional FAIL does.
+### Step A — slop-detect gate (must pass)
+```bash
+node bin/slop-detect.mjs {touched frontend paths from git diff}
+```
+If exit code is 1 (critical findings present), the phase FAILS. Quote the findings in the report. Do not score the rubric — fix slop first.
+### Step B — Design rubric scoring (8 dimensions)
+Apply `rules/design-rubric.md`. Score 1-5 per dimension WITH evidence on the next line. Default to 3 unless evidence supports otherwise.
+Scoped by phase scope:
+- Component-only phase → score Typography, Color cohesion, States, Motion intent, Microcopy, Container depth (skip Layout originality, Spatial rhythm — those are page-level concerns)
+- Page/section phase → all 8 dimensions
+- Full app phase → all 8 dimensions across 2-3 representative routes, average
+Output format (mandatory, append to verification.md):
+```markdown
+## Design Rubric — Phase {N}
+| Dim | Score | Evidence |
+|---|---|---|
+| Typography | 4 | `app/page.tsx:14` Fraunces + JetBrains Mono pair, weights 400/500/700 |
+| Color cohesion | 3 | All CSS vars in `app/globals.css:8-22`, OKLCH used, strategy: Restrained |
+| ... | ... | ... |
+**Aggregate:** {sum}/40 (avg {sum/8})
+**Design verdict:** PASS (all dims ≥ 3) | FAIL (Layout Originality at 2 — three-column grid, see `app/page.tsx:42`)
+```
+### Step C — Drift audit (full app verification only)
+Compare implementation against DESIGN.md tokens. Flag tokens used in code but not declared, and raw hex values still appearing.
+```bash
+# Orphan tokens (used in code, missing from DESIGN.md)
+grep -rE "var\(--[a-z-]+\)" src/ app/ components/ 2>/dev/null | \
+  awk -F'var\\(--' '{print $2}' | awk -F'\\)' '{print $1}' | sort -u > /tmp/used-tokens
+grep -E "^\s*--[a-z-]+:" DESIGN.md 2>/dev/null | sed -E 's/.*--([a-z-]+):.*/\1/' | sort -u > /tmp/declared
+comm -23 /tmp/used-tokens /tmp/declared
+```
+Drift findings are reported, not auto-failing. Drift may be intentional. But if 5+ orphan tokens appear, flag as MEDIUM finding for the next polish cycle.
+### Phase verdict (combined)
+```
+phase_pass = functional_pass AND slop_detect_pass AND design_rubric_pass
+phase_fail = ANY of the above failed
+```
+A perfect functional verification with a Design Rubric score of 2 in any dimension is a phase FAIL. Design is not a "would be nice" — it's a verification dimension equal to functionality.
 ### Wiring Check (Level 3)
 ```bash

package/bin/agent-runs.js ADDED Viewed

@@ -0,0 +1,233 @@
+#!/usr/bin/env node
+// Agent runs telemetry — JSONL writer + reader. See docs/agent-runs.md.
+//
+// Pure library. Atomic writes via fs.appendFileSync (single write() syscall
+// to an O_APPEND file descriptor; safe at our record sizes — see the spec).
+//
+// Zero npm dependencies.
+const fs = require("fs");
+const path = require("path");
+const crypto = require("crypto");
+const SCHEMA_VERSION = 1;
+const VALID_AGENT_TYPES = new Set([
+  "planner", "plan-checker", "builder", "verifier", "qa-browser",
+  "researcher", "research-synthesizer", "roadmapper", "team-orchestrator",
+  "custom",
+]);
+const VALID_STATUS = new Set([
+  "success", "partial", "blocked", "failure", "timeout", "interrupted",
+]);
+// One UUID per process — fallback when Claude Code doesn't expose a session id.
+let _processSessionId = null;
+function processSessionId() {
+  if (!_processSessionId) {
+    const buf = crypto.randomBytes(16);
+    // RFC 4122 v4
+    buf[6] = (buf[6] & 0x0f) | 0x40;
+    buf[8] = (buf[8] & 0x3f) | 0x80;
+    const h = buf.toString("hex");
+    _processSessionId = `${h.slice(0,8)}-${h.slice(8,12)}-${h.slice(12,16)}-${h.slice(16,20)}-${h.slice(20)}`;
+  }
+  return _processSessionId;
+}
+// ULID-ish: timestamp prefix + random suffix. Sortable by time.
+function newRunId() {
+  const ts = Date.now().toString(36).toUpperCase().padStart(10, "0");
+  const rand = crypto.randomBytes(10).toString("hex").toUpperCase();
+  return `${ts}${rand}`.slice(0, 26);
+}
+function planningDir(cwd) {
+  return path.join(cwd || process.cwd(), ".planning");
+}
+function jsonlPath(cwd) {
+  return path.join(planningDir(cwd), "agent-runs.jsonl");
+}
+function logDir(cwd) {
+  return path.join(planningDir(cwd), "agent-runs");
+}
+function telemetryEnabled() {
+  return (process.env.QUALIA_TELEMETRY || "").toLowerCase() !== "off";
+}
+function ensureDir(p) {
+  if (!fs.existsSync(p)) fs.mkdirSync(p, { recursive: true });
+}
+function truncTail(s, max) {
+  if (typeof s !== "string") return undefined;
+  if (s.length <= max) return s;
+  return s.slice(s.length - max);
+}
+// ─── Writer ────────────────────────────────────────────────────────────
+// start({ agent_type, model, ... }) → opaque token used by finish()
+function start(opts) {
+  const now = new Date().toISOString();
+  const token = {
+    started_at: now,
+    started_ms: Date.now(),
+    record: {
+      schema_version: SCHEMA_VERSION,
+      run_id: opts.run_id || newRunId(),
+      parent_run_id: opts.parent_run_id || undefined,
+      skill_invocation_id: opts.skill_invocation_id || processSessionId(),
+      session_id: opts.session_id || processSessionId(),
+      agent_type: opts.agent_type,
+      agent_name: opts.agent_name || undefined,
+      model: opts.model,
+      effort: opts.effort || undefined,
+      project: opts.project || undefined,
+      phase: opts.phase != null ? opts.phase : undefined,
+      milestone: opts.milestone != null ? opts.milestone : undefined,
+      task_id: opts.task_id || undefined,
+      wave: opts.wave != null ? opts.wave : undefined,
+      retry_of: opts.retry_of || undefined,
+      started_at: now,
+    },
+  };
+  return token;
+}
+// finish(token, { status, ... }) → writes the JSONL line + optional log file
+function finish(token, result) {
+  if (!token || !token.record) throw new Error("finish: invalid token");
+  if (!telemetryEnabled()) return { written: false, reason: "telemetry-off" };
+  const cwd = result.cwd || process.cwd();
+  if (!fs.existsSync(planningDir(cwd))) {
+    return { written: false, reason: "no-planning-dir" };
+  }
+  const finishedMs = Date.now();
+  const record = {
+    ...token.record,
+    status: result.status,
+    started_at: token.record.started_at,
+    finished_at: new Date(finishedMs).toISOString(),
+    duration_ms: finishedMs - token.started_ms,
+    input_tokens: result.input_tokens,
+    output_tokens: result.output_tokens,
+    cache_read_tokens: result.cache_read_tokens,
+    cache_creation_tokens: result.cache_creation_tokens,
+    tool_calls_count: result.tool_calls_count,
+    files_changed: Array.isArray(result.files_changed) ? [...new Set(result.files_changed)] : undefined,
+    commit_sha: result.commit_sha || undefined,
+    verifier_score: result.verifier_score,
+    verification_result: result.verification_result,
+    failure_reason: result.failure_reason,
+    failure_detail: truncTail(result.failure_detail, 500),
+  };
+  if (!VALID_AGENT_TYPES.has(record.agent_type)) {
+    record.failure_reason = record.failure_reason || "unknown";
+    // don't reject — we want the trace even if the caller misnamed itself
+  }
+  if (!VALID_STATUS.has(record.status)) {
+    record.status = "failure";
+    record.failure_reason = record.failure_reason || "unknown";
+  }
+  // Side log for non-success runs.
+  if (record.status !== "success" && typeof result.full_stderr === "string" && result.full_stderr.length) {
+    try {
+      ensureDir(logDir(cwd));
+      const logFile = path.join(logDir(cwd), `${record.run_id}.log`);
+      fs.writeFileSync(logFile, result.full_stderr);
+      record.log_file = path.relative(cwd, logFile).split(path.sep).join("/");
+    } catch {
+      // Side-log is best-effort — never block the JSONL write.
+    }
+  }
+  // Drop undefined keys for a compact line.
+  const clean = {};
+  for (const [k, v] of Object.entries(record)) if (v !== undefined) clean[k] = v;
+  const line = JSON.stringify(clean) + "\n";
+  ensureDir(planningDir(cwd));
+  fs.appendFileSync(jsonlPath(cwd), line);
+  return { written: true, run_id: record.run_id, log_file: record.log_file };
+}
+// ─── Reader ────────────────────────────────────────────────────────────
+function read(cwd, opts) {
+  const file = jsonlPath(cwd);
+  if (!fs.existsSync(file)) return [];
+  const lines = fs.readFileSync(file, "utf8").split(/\r?\n/).filter(Boolean);
+  const records = [];
+  for (const line of lines) {
+    try { records.push(JSON.parse(line)); }
+    catch { /* skip corrupt line; we never want a single bad record to mask the rest */ }
+  }
+  let out = records;
+  if (opts && opts.failed) {
+    out = out.filter((r) => r.status !== "success");
+  }
+  if (opts && opts.task_id) {
+    out = out.filter((r) => r.task_id === opts.task_id);
+  }
+  if (opts && opts.phase != null) {
+    out = out.filter((r) => r.phase === opts.phase);
+  }
+  if (opts && opts.limit) {
+    out = out.slice(-opts.limit);
+  }
+  return out;
+}
+function prune(cwd, beforeIso) {
+  const file = jsonlPath(cwd);
+  if (!fs.existsSync(file)) return { removed: 0, logs_removed: 0 };
+  const cutoff = Date.parse(beforeIso);
+  if (!Number.isFinite(cutoff)) throw new Error(`prune: invalid date "${beforeIso}"`);
+  const lines = fs.readFileSync(file, "utf8").split(/\r?\n/).filter(Boolean);
+  const kept = [];
+  const removedRunIds = [];
+  for (const line of lines) {
+    let rec;
+    try { rec = JSON.parse(line); }
+    catch { kept.push(line); continue; } // preserve unparseable; never destroy data we don't understand
+    const ts = Date.parse(rec.finished_at || rec.started_at || "");
+    if (Number.isFinite(ts) && ts < cutoff) {
+      removedRunIds.push(rec.run_id);
+    } else {
+      kept.push(line);
+    }
+  }
+  fs.writeFileSync(file, kept.join("\n") + (kept.length ? "\n" : ""));
+  let logsRemoved = 0;
+  if (fs.existsSync(logDir(cwd))) {
+    for (const id of removedRunIds) {
+      const lf = path.join(logDir(cwd), `${id}.log`);
+      try { fs.unlinkSync(lf); logsRemoved++; } catch {}
+    }
+  }
+  return { removed: removedRunIds.length, logs_removed: logsRemoved };
+}
+module.exports = {
+  SCHEMA_VERSION,
+  start,
+  finish,
+  read,
+  prune,
+  // exposed for tests / introspection
+  newRunId,
+  processSessionId,
+  jsonlPath,
+  logDir,
+};