qualia-framework 4.3.0 → 4.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +13 -1
- package/README.md +16 -13
- package/agents/builder.md +12 -20
- package/agents/plan-checker.md +18 -0
- package/agents/planner.md +9 -0
- package/agents/verifier.md +62 -0
- package/bin/agent-runs.js +233 -0
- package/bin/cli.js +225 -21
- package/bin/install.js +25 -5
- package/bin/plan-contract.js +220 -0
- package/bin/slop-detect.mjs +357 -0
- package/bin/state.js +199 -10
- package/docs/agent-runs.md +273 -0
- package/docs/erp-contract.md +5 -0
- package/docs/plan-contract.md +321 -0
- package/hooks/auto-update.js +3 -7
- package/hooks/pre-compact.js +22 -11
- package/hooks/pre-deploy-gate.js +16 -2
- package/hooks/pre-push.js +22 -2
- package/hooks/stop-session-log.js +1 -1
- package/package.json +8 -2
- package/rules/design-brand.md +110 -0
- package/rules/design-laws.md +144 -0
- package/rules/design-product.md +110 -0
- package/rules/design-rubric.md +153 -0
- package/skills/qualia-build/SKILL.md +5 -5
- package/skills/qualia-flush/SKILL.md +1 -1
- package/skills/qualia-new/SKILL.md +40 -3
- package/skills/qualia-polish/SKILL.md +180 -136
- package/skills/qualia-quick/SKILL.md +1 -1
- package/skills/qualia-report/SKILL.md +25 -5
- package/skills/qualia-ship/SKILL.md +12 -10
- package/skills/zoho-workflow/SKILL.md +64 -0
- package/templates/DESIGN.md +229 -435
- package/templates/PRODUCT.md +95 -0
- package/templates/help.html +13 -7
- package/tests/bin.test.sh +6 -3
- package/tests/hooks.test.sh +9 -20
- package/tests/lib.test.sh +217 -0
- package/tests/runner.js +96 -75
- package/tests/state.test.sh +4 -3
- package/skills/qualia-design/SKILL.md +0 -169
package/CLAUDE.md
CHANGED
|
@@ -36,8 +36,20 @@ For each milestone, for each phase:
|
|
|
36
36
|
↓
|
|
37
37
|
/qualia-milestone → close milestone, archive artifacts, prep next (human gate)
|
|
38
38
|
↓ (repeat for each milestone until Handoff)
|
|
39
|
+
Design as a thread (v4.5.0+): every road agent loads PRODUCT.md +
|
|
40
|
+
DESIGN.md + design-laws.md substrate. Builders run slop-detect on every
|
|
41
|
+
frontend commit. Verifiers score 8 design dimensions per phase.
|
|
42
|
+
|
|
43
|
+
/qualia-polish is now a flexible verb usable at any scope:
|
|
44
|
+
/qualia-polish src/components/Button.tsx ~30s component touch-up
|
|
45
|
+
/qualia-polish app/dashboard ~3m section pass
|
|
46
|
+
/qualia-polish ~12m whole app, fan-out
|
|
47
|
+
/qualia-polish --redesign ~30m ground-up redesign
|
|
48
|
+
/qualia-polish --critique read-only scored audit
|
|
49
|
+
/qualia-polish --quick ~1m gates only
|
|
50
|
+
|
|
39
51
|
Final milestone = Handoff:
|
|
40
|
-
/qualia-polish → design
|
|
52
|
+
/qualia-polish → final design pass (whole app)
|
|
41
53
|
(content + SEO) → Phase 2
|
|
42
54
|
(final QA) → Phase 3
|
|
43
55
|
/qualia-ship → deploy to production (quality gates → deploy → verify)
|
package/README.md
CHANGED
|
@@ -40,7 +40,7 @@ Open Claude Code in any project directory.
|
|
|
40
40
|
...repeat plan/build/verify per phase...
|
|
41
41
|
/qualia-milestone # Close current milestone, open next (loads next scope from JOURNEY.md)
|
|
42
42
|
...repeat per milestone until the final "Handoff" milestone...
|
|
43
|
-
/qualia-polish # Design
|
|
43
|
+
/qualia-polish # Design pass — flexible scope: component, route, app, redesign, critique, quick
|
|
44
44
|
/qualia-ship # Deploy to production
|
|
45
45
|
/qualia-handoff # Enforce the 4 mandatory handoff deliverables
|
|
46
46
|
/qualia-report # Mandatory end-of-session report + ERP upload
|
|
@@ -77,7 +77,6 @@ Two human gates per project. One halt case (gap-cycle limit exceeded on a failin
|
|
|
77
77
|
|
|
78
78
|
```
|
|
79
79
|
/qualia-debug # Structured debugging
|
|
80
|
-
/qualia-design # One-shot design transformation
|
|
81
80
|
/qualia-review # Production audit (scored diagnostics)
|
|
82
81
|
/qualia-optimize # Deep optimization pass (parallel specialist agents)
|
|
83
82
|
/qualia-quick # Fast path for trivial fixes (skips planning)
|
|
@@ -115,13 +114,13 @@ Project
|
|
|
115
114
|
|
|
116
115
|
**Why it matters:** non-technical team members can follow the ladder from any entry point. `/qualia` and `/qualia-milestone` render JOURNEY.md as a visual ladder with current position highlighted.
|
|
117
116
|
|
|
118
|
-
## What's Inside (v4.
|
|
117
|
+
## What's Inside (v4.3.0)
|
|
119
118
|
|
|
120
|
-
- **
|
|
119
|
+
- **28 skills** — from setup to handoff, plus debug, design, review, optimize, diagnostic (`qualia-idk`), memory flush, postmortem, session management, skill authoring, per-phase depth (discuss, research, map), and full-journey additions (`--auto` chaining, milestone closure)
|
|
121
120
|
- **8 agents** (each runs in fresh context): planner, builder, verifier, qa-browser, researcher, research-synthesizer, roadmapper, plan-checker
|
|
122
|
-
- **
|
|
123
|
-
- **
|
|
124
|
-
- **
|
|
121
|
+
- **9 hooks** (pure Node.js, cross-platform): session-start, auto-update, git-guardrails, branch-guard, pre-push tracking sync, migration-guard, pre-deploy-gate, pre-compact state save, stop-session-log
|
|
122
|
+
- **6 rules**: security, frontend, design-reference, deployment, infrastructure, grounding
|
|
123
|
+
- **21 template files**: project.md, **journey.md** (new in v4), plan.md (story-file format), state.md, DESIGN.md, tracking.json (now with `milestone_name` + `milestones[]`), requirements.md (multi-milestone), roadmap.md (current milestone only), phase-context.md, 4 project-type templates (website, ai-agent, voice-agent, mobile-app), 5 research-project templates (STACK, FEATURES, ARCHITECTURE, PITFALLS, SUMMARY), knowledge templates, help.html
|
|
125
124
|
- **1 reference** — questioning.md methodology for deep project initialization
|
|
126
125
|
|
|
127
126
|
## Supported Platforms
|
|
@@ -156,13 +155,17 @@ Splitting planner, builder, and verifier into separate agents with separate cont
|
|
|
156
155
|
|
|
157
156
|
### Production-Grade Hooks
|
|
158
157
|
|
|
159
|
-
All
|
|
158
|
+
All 9 hooks are real ops engineering, not theoretical:
|
|
160
159
|
|
|
161
160
|
- **Pre-deploy gate** — TypeScript, lint, tests, build, and `service_role` leak scan before `vercel --prod`
|
|
161
|
+
- **Session start** — Shows project state, next command, update notices, and health warnings at session start
|
|
162
|
+
- **Auto-update** — Daily update check with cached failures so offline/npm issues do not slow every command
|
|
163
|
+
- **Git guardrails** — Blocks destructive git operations like force-push to main/master, `git clean -fd`, and `rm -rf .git`
|
|
162
164
|
- **Branch guard** — Role-aware: owner can push to main, employees can't (parses refspec so `feature/x:main` bypass is blocked)
|
|
163
165
|
- **Migration guard** — Catches `DROP TABLE` without `IF EXISTS`, `DELETE`/`UPDATE` without `WHERE`, `CREATE TABLE` without RLS, `GRANT ... TO PUBLIC`, `ALTER TABLE ... DROP COLUMN`
|
|
164
166
|
- **Pre-push** — Stamps tracking.json via a bot commit so the ERP always sees fresh data
|
|
165
167
|
- **Pre-compact** — Saves state before context compression
|
|
168
|
+
- **Stop-session log** — Writes lightweight daily session checkpoints into the knowledge layer
|
|
166
169
|
|
|
167
170
|
### Enforced State Machine
|
|
168
171
|
|
|
@@ -183,12 +186,12 @@ npx qualia-framework@latest install
|
|
|
183
186
|
|
|
|
184
187
|
v
|
|
185
188
|
~/.claude/
|
|
186
|
-
├── skills/
|
|
189
|
+
├── skills/ 28 slash commands
|
|
187
190
|
├── agents/ 8 agent definitions (planner, builder, verifier, qa-browser, roadmapper, research-synthesizer, researcher, plan-checker)
|
|
188
|
-
├── hooks/
|
|
189
|
-
├── bin/ state.js
|
|
191
|
+
├── hooks/ 9 Node.js hooks — cross-platform (no bash dependency)
|
|
192
|
+
├── bin/ state.js + qualia-ui.js + statusline.js + knowledge.js + knowledge-flush.js
|
|
190
193
|
├── knowledge/ learned-patterns.md, common-fixes.md, client-prefs.md
|
|
191
|
-
├── rules/ security, frontend, design-reference, deployment, infrastructure
|
|
194
|
+
├── rules/ security, frontend, design-reference, deployment, infrastructure, grounding
|
|
192
195
|
├── qualia-templates/ project.md, journey.md, plan.md (story-file), state.md, DESIGN.md, tracking.json, requirements.md, roadmap.md, + projects/*.md + research-project/*.md + help.html
|
|
193
196
|
├── qualia-references/ questioning.md (deep project initialization methodology)
|
|
194
197
|
├── CLAUDE.md global instructions (role-configured per team member)
|
|
@@ -201,6 +204,6 @@ Stack: Next.js 16+, React 19, TypeScript, Supabase, Vercel. Voice: Retell AI, El
|
|
|
201
204
|
|
|
202
205
|
## Changelog
|
|
203
206
|
|
|
204
|
-
See [CHANGELOG.md](./CHANGELOG.md) for the full version history. v4.
|
|
207
|
+
See [CHANGELOG.md](./CHANGELOG.md) for the full version history. v4.3.0 release notes are the most recent section.
|
|
205
208
|
|
|
206
209
|
Built by [Qualia Solutions](https://qualiasolutions.net) — Nicosia, Cyprus.
|
package/agents/builder.md
CHANGED
|
@@ -84,10 +84,11 @@ Before committing:
|
|
|
84
84
|
1. Run every command in **Validation:** — they must pass
|
|
85
85
|
2. Mentally walk through each **Acceptance Criterion** — does the code actually produce that observable behavior?
|
|
86
86
|
3. Run `npx tsc --noEmit` if you touched TypeScript files
|
|
87
|
-
4.
|
|
88
|
-
5.
|
|
87
|
+
4. **If you touched any `.tsx/.jsx/.css/.scss/.html` file: run `node bin/slop-detect.mjs {touched paths}`. Exit 1 (critical findings) BLOCKS the commit.** Fix the findings (apply the rewrite recipe in the script's output), re-run, repeat until exit 0.
|
|
88
|
+
5. No `// TODO`, no placeholder text, no stub functions
|
|
89
|
+
6. Imports are wired — not just declared but actually used
|
|
89
90
|
|
|
90
|
-
If any Validation command fails or any AC is not met, fix before committing. Do not commit and hope the verifier catches it.
|
|
91
|
+
If any Validation command fails, slop-detect returns 1, or any AC is not met, fix before committing. Do not commit and hope the verifier catches it.
|
|
91
92
|
|
|
92
93
|
### 5. Commit
|
|
93
94
|
One atomic commit per task:
|
|
@@ -132,23 +133,14 @@ Rule of thumb: If you can explain the change in one sentence in a commit message
|
|
|
132
133
|
- Always check auth server-side
|
|
133
134
|
- Enable RLS on every table
|
|
134
135
|
- Validate input with Zod at system boundaries
|
|
135
|
-
5. **Frontend standards (mandatory for any .tsx/.jsx/.css file):**
|
|
136
|
-
- Before
|
|
137
|
-
-
|
|
138
|
-
-
|
|
139
|
-
-
|
|
140
|
-
-
|
|
141
|
-
-
|
|
142
|
-
-
|
|
143
|
-
- Semantic HTML (`nav`, `main`, `section`, `article`) — not div soup
|
|
144
|
-
- Keyboard accessible: Tab, Enter, Escape, Arrow keys work
|
|
145
|
-
- Touch targets: 44px minimum
|
|
146
|
-
- Form inputs: visible labels (not placeholder-only), error messages with `aria-describedby`
|
|
147
|
-
- Motion: 150–200ms hover, 250ms expand, stagger children on load, respect `prefers-reduced-motion`
|
|
148
|
-
- Mobile-first responsive: stack on mobile, expand on desktop, fluid typography
|
|
149
|
-
- Skip link on every page, heading hierarchy (one h1, sequential order)
|
|
150
|
-
- No emoji as icons — use SVGs
|
|
151
|
-
- `cursor: pointer` on all clickable elements
|
|
136
|
+
5. **Frontend standards (mandatory for any .tsx/.jsx/.css/.scss/.html file):**
|
|
137
|
+
- **Read substrate first.** Before any frontend code: read `PRODUCT.md`, `DESIGN.md`, `rules/design-laws.md`, AND the matching register file (`rules/design-brand.md` if `register: brand`, `rules/design-product.md` if `register: product`). These ARE the source of truth.
|
|
138
|
+
- **Honor the task's `**Design:**` contract.** If the planner specified `Tokens used: var(--accent), --space-4`, those are the tokens you use — don't introduce new ones without flagging.
|
|
139
|
+
- **OKLCH only.** No `#000`, no `#fff`, no scattered hex. Reference design tokens via `var(--name)`.
|
|
140
|
+
- **Banned fonts:** Inter, Roboto, Arial, Helvetica, system-ui, Space Grotesk. Use the font defined in DESIGN.md §3.
|
|
141
|
+
- **No purple-blue gradients, no gradient text, no side-stripe borders, no glassmorphism by default, no identical card grids, no modal as first thought, no em dashes** (per `rules/design-laws.md` §8 absolute bans).
|
|
142
|
+
- **Pre-commit guard:** run `node bin/slop-detect.mjs {touched files}`. Exit 1 = blocked.
|
|
143
|
+
- All other rules (states, semantics, keyboard, touch targets, motion, responsive, headings, skip links, no-emoji-icons, cursor:pointer, WCAG AA) carry over from `rules/design-laws.md` and the register file.
|
|
152
144
|
6. **No empty catch blocks.** At minimum, log the error.
|
|
153
145
|
7. **No dangerouslySetInnerHTML.** No eval().
|
|
154
146
|
8. **React/Next.js performance:**
|
package/agents/plan-checker.md
CHANGED
|
@@ -105,6 +105,24 @@ If `.planning/phase-{N}-context.md` exists, read its "Locked Decisions" section.
|
|
|
105
105
|
|
|
106
106
|
**FAIL if:** plan contradicts a locked decision (e.g., context says "use library X" but plan uses library Y).
|
|
107
107
|
|
|
108
|
+
### Rule 7b: Frontend tasks have a design contract (v4.5.0+)
|
|
109
|
+
|
|
110
|
+
A "frontend task" is any task whose **Files:** list contains a `.tsx`, `.jsx`, `.css`, `.scss`, `.html`, `.svelte`, `.vue`, or `.astro` path.
|
|
111
|
+
|
|
112
|
+
Every frontend task MUST include a `**Design:**` field with:
|
|
113
|
+
- `Register: brand` or `Register: product`
|
|
114
|
+
- `Tokens used:` non-empty list of CSS custom properties (e.g. `var(--accent), --space-4`) — proves the task references DESIGN.md tokens, not raw hex/px
|
|
115
|
+
- `Scope: component|section|page|app`
|
|
116
|
+
- `Anti-pattern guard:` line confirming builder runs `bin/slop-detect.mjs` pre-commit
|
|
117
|
+
|
|
118
|
+
**FAIL if:**
|
|
119
|
+
- Frontend task missing `**Design:**` field entirely
|
|
120
|
+
- Register is neither `brand` nor `product`
|
|
121
|
+
- Tokens used is empty or contains raw hex (`#ff0000`) instead of CSS-var references
|
|
122
|
+
- Plan steps on absolute bans (per `rules/design-laws.md` §8): grep the plan for `gradient text`, `glassmorphism`, `purple gradient`, `hero metric template`, `identical card grid`, `modal as first thought`, `border-left:.4px` decorative, `font-family: Inter`, `Space Grotesk`. Any hit = REVISE.
|
|
123
|
+
|
|
124
|
+
Non-frontend tasks (backend, migrations, API routes without UI) MUST NOT have a `**Design:**` field. Warn but don't fail if one is mistakenly added.
|
|
125
|
+
|
|
108
126
|
### Rule 8: Validation commands test behavior, not just existence
|
|
109
127
|
|
|
110
128
|
Each task's `**Validation:**` list must contain at least one `grep-match` or `command-exit` check — a command that proves the code DOES something. A task whose ONLY validation is `test -f {file}` will pass even if the file contains only `// TODO`.
|
package/agents/planner.md
CHANGED
|
@@ -11,6 +11,9 @@ You create phase plans. Plans are prompts — they ARE the instructions the buil
|
|
|
11
11
|
## Input
|
|
12
12
|
|
|
13
13
|
- `<project_context>` — inlined `.planning/PROJECT.md` contents
|
|
14
|
+
- `<product_context>` — inlined `PRODUCT.md` (if present — required from v4.5.0 onward; substrate for any frontend task)
|
|
15
|
+
- `<design_spec>` — inlined `DESIGN.md` (if present — visual contract for any frontend task)
|
|
16
|
+
- `<design_substrate>` — inlined `rules/design-laws.md` + matching register file (`rules/design-brand.md` OR `rules/design-product.md` based on PRODUCT.md `register:` field)
|
|
14
17
|
- `<current_state>` — inlined `.planning/STATE.md` contents
|
|
15
18
|
- `<phase_details>` — phase goal + success criteria + REQ-IDs from ROADMAP.md
|
|
16
19
|
- `<locked_decisions>` (optional) — Locked Decisions from `.planning/phase-{N}-context.md` if it exists
|
|
@@ -101,6 +104,12 @@ waves: {count}
|
|
|
101
104
|
|
|
102
105
|
**Context:** Read @{file references}
|
|
103
106
|
|
|
107
|
+
**Design:** (REQUIRED for any task touching .tsx/.jsx/.css/.scss/.html — omit otherwise)
|
|
108
|
+
- Register: {brand|product}
|
|
109
|
+
- Tokens used: {var(--accent), var(--text), --space-4, ...}
|
|
110
|
+
- Scope: {component|section|page|app}
|
|
111
|
+
- Anti-pattern guard: builder runs `node bin/slop-detect.mjs {target}` pre-commit; commit blocked on critical findings
|
|
112
|
+
|
|
104
113
|
## Success Criteria
|
|
105
114
|
- [ ] {phase-level truth 1}
|
|
106
115
|
- [ ] {phase-level truth 2}
|
package/agents/verifier.md
CHANGED
|
@@ -14,6 +14,9 @@ You verify that a phase achieved its GOAL, not just completed its TASKS.
|
|
|
14
14
|
|
|
15
15
|
- `<plan_path>` — path to `.planning/phase-{N}-plan.md`
|
|
16
16
|
- `<project_context>` — inlined `.planning/PROJECT.md` contents (for Quality scoring against project conventions)
|
|
17
|
+
- `<product_context>` — inlined `PRODUCT.md` (if present, v4.5.0+) — register, anti-references, principles
|
|
18
|
+
- `<design_spec>` — inlined `DESIGN.md` (if present) — visual contract for design rubric scoring
|
|
19
|
+
- `<design_substrate>` — inlined `rules/design-laws.md`, `rules/design-rubric.md`, and the matching register file
|
|
17
20
|
- `<previous_verification>` (optional) — inlined `.planning/phase-{N}-verification.md` from a prior run
|
|
18
21
|
|
|
19
22
|
## Output
|
|
@@ -118,6 +121,65 @@ grep -c "async.*=> {}\|() => {}" {file}
|
|
|
118
121
|
|
|
119
122
|
If Level 2 finds more than 2 stub patterns in a single file, mark that criterion as **FAIL** regardless of other checks. Stubs are not implementations.
|
|
120
123
|
|
|
124
|
+
## Design Verification (v4.5.0+)
|
|
125
|
+
|
|
126
|
+
If the phase touched any frontend file (`.tsx/.jsx/.css/.scss/.html`), run the design verification block IN ADDITION to the functional verification above. Design FAIL blocks the phase the same way a functional FAIL does.
|
|
127
|
+
|
|
128
|
+
### Step A — slop-detect gate (must pass)
|
|
129
|
+
|
|
130
|
+
```bash
|
|
131
|
+
node bin/slop-detect.mjs {touched frontend paths from git diff}
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
If exit code is 1 (critical findings present), the phase FAILS. Quote the findings in the report. Do not score the rubric — fix slop first.
|
|
135
|
+
|
|
136
|
+
### Step B — Design rubric scoring (8 dimensions)
|
|
137
|
+
|
|
138
|
+
Apply `rules/design-rubric.md`. Score 1-5 per dimension WITH evidence on the next line. Default to 3 unless evidence supports otherwise.
|
|
139
|
+
|
|
140
|
+
Scoped by phase scope:
|
|
141
|
+
- Component-only phase → score Typography, Color cohesion, States, Motion intent, Microcopy, Container depth (skip Layout originality, Spatial rhythm — those are page-level concerns)
|
|
142
|
+
- Page/section phase → all 8 dimensions
|
|
143
|
+
- Full app phase → all 8 dimensions across 2-3 representative routes, average
|
|
144
|
+
|
|
145
|
+
Output format (mandatory, append to verification.md):
|
|
146
|
+
|
|
147
|
+
```markdown
|
|
148
|
+
## Design Rubric — Phase {N}
|
|
149
|
+
|
|
150
|
+
| Dim | Score | Evidence |
|
|
151
|
+
|---|---|---|
|
|
152
|
+
| Typography | 4 | `app/page.tsx:14` Fraunces + JetBrains Mono pair, weights 400/500/700 |
|
|
153
|
+
| Color cohesion | 3 | All CSS vars in `app/globals.css:8-22`, OKLCH used, strategy: Restrained |
|
|
154
|
+
| ... | ... | ... |
|
|
155
|
+
|
|
156
|
+
**Aggregate:** {sum}/40 (avg {sum/8})
|
|
157
|
+
**Design verdict:** PASS (all dims ≥ 3) | FAIL (Layout Originality at 2 — three-column grid, see `app/page.tsx:42`)
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
### Step C — Drift audit (full app verification only)
|
|
161
|
+
|
|
162
|
+
Compare implementation against DESIGN.md tokens. Flag tokens used in code but not declared, and raw hex values still appearing.
|
|
163
|
+
|
|
164
|
+
```bash
|
|
165
|
+
# Orphan tokens (used in code, missing from DESIGN.md)
|
|
166
|
+
grep -rE "var\(--[a-z-]+\)" src/ app/ components/ 2>/dev/null | \
|
|
167
|
+
awk -F'var\\(--' '{print $2}' | awk -F'\\)' '{print $1}' | sort -u > /tmp/used-tokens
|
|
168
|
+
grep -E "^\s*--[a-z-]+:" DESIGN.md 2>/dev/null | sed -E 's/.*--([a-z-]+):.*/\1/' | sort -u > /tmp/declared
|
|
169
|
+
comm -23 /tmp/used-tokens /tmp/declared
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
Drift findings are reported, not auto-failing. Drift may be intentional. But if 5+ orphan tokens appear, flag as MEDIUM finding for the next polish cycle.
|
|
173
|
+
|
|
174
|
+
### Phase verdict (combined)
|
|
175
|
+
|
|
176
|
+
```
|
|
177
|
+
phase_pass = functional_pass AND slop_detect_pass AND design_rubric_pass
|
|
178
|
+
phase_fail = ANY of the above failed
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
A perfect functional verification with a Design Rubric score of 2 in any dimension is a phase FAIL. Design is not a "would be nice" — it's a verification dimension equal to functionality.
|
|
182
|
+
|
|
121
183
|
### Wiring Check (Level 3)
|
|
122
184
|
|
|
123
185
|
```bash
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Agent runs telemetry — JSONL writer + reader. See docs/agent-runs.md.
|
|
3
|
+
//
|
|
4
|
+
// Pure library. Atomic writes via fs.appendFileSync (single write() syscall
|
|
5
|
+
// to an O_APPEND file descriptor; safe at our record sizes — see the spec).
|
|
6
|
+
//
|
|
7
|
+
// Zero npm dependencies.
|
|
8
|
+
|
|
9
|
+
const fs = require("fs");
|
|
10
|
+
const path = require("path");
|
|
11
|
+
const crypto = require("crypto");
|
|
12
|
+
|
|
13
|
+
const SCHEMA_VERSION = 1;
|
|
14
|
+
|
|
15
|
+
const VALID_AGENT_TYPES = new Set([
|
|
16
|
+
"planner", "plan-checker", "builder", "verifier", "qa-browser",
|
|
17
|
+
"researcher", "research-synthesizer", "roadmapper", "team-orchestrator",
|
|
18
|
+
"custom",
|
|
19
|
+
]);
|
|
20
|
+
|
|
21
|
+
const VALID_STATUS = new Set([
|
|
22
|
+
"success", "partial", "blocked", "failure", "timeout", "interrupted",
|
|
23
|
+
]);
|
|
24
|
+
|
|
25
|
+
// One UUID per process — fallback when Claude Code doesn't expose a session id.
|
|
26
|
+
let _processSessionId = null;
|
|
27
|
+
function processSessionId() {
|
|
28
|
+
if (!_processSessionId) {
|
|
29
|
+
const buf = crypto.randomBytes(16);
|
|
30
|
+
// RFC 4122 v4
|
|
31
|
+
buf[6] = (buf[6] & 0x0f) | 0x40;
|
|
32
|
+
buf[8] = (buf[8] & 0x3f) | 0x80;
|
|
33
|
+
const h = buf.toString("hex");
|
|
34
|
+
_processSessionId = `${h.slice(0,8)}-${h.slice(8,12)}-${h.slice(12,16)}-${h.slice(16,20)}-${h.slice(20)}`;
|
|
35
|
+
}
|
|
36
|
+
return _processSessionId;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// ULID-ish: timestamp prefix + random suffix. Sortable by time.
|
|
40
|
+
function newRunId() {
|
|
41
|
+
const ts = Date.now().toString(36).toUpperCase().padStart(10, "0");
|
|
42
|
+
const rand = crypto.randomBytes(10).toString("hex").toUpperCase();
|
|
43
|
+
return `${ts}${rand}`.slice(0, 26);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function planningDir(cwd) {
|
|
47
|
+
return path.join(cwd || process.cwd(), ".planning");
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
function jsonlPath(cwd) {
|
|
51
|
+
return path.join(planningDir(cwd), "agent-runs.jsonl");
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function logDir(cwd) {
|
|
55
|
+
return path.join(planningDir(cwd), "agent-runs");
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function telemetryEnabled() {
|
|
59
|
+
return (process.env.QUALIA_TELEMETRY || "").toLowerCase() !== "off";
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
function ensureDir(p) {
|
|
63
|
+
if (!fs.existsSync(p)) fs.mkdirSync(p, { recursive: true });
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function truncTail(s, max) {
|
|
67
|
+
if (typeof s !== "string") return undefined;
|
|
68
|
+
if (s.length <= max) return s;
|
|
69
|
+
return s.slice(s.length - max);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// ─── Writer ────────────────────────────────────────────────────────────
|
|
73
|
+
|
|
74
|
+
// start({ agent_type, model, ... }) → opaque token used by finish()
|
|
75
|
+
function start(opts) {
|
|
76
|
+
const now = new Date().toISOString();
|
|
77
|
+
const token = {
|
|
78
|
+
started_at: now,
|
|
79
|
+
started_ms: Date.now(),
|
|
80
|
+
record: {
|
|
81
|
+
schema_version: SCHEMA_VERSION,
|
|
82
|
+
run_id: opts.run_id || newRunId(),
|
|
83
|
+
parent_run_id: opts.parent_run_id || undefined,
|
|
84
|
+
skill_invocation_id: opts.skill_invocation_id || processSessionId(),
|
|
85
|
+
session_id: opts.session_id || processSessionId(),
|
|
86
|
+
agent_type: opts.agent_type,
|
|
87
|
+
agent_name: opts.agent_name || undefined,
|
|
88
|
+
model: opts.model,
|
|
89
|
+
effort: opts.effort || undefined,
|
|
90
|
+
project: opts.project || undefined,
|
|
91
|
+
phase: opts.phase != null ? opts.phase : undefined,
|
|
92
|
+
milestone: opts.milestone != null ? opts.milestone : undefined,
|
|
93
|
+
task_id: opts.task_id || undefined,
|
|
94
|
+
wave: opts.wave != null ? opts.wave : undefined,
|
|
95
|
+
retry_of: opts.retry_of || undefined,
|
|
96
|
+
started_at: now,
|
|
97
|
+
},
|
|
98
|
+
};
|
|
99
|
+
return token;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// finish(token, { status, ... }) → writes the JSONL line + optional log file
|
|
103
|
+
function finish(token, result) {
|
|
104
|
+
if (!token || !token.record) throw new Error("finish: invalid token");
|
|
105
|
+
if (!telemetryEnabled()) return { written: false, reason: "telemetry-off" };
|
|
106
|
+
|
|
107
|
+
const cwd = result.cwd || process.cwd();
|
|
108
|
+
if (!fs.existsSync(planningDir(cwd))) {
|
|
109
|
+
return { written: false, reason: "no-planning-dir" };
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
const finishedMs = Date.now();
|
|
113
|
+
const record = {
|
|
114
|
+
...token.record,
|
|
115
|
+
status: result.status,
|
|
116
|
+
started_at: token.record.started_at,
|
|
117
|
+
finished_at: new Date(finishedMs).toISOString(),
|
|
118
|
+
duration_ms: finishedMs - token.started_ms,
|
|
119
|
+
input_tokens: result.input_tokens,
|
|
120
|
+
output_tokens: result.output_tokens,
|
|
121
|
+
cache_read_tokens: result.cache_read_tokens,
|
|
122
|
+
cache_creation_tokens: result.cache_creation_tokens,
|
|
123
|
+
tool_calls_count: result.tool_calls_count,
|
|
124
|
+
files_changed: Array.isArray(result.files_changed) ? [...new Set(result.files_changed)] : undefined,
|
|
125
|
+
commit_sha: result.commit_sha || undefined,
|
|
126
|
+
verifier_score: result.verifier_score,
|
|
127
|
+
verification_result: result.verification_result,
|
|
128
|
+
failure_reason: result.failure_reason,
|
|
129
|
+
failure_detail: truncTail(result.failure_detail, 500),
|
|
130
|
+
};
|
|
131
|
+
|
|
132
|
+
if (!VALID_AGENT_TYPES.has(record.agent_type)) {
|
|
133
|
+
record.failure_reason = record.failure_reason || "unknown";
|
|
134
|
+
// don't reject — we want the trace even if the caller misnamed itself
|
|
135
|
+
}
|
|
136
|
+
if (!VALID_STATUS.has(record.status)) {
|
|
137
|
+
record.status = "failure";
|
|
138
|
+
record.failure_reason = record.failure_reason || "unknown";
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// Side log for non-success runs.
|
|
142
|
+
if (record.status !== "success" && typeof result.full_stderr === "string" && result.full_stderr.length) {
|
|
143
|
+
try {
|
|
144
|
+
ensureDir(logDir(cwd));
|
|
145
|
+
const logFile = path.join(logDir(cwd), `${record.run_id}.log`);
|
|
146
|
+
fs.writeFileSync(logFile, result.full_stderr);
|
|
147
|
+
record.log_file = path.relative(cwd, logFile).split(path.sep).join("/");
|
|
148
|
+
} catch {
|
|
149
|
+
// Side-log is best-effort — never block the JSONL write.
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// Drop undefined keys for a compact line.
|
|
154
|
+
const clean = {};
|
|
155
|
+
for (const [k, v] of Object.entries(record)) if (v !== undefined) clean[k] = v;
|
|
156
|
+
|
|
157
|
+
const line = JSON.stringify(clean) + "\n";
|
|
158
|
+
ensureDir(planningDir(cwd));
|
|
159
|
+
fs.appendFileSync(jsonlPath(cwd), line);
|
|
160
|
+
return { written: true, run_id: record.run_id, log_file: record.log_file };
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// ─── Reader ────────────────────────────────────────────────────────────
|
|
164
|
+
|
|
165
|
+
function read(cwd, opts) {
|
|
166
|
+
const file = jsonlPath(cwd);
|
|
167
|
+
if (!fs.existsSync(file)) return [];
|
|
168
|
+
const lines = fs.readFileSync(file, "utf8").split(/\r?\n/).filter(Boolean);
|
|
169
|
+
const records = [];
|
|
170
|
+
for (const line of lines) {
|
|
171
|
+
try { records.push(JSON.parse(line)); }
|
|
172
|
+
catch { /* skip corrupt line; we never want a single bad record to mask the rest */ }
|
|
173
|
+
}
|
|
174
|
+
let out = records;
|
|
175
|
+
if (opts && opts.failed) {
|
|
176
|
+
out = out.filter((r) => r.status !== "success");
|
|
177
|
+
}
|
|
178
|
+
if (opts && opts.task_id) {
|
|
179
|
+
out = out.filter((r) => r.task_id === opts.task_id);
|
|
180
|
+
}
|
|
181
|
+
if (opts && opts.phase != null) {
|
|
182
|
+
out = out.filter((r) => r.phase === opts.phase);
|
|
183
|
+
}
|
|
184
|
+
if (opts && opts.limit) {
|
|
185
|
+
out = out.slice(-opts.limit);
|
|
186
|
+
}
|
|
187
|
+
return out;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
function prune(cwd, beforeIso) {
|
|
191
|
+
const file = jsonlPath(cwd);
|
|
192
|
+
if (!fs.existsSync(file)) return { removed: 0, logs_removed: 0 };
|
|
193
|
+
const cutoff = Date.parse(beforeIso);
|
|
194
|
+
if (!Number.isFinite(cutoff)) throw new Error(`prune: invalid date "${beforeIso}"`);
|
|
195
|
+
|
|
196
|
+
const lines = fs.readFileSync(file, "utf8").split(/\r?\n/).filter(Boolean);
|
|
197
|
+
const kept = [];
|
|
198
|
+
const removedRunIds = [];
|
|
199
|
+
for (const line of lines) {
|
|
200
|
+
let rec;
|
|
201
|
+
try { rec = JSON.parse(line); }
|
|
202
|
+
catch { kept.push(line); continue; } // preserve unparseable; never destroy data we don't understand
|
|
203
|
+
const ts = Date.parse(rec.finished_at || rec.started_at || "");
|
|
204
|
+
if (Number.isFinite(ts) && ts < cutoff) {
|
|
205
|
+
removedRunIds.push(rec.run_id);
|
|
206
|
+
} else {
|
|
207
|
+
kept.push(line);
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
fs.writeFileSync(file, kept.join("\n") + (kept.length ? "\n" : ""));
|
|
211
|
+
|
|
212
|
+
let logsRemoved = 0;
|
|
213
|
+
if (fs.existsSync(logDir(cwd))) {
|
|
214
|
+
for (const id of removedRunIds) {
|
|
215
|
+
const lf = path.join(logDir(cwd), `${id}.log`);
|
|
216
|
+
try { fs.unlinkSync(lf); logsRemoved++; } catch {}
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
return { removed: removedRunIds.length, logs_removed: logsRemoved };
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
module.exports = {
|
|
223
|
+
SCHEMA_VERSION,
|
|
224
|
+
start,
|
|
225
|
+
finish,
|
|
226
|
+
read,
|
|
227
|
+
prune,
|
|
228
|
+
// exposed for tests / introspection
|
|
229
|
+
newRunId,
|
|
230
|
+
processSessionId,
|
|
231
|
+
jsonlPath,
|
|
232
|
+
logDir,
|
|
233
|
+
};
|