qualia-framework 6.7.1 → 6.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +2304 -0
- package/FLAGS.md +73 -0
- package/SOUL.md +17 -0
- package/TROUBLESHOOTING.md +183 -0
- package/agents/plan-checker.md +4 -0
- package/agents/planner.md +8 -0
- package/agents/qa-browser.md +6 -2
- package/agents/research-synthesizer.md +4 -0
- package/agents/researcher.md +4 -0
- package/agents/roadmapper.md +4 -0
- package/agents/verifier.md +1 -1
- package/agents/visual-evaluator.md +8 -6
- package/bin/cli.js +7 -1
- package/bin/install.js +69 -7
- package/bin/runtime-manifest.js +1 -0
- package/bin/security-scan.js +24 -10
- package/bin/trust-score.js +34 -0
- package/hooks/migration-guard.js +4 -4
- package/hooks/pre-deploy-gate.js +14 -4
- package/hooks/stop-session-log.js +10 -7
- package/package.json +6 -2
- package/qualia-design/design-rubric.md +3 -1
- package/rules/architecture.md +1 -1
- package/rules/grounding.md +3 -1
- package/rules/speed.md +2 -2
- package/skills/qualia-idk/SKILL.md +3 -3
- package/skills/qualia-polish/REFERENCE.md +11 -6
- package/skills/qualia-polish/SKILL.md +20 -3
- package/skills/qualia-polish/scripts/loop.mjs +24 -6
- package/skills/qualia-polish/scripts/playwright-capture.mjs +89 -11
- package/skills/qualia-polish/scripts/vibe-tokens.mjs +57 -1
- package/skills/qualia-research/SKILL.md +1 -1
- package/skills/qualia-road/SKILL.md +6 -0
- package/skills/qualia-scope/SKILL.md +2 -2
- package/skills/qualia-secure/SKILL.md +5 -5
- package/templates/knowledge/index.md +4 -4
- package/tests/bin.test.sh +4 -4
- package/tests/lib.test.sh +2 -2
package/bin/security-scan.js
CHANGED
|
@@ -192,7 +192,7 @@ function categoryScore(findings) {
|
|
|
192
192
|
return { weighted_sum: ws, score: Math.max(1, 5 - Math.floor(ws / 8)), counts };
|
|
193
193
|
}
|
|
194
194
|
|
|
195
|
-
function renderMarkdown({ findings, paths, score }) {
|
|
195
|
+
function renderMarkdown({ findings, paths, score, skipped = [] }) {
|
|
196
196
|
const lines = [];
|
|
197
197
|
lines.push(`# Qualia security scan — ${new Date().toISOString()}`);
|
|
198
198
|
lines.push("");
|
|
@@ -202,10 +202,16 @@ function renderMarkdown({ findings, paths, score }) {
|
|
|
202
202
|
lines.push(`**Score:** ${score.score} / 5 (weighted_sum=${score.weighted_sum})`);
|
|
203
203
|
lines.push("");
|
|
204
204
|
|
|
205
|
-
if (
|
|
205
|
+
if (skipped.length > 0) {
|
|
206
|
+
lines.push(`⚠️ **${skipped.length} file(s) could not be scanned** — result is NOT clean (score degraded, non-zero exit):`);
|
|
207
|
+
for (const s of skipped) lines.push(`- \`${s.file}\` — ${s.error}`);
|
|
208
|
+
lines.push("");
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
if (findings.length === 0 && skipped.length === 0) {
|
|
206
212
|
lines.push("✅ Clean.");
|
|
207
213
|
lines.push("");
|
|
208
|
-
} else {
|
|
214
|
+
} else if (findings.length > 0) {
|
|
209
215
|
findings.sort((a, b) => severityOrder(a.severity) - severityOrder(b.severity));
|
|
210
216
|
for (const f of findings) {
|
|
211
217
|
lines.push(`### [${f.severity}] ${f.name}`);
|
|
@@ -346,6 +352,7 @@ function main() {
|
|
|
346
352
|
const paths = args.paths || defaultPaths();
|
|
347
353
|
const findings = [];
|
|
348
354
|
const scanned = [];
|
|
355
|
+
const skipped = [];
|
|
349
356
|
|
|
350
357
|
for (const p of paths) {
|
|
351
358
|
try {
|
|
@@ -356,14 +363,21 @@ function main() {
|
|
|
356
363
|
const content = fs.readFileSync(p, "utf8");
|
|
357
364
|
scanned.push(p);
|
|
358
365
|
findings.push(...scanContent(p, content));
|
|
359
|
-
} catch {}
|
|
366
|
+
} catch (e) { skipped.push({ file: p, error: e.message }); }
|
|
360
367
|
}
|
|
361
368
|
|
|
362
369
|
const score = categoryScore(findings);
|
|
370
|
+
// An unreadable file must not silently pass as clean. Degrade the score
|
|
371
|
+
// (min 1) and force a non-clean exit when any target could not be scanned.
|
|
372
|
+
if (skipped.length > 0) score.score = Math.max(1, score.score - 1);
|
|
373
|
+
|
|
374
|
+
// A scan that couldn't read every target must not exit clean — surface at
|
|
375
|
+
// least HIGH (1) so an unreadable file can't slip through a CI gate as 0.
|
|
376
|
+
const exitCode = skipped.length > 0 ? Math.max(1, exitCodeFor(findings)) : exitCodeFor(findings);
|
|
363
377
|
|
|
364
378
|
if (args.json) {
|
|
365
|
-
process.stdout.write(JSON.stringify({ findings, scanned, score }, null, 2) + "\n");
|
|
366
|
-
process.exit(
|
|
379
|
+
process.stdout.write(JSON.stringify({ findings, scanned, skipped, score }, null, 2) + "\n");
|
|
380
|
+
process.exit(exitCode);
|
|
367
381
|
}
|
|
368
382
|
|
|
369
383
|
// --deep emits a prompt pack for the /qualia-secure skill to spawn agents.
|
|
@@ -373,17 +387,17 @@ function main() {
|
|
|
373
387
|
try { fs.mkdirSync(planningDir, { recursive: true }); } catch {}
|
|
374
388
|
const staticPath = path.join(planningDir, "security-scan.md");
|
|
375
389
|
const promptPath = path.join(planningDir, "security-deep-prompt.md");
|
|
376
|
-
fs.writeFileSync(staticPath, renderMarkdown({ findings, paths: scanned, score }));
|
|
390
|
+
fs.writeFileSync(staticPath, renderMarkdown({ findings, paths: scanned, score, skipped }));
|
|
377
391
|
fs.writeFileSync(promptPath, renderDeepPromptPack({ findings, scanned, score }));
|
|
378
392
|
console.log(`Wrote ${staticPath}`);
|
|
379
393
|
console.log(`Wrote ${promptPath}`);
|
|
380
394
|
console.log("");
|
|
381
395
|
console.log("Now run `/qualia-secure` in a Claude session — it will read the prompt pack");
|
|
382
396
|
console.log("and spawn the red/blue/auditor agents in parallel.");
|
|
383
|
-
process.exit(
|
|
397
|
+
process.exit(exitCode);
|
|
384
398
|
}
|
|
385
399
|
|
|
386
|
-
const md = renderMarkdown({ findings, paths: scanned, score });
|
|
400
|
+
const md = renderMarkdown({ findings, paths: scanned, score, skipped });
|
|
387
401
|
if (args.write) {
|
|
388
402
|
const outDefault = path.join(process.cwd(), ".planning", "security-scan.md");
|
|
389
403
|
const out = args.writePath || outDefault;
|
|
@@ -395,7 +409,7 @@ function main() {
|
|
|
395
409
|
process.stdout.write(md);
|
|
396
410
|
}
|
|
397
411
|
|
|
398
|
-
process.exit(
|
|
412
|
+
process.exit(exitCode);
|
|
399
413
|
}
|
|
400
414
|
|
|
401
415
|
module.exports = { main, scanContent, defaultPaths, categoryScore, renderDeepPromptPack, SECRET_PATTERNS, CONFIG_SMELLS };
|
package/bin/trust-score.js
CHANGED
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
const fs = require("fs");
|
|
5
5
|
const path = require("path");
|
|
6
6
|
const os = require("os");
|
|
7
|
+
const { spawnSync } = require("child_process");
|
|
7
8
|
const pc = require("./plan-contract.js");
|
|
8
9
|
const ledger = require("./state-ledger.js");
|
|
9
10
|
const { binFiles } = require("./runtime-manifest.js");
|
|
@@ -16,6 +17,12 @@ const HOMES = [
|
|
|
16
17
|
|
|
17
18
|
const REQUIRED_BIN = binFiles();
|
|
18
19
|
|
|
20
|
+
// Spine scripts: the load-bearing executables that, if they fail to even parse,
|
|
21
|
+
// render the install dead. binFiles() does not include these, so probe them
|
|
22
|
+
// explicitly — existence AND Node loadability (`--check`). A syntax-broken
|
|
23
|
+
// cli.js (the C1 regression) must force overall status off PASS.
|
|
24
|
+
const SPINE_SCRIPTS = ["cli.js", "install.js", "state.js"];
|
|
25
|
+
|
|
19
26
|
const REQUIRED_HOOKS = [
|
|
20
27
|
"session-start.js", "auto-update.js", "branch-guard.js", "pre-push.js",
|
|
21
28
|
"pre-deploy-gate.js", "migration-guard.js", "git-guardrails.js",
|
|
@@ -54,10 +61,27 @@ function inspectInstall(homes) {
|
|
|
54
61
|
return { status: "fail", score: 0, issues: ["no Qualia install config found"], targets: [] };
|
|
55
62
|
}
|
|
56
63
|
const issues = [];
|
|
64
|
+
let spineBroken = false;
|
|
57
65
|
for (const home of homes) {
|
|
58
66
|
for (const f of REQUIRED_BIN) {
|
|
59
67
|
if (!exists(path.join(home.dir, "bin", f))) issues.push(`${home.name}: missing bin/${f}`);
|
|
60
68
|
}
|
|
69
|
+
// Spine probe: Node loadability of spine scripts that ARE installed.
|
|
70
|
+
// cli.js / install.js run via npx / the installer and are not part of the
|
|
71
|
+
// runtime manifest, so their ABSENCE is not a failure (state.js absence is
|
|
72
|
+
// already reported via REQUIRED_BIN). But any spine script that IS present
|
|
73
|
+
// must pass `--check` — a syntax-broken installed spine forces status off
|
|
74
|
+
// PASS (this is the check that catches a dead cli.js).
|
|
75
|
+
for (const f of SPINE_SCRIPTS) {
|
|
76
|
+
const fp = path.join(home.dir, "bin", f);
|
|
77
|
+
if (!exists(fp)) continue;
|
|
78
|
+
const r = spawnSync(process.execPath, ["--check", fp], { encoding: "utf8" });
|
|
79
|
+
if (r.status !== 0) {
|
|
80
|
+
const detail = (r.stderr || r.error?.message || "").split("\n")[0].trim();
|
|
81
|
+
issues.push(`${home.name}: spine bin/${f} fails --check${detail ? ` (${detail})` : ""}`);
|
|
82
|
+
spineBroken = true;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
61
85
|
if (home.name === "Claude") {
|
|
62
86
|
if (!exists(path.join(home.dir, "CLAUDE.md"))) issues.push("Claude: missing CLAUDE.md");
|
|
63
87
|
if (!exists(path.join(home.dir, "settings.json"))) issues.push("Claude: missing settings.json");
|
|
@@ -67,6 +91,16 @@ function inspectInstall(homes) {
|
|
|
67
91
|
if (!exists(path.join(home.dir, "config.toml"))) issues.push("Codex: missing config.toml");
|
|
68
92
|
}
|
|
69
93
|
}
|
|
94
|
+
// A broken spine is a hard failure: the install cannot run. Force fail status
|
|
95
|
+
// (drives overall status to FAIL) and zero the score regardless of other issues.
|
|
96
|
+
if (spineBroken) {
|
|
97
|
+
return {
|
|
98
|
+
status: "fail",
|
|
99
|
+
score: 0,
|
|
100
|
+
issues,
|
|
101
|
+
targets: homes.map((h) => h.name),
|
|
102
|
+
};
|
|
103
|
+
}
|
|
70
104
|
return {
|
|
71
105
|
status: issues.length ? "degraded" : "pass",
|
|
72
106
|
score: issues.length ? Math.max(6, 20 - issues.length * 2) : 20,
|
package/hooks/migration-guard.js
CHANGED
|
@@ -161,12 +161,12 @@ if (realCreateTables.length > 0 && !/ENABLE\s+ROW\s+LEVEL\s+SECURITY/i.test(scan
|
|
|
161
161
|
}
|
|
162
162
|
|
|
163
163
|
if (errors.length > 0) {
|
|
164
|
-
console.
|
|
164
|
+
console.error("⬢ Migration guard — dangerous patterns found:");
|
|
165
165
|
for (const e of errors) {
|
|
166
|
-
console.
|
|
166
|
+
console.error(` ✗ ${e}`);
|
|
167
167
|
}
|
|
168
|
-
console.
|
|
169
|
-
console.
|
|
168
|
+
console.error("");
|
|
169
|
+
console.error("Fix these before proceeding. If intentional, ask Fawzi to approve.");
|
|
170
170
|
_trace("migration-guard", "block", { errors });
|
|
171
171
|
process.exit(2);
|
|
172
172
|
}
|
package/hooks/pre-deploy-gate.js
CHANGED
|
@@ -73,7 +73,7 @@ function runGate(label, cmd, args, { required = true } = {}) {
|
|
|
73
73
|
const r = spawnSync(cmd, args, {
|
|
74
74
|
stdio: ["ignore", "pipe", "pipe"],
|
|
75
75
|
encoding: "utf8",
|
|
76
|
-
timeout:
|
|
76
|
+
timeout: 120000,
|
|
77
77
|
shell: process.platform === "win32",
|
|
78
78
|
});
|
|
79
79
|
if (r.status === 0) {
|
|
@@ -170,7 +170,10 @@ function enforceShipPolicy() {
|
|
|
170
170
|
|
|
171
171
|
// If this is not a Qualia-managed project, keep the legacy behavior: run the
|
|
172
172
|
// quality/security gates but do not invent state.
|
|
173
|
-
if (!state || !status)
|
|
173
|
+
if (!state || !status) {
|
|
174
|
+
console.error("⬢ pre-deploy-gate: no ship-state (brownfield) — skipping ship-policy gate, quality gates still run");
|
|
175
|
+
return;
|
|
176
|
+
}
|
|
174
177
|
|
|
175
178
|
const shippable = status === "polished" || (status === "verified" && verification === "pass");
|
|
176
179
|
if (!shippable && !force) {
|
|
@@ -281,9 +284,16 @@ if (fs.existsSync("tsconfig.json")) {
|
|
|
281
284
|
}
|
|
282
285
|
|
|
283
286
|
// Lint (with QUALIA_SKIP_LINT=1 escape — for the documented "lint blocks
|
|
284
|
-
// ship" friction when the lint failures are pre-existing or auto-fixer-broken)
|
|
287
|
+
// ship" friction when the lint failures are pre-existing or auto-fixer-broken).
|
|
288
|
+
// The skip is OWNER-only, mirroring the QUALIA_SHIP_FORCE role-gate above.
|
|
285
289
|
if (hasScript("lint")) {
|
|
286
|
-
|
|
290
|
+
const skipLint = process.env.QUALIA_SKIP_LINT === "1";
|
|
291
|
+
const lintRole = String(readConfig().role || "").toUpperCase();
|
|
292
|
+
if (skipLint && lintRole !== "OWNER") {
|
|
293
|
+
const lintState = readState();
|
|
294
|
+
blockDeploy("QUALIA_SKIP_LINT is OWNER-only.", (lintState && lintState.next_command) || "/qualia");
|
|
295
|
+
}
|
|
296
|
+
if (skipLint) {
|
|
287
297
|
console.log(" ⚠ Lint skipped (QUALIA_SKIP_LINT=1)");
|
|
288
298
|
_trace("pre-deploy-gate", "skip-lint", { reason: "QUALIA_SKIP_LINT=1" });
|
|
289
299
|
} else {
|
|
@@ -91,13 +91,16 @@ try {
|
|
|
91
91
|
// dedupe marker, so it's a cheap no-op on every turn except the one right
|
|
92
92
|
// after a ship. Wrapped + unref'd so it never blocks or breaks the session.
|
|
93
93
|
try {
|
|
94
|
-
const
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
94
|
+
const autoReportPath = path.join(__dirname, "..", "bin", "auto-report.js");
|
|
95
|
+
if (fs.existsSync(autoReportPath)) {
|
|
96
|
+
const { spawn } = require("child_process");
|
|
97
|
+
const child = spawn(
|
|
98
|
+
process.execPath,
|
|
99
|
+
[autoReportPath],
|
|
100
|
+
{ cwd: process.cwd(), detached: true, stdio: "ignore" },
|
|
101
|
+
);
|
|
102
|
+
child.unref();
|
|
103
|
+
}
|
|
101
104
|
} catch {}
|
|
102
105
|
|
|
103
106
|
// ── Skip if too soon since last write ────────────────────
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "qualia-framework",
|
|
3
|
-
"version": "6.
|
|
3
|
+
"version": "6.8.0",
|
|
4
4
|
"description": "Claude Code and Codex workflow framework by Qualia Solutions. Plan, build, verify, ship.",
|
|
5
5
|
"bin": {
|
|
6
6
|
"qualia-framework": "./bin/cli.js"
|
|
@@ -54,7 +54,11 @@
|
|
|
54
54
|
"docs/onboarding.html",
|
|
55
55
|
"CLAUDE.md",
|
|
56
56
|
"AGENTS.md",
|
|
57
|
-
"guide.md"
|
|
57
|
+
"guide.md",
|
|
58
|
+
"SOUL.md",
|
|
59
|
+
"FLAGS.md",
|
|
60
|
+
"TROUBLESHOOTING.md",
|
|
61
|
+
"CHANGELOG.md"
|
|
58
62
|
],
|
|
59
63
|
"engines": {
|
|
60
64
|
"node": ">=18"
|
|
@@ -33,7 +33,7 @@ A whole-app `/qualia-polish` scores all 9 dimensions across multiple representat
|
|
|
33
33
|
|
|
34
34
|
| Score | Criteria |
|
|
35
35
|
|---|---|
|
|
36
|
-
| 1 | Inter / Roboto / Arial / system-ui / Space Grotesk
|
|
36
|
+
| 1 | Banned font as primary — Inter / Roboto / Arial / Helvetica / system-ui / Space Grotesk / Montserrat / Poppins / Lato / Open Sans. Or single weight throughout. |
|
|
37
37
|
| 2 | Project font loaded but only one weight, no scale, no letter-spacing variation. |
|
|
38
38
|
| 3 | Project font with 2-3 weights, basic scale (h1/h2/body), readable. |
|
|
39
39
|
| 4 | Distinctive display + refined body pair. Letter-spacing varied semantically (tight headlines, open labels). Tabular numerals on numeric data. |
|
|
@@ -41,6 +41,8 @@ A whole-app `/qualia-polish` scores all 9 dimensions across multiple representat
|
|
|
41
41
|
|
|
42
42
|
Evidence format: `font-family: "<name>" used at line N, scale 14/16/24/40 visible, weights 400/500/700`
|
|
43
43
|
|
|
44
|
+
The banned-font list above is the machine-enforced counterpart of `bin/slop-detect.mjs` (the static anti-pattern scanner) and `agents/visual-evaluator.md` (the vision evaluator). All three are kept in lockstep — Inter, Roboto, Arial, Helvetica, system-ui, Space Grotesk, Montserrat, Poppins, Lato, Open Sans.
|
|
45
|
+
|
|
44
46
|
### 2. Color cohesion
|
|
45
47
|
|
|
46
48
|
| Score | Criteria |
|
package/rules/architecture.md
CHANGED
|
@@ -119,7 +119,7 @@ Read this file (auto-load via skill or `@rules/architecture.md`) when:
|
|
|
119
119
|
|
|
120
120
|
- Planning a new module or feature with multiple components.
|
|
121
121
|
- The user requests `/qualia-optimize --deepen` or `--alignment`.
|
|
122
|
-
- A verifier is scoring "Container depth & nesting" (per `
|
|
122
|
+
- A verifier is scoring "Container depth & nesting" (per `qualia-design/design-rubric.md` dimension 8).
|
|
123
123
|
- An ADR is being drafted for an architectural fork.
|
|
124
124
|
|
|
125
125
|
Do **not** auto-load this on quick fixes, copy edits, single-component touch-ups — that wastes instruction budget. Use judgment.
|
package/rules/grounding.md
CHANGED
|
@@ -102,7 +102,9 @@ Every skill that spawns an agent must order the prompt from most-stable to most-
|
|
|
102
102
|
- Per-task content goes LAST. Mixing task-specific files into `<phase_context>` breaks cache on every spawn within the same wave.
|
|
103
103
|
- Reference files via `@path` when the harness auto-expands, OR inline the content — but pick one and stick with it per section (switching styles breaks prefix match).
|
|
104
104
|
|
|
105
|
-
## Design Quality Rubric (any dimension < 3 = mandatory fix before commit)
|
|
105
|
+
## Design Quality Rubric (pre-commit SUBSET — any dimension < 3 = mandatory fix before commit)
|
|
106
|
+
|
|
107
|
+
> This 6-dimension table (Typography, Color, Spacing, States, Responsiveness, Accessibility) is the fast pre-commit gate every agent runs inline. It is a **deliberate SUBSET** of the authoritative 9-dimension rubric at `@qualia-design/design-rubric.md` (Typography, Color cohesion, Spatial rhythm, Layout originality, Shadow & depth, Motion intent, Microcopy specificity, Container depth & nesting, Visual system & graphics). For verifier scoring and `/qualia-polish --critique`, the 9-dimension rubric is authoritative — read it, don't score from this subset.
|
|
106
108
|
|
|
107
109
|
| Dimension | 1 (Fail) | 3 (Acceptable) | 5 (Excellent) |
|
|
108
110
|
|-----------------|-----------------------------------------|------------------------------------------|--------------------------------------------|
|
package/rules/speed.md
CHANGED
|
@@ -26,14 +26,14 @@ MCP servers impose a **token tax**: their tool definitions consume context-windo
|
|
|
26
26
|
- The MCP returns structured JSON that would be expensive to parse from CLI text output.
|
|
27
27
|
- The MCP enforces governance (RLS-aware queries, scoped DB credentials) that the CLI doesn't.
|
|
28
28
|
|
|
29
|
-
If a `/skill-name` exists that wraps a CLI, prefer the skill — it's been hardened. Canonical example:
|
|
29
|
+
If a `/skill-name` exists that wraps a CLI, prefer the skill — it's been hardened. Canonical example: drive Supabase through `npx supabase` (migrations, type generation, local dev, SQL) instead of the Supabase MCP — the CLI hits the same API at zero token cost and replaces the bulk of the MCP tool surface (see `infrastructure.md`).
|
|
30
30
|
|
|
31
31
|
### MCP tier-list (when each is justified)
|
|
32
32
|
|
|
33
33
|
| Server | Always-on? | Justification |
|
|
34
34
|
|---|---|---|
|
|
35
35
|
| `claude-in-chrome` | On-demand | Browser automation has no CLI equivalent; use for QA flows only |
|
|
36
|
-
| `supabase` MCP | **Off** in favor of
|
|
36
|
+
| `supabase` MCP | **Off** in favor of `npx supabase` CLI | CLI covers 95% of operations; MCP only if you need branch management interactively |
|
|
37
37
|
| `context7` | On-demand | Library docs at runtime — no CLI alternative for Context7 itself |
|
|
38
38
|
| `notebooklm-mcp` | On-demand | NotebookLM has no CLI; only loaded when researching against existing notebooks |
|
|
39
39
|
| `firecrawl-mcp` | On-demand | Web scraping; only loaded when feature requires it |
|
|
@@ -207,13 +207,13 @@ With all three reports + `<user_confusion>` + `<session_context>` in hand, produ
|
|
|
207
207
|
|---|---|
|
|
208
208
|
| Plan says built but code has stubs | `/qualia-plan {N} --gaps` → `/qualia-build` → `/qualia-verify` |
|
|
209
209
|
| Verify FAILed and no postmortem ran | `/qualia-postmortem` → `/qualia-plan {N} --gaps` → `/qualia-build` |
|
|
210
|
-
| Stale `.continue-here.md`, ongoing context | `/qualia
|
|
210
|
+
| Stale `.continue-here.md`, ongoing context | `/qualia` (restores from `.continue-here.md` / STATE.md) |
|
|
211
211
|
| Brownfield drift (plan and code diverged hard) | `/qualia-map` → `/qualia-plan {N} --gaps` |
|
|
212
212
|
| Phase context missing (no `/qualia-scope` ran) | `/qualia-scope {N}` → `/qualia-plan {N}` |
|
|
213
213
|
| Specific error, scope clear | `/qualia-fix '<symptom>'` |
|
|
214
214
|
| Performance feels off, no profile | `/qualia-fix --perf '<route>'` or `/qualia-optimize --perf` |
|
|
215
215
|
| Design feels off | `/qualia-polish --critique` then `/qualia-polish` |
|
|
216
|
-
| User is overwhelmed | `/qualia-
|
|
216
|
+
| User is overwhelmed | `/qualia-handoff` (save handoff), come back later |
|
|
217
217
|
| Truly nothing actionable found | Ask one specific question; don't invent a sequence |
|
|
218
218
|
|
|
219
219
|
Pick the sequence that fits the actual evidence. Substitute real `{N}` from the Plan-view scan.
|
|
@@ -240,6 +240,6 @@ node ${QUALIA_BIN}/qualia-ui.js end "DIAGNOSED" "{first command in the sequence,
|
|
|
240
240
|
- User knows what they're doing and just wants the next command → `/qualia`
|
|
241
241
|
- User has a specific error message they want fixed → `/qualia-fix '<symptom>'`
|
|
242
242
|
- User wants to review code quality → `/qualia-review`
|
|
243
|
-
- User wants to pause and come back → `/qualia-
|
|
243
|
+
- User wants to pause and come back → `/qualia-handoff`
|
|
244
244
|
|
|
245
245
|
`/qualia-idk` is specifically for **"I'm not sure what I'm even looking at"** situations. Route to sharper tools when the question is sharper.
|
|
@@ -32,6 +32,8 @@ The capture script (`scripts/playwright-capture.mjs`) auto-selects in this order
|
|
|
32
32
|
3. `~/.cache/ms-playwright/chromium-{version}/chrome-{linux64,linux,mac,win}/chrome` — if Playwright was ever installed for browsers but the package isn't import-resolvable
|
|
33
33
|
4. `which google-chrome` / `chromium` / `chromium-browser` / `chrome` — system browser fallback
|
|
34
34
|
|
|
35
|
+
All four backends capture **full-page** (top-to-bottom, including below-fold content), not just the first viewport-height — the evaluator scores card grids, CTAs, and footers, so the capture must contain them. Playwright uses `fullPage: true`; the binary-direct backends omit any fixed `--window-size` height clamp so the headless render extends to the full document height.
|
|
36
|
+
|
|
35
37
|
For backends 3 and 4 (binary-direct), the script uses `--headless=new --screenshot --virtual-time-budget`. Less precise than Playwright's `networkidle` waiting but works without any npm dependency.
|
|
36
38
|
|
|
37
39
|
Setup hints if all four fail:
|
|
@@ -70,6 +72,7 @@ Role: @${QUALIA_AGENTS}/visual-evaluator.md
|
|
|
70
72
|
</product>
|
|
71
73
|
|
|
72
74
|
<screenshots>
|
|
75
|
+
One full-page PNG per viewport (captured top-to-bottom, includes below-fold content):
|
|
73
76
|
- mobile (375px): /tmp/qpl-{ts}/iter-{N}/mobile-375.png
|
|
74
77
|
- tablet (768px): /tmp/qpl-{ts}/iter-{N}/tablet-768.png
|
|
75
78
|
- desktop (1440px): /tmp/qpl-{ts}/iter-{N}/desktop-1440.png
|
|
@@ -150,6 +153,8 @@ the {dim} dimension at {score}. Your single task: fix that one dimension.
|
|
|
150
153
|
|
|
151
154
|
## Iteration log entry (what `loop.mjs record` writes to state.json.iterations[])
|
|
152
155
|
|
|
156
|
+
`tokens_used` is a **deterministic fixed charge** (`PER_ITER_TOKENS = 14500`) added by `loop.mjs` per iteration — NOT a number reported by the evaluator. The evaluator's JSON no longer carries a `tokens_used` field; trusting a model-guessed count to drive the budget kill-switch was unsound, so the loop charges the constant below for every iteration regardless of what any agent claims.
|
|
157
|
+
|
|
153
158
|
```json
|
|
154
159
|
{
|
|
155
160
|
"iteration": 1,
|
|
@@ -189,20 +194,20 @@ When the kill trigger fires, the verdict becomes `killed_regression` and `state.
|
|
|
189
194
|
| 6 | ~90K | default |
|
|
190
195
|
| 8 | ~120K | hard cap; pass `--budget 150000` to allow |
|
|
191
196
|
|
|
192
|
-
Per-iteration cost
|
|
193
|
-
- 3 screenshot reads ≈ 9K
|
|
197
|
+
Per-iteration cost is charged as a **fixed constant** — `loop.mjs` adds `PER_ITER_TOKENS = 14500` per iteration to `state.tokens_used` and the kill-switch trips off that deterministic sum, never off an agent's self-estimate. The breakdown below is the rationale for the constant, not a per-run measurement:
|
|
198
|
+
- 3 full-page screenshot reads ≈ 9K (taller PNGs than viewport-height crops, but still 3 reads)
|
|
194
199
|
- rubric + brief inlined ≈ 2K (cached after iter 1)
|
|
195
200
|
- previous-iteration delta ≈ 0.5K
|
|
196
201
|
- 3 fix-builder spawns × (file read + edit + commit-fix call) ≈ 3K
|
|
197
|
-
- **per-iteration
|
|
202
|
+
- **per-iteration constant = 14.5K** (deterministic — same charge every iteration)
|
|
198
203
|
|
|
199
204
|
## Self-test scenarios (mapping to spec)
|
|
200
205
|
|
|
201
206
|
| # | Fixture | Expected | Verifier |
|
|
202
207
|
|---|---|---|---|
|
|
203
|
-
| 1 | `fixtures/clean.html` | SUCCESS in 1-2 iterations, all dims ≥ 4 | run capture, run evaluator inline, assert pass |
|
|
204
|
-
| 2 | `fixtures/broken.html` | SUCCESS in 4-6 iters; identifies banned font + gradient + 3-card grid + side-stripe + generic CTA | each fix-builder commits a `qpl-N:` change; final eval all dims ≥ 3 |
|
|
205
|
-
| 3 | Kill-switch | KILL at iter ≤ 4 with `LOOP_REGRESSION_DETECTED` | call `loop.mjs record` 3× with the same fingerprint; assert exit 3 + correct verdict |
|
|
208
|
+
| 1 | `fixtures/clean.html` | Each viewport PNG is full-page (height > viewport-height when the fixture scrolls); SUCCESS in 1-2 iterations, all dims ≥ 4 | run capture, assert PNG is full-page, run evaluator inline, assert pass |
|
|
209
|
+
| 2 | `fixtures/broken.html` | SUCCESS in 4-6 iters; identifies banned font + gradient + 3-card grid + side-stripe + generic CTA, INCLUDING below-fold offenders only visible in a full-page shot | each fix-builder commits a `qpl-N:` change; final eval all dims ≥ 3 |
|
|
210
|
+
| 3 | Kill-switch | KILL at iter ≤ 4 with `LOOP_REGRESSION_DETECTED`; `state.tokens_used` increments by exactly `PER_ITER_TOKENS` (14500) per recorded iteration | call `loop.mjs record` 3× with the same fingerprint; assert exit 3 + correct verdict + deterministic token charge |
|
|
206
211
|
|
|
207
212
|
The pilot-results doc at `docs/playwright-loop-pilot-results.md` records the actual outcome from `bash scripts/_self-tests.sh` (Scenario 3 is exercised by a deterministic unit-style invocation; Scenarios 1+2 require a real vision pass and are run by Claude when the loop ships).
|
|
208
213
|
|
|
@@ -32,7 +32,7 @@ The first argument selects the scope. Stage selection follows from scope.
|
|
|
32
32
|
| `/qualia-polish --redesign` | **Redesign** | ~30m | all + Stage 1 mandatory + 2 vision iterations |
|
|
33
33
|
| `/qualia-polish --critique` | **Critique** | read-only | 0, 4, 5 (no edits) |
|
|
34
34
|
| `/qualia-polish --quick` | **Quick** | ~1m | 0, 2, 7 (gates only, no vision loop) |
|
|
35
|
-
| `/qualia-polish --vibe` | **Aesthetic pivot** | ~3m | token-only direction swap |
|
|
35
|
+
| `/qualia-polish --vibe` | **Aesthetic pivot** | ~3m | token-only direction swap, then 6, 7 (mandatory verify) |
|
|
36
36
|
| `/qualia-polish --loop {url}` | **Loop** | ~5-15m | autonomous see/fix/verify, max 8 iterations |
|
|
37
37
|
|
|
38
38
|
Other flags: `--register=brand|product` overrides register inference. Vibe-specific flags: `--variants N`, `--extract URL|image`, `--sync`, `--write`. Loop-specific flags: `--brief PATH`, `--max N`, `--viewports 375,768,1440`, `--ref PATH`, `--budget 100000`.
|
|
@@ -73,6 +73,21 @@ node ${QUALIA_SKILLS}/qualia-polish/scripts/vibe-extract.mjs --source https://ex
|
|
|
73
73
|
|
|
74
74
|
Default flow proposes one opinionated direction per `rules/one-opinion.md`. `--variants` is opt-in only. `--extract` stages a reference screenshot for the visual evaluator to reverse-engineer into DESIGN.md tokens; the user must review low-confidence extracted values before application. `--sync --write` patches DESIGN.md to reflect tokens already present in code.
|
|
75
75
|
|
|
76
|
+
**Mandatory post-vibe verify (the token swap does NOT terminate the run).** After applying the new tokens, a `--vibe` run MUST NOT stop at the swap — it routes back through **Stage 6 — Verify**. Before claiming done, run:
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
# 1. TypeScript still compiles (a token rename can break a typed theme map)
|
|
80
|
+
npx tsc --noEmit 2>&1 | tail -10
|
|
81
|
+
|
|
82
|
+
# 2. Anti-pattern scan on the files the swap touched (new tokens can reintroduce slop)
|
|
83
|
+
node bin/slop-detect.mjs {changed_files}
|
|
84
|
+
[ $? -eq 0 ] || { echo "vibe failed — slop-detect found critical issues in the new tokens"; exit 1; }
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
3. **Scoped vision check (best-effort, only if a dev server is reachable).** Capture one screenshot per viewport via `scripts/playwright-capture.mjs` and spawn a single `agents/visual-evaluator.md` pass scored ONLY on the dimensions a token swap can move: **color, typography, shadow, motion**. (Layout/container/microcopy/graphics are out of scope for `--vibe` — do not score or fix them here.) If no dev server and no resolvable browser backend, skip this step and note it in the report; never block the vibe on missing optional tooling. Any in-scope dimension scoring < 3 is a regression in the new direction — re-tune the offending tokens and re-verify, do not ship.
|
|
88
|
+
|
|
89
|
+
Only after Stage 6 passes does the `--vibe` run reach Stage 7 (commit & state).
|
|
90
|
+
|
|
76
91
|
## Setup gates (non-optional, every scope)
|
|
77
92
|
|
|
78
93
|
Before any work — design or otherwise — pass these gates. Skipping them produces generic output that ignores the project.
|
|
@@ -163,6 +178,8 @@ Each agent receives:
|
|
|
163
178
|
|
|
164
179
|
For Component scope: do the work in main context. Read, fix, verify.
|
|
165
180
|
|
|
181
|
+
**Optional best-effort vision check (Component scope).** If a dev server is reachable (`http://localhost:3000` or a port detected via `lsof`) AND `scripts/playwright-capture.mjs` can resolve a browser backend, capture ONE screenshot at a single viewport (desktop 1440 is sufficient for an isolated component) and spawn one `agents/visual-evaluator.md` pass scored on component-relevant dimensions ONLY — Typography, Color, Shadow, Motion, Microcopy (skip Layout/Container/Graphics per the rubric's scope guard). This is opt-in by environment: if there is no dev server or no resolvable backend, **omit the vision check without error and record that it was unavailable** — never block a component touch-up on missing optional tooling.
|
|
182
|
+
|
|
166
183
|
**Apply fixes scoped by what's missing:**
|
|
167
184
|
|
|
168
185
|
| If the file scores low on… | Apply fix from… |
|
|
@@ -216,7 +233,7 @@ If a11y < 90 OR axe critical/serious violations: **fix programmatically** (th
|
|
|
216
233
|
|
|
217
234
|
### Stage 4 — Vision loop (Redesign scope only — max 2 iterations)
|
|
218
235
|
|
|
219
|
-
Use `skills/qualia-polish/scripts/playwright-capture.mjs` (Playwright backend, with Chrome-binary fallback). Capture
|
|
236
|
+
Use `skills/qualia-polish/scripts/playwright-capture.mjs` (Playwright backend, with Chrome-binary fallback). Capture one **full-page** screenshot at each of 3 viewports: 375 / 768 / 1440 — these match what `agents/visual-evaluator.md` expects, and what the `--loop` mode captures. The `height` below sets the viewport window, not the screenshot height — each PNG runs top-to-bottom so the evaluator scores below-fold sections too. Single browser (Chromium) is fine in 2026 — cross-browser CSS rendering differences are vanishingly rare.
|
|
220
237
|
|
|
221
238
|
```
|
|
222
239
|
viewports: [
|
|
@@ -325,6 +342,6 @@ node ${QUALIA_BIN}/qualia-ui.js end "POLISHED" "{next command — depends on con
|
|
|
325
342
|
| `bin/slop-detect.mjs not found` | Framework not installed in project | Run `npx qualia install` or pull script from framework repo |
|
|
326
343
|
| `PRODUCT.md missing` | Project predates the PRODUCT.md substrate | Run setup (ask 5 questions, generate). For Component scope, can proceed with nudge. |
|
|
327
344
|
| `Lighthouse not installed` | Optional tool | Skip Stage 3 numeric gates, note in report. Don't fail. |
|
|
328
|
-
| `
|
|
345
|
+
| `playwright-capture.mjs cannot resolve a browser backend` | None of the four backends resolve: no `playwright`/`playwright-core` package, no cached Chromium under `~/.cache/ms-playwright`, no system Chrome/Chromium on PATH | Skip Stage 4 vision loop (and any screenshot-dependent verify). Note in report. Install a backend: `npm i -D playwright && npx playwright install chromium` (or put `google-chrome` on PATH). |
|
|
329
346
|
| Vision loop oscillates between iterations | Rubric not anchored properly | Verify rubric prompt instructs "default to 3, only 1-2 = fix". Hard cap at 2 iterations. |
|
|
330
347
|
| User says "you missed X" after polish completes | Scope was too narrow | Re-run with wider scope (`/qualia-polish` whole-app). Don't argue scope. |
|
|
@@ -14,12 +14,13 @@
|
|
|
14
14
|
*
|
|
15
15
|
* Eval JSON contract (what the vision evaluator returns):
|
|
16
16
|
* {
|
|
17
|
-
* "iteration": 1,
|
|
17
|
+
* "iteration": 1, // optional; if present must equal state.iteration+1 (monotonicity assertion)
|
|
18
18
|
* "viewport_results": [{ viewport, scores, top_issues }],
|
|
19
19
|
* "aggregate_scores": { typography, color, spatial, layout, shadow, motion, microcopy, container, graphics },
|
|
20
|
-
* "top_issues": [{ dim, severity, description, likely_file, fix }]
|
|
21
|
-
* "tokens_used": 14500
|
|
20
|
+
* "top_issues": [{ dim, severity, description, likely_file, fix }]
|
|
22
21
|
* }
|
|
22
|
+
* NOTE: any ev.tokens_used is IGNORED — the budget charges a fixed
|
|
23
|
+
* PER_ITER_TOKENS per iteration (deterministic kill-switch).
|
|
23
24
|
*
|
|
24
25
|
* State JSON shape (written by this script, read by Claude):
|
|
25
26
|
* {
|
|
@@ -35,6 +36,12 @@ import { spawnSync } from "node:child_process";
|
|
|
35
36
|
|
|
36
37
|
const DIMS = ["typography", "color", "spatial", "layout", "shadow", "motion", "microcopy", "container", "graphics"];
|
|
37
38
|
|
|
39
|
+
// Deterministic per-iteration token charge (evaluator + 3 builders ≈ 14500,
|
|
40
|
+
// per REFERENCE.md:197). The budget kill-switch must NOT trust ev.tokens_used —
|
|
41
|
+
// that is a model-guessed number and an under-report would defeat the budget
|
|
42
|
+
// guard. Charge a fixed, known cost per iteration instead.
|
|
43
|
+
const PER_ITER_TOKENS = 14500;
|
|
44
|
+
|
|
38
45
|
// ── helpers ──────────────────────────────────────────────────────────────
|
|
39
46
|
function loadState(p) {
|
|
40
47
|
if (!existsSync(p)) { console.error(`state file not found: ${p}`); exit(2); }
|
|
@@ -98,7 +105,17 @@ function cmdRecord() {
|
|
|
98
105
|
const state = loadState(statePath);
|
|
99
106
|
const ev = JSON.parse(readFileSync(evalPath, "utf8"));
|
|
100
107
|
|
|
101
|
-
|
|
108
|
+
// Monotonicity assertion: each record() advances exactly one iteration. If the
|
|
109
|
+
// evaluator reports an iteration that disagrees with the script's own counter,
|
|
110
|
+
// the loop is desynced — fail loudly instead of trusting the eval's number,
|
|
111
|
+
// which would corrupt regression fingerprinting (state.iteration is the key
|
|
112
|
+
// used at :108-134) and budget accounting.
|
|
113
|
+
const expected = state.iteration + 1;
|
|
114
|
+
if (ev.iteration != null && ev.iteration !== expected) {
|
|
115
|
+
console.error(`loop desync: eval.iteration=${ev.iteration} expected=${expected}`);
|
|
116
|
+
exit(2);
|
|
117
|
+
}
|
|
118
|
+
state.iteration = expected;
|
|
102
119
|
const scores = ev.aggregate_scores || {};
|
|
103
120
|
const failingDims = DIMS.filter((d) => (scores[d] ?? 3) < 3);
|
|
104
121
|
const total = DIMS.reduce((a, d) => a + (scores[d] ?? 3), 0);
|
|
@@ -133,7 +150,8 @@ function cmdRecord() {
|
|
|
133
150
|
return its.length >= 3 && its.every((n, i, arr) => i === 0 || n === arr[i - 1] + 1);
|
|
134
151
|
});
|
|
135
152
|
|
|
136
|
-
|
|
153
|
+
// Charge the fixed per-iteration cost — do NOT trust ev.tokens_used.
|
|
154
|
+
state.tokens_used += PER_ITER_TOKENS;
|
|
137
155
|
|
|
138
156
|
state.iterations.push({
|
|
139
157
|
iteration: state.iteration,
|
|
@@ -142,7 +160,7 @@ function cmdRecord() {
|
|
|
142
160
|
pass,
|
|
143
161
|
failing_dims: failingDims,
|
|
144
162
|
top_issues: issues,
|
|
145
|
-
tokens_used:
|
|
163
|
+
tokens_used: PER_ITER_TOKENS,
|
|
146
164
|
timestamp: new Date().toISOString(),
|
|
147
165
|
});
|
|
148
166
|
|