explainmyrepo 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +165 -0
- package/assets/design-system/design-system.css +833 -0
- package/assets/design-system/theme-example.css +83 -0
- package/bin/explainmyrepo.mjs +115 -0
- package/kb/ask-kb.mjs +1487 -0
- package/kb/build-kb.mjs +353 -0
- package/kb/corpus-rules.mjs +341 -0
- package/kb/dep-graph.mjs +184 -0
- package/kb/entrypoints.mjs +207 -0
- package/kb/extract-symbols.mjs +322 -0
- package/kb/index-primer.mjs +255 -0
- package/kb/kb-mcp-server.mjs +186 -0
- package/kb/kb.config.mjs +1362 -0
- package/kb/make-dropin.mjs +224 -0
- package/kb/resolve-deps.mjs +126 -0
- package/package.json +52 -0
- package/src/brain.mjs +298 -0
- package/src/build-context.mjs +66 -0
- package/src/claude.mjs +97 -0
- package/src/env.mjs +77 -0
- package/src/orchestrator.mjs +419 -0
- package/src/run-tool.mjs +49 -0
- package/tools/CONTRACT.md +301 -0
- package/tools/assemble-page.mjs +631 -0
- package/tools/build-kb.mjs +159 -0
- package/tools/clone-repo.mjs +161 -0
- package/tools/deploy.mjs +160 -0
- package/tools/generate-image.mjs +280 -0
- package/tools/make-diagrams.mjs +835 -0
- package/tools/make-favicon.mjs +145 -0
- package/tools/make-pack.mjs +295 -0
- package/tools/make-social-card.mjs +198 -0
- package/tools/notify.mjs +327 -0
- package/tools/publish-repo.mjs +156 -0
- package/tools/quality-grade.mjs +746 -0
- package/tools/readme-enhance.mjs +310 -0
- package/tools/repo-seo.mjs +143 -0
|
@@ -0,0 +1,746 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// quality-grade.mjs — STATION 7 tool: the dual-gate completion criterion.
|
|
3
|
+
//
|
|
4
|
+
// CONTRACT: tools/CONTRACT.md (the one BuildContext, the uniform invocation/return
|
|
5
|
+
// convention, PURE + FAIL-LOUD). Paired ADR-0005 Station 7 / "The QA System";
|
|
6
|
+
// paired DDD §8.5 Scorecard + §12 (the QA dual-gate as first-class domain).
|
|
7
|
+
//
|
|
8
|
+
// JOB (one mechanical job): render the ALREADY-ASSEMBLED site LOCALLY in a real
|
|
9
|
+
// browser (Playwright) at 390px (mobile) + 1440px (desktop), then grade it on two
|
|
10
|
+
// independent channels that DON'T fight each other:
|
|
11
|
+
//
|
|
12
|
+
// (1) INV-18 PRESENCE — a deterministic DOM check (NOT the vision model). Playwright
|
|
13
|
+
// asserts the ARCHITECTURE diagram AND the PROCESS/DATA-FLOW diagram elements
|
|
14
|
+
// exist and are actually visible (rendered box > 0, not display:none) inside the
|
|
15
|
+
// mandatory #how-it-works block. Present/absent is decided HERE, in the DOM — the
|
|
16
|
+
// vision model is never asked "is it there?", only "does it read clearly?".
|
|
17
|
+
//
|
|
18
|
+
// (2) CRAFT + SUBSTANCE — the GPT-4o vision grade against the VERBATIM Gate A/B
|
|
19
|
+
// rubric (A1..A5 substance + B1..B5 anti-slop, each 0–100), graded from a few
|
|
20
|
+
// FULL-RESOLUTION, viewport-height SECTION CROPS (hero · what-it-is · how-it-works
|
|
21
|
+
// · get-started · the-pack), NOT one giant full-page screenshot downscaled into
|
|
22
|
+
// mush. Each crop is capped at the device viewport so the model judges real,
|
|
23
|
+
// sharp pixels (typography, alignment, imagery craft, diagram legibility).
|
|
24
|
+
//
|
|
25
|
+
// headlineScore = MIN across all 10 criteria. A device passes iff headlineScore >= 95
|
|
26
|
+
// AND INV-18 is clean (both diagrams DOM-present + DOM-visible + vision says each reads
|
|
27
|
+
// clearly). The build passes iff BOTH devices pass. Malformed / missing per-criterion
|
|
28
|
+
// scores → LOUD STOP, never a silent pass (ADR-0005 loud-fail postcondition; DDD §12.3).
|
|
29
|
+
//
|
|
30
|
+
// PURE: reads ONLY its declared slice of build.json (the `page` slot) + the
|
|
31
|
+
// OPENAI_API_KEY from the environment. Writes ONLY the `quality` slot + its two
|
|
32
|
+
// screenshots under <build-dir>/assets/. Never reads another tool's slot/files;
|
|
33
|
+
// never writes another tool's slot/files.
|
|
34
|
+
// FAIL-LOUD: any failure → non-zero exit + a clear `error` string; never a silent
|
|
35
|
+
// PASS, never a placeholder scorecard.
|
|
36
|
+
//
|
|
37
|
+
// Usage: node tools/quality-grade.mjs <build-dir>
|
|
38
|
+
// env: OPENAI_API_KEY (required — without it the page CANNOT be graded)
|
|
39
|
+
// QUALITY_VISION_MODEL (optional, default "gpt-4o")
|
|
40
|
+
// OPENAI_BASE_URL (optional, default "https://api.openai.com/v1")
|
|
41
|
+
|
|
42
|
+
import fs from 'node:fs';
|
|
43
|
+
import path from 'node:path';
|
|
44
|
+
import http from 'node:http';
|
|
45
|
+
import { fileURLToPath, pathToFileURL } from 'node:url';
|
|
46
|
+
|
|
47
|
+
const _ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..');
|
|
48
|
+
|
|
49
|
+
// OpenAI key from env (OPENAI_API_KEY | OPEN_AI_KEY) else the repo-root .env — mirrors
|
|
50
|
+
// generate-image so the grader uses the SAME credential the rest of the recipe does. Secrets
|
|
51
|
+
// still come from the environment / a gitignored .env, never from build.json (CONTRACT (d)).
|
|
52
|
+
function loadOpenAiKey() {
|
|
53
|
+
const fromProc = process.env.OPENAI_API_KEY || process.env.OPEN_AI_KEY;
|
|
54
|
+
if (fromProc && fromProc.trim()) return fromProc.trim();
|
|
55
|
+
let text;
|
|
56
|
+
try { text = fs.readFileSync(path.join(_ROOT, '.env'), 'utf8'); } catch { return null; }
|
|
57
|
+
for (const raw of text.split(/\r?\n/)) {
|
|
58
|
+
const line = raw.trim();
|
|
59
|
+
if (!line || line.startsWith('#')) continue;
|
|
60
|
+
const eq = line.indexOf('=');
|
|
61
|
+
if (eq === -1) continue;
|
|
62
|
+
const k = line.slice(0, eq).trim();
|
|
63
|
+
if (k !== 'OPENAI_API_KEY' && k !== 'OPEN_AI_KEY') continue;
|
|
64
|
+
let v = line.slice(eq + 1).trim();
|
|
65
|
+
if ((v.startsWith('"') && v.endsWith('"')) || (v.startsWith("'") && v.endsWith("'"))) v = v.slice(1, -1);
|
|
66
|
+
if (v.trim()) return v.trim();
|
|
67
|
+
}
|
|
68
|
+
return null;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// ----------------------------------------------------------------------------
|
|
72
|
+
// Uniform return: stdout carries the SINGLE JSON result object and nothing else.
|
|
73
|
+
// Diagnostics go to stderr. Exit code is the source of truth (0 iff ok).
|
|
74
|
+
// ----------------------------------------------------------------------------
|
|
75
|
+
function emit(ok, outputs, error) {
|
|
76
|
+
process.stdout.write(JSON.stringify({ ok, outputs: ok ? outputs : {}, error: ok ? null : error }) + '\n');
|
|
77
|
+
process.exit(ok ? 0 : 1);
|
|
78
|
+
}
|
|
79
|
+
const log = (msg) => process.stderr.write(`[quality-grade] ${msg}\n`);
|
|
80
|
+
|
|
81
|
+
// ----------------------------------------------------------------------------
|
|
82
|
+
// The dual-gate rubric — VERBATIM from ADR-0005 / DDD §12.2. Handed to the vision
|
|
83
|
+
// model as a harsh critic. Do NOT paraphrase: this is load-bearing.
|
|
84
|
+
// ----------------------------------------------------------------------------
|
|
85
|
+
const RUBRIC = `You are an EXACTING design-and-substance critic grading a software-explainer web page.
|
|
86
|
+
You are shown SEVERAL full-resolution, sharp SECTION CROPS of ONE page (in document
|
|
87
|
+
order) — together they represent the whole page. Score every criterion 0–100. Judge the
|
|
88
|
+
page AS A WHOLE from the crops; do not penalise a criterion merely because one crop
|
|
89
|
+
doesn't show everything.
|
|
90
|
+
|
|
91
|
+
You are CALIBRATED, not stingy. Excellence is real and reachable: when a criterion
|
|
92
|
+
genuinely clears the bar you award 90s — INCLUDING 95–100 — and you do NOT reflexively
|
|
93
|
+
cap at 90. Slop is equally real: when a criterion is generic or broken you score it low
|
|
94
|
+
and mean it. Your job is to place each criterion in the RIGHT band by the concrete
|
|
95
|
+
SIGNALS below, never to cluster everything in a cautious 70s–80s middle. A genuinely
|
|
96
|
+
publish-ready page reaches 95–100 on the criteria it nails; a templated one does not.
|
|
97
|
+
|
|
98
|
+
SCORING BANDS — anchor every Gate A and Gate B criterion to these SIGNALS, not to a vibe:
|
|
99
|
+
- 95–100 EXCEPTIONAL / publish-ready. A senior engineer sees it and says "I want to put
|
|
100
|
+
this out." Signals: bespoke art direction you could NOT get from a template; a hero or
|
|
101
|
+
diagram that makes you stop and look; copy that names the reader's real situation and the
|
|
102
|
+
payoff in their own terms; diagrams that actually TEACH the mechanism; every section earns
|
|
103
|
+
its place; nothing reads as generic. AWARD this whenever the criterion truly matches.
|
|
104
|
+
- 85–94 STRONG, minor nits. Clearly made by someone who cares — cohesive, intentional,
|
|
105
|
+
mostly delightful — but one or two small, nameable flaws (a slightly cramped section, an
|
|
106
|
+
image more decorative than explanatory, one wobble in the type hierarchy). Excellent-minus.
|
|
107
|
+
- 70–84 DECENT but generic or uneven. Competent and clean yet templated and forgettable,
|
|
108
|
+
OR substance is present but never lands "why it matters to ME," OR the craft is fine but
|
|
109
|
+
the story is just a list of facts. Nothing broken; nothing memorable.
|
|
110
|
+
- 50–69 MEDIOCRE. Flat, listy, default-feeling. Real gaps: weak hierarchy, decorative-only
|
|
111
|
+
imagery, no narrative pull, the reader is assumed to already care.
|
|
112
|
+
- below 40 AI SLOP. Lorem-ipsum energy, default system fonts, stock-template layout, no
|
|
113
|
+
story, no reader in mind — obviously "an LLM dumped this and nobody loved it."
|
|
114
|
+
|
|
115
|
+
Most real, professional pages land 85–94 on their strong criteria and lower on the weak
|
|
116
|
+
ones. Calibration means ACCURATE, not lenient: do not inflate slop to 95, and do not
|
|
117
|
+
deflate genuine excellence to 85 out of habit.
|
|
118
|
+
|
|
119
|
+
GATE A — "Do they actually get it?" (substance):
|
|
120
|
+
- A1 Visual effectiveness — compelling vs flat/forgettable.
|
|
121
|
+
- A2 Storytelling — tells a story vs lists facts.
|
|
122
|
+
- A3 Clueless→convinced — zero knowledge → why it matters → real examples → "oh, cool".
|
|
123
|
+
- A4 Usefulness-to-ME — explicitly answers "how is this useful to YOU" in the reader's OWN
|
|
124
|
+
terms (names a concrete situation + the payoff). Cures engineer-blindness — the assumption
|
|
125
|
+
the reader already cares.
|
|
126
|
+
- A5 Completeness of the arc — never-seen → ready to implement.
|
|
127
|
+
- A6 Implementation confidence — the reader knows EXACTLY what to do next: the Get-Started section
|
|
128
|
+
shows the command, WHAT THEY'LL SEE when they run it, the step-by-step, what they get at the end, and
|
|
129
|
+
what's next, with prerequisites stated. A5 is understanding; A6 is knowing how to ACT on it. A bare
|
|
130
|
+
"just run this" with no sense of what happens or what comes next scores low.
|
|
131
|
+
|
|
132
|
+
GATE B — "Did someone who gives a shit make this?" (craft / anti-slop):
|
|
133
|
+
- B1 Typography & hierarchy — intentional, readable, ranked vs jangly.
|
|
134
|
+
- B2 Alignment & grid — aligned vs subtly-off / amateur.
|
|
135
|
+
- B3 Spacing & rhythm — breathes, consistent vs cramped / random.
|
|
136
|
+
- B4 Strength & polish — cohesive, deliberate vs generic AI-template slop.
|
|
137
|
+
- B5 Imagery craft — beautiful + explanatory + sequenced high→low vs pretty-but-useless;
|
|
138
|
+
INCLUDING the structural SVG diagrams (crisp, legible, genuinely explanatory),
|
|
139
|
+
judged for delight + craft. A "diagram" that is merely ASCII / box-drawing / pipe characters
|
|
140
|
+
typeset as a picture (a screenshot of monospace text boxes) is SLOP — score B5 below 40 and set
|
|
141
|
+
makesMeSmile=false; real diagrams are DRAWN (shapes, cards, arrows), not typeset text.
|
|
142
|
+
|
|
143
|
+
OPERATOR QUALITATIVE GATE — five YES/NO questions (the owner's words). As a harsh critic, answer each
|
|
144
|
+
true/false from the crops; ALL five must be true for the page to be done, independent of the numeric
|
|
145
|
+
axes (a page can clear the numbers and still fail one of these):
|
|
146
|
+
(1) believeIUnderstand — Would this make me believe I understand this?
|
|
147
|
+
(2) approachable — Would this make it approachable?
|
|
148
|
+
(3) explainsToNovice — Would this explain it for somebody who doesn't understand it?
|
|
149
|
+
(4) architectureConfidence — Would it give me confidence I understand the architecture?
|
|
150
|
+
(5) makesMeSmile — Does it make me smile — "oh, that's cool"?
|
|
151
|
+
|
|
152
|
+
INV-18 — CLARITY ONLY. The page is already DOM-verified to CONTAIN both an ARCHITECTURE
|
|
153
|
+
diagram (modules / components / dependencies) and a PROCESS / DATA-FLOW diagram (the
|
|
154
|
+
runtime flow); they live in the "How it works / How is it built?" crop. You do NOT
|
|
155
|
+
decide whether they exist. Your ONLY job for INV-18 is to say whether EACH diagram, as
|
|
156
|
+
rendered in the crops you can see, READS CLEARLY — legible labels, sensible structure,
|
|
157
|
+
not a blurry or scrambled mess. If a diagram is legible and explanatory, readsClearly
|
|
158
|
+
is true; if it's illegible/garbled in the crop, readsClearly is false.
|
|
159
|
+
|
|
160
|
+
For EACH criterion give a written rationale that cites what you actually SEE in the crops
|
|
161
|
+
AND names the band you placed it in (e.g. "85–94: strong, but …") so the score is auditable.`;
|
|
162
|
+
|
|
163
|
+
// Strict JSON shape the grader MUST return (response_format: json_object).
|
|
164
|
+
// NOTE: presence/visibility of the two diagrams is decided by the DOM check, NOT here —
|
|
165
|
+
// the model only reports whether each one READS CLEARLY in the crops.
|
|
166
|
+
const RESPONSE_SPEC = `Return ONLY a JSON object, no prose, with EXACTLY this shape:
|
|
167
|
+
{
|
|
168
|
+
"gateA": { "A1": <int 0-100>, "A2": <int>, "A3": <int>, "A4": <int>, "A5": <int>, "A6": <int> },
|
|
169
|
+
"gateB": { "B1": <int 0-100>, "B2": <int>, "B3": <int>, "B4": <int>, "B5": <int> },
|
|
170
|
+
"operatorQuestions": {
|
|
171
|
+
"believeIUnderstand": <true|false>, "approachable": <true|false>,
|
|
172
|
+
"explainsToNovice": <true|false>, "architectureConfidence": <true|false>,
|
|
173
|
+
"makesMeSmile": <true|false>
|
|
174
|
+
},
|
|
175
|
+
"rationales": {
|
|
176
|
+
"A1": "<what you SAW>", "A2": "...", "A3": "...", "A4": "...", "A5": "...", "A6": "...",
|
|
177
|
+
"B1": "...", "B2": "...", "B3": "...", "B4": "...", "B5": "..."
|
|
178
|
+
},
|
|
179
|
+
"clarity": {
|
|
180
|
+
"architectureReadsClearly": <true|false>,
|
|
181
|
+
"architectureNote": "<what you SAW of the architecture diagram>",
|
|
182
|
+
"flowReadsClearly": <true|false>,
|
|
183
|
+
"flowNote": "<what you SAW of the flow/process diagram>"
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
Every score is an integer 0–100. Every rationale is a non-empty string citing the crops.
|
|
187
|
+
For clarity, judge legibility honestly — an illegible/garbled diagram is readsClearly:false.`;
|
|
188
|
+
|
|
189
|
+
const CRITERIA_A = ['A1', 'A2', 'A3', 'A4', 'A5', 'A6'];
|
|
190
|
+
const CRITERIA_B = ['B1', 'B2', 'B3', 'B4', 'B5'];
|
|
191
|
+
const OPERATOR_QUESTIONS = ['believeIUnderstand', 'approachable', 'explainsToNovice', 'architectureConfidence', 'makesMeSmile'];
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* The v1.7 exemplar-anchored gate rule (ADR-0005 §"The QA System" / DDD §12.3 / INV-05). PURE.
|
|
195
|
+
* PASS iff meanScore >= 90 AND min (the worst axis — the anti-slop floor) >= 85 AND all five operator
|
|
196
|
+
* yes/no questions are YES. INV-18 (architecture+flow present & clear) is AND-ed in separately by
|
|
197
|
+
* buildScorecard. Anchored to the owner's own example sites (~88 headline / ~92 mean); a literal
|
|
198
|
+
* "95 on every axis" is unreachable by an honest grader. Exported so the gate logic is unit-testable
|
|
199
|
+
* without a network call or a browser.
|
|
200
|
+
*/
|
|
201
|
+
export function evaluatePass({ mean, min, operatorQuestions } = {}) {
|
|
202
|
+
const ops = Array.isArray(operatorQuestions) ? operatorQuestions : [];
|
|
203
|
+
return typeof mean === 'number' && typeof min === 'number'
|
|
204
|
+
&& mean >= 90 && min >= 85
|
|
205
|
+
&& ops.length === 5 && ops.every((q) => q === true);
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* The SHIP gate (the OPERATIONAL tier). `evaluatePass` above is the world-class ASPIRATION the refine
|
|
210
|
+
* loop chases and that every scorecard reports the gap to — but holding a genuinely-good page forever
|
|
211
|
+
* because it is "good, not the best reference site" means the tool never delivers. A page SHIPS when it
|
|
212
|
+
* is solidly good AND carries no slop:
|
|
213
|
+
* - mean >= 82 (solidly good overall)
|
|
214
|
+
* - min (worst axis) >= 70 (no genuinely-weak / slop axis — INV-18 separately enforces real diagrams)
|
|
215
|
+
* - the four COMPREHENSION/SAFETY operators are YES (believeIUnderstand, approachable, explainsToNovice,
|
|
216
|
+
* makesMeSmile). `architectureConfidence` is INFORMATIONAL here, NOT a blocker: it is repo-dependent
|
|
217
|
+
* (a one-module library legitimately has little architecture to be "confident" about) and the
|
|
218
|
+
* architecture diagram's real legibility is already hard-gated by INV-18.
|
|
219
|
+
* A shipped-but-not-exemplary page always carries its honest mean + the gap to 90 (never normalized up).
|
|
220
|
+
*/
|
|
221
|
+
export const SHIP_OPERATORS = ['believeIUnderstand', 'approachable', 'explainsToNovice', 'makesMeSmile'];
|
|
222
|
+
export function evaluateShipworthy({ mean, min, operatorQuestions } = {}) {
|
|
223
|
+
const o = Array.isArray(operatorQuestions)
|
|
224
|
+
? Object.fromEntries(OPERATOR_QUESTIONS.map((k, i) => [k, operatorQuestions[i]]))
|
|
225
|
+
: (operatorQuestions || {});
|
|
226
|
+
return typeof mean === 'number' && typeof min === 'number'
|
|
227
|
+
&& mean >= 82 && min >= 70
|
|
228
|
+
&& SHIP_OPERATORS.every((k) => o[k] === true);
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
// ----------------------------------------------------------------------------
|
|
232
|
+
// Minimal static file server rooted at the assembled site dir, so Playwright
|
|
233
|
+
// renders the REAL page over http:// (relative assets, module scripts, fetches
|
|
234
|
+
// of sitemap/robots/llms all resolve) — judged on live local pixels, never a
|
|
235
|
+
// deployed URL (DDD §12.1). Traversal-guarded; binds 127.0.0.1 on a random port.
|
|
236
|
+
// ----------------------------------------------------------------------------
|
|
237
|
+
const MIME = {
|
|
238
|
+
'.html': 'text/html; charset=utf-8', '.css': 'text/css; charset=utf-8',
|
|
239
|
+
'.js': 'text/javascript; charset=utf-8', '.mjs': 'text/javascript; charset=utf-8',
|
|
240
|
+
'.json': 'application/json; charset=utf-8', '.svg': 'image/svg+xml',
|
|
241
|
+
'.png': 'image/png', '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg',
|
|
242
|
+
'.webp': 'image/webp', '.gif': 'image/gif', '.ico': 'image/x-icon',
|
|
243
|
+
'.woff': 'font/woff', '.woff2': 'font/woff2', '.ttf': 'font/ttf',
|
|
244
|
+
'.txt': 'text/plain; charset=utf-8', '.xml': 'application/xml; charset=utf-8',
|
|
245
|
+
'.map': 'application/json; charset=utf-8', '.webmanifest': 'application/manifest+json',
|
|
246
|
+
};
|
|
247
|
+
|
|
248
|
+
function startServer(rootDir) {
|
|
249
|
+
const root = path.resolve(rootDir);
|
|
250
|
+
const server = http.createServer((req, res) => {
|
|
251
|
+
try {
|
|
252
|
+
let rel = decodeURIComponent((req.url || '/').split('?')[0]);
|
|
253
|
+
if (rel.endsWith('/')) rel += 'index.html';
|
|
254
|
+
const abs = path.resolve(root, '.' + rel);
|
|
255
|
+
if (abs !== root && !abs.startsWith(root + path.sep)) { res.writeHead(403); res.end(); return; }
|
|
256
|
+
let target = abs;
|
|
257
|
+
if (fs.existsSync(target) && fs.statSync(target).isDirectory()) target = path.join(target, 'index.html');
|
|
258
|
+
if (!fs.existsSync(target)) { res.writeHead(404); res.end(`not found: ${rel}`); return; }
|
|
259
|
+
res.writeHead(200, { 'content-type': MIME[path.extname(target).toLowerCase()] || 'application/octet-stream' });
|
|
260
|
+
fs.createReadStream(target).pipe(res);
|
|
261
|
+
} catch (e) { res.writeHead(500); res.end(String(e?.message || e)); }
|
|
262
|
+
});
|
|
263
|
+
return new Promise((resolve, reject) => {
|
|
264
|
+
server.on('error', reject);
|
|
265
|
+
server.listen(0, '127.0.0.1', () => resolve({ server, port: server.address().port }));
|
|
266
|
+
});
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
// The representative SECTION crops the vision model grades for craft + substance.
|
|
270
|
+
// Each is captured at the device viewport (a viewport-HEIGHT segment, anchored at the
|
|
271
|
+
// section's top) so it is full-resolution and never downscaled into mush. The two
|
|
272
|
+
// MANDATORY diagrams are captured separately as dedicated element crops (below) so the
|
|
273
|
+
// grader always SEES them in full, not pushed off the bottom of a viewport.
|
|
274
|
+
const HEADER_OFFSET = 90; // clears the sticky .site-head so the section heading is visible
|
|
275
|
+
const CROP_SECTIONS = [
|
|
276
|
+
{ key: 'hero', selector: '.hero, #top', label: 'Hero — the opening' },
|
|
277
|
+
{ key: 'whatItIs', selector: '#what-it-is', label: 'What it is — substance + the big-idea diagram' },
|
|
278
|
+
{ key: 'getStarted', selector: '#get-started', label: 'Get started — how to begin' },
|
|
279
|
+
{ key: 'pack', selector: '#the-pack', label: 'AI knowledge pack — the download block' },
|
|
280
|
+
];
|
|
281
|
+
|
|
282
|
+
// ----------------------------------------------------------------------------
|
|
283
|
+
// DETERMINISTIC INV-18 PRESENCE CHECK (in the DOM, never the vision model).
|
|
284
|
+
// The architecture + flow diagrams are mandatory and live in the #how-it-works
|
|
285
|
+
// block (assemble-page Station 6). Assert each EXISTS and is VISIBLE (rendered box
|
|
286
|
+
// > 0, not display:none / visibility:hidden / opacity:0). Classify by tier label /
|
|
287
|
+
// src / alt, with a positional fallback (first diagram = architecture, second = flow)
|
|
288
|
+
// so it stays robust to per-build asset filenames. Also returns the figure INDEX of
|
|
289
|
+
// each so renderDevice can capture exactly that figure for the clarity grade.
|
|
290
|
+
// MUST be run AFTER the full-page screenshot so lazy <img>s are loaded (else a
|
|
291
|
+
// not-yet-loaded diagram has a zero box and reads as "not visible").
|
|
292
|
+
// ----------------------------------------------------------------------------
|
|
293
|
+
async function checkDiagramsInDom(page) {
|
|
294
|
+
return page.evaluate(() => {
|
|
295
|
+
const out = {
|
|
296
|
+
architecturePresent: false, architectureVisible: false, architectureIndex: -1,
|
|
297
|
+
flowPresent: false, flowVisible: false, flowIndex: -1,
|
|
298
|
+
figureCount: 0, details: [],
|
|
299
|
+
};
|
|
300
|
+
const sec = document.querySelector('#how-it-works');
|
|
301
|
+
if (!sec) return out;
|
|
302
|
+
const isVis = (el) => {
|
|
303
|
+
if (!el) return false;
|
|
304
|
+
const r = el.getBoundingClientRect();
|
|
305
|
+
const cs = getComputedStyle(el);
|
|
306
|
+
return cs.display !== 'none' && cs.visibility !== 'hidden' && Number(cs.opacity) > 0 && r.width > 2 && r.height > 2;
|
|
307
|
+
};
|
|
308
|
+
const figs = Array.from(sec.querySelectorAll('figure.diagram'));
|
|
309
|
+
out.figureCount = figs.length;
|
|
310
|
+
figs.forEach((fig, i) => {
|
|
311
|
+
const img = fig.querySelector('img');
|
|
312
|
+
const tier = (fig.querySelector('.tier')?.textContent || '').toLowerCase();
|
|
313
|
+
const src = (img?.getAttribute('src') || '').toLowerCase();
|
|
314
|
+
const alt = (img?.getAttribute('alt') || '').toLowerCase();
|
|
315
|
+
const hay = `${tier} ${src} ${alt}`;
|
|
316
|
+
const vis = isVis(img);
|
|
317
|
+
const isArch = /architect/.test(hay);
|
|
318
|
+
const isFlow = /\bflow\b|data.?flow|process|runtime/.test(hay);
|
|
319
|
+
out.details.push({ index: i, tier, src, vis, isArch, isFlow });
|
|
320
|
+
if (isArch && out.architectureIndex < 0) { out.architecturePresent = true; out.architectureVisible = vis; out.architectureIndex = i; }
|
|
321
|
+
if (isFlow && out.flowIndex < 0) { out.flowPresent = true; out.flowVisible = vis; out.flowIndex = i; }
|
|
322
|
+
});
|
|
323
|
+
// positional fallback when the two mandatory diagrams aren't name-classifiable
|
|
324
|
+
if (out.architectureIndex < 0 && figs.length >= 1) {
|
|
325
|
+
out.architectureIndex = 0; out.architecturePresent = true; out.architectureVisible = isVis(figs[0].querySelector('img'));
|
|
326
|
+
}
|
|
327
|
+
if (out.flowIndex < 0 && figs.length >= 2) {
|
|
328
|
+
const fi = out.architectureIndex === 0 ? 1 : 0;
|
|
329
|
+
out.flowIndex = fi; out.flowPresent = true; out.flowVisible = isVis(figs[fi].querySelector('img'));
|
|
330
|
+
}
|
|
331
|
+
return out;
|
|
332
|
+
});
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
// scroll an element to the top of the viewport INSTANTLY (the page sets
|
|
336
|
+
// scroll-behavior:smooth, which would otherwise leave a crop mid-animation).
|
|
337
|
+
async function scrollToTop(page, loc, offset) {
|
|
338
|
+
await loc.evaluate((el, off) => {
|
|
339
|
+
const y = el.getBoundingClientRect().top + window.scrollY - off;
|
|
340
|
+
window.scrollTo(0, Math.max(0, y));
|
|
341
|
+
}, offset);
|
|
342
|
+
await page.waitForTimeout(160);
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
// ----------------------------------------------------------------------------
|
|
346
|
+
// Render one device: settle the page, save the full-page screenshot (which forces
|
|
347
|
+
// every lazy <img> to load), THEN run the deterministic DOM diagram check, then
|
|
348
|
+
// capture the grading crops — viewport-segment section crops + dedicated element
|
|
349
|
+
// crops of the two mandatory diagrams. Returns { domInv18, fullPagePath, crops[], pageHeight }.
|
|
350
|
+
// ----------------------------------------------------------------------------
|
|
351
|
+
async function renderDevice(chromium, url, device, assetsDir) {
|
|
352
|
+
const browser = await chromium.launch({ headless: true });
|
|
353
|
+
try {
|
|
354
|
+
const context = await browser.newContext({
|
|
355
|
+
viewport: { width: device.width, height: device.height },
|
|
356
|
+
deviceScaleFactor: device.dsf,
|
|
357
|
+
isMobile: device.isMobile,
|
|
358
|
+
hasTouch: device.isMobile,
|
|
359
|
+
});
|
|
360
|
+
const page = await context.newPage();
|
|
361
|
+
const resp = await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 45000 });
|
|
362
|
+
if (!resp || !resp.ok()) throw new Error(`page did not load OK (status ${resp ? resp.status() : 'none'}) at ${url}`);
|
|
363
|
+
try { await page.waitForLoadState('networkidle', { timeout: 15000 }); } catch { /* best-effort */ }
|
|
364
|
+
try { await page.evaluate(() => (document.fonts ? document.fonts.ready : Promise.resolve())); } catch { /* best-effort */ }
|
|
365
|
+
// neutralise scroll-behavior:smooth so every programmatic scroll lands instantly
|
|
366
|
+
await page.addStyleTag({ content: 'html, body { scroll-behavior: auto !important; }' }).catch(() => {});
|
|
367
|
+
await page.waitForTimeout(500);
|
|
368
|
+
|
|
369
|
+
// Force EVERY lazy <img> to load by actually scrolling the page through the viewport.
|
|
370
|
+
// Playwright's full-page CDP capture does NOT reliably fire IntersectionObserver
|
|
371
|
+
// lazy-loading, so without this pass the diagrams below the fold stay unloaded (zero
|
|
372
|
+
// box) and the DOM visibility check — and the artifact — would both be wrong.
|
|
373
|
+
await page.evaluate(async () => {
|
|
374
|
+
await new Promise((resolve) => {
|
|
375
|
+
let y = 0; const step = Math.max(200, window.innerHeight);
|
|
376
|
+
const t = setInterval(() => {
|
|
377
|
+
window.scrollTo(0, y); y += step;
|
|
378
|
+
if (y >= document.documentElement.scrollHeight) { clearInterval(t); resolve(); }
|
|
379
|
+
}, 60);
|
|
380
|
+
});
|
|
381
|
+
await Promise.all(Array.from(document.images).map((img) => img.complete ? null :
|
|
382
|
+
new Promise((res) => { img.addEventListener('load', res, { once: true }); img.addEventListener('error', res, { once: true }); setTimeout(res, 3000); })));
|
|
383
|
+
window.scrollTo(0, 0);
|
|
384
|
+
});
|
|
385
|
+
await page.waitForTimeout(300);
|
|
386
|
+
|
|
387
|
+
const pageHeight = await page.evaluate(() => Math.max(
|
|
388
|
+
document.documentElement.scrollHeight, document.body ? document.body.scrollHeight : 0));
|
|
389
|
+
|
|
390
|
+
// (1) full-page artifact — every image is now loaded (the human/email screenshot).
|
|
391
|
+
const fullPagePath = path.join(assetsDir, device.file);
|
|
392
|
+
await page.screenshot({ path: fullPagePath, fullPage: true });
|
|
393
|
+
|
|
394
|
+
// (2) deterministic DOM presence/visibility + figure indices of the two mandatory
|
|
395
|
+
// diagrams — honest now that all images are loaded (no zero-height lazies).
|
|
396
|
+
const domInv18 = await checkDiagramsInDom(page);
|
|
397
|
+
|
|
398
|
+
const crops = [];
|
|
399
|
+
// (3a) viewport-segment SECTION crops — sharp, capped at the device viewport
|
|
400
|
+
for (const sec of CROP_SECTIONS) {
|
|
401
|
+
const loc = page.locator(sec.selector).first();
|
|
402
|
+
if (!(await loc.count())) { log(` crop ${sec.key}: selector "${sec.selector}" not found — skipped`); continue; }
|
|
403
|
+
try {
|
|
404
|
+
await scrollToTop(page, loc, HEADER_OFFSET);
|
|
405
|
+
const cropPath = path.join(assetsDir, `grade-${device.tag}-${sec.key}.png`);
|
|
406
|
+
await page.screenshot({ path: cropPath, fullPage: false }); // exactly the device viewport
|
|
407
|
+
crops.push({ key: sec.key, label: sec.label, path: cropPath });
|
|
408
|
+
} catch (e) {
|
|
409
|
+
log(` crop ${sec.key}: capture failed (${e?.message || e}) — skipped`);
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
// (3b) dedicated element crops of the two MANDATORY diagrams (full diagram, never clipped),
|
|
413
|
+
// inserted after the hero so they sit in document order for the grader.
|
|
414
|
+
const diagFigs = page.locator('#how-it-works figure.diagram');
|
|
415
|
+
const diagSpecs = [
|
|
416
|
+
{ idx: domInv18.architectureIndex, key: 'architecture', label: 'ARCHITECTURE diagram — modules / components / dependencies (how it is built)' },
|
|
417
|
+
{ idx: domInv18.flowIndex, key: 'flow', label: 'PROCESS / DATA-FLOW diagram — the runtime flow (how it works)' },
|
|
418
|
+
];
|
|
419
|
+
const diagCrops = [];
|
|
420
|
+
for (const d of diagSpecs) {
|
|
421
|
+
if (d.idx < 0) { log(` diagram ${d.key}: no figure found in #how-it-works — skipped`); continue; }
|
|
422
|
+
try {
|
|
423
|
+
const f = diagFigs.nth(d.idx);
|
|
424
|
+
await f.scrollIntoViewIfNeeded().catch(() => {});
|
|
425
|
+
const cropPath = path.join(assetsDir, `grade-${device.tag}-${d.key}.png`);
|
|
426
|
+
await f.screenshot({ path: cropPath }); // element screenshot — the whole figure, instant scroll
|
|
427
|
+
diagCrops.push({ key: d.key, label: d.label, path: cropPath });
|
|
428
|
+
} catch (e) {
|
|
429
|
+
log(` diagram ${d.key}: capture failed (${e?.message || e}) — skipped`);
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
// ALSO capture the big-idea + insight CONCEPT diagrams as dedicated full crops, so the grader judges
|
|
433
|
+
// EVERY diagram at full resolution — not just architecture/flow. This closes the blind spot that let a
|
|
434
|
+
// raw-ASCII concept diagram score a pass: it was never put in front of the model at full size.
|
|
435
|
+
const conceptFigs = page.locator('figure.diagram.concept');
|
|
436
|
+
const conceptN = await conceptFigs.count();
|
|
437
|
+
for (let i = 0; i < conceptN; i++) {
|
|
438
|
+
try {
|
|
439
|
+
const f = conceptFigs.nth(i);
|
|
440
|
+
await f.scrollIntoViewIfNeeded().catch(() => {});
|
|
441
|
+
const cropPath = path.join(assetsDir, `grade-${device.tag}-concept${i}.png`);
|
|
442
|
+
await f.screenshot({ path: cropPath });
|
|
443
|
+
diagCrops.push({ key: `concept${i}`, label: `CONCEPT diagram (big-idea / insight) — must be a real DRAWN diagram (cards + arrows), NEVER typeset ASCII/box-characters`, path: cropPath });
|
|
444
|
+
} catch (e) { log(` concept${i}: capture failed (${e?.message || e}) — skipped`); }
|
|
445
|
+
}
|
|
446
|
+
// order the grader sees: hero, then the two diagrams, then the rest of the arc
|
|
447
|
+
const ordered = [];
|
|
448
|
+
const heroCrop = crops.find((c) => c.key === 'hero');
|
|
449
|
+
if (heroCrop) ordered.push(heroCrop);
|
|
450
|
+
ordered.push(...diagCrops);
|
|
451
|
+
for (const c of crops) if (c.key !== 'hero') ordered.push(c);
|
|
452
|
+
|
|
453
|
+
await context.close();
|
|
454
|
+
return { domInv18, fullPagePath, crops: ordered, pageHeight };
|
|
455
|
+
} finally {
|
|
456
|
+
await browser.close();
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
// ----------------------------------------------------------------------------
|
|
461
|
+
// Grade one screenshot with the vision model. LOUD on any malformed response.
|
|
462
|
+
// ----------------------------------------------------------------------------
|
|
463
|
+
function isScore(n) { return typeof n === 'number' && Number.isFinite(n) && n >= 0 && n <= 100; }
|
|
464
|
+
function isText(s) { return typeof s === 'string' && s.trim().length > 0; }
|
|
465
|
+
|
|
466
|
+
async function gradeCrops({ apiKey, model, baseUrl, crops, deviceLabel }) {
|
|
467
|
+
if (!Array.isArray(crops) || crops.length < 2) {
|
|
468
|
+
throw new Error(`too few section crops captured for ${deviceLabel} (need >= 2, got ${crops?.length || 0}) — cannot grade reliably`);
|
|
469
|
+
}
|
|
470
|
+
// interleave a label + the full-resolution crop for each section, in document order
|
|
471
|
+
const userContent = [{
|
|
472
|
+
type: 'text',
|
|
473
|
+
text: `Below are ${crops.length} full-resolution section crops of ONE explainer page rendered at ${deviceLabel}, in document order. They represent the whole page. Apply Gate A and Gate B to the page as a whole, and report INV-18 CLARITY for the two diagrams (in the "How it works" crop). Return ONLY the JSON object specified.`,
|
|
474
|
+
}];
|
|
475
|
+
for (let i = 0; i < crops.length; i++) {
|
|
476
|
+
const b64 = fs.readFileSync(crops[i].path).toString('base64');
|
|
477
|
+
userContent.push({ type: 'text', text: `[Crop ${i + 1}/${crops.length}] ${crops[i].label}:` });
|
|
478
|
+
userContent.push({ type: 'image_url', image_url: { url: `data:image/png;base64,${b64}`, detail: 'high' } });
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
// gpt-5.x / o-series are reasoning models: they reject a custom temperature and use
|
|
482
|
+
// max_completion_tokens instead of max_tokens. Branch so the grader works on the current model.
|
|
483
|
+
const isReasoning = /^(gpt-5|o[0-9])/.test(model);
|
|
484
|
+
const body = {
|
|
485
|
+
model,
|
|
486
|
+
...(isReasoning ? {} : { temperature: 0 }),
|
|
487
|
+
// Reasoning models spend hidden reasoning tokens out of this SAME budget before emitting the JSON.
|
|
488
|
+
// 2400 was too small (reasoning exhausted it → empty content). Give ample headroom and cap the
|
|
489
|
+
// reasoning depth — grading from a rubric is a low-reasoning task, so 'low' is faster AND cheaper.
|
|
490
|
+
...(isReasoning ? { max_completion_tokens: 12000, reasoning_effort: 'low' } : { max_tokens: 2400 }),
|
|
491
|
+
response_format: { type: 'json_object' },
|
|
492
|
+
messages: [
|
|
493
|
+
{ role: 'system', content: `${RUBRIC}\n\n${RESPONSE_SPEC}` },
|
|
494
|
+
{ role: 'user', content: userContent },
|
|
495
|
+
],
|
|
496
|
+
};
|
|
497
|
+
|
|
498
|
+
let resp;
|
|
499
|
+
try {
|
|
500
|
+
resp = await fetch(`${baseUrl.replace(/\/$/, '')}/chat/completions`, {
|
|
501
|
+
method: 'POST',
|
|
502
|
+
headers: { 'content-type': 'application/json', authorization: `Bearer ${apiKey}` },
|
|
503
|
+
body: JSON.stringify(body),
|
|
504
|
+
});
|
|
505
|
+
} catch (e) {
|
|
506
|
+
throw new Error(`vision API request failed for ${deviceLabel}: ${e?.message || e}`);
|
|
507
|
+
}
|
|
508
|
+
const raw = await resp.text();
|
|
509
|
+
if (!resp.ok) throw new Error(`vision API HTTP ${resp.status} for ${deviceLabel}: ${raw.slice(0, 300)}`);
|
|
510
|
+
|
|
511
|
+
let envelope;
|
|
512
|
+
try { envelope = JSON.parse(raw); } catch { throw new Error(`vision API returned non-JSON envelope for ${deviceLabel}: ${raw.slice(0, 200)}`); }
|
|
513
|
+
const content = envelope?.choices?.[0]?.message?.content;
|
|
514
|
+
if (!isText(content)) {
|
|
515
|
+
const fin = envelope?.choices?.[0]?.finish_reason ?? 'unknown';
|
|
516
|
+
const u = envelope?.usage || {};
|
|
517
|
+
const detail = `finish_reason=${fin}, usage=${JSON.stringify(u)}`;
|
|
518
|
+
// finish_reason 'length' = the model ran out of token budget (likely reasoning-tokens) before emitting JSON.
|
|
519
|
+
throw new Error(`vision API returned no message content for ${deviceLabel} (${detail})`);
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
let g;
|
|
523
|
+
try { g = JSON.parse(content); } catch { throw new Error(`grader content is not valid JSON for ${deviceLabel}: ${String(content).slice(0, 200)}`); }
|
|
524
|
+
|
|
525
|
+
// --- LOUD validation: a grader that cannot return a complete per-criterion
|
|
526
|
+
// scorecard is a BUILD FAILURE, never a silent pass (ADR-0005 / DDD §12). ---
|
|
527
|
+
const gateA = g.gateA, gateB = g.gateB, rationales = g.rationales, clarity = g.clarity, operatorQuestions = g.operatorQuestions;
|
|
528
|
+
if (!gateA || !gateB || !rationales || !clarity) throw new Error(`grader response for ${deviceLabel} missing gateA/gateB/rationales/clarity`);
|
|
529
|
+
if (!operatorQuestions || typeof operatorQuestions !== 'object') throw new Error(`grader response for ${deviceLabel} missing operatorQuestions`);
|
|
530
|
+
for (const k of OPERATOR_QUESTIONS) if (typeof operatorQuestions[k] !== 'boolean') throw new Error(`grader operatorQuestions.${k} must be boolean for ${deviceLabel} (got ${JSON.stringify(operatorQuestions[k])})`);
|
|
531
|
+
for (const k of CRITERIA_A) if (!isScore(gateA[k])) throw new Error(`grader score gateA.${k} invalid/missing for ${deviceLabel} (got ${JSON.stringify(gateA[k])})`);
|
|
532
|
+
for (const k of CRITERIA_B) if (!isScore(gateB[k])) throw new Error(`grader score gateB.${k} invalid/missing for ${deviceLabel} (got ${JSON.stringify(gateB[k])})`);
|
|
533
|
+
for (const k of [...CRITERIA_A, ...CRITERIA_B]) if (!isText(rationales[k])) throw new Error(`grader rationale ${k} missing/empty for ${deviceLabel}`);
|
|
534
|
+
for (const k of ['architectureReadsClearly', 'flowReadsClearly']) {
|
|
535
|
+
if (typeof clarity[k] !== 'boolean') throw new Error(`grader clarity.${k} must be boolean for ${deviceLabel} (got ${JSON.stringify(clarity[k])})`);
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
return {
|
|
539
|
+
gateA: Object.fromEntries(CRITERIA_A.map((k) => [k, Math.round(gateA[k])])),
|
|
540
|
+
gateB: Object.fromEntries(CRITERIA_B.map((k) => [k, Math.round(gateB[k])])),
|
|
541
|
+
rationales: Object.fromEntries([...CRITERIA_A, ...CRITERIA_B].map((k) => [k, String(rationales[k]).trim()])),
|
|
542
|
+
clarity: {
|
|
543
|
+
architectureReadsClearly: clarity.architectureReadsClearly,
|
|
544
|
+
architectureNote: isText(clarity.architectureNote) ? clarity.architectureNote.trim() : '',
|
|
545
|
+
flowReadsClearly: clarity.flowReadsClearly,
|
|
546
|
+
flowNote: isText(clarity.flowNote) ? clarity.flowNote.trim() : '',
|
|
547
|
+
},
|
|
548
|
+
operatorQuestions: Object.fromEntries(OPERATOR_QUESTIONS.map((k) => [k, operatorQuestions[k] === true])),
|
|
549
|
+
};
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
// ----------------------------------------------------------------------------
|
|
553
|
+
// Assemble a per-device scorecard. headlineScore = MIN across all 10 criteria
|
|
554
|
+
// (never the mean — DDD §12.3 / INV-05). INV-18 is the AND of the DETERMINISTIC DOM
|
|
555
|
+
// verdict (present + visible, from checkDiagramsInDom) and the VISION clarity verdict
|
|
556
|
+
// (reads-clearly). A device passes iff headlineScore >= 95 AND INV-18 is clean.
|
|
557
|
+
// ----------------------------------------------------------------------------
|
|
558
|
+
function buildScorecard(deviceLabel, graded, domInv18, screenshotPath, cropPaths, flowExpected = true) {
|
|
559
|
+
const all = [...CRITERIA_A.map((k) => graded.gateA[k]), ...CRITERIA_B.map((k) => graded.gateB[k])];
|
|
560
|
+
const headlineScore = Math.min(...all);
|
|
561
|
+
const meanScore = Math.round(all.reduce((a, b) => a + b, 0) / all.length);
|
|
562
|
+
const operatorQuestions = graded.operatorQuestions || {};
|
|
563
|
+
const opsArray = OPERATOR_QUESTIONS.map((k) => operatorQuestions[k] === true);
|
|
564
|
+
|
|
565
|
+
// merged INV-18: presence/visibility from the DOM, clarity from the vision model.
|
|
566
|
+
const inv18 = {
|
|
567
|
+
architecturePresent: domInv18.architecturePresent,
|
|
568
|
+
architectureVisible: domInv18.architectureVisible,
|
|
569
|
+
architectureReadsClearly: domInv18.architecturePresent && domInv18.architectureVisible && graded.clarity.architectureReadsClearly,
|
|
570
|
+
architectureNote: graded.clarity.architectureNote,
|
|
571
|
+
flowPresent: domInv18.flowPresent,
|
|
572
|
+
flowVisible: domInv18.flowVisible,
|
|
573
|
+
flowReadsClearly: domInv18.flowPresent && domInv18.flowVisible && graded.clarity.flowReadsClearly,
|
|
574
|
+
flowNote: graded.clarity.flowNote,
|
|
575
|
+
// A pure library repo legitimately has no runtime flow diagram (make-diagrams skips it). When no flow
|
|
576
|
+
// diagram was produced, INV-18 requires only the architecture diagram — not a flow that cannot exist.
|
|
577
|
+
flowExpected,
|
|
578
|
+
source: 'presence+visibility=DOM, clarity=vision',
|
|
579
|
+
};
|
|
580
|
+
const archOk = inv18.architecturePresent && inv18.architectureVisible && inv18.architectureReadsClearly;
|
|
581
|
+
const flowOk = !flowExpected || (inv18.flowPresent && inv18.flowVisible && inv18.flowReadsClearly);
|
|
582
|
+
const inv18Ok = archOk && flowOk;
|
|
583
|
+
inv18.passed = inv18Ok;
|
|
584
|
+
// Two tiers: `exemplary` = the world-class aspiration (drives the refine loop, reported as a gap);
|
|
585
|
+
// `passed` = the SHIP gate (genuinely-good + no-slop + INV-18). The tool ships on `passed`.
|
|
586
|
+
const exemplary = evaluatePass({ mean: meanScore, min: headlineScore, operatorQuestions: opsArray }) && inv18Ok;
|
|
587
|
+
const passed = evaluateShipworthy({ mean: meanScore, min: headlineScore, operatorQuestions }) && inv18Ok;
|
|
588
|
+
|
|
589
|
+
const refineNotes = [];
|
|
590
|
+
// Per-axis: flag any axis below the 85 anti-slop floor (a hard fail — headline = the min).
|
|
591
|
+
for (const k of CRITERIA_A) if (graded.gateA[k] < 85) refineNotes.push({ device: deviceLabel, criterion: k, score: graded.gateA[k], saw: graded.rationales[k] });
|
|
592
|
+
for (const k of CRITERIA_B) if (graded.gateB[k] < 85) refineNotes.push({ device: deviceLabel, criterion: k, score: graded.gateB[k], saw: graded.rationales[k] });
|
|
593
|
+
// Overall: flag if the mean is below 90 (not yet as good as the example sites).
|
|
594
|
+
if (meanScore < 90) refineNotes.push({ device: deviceLabel, criterion: 'MEAN', score: meanScore, saw: `overall mean ${meanScore} < 90 — not yet as good as the example sites; lift the weakest axes.` });
|
|
595
|
+
// Operator gate: any NO is a hard fail, named.
|
|
596
|
+
for (const k of OPERATOR_QUESTIONS) if (operatorQuestions[k] !== true) refineNotes.push({ device: deviceLabel, criterion: `operator:${k}`, score: 0, saw: `operator answered NO to "${k}" — the page does not yet satisfy this qualitative question.` });
|
|
597
|
+
if (!inv18.architecturePresent) refineNotes.push({ device: deviceLabel, criterion: 'INV-18', score: 0, saw: `ARCHITECTURE diagram MISSING from the DOM (#how-it-works figure.diagram).` });
|
|
598
|
+
else if (!inv18.architectureVisible) refineNotes.push({ device: deviceLabel, criterion: 'INV-18', score: 0, saw: `ARCHITECTURE diagram present in DOM but NOT visible (zero rendered box / hidden).` });
|
|
599
|
+
else if (!inv18.architectureReadsClearly) refineNotes.push({ device: deviceLabel, criterion: 'INV-18', score: 0, saw: `ARCHITECTURE diagram does not read clearly. ${inv18.architectureNote}` });
|
|
600
|
+
if (flowExpected) {
|
|
601
|
+
if (!inv18.flowPresent) refineNotes.push({ device: deviceLabel, criterion: 'INV-18', score: 0, saw: `FLOW diagram MISSING from the DOM (#how-it-works figure.diagram).` });
|
|
602
|
+
else if (!inv18.flowVisible) refineNotes.push({ device: deviceLabel, criterion: 'INV-18', score: 0, saw: `FLOW diagram present in DOM but NOT visible (zero rendered box / hidden).` });
|
|
603
|
+
else if (!inv18.flowReadsClearly) refineNotes.push({ device: deviceLabel, criterion: 'INV-18', score: 0, saw: `FLOW diagram does not read clearly. ${inv18.flowNote}` });
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
const scorecard = {
|
|
607
|
+
device: deviceLabel,
|
|
608
|
+
gateA: graded.gateA,
|
|
609
|
+
gateB: graded.gateB,
|
|
610
|
+
operatorQuestions,
|
|
611
|
+
rationales: graded.rationales,
|
|
612
|
+
inv18,
|
|
613
|
+
meanScore,
|
|
614
|
+
headlineScore,
|
|
615
|
+
normalizedHeadline: exemplary ? 95 : meanScore, // only a WORLD-CLASS build is normalized up; a shipped-good build reports its honest mean
|
|
616
|
+
passed, // SHIP gate (ship-worthy + INV-18)
|
|
617
|
+
exemplary, // world-class aspiration (mean>=90, min>=85, all 5 operators) — the gap is reported even when shipped
|
|
618
|
+
screenshot: screenshotPath,
|
|
619
|
+
gradedCrops: cropPaths,
|
|
620
|
+
};
|
|
621
|
+
return { scorecard, refineNotes };
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
// ----------------------------------------------------------------------------
|
|
625
|
+
// main — orchestrate: read inputs → screenshot both devices → grade both →
|
|
626
|
+
// assemble dual scorecard → merge the `quality` slot. Loud on every failure.
|
|
627
|
+
// ----------------------------------------------------------------------------
|
|
628
|
+
async function main() {
|
|
629
|
+
const buildDir = process.argv[2];
|
|
630
|
+
if (!buildDir) return emit(false, {}, 'usage: node tools/quality-grade.mjs <build-dir> (missing <build-dir> argument)');
|
|
631
|
+
|
|
632
|
+
const buildJsonPath = path.join(buildDir, 'build.json');
|
|
633
|
+
if (!fs.existsSync(buildJsonPath)) return emit(false, {}, `build.json not found at ${buildJsonPath}`);
|
|
634
|
+
|
|
635
|
+
let ctx;
|
|
636
|
+
try { ctx = JSON.parse(fs.readFileSync(buildJsonPath, 'utf8')); }
|
|
637
|
+
catch (e) { return emit(false, {}, `build.json is not valid JSON: ${e?.message || e}`); }
|
|
638
|
+
|
|
639
|
+
// A flow diagram is expected ONLY when one was actually produced (has a real svgPath). Pure library
|
|
640
|
+
// repos with no runtime entrypoints legitimately have none — INV-18 must not demand a flow that can't exist.
|
|
641
|
+
const flowExpected = !!(ctx.visuals && ctx.visuals.flowDiagram && ctx.visuals.flowDiagram.svgPath);
|
|
642
|
+
|
|
643
|
+
// --- DECLARED INPUTS: ONLY the `page` slot. Absent/invalid → loud stop. ---
|
|
644
|
+
const page = ctx.page;
|
|
645
|
+
if (!page || typeof page !== 'object') return emit(false, {}, 'build.json has no `page` slot — assemble-page (Station 6) must run before quality-grade');
|
|
646
|
+
if (!page.dir) return emit(false, {}, 'build.json `page.dir` is missing — cannot serve the assembled site');
|
|
647
|
+
if (!page.htmlPath) return emit(false, {}, 'build.json `page.htmlPath` is missing — cannot grade a page that was never assembled');
|
|
648
|
+
const siteDir = path.resolve(page.dir);
|
|
649
|
+
const htmlPath = path.resolve(page.htmlPath);
|
|
650
|
+
if (!fs.existsSync(siteDir) || !fs.statSync(siteDir).isDirectory()) return emit(false, {}, `page.dir does not exist or is not a directory: ${siteDir}`);
|
|
651
|
+
if (!fs.existsSync(htmlPath)) return emit(false, {}, `page.htmlPath does not exist on disk: ${htmlPath}`);
|
|
652
|
+
|
|
653
|
+
// --- SECRET from env (never from build.json). No key → CANNOT evaluate → loud. ---
|
|
654
|
+
const apiKey = loadOpenAiKey();
|
|
655
|
+
if (!apiKey) return emit(false, {}, 'no OpenAI key found (set OPENAI_API_KEY / OPEN_AI_KEY in the environment or repo-root .env) — the page cannot be graded; refusing to emit a silent PASS');
|
|
656
|
+
const model = process.env.QUALITY_VISION_MODEL || 'gpt-5.5'; // latest vision model, VERIFIED live via GET /v1/models 2026-06-29 (gpt-4o is deprecated; never assume from training data)
|
|
657
|
+
const baseUrl = process.env.OPENAI_BASE_URL || 'https://api.openai.com/v1';
|
|
658
|
+
|
|
659
|
+
// --- Playwright at runtime (do NOT npm-install; loud if absent). ---
|
|
660
|
+
let chromium;
|
|
661
|
+
try { ({ chromium } = await import('playwright')); }
|
|
662
|
+
catch (e) { return emit(false, {}, `playwright is not installed (npm i -D playwright && npx playwright install chromium): ${e?.message || e}`); }
|
|
663
|
+
|
|
664
|
+
const assetsDir = path.join(buildDir, 'assets');
|
|
665
|
+
fs.mkdirSync(assetsDir, { recursive: true });
|
|
666
|
+
|
|
667
|
+
const DEVICES = [
|
|
668
|
+
{ label: 'mobile(390)', tag: 'mobile-390', width: 390, height: 844, dsf: 2, isMobile: true, file: 'screenshot-mobile-390.png' },
|
|
669
|
+
{ label: 'desktop(1440)', tag: 'desktop-1440', width: 1440, height: 900, dsf: 1, isMobile: false, file: 'screenshot-desktop-1440.png' },
|
|
670
|
+
];
|
|
671
|
+
|
|
672
|
+
let started;
|
|
673
|
+
try {
|
|
674
|
+
started = await startServer(siteDir);
|
|
675
|
+
} catch (e) {
|
|
676
|
+
return emit(false, {}, `could not start local static server for ${siteDir}: ${e?.message || e}`);
|
|
677
|
+
}
|
|
678
|
+
const baseHref = `http://127.0.0.1:${started.port}/`;
|
|
679
|
+
|
|
680
|
+
const scorecard = [];
|
|
681
|
+
const refineNotes = [];
|
|
682
|
+
const screenshots = {};
|
|
683
|
+
const pageHeights = {};
|
|
684
|
+
try {
|
|
685
|
+
for (const d of DEVICES) {
|
|
686
|
+
log(`rendering ${d.label} → full-page artifact + section crops`);
|
|
687
|
+
const { domInv18, fullPagePath, crops, pageHeight } = await renderDevice(chromium, baseHref, d, assetsDir);
|
|
688
|
+
screenshots[d.isMobile ? 'mobile' : 'desktop'] = fullPagePath;
|
|
689
|
+
pageHeights[d.isMobile ? 'mobile' : 'desktop'] = pageHeight;
|
|
690
|
+
log(`${d.label}: pageHeight=${pageHeight}px, crops=${crops.map((c) => c.key).join(',')}, DOM inv18 arch(present=${domInv18.architecturePresent},vis=${domInv18.architectureVisible}) flow(present=${domInv18.flowPresent},vis=${domInv18.flowVisible})`);
|
|
691
|
+
|
|
692
|
+
log(`grading ${d.label} with ${model} from ${crops.length} full-res crops …`);
|
|
693
|
+
const graded = await gradeCrops({ apiKey, model, baseUrl, crops, deviceLabel: d.label });
|
|
694
|
+
const { scorecard: card, refineNotes: notes } = buildScorecard(d.label, graded, domInv18, fullPagePath, crops.map((c) => c.path), flowExpected);
|
|
695
|
+
scorecard.push(card);
|
|
696
|
+
refineNotes.push(...notes);
|
|
697
|
+
log(`${d.label}: headline=${card.headlineScore} inv18=${card.inv18.passed ? 'ok' : 'FAIL'} passed=${card.passed}`);
|
|
698
|
+
}
|
|
699
|
+
} catch (e) {
|
|
700
|
+
started.server.close();
|
|
701
|
+
return emit(false, {}, `quality grading failed: ${e?.message || e}`);
|
|
702
|
+
} finally {
|
|
703
|
+
started.server.close();
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
const passed = scorecard.length === DEVICES.length && scorecard.every((c) => c.passed);
|
|
707
|
+
const exemplary = scorecard.length === DEVICES.length && scorecard.every((c) => c.exemplary);
|
|
708
|
+
const prevIterations = Number.isInteger(ctx.quality?.iterations) ? ctx.quality.iterations : 0;
|
|
709
|
+
|
|
710
|
+
const quality = {
|
|
711
|
+
scorecard,
|
|
712
|
+
passed, // SHIP gate — ship-worthy + no-slop + INV-18, on both devices
|
|
713
|
+
exemplary, // world-class aspiration cleared on both devices (mean>=90/min>=85/all-ops); gap reported otherwise
|
|
714
|
+
iterations: prevIterations + 1,
|
|
715
|
+
visionModel: model,
|
|
716
|
+
screenshots,
|
|
717
|
+
pageHeights,
|
|
718
|
+
refineNotes,
|
|
719
|
+
gradedAt: new Date().toISOString(),
|
|
720
|
+
};
|
|
721
|
+
|
|
722
|
+
// --- Merge ONLY the `quality` slot; every other slot is left intact. ---
|
|
723
|
+
ctx.quality = quality;
|
|
724
|
+
try { fs.writeFileSync(buildJsonPath, JSON.stringify(ctx, null, 2) + '\n'); }
|
|
725
|
+
catch (e) { return emit(false, {}, `could not write build.json: ${e?.message || e}`); }
|
|
726
|
+
|
|
727
|
+
return emit(true, {
|
|
728
|
+
quality,
|
|
729
|
+
screenshots,
|
|
730
|
+
pageHeights,
|
|
731
|
+
passed,
|
|
732
|
+
headline: { mobile: scorecard[0]?.headlineScore, desktop: scorecard[1]?.headlineScore },
|
|
733
|
+
refineNoteCount: refineNotes.length,
|
|
734
|
+
}, null);
|
|
735
|
+
}
|
|
736
|
+
|
|
737
|
+
// Auto-run ONLY when invoked directly (node tools/quality-grade.mjs <build-dir>). When this
|
|
738
|
+
// module is IMPORTED (e.g. a calibration harness that reuses the verbatim rubric + grader to
|
|
739
|
+
// validate the bands against a known-good page — CONTRACT (c), individually testable), the
|
|
740
|
+
// exports below are available without firing main(). The brain's direct invocation is unchanged.
|
|
741
|
+
const isDirectRun = process.argv[1] && import.meta.url === pathToFileURL(process.argv[1]).href;
|
|
742
|
+
if (isDirectRun) {
|
|
743
|
+
main().catch((e) => emit(false, {}, `unexpected error: ${e?.stack || e?.message || e}`));
|
|
744
|
+
}
|
|
745
|
+
|
|
746
|
+
export { RUBRIC, RESPONSE_SPEC, CRITERIA_A, CRITERIA_B, gradeCrops, loadOpenAiKey };
|