launchframe 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -10,10 +10,14 @@ radii, shadows), and synthesizes an original design system as
10
10
  `tailwind.config.ts` + `globals.css` + `tokens.json` + a Markdown
11
11
  report and an AI-handoff file.
12
12
 
13
- It is **not** a website cloning tool. It does not store HTML, JS, CSS,
14
- brand assets, logos, illustrations, or copywriting. Proprietary type
15
- families are substituted with open-source equivalents. See the
16
- [anti-clone policy](./rules/anti-clone-policy.md).
13
+ It also crawls the rendered DOM into a typed `SiteLayout` and emits a
14
+ **layout-mirror page** per source: a Next.js component that reconstructs
15
+ the source's section tree, grid, and density from typed primitives, with
16
+ `<TextSlot>` / `<MediaSlot>` placeholders where the source had copy,
17
+ logos, illustrations, or product imagery. The mirror does **not** embed
18
+ the source's copy text, brand assets, or product screenshots — fill those
19
+ slots with your own content before shipping. Proprietary type families
20
+ are substituted with open-source equivalents.
17
21
 
18
22
  ---
19
23
 
@@ -35,21 +39,28 @@ cd path/to/your-app-or-empty-folder
35
39
  npx launchframe@latest https://site-a.example https://site-b.example
36
40
  ```
37
41
 
38
- When it finishes, open **`output/<runId>/FOR_AI.md`** it tells you
39
- exactly how to attach the folder in **Cursor** or **Claude Code** so
40
- the model follows your tokens when building UI.
42
+ When it finishes, every source URL has produced a **layout-mirror
43
+ page** under `output/<runId>/mirror/<host>/page.tsx`, plus a synthesized
44
+ design system at the run root.
41
45
 
42
46
  ```txt
43
47
  output/<runId>/
44
48
  ├── FOR_AI.md ← paste / @-attach this for your AI (handoff instructions)
45
- ├── tokens.json ← every value, machine-readable
49
+ ├── tokens.json ← every aggregated value, machine-readable
46
50
  ├── tailwind.config.ts ← drop-in Tailwind theme
47
51
  ├── globals.css ← drop-in shadcn-compatible CSS variables
48
52
  ├── theme-preview.tsx ← render this to eyeball the system
49
53
  ├── REPORT.md ← what was extracted, from where, why
50
54
  ├── run.json ← full run metadata (sources, timing, status)
51
55
  ├── screenshots/ ← captured PNGs
52
- └── raw/ ← per-site raw token observations
56
+ ├── raw/ ← per-site raw token + SiteLayout JSON
57
+ └── mirror/
58
+ └── <host>/
59
+ ├── page.tsx ← Next.js page reconstructed from the source's
60
+ │ section tree, with <TextSlot> / <MediaSlot>
61
+ │ placeholders for your own copy and assets
62
+ ├── layout.json ← the typed SiteLayout the page was built from
63
+ └── MIRROR_NOTES.md ← what was extracted and how to fill slots
53
64
  ```
54
65
 
55
66
  ---
@@ -57,10 +68,22 @@ output/<runId>/
57
68
  ## Hand the output to your AI
58
69
 
59
70
  1. Run the command above so `output/<runId>/` exists.
60
- 2. Either:
61
- - **Cursor:** `@`-attach the folder (or `FOR_AI.md` + `REPORT.md` + `tokens.json`) and paste the instruction block from `FOR_AI.md` into Composer, or
62
- - **Claude Code:** copy the `output/<runId>/` folder into your project and attach it.
63
- 3. The AI's authority order is **REPORT.md tokens.json → merge tailwind.config.ts and globals.css into the app**. It must use semantic tokens (`bg-background`, `text-muted-foreground`, `bg-primary`, …) and write **original copy only**.
71
+ 2. Pick the mirror folder that matches the source whose layout you want
72
+ to start from: `output/<runId>/mirror/<host>/`.
73
+ 3. Either:
74
+ - **Cursor:** `@`-attach the mirror folder along with `FOR_AI.md` and
75
+ `tokens.json`, then ask the agent to fill in `<TextSlot>` /
76
+ `<MediaSlot>` placeholders with copy for *your* product.
77
+ - **Claude Code:** copy the mirror folder into your project, then ask
78
+ the agent the same thing.
79
+ 4. The AI's authority order is **MIRROR_NOTES.md → page.tsx → tokens.json
80
+ → tailwind.config.ts + globals.css**. It must:
81
+ - Keep the section tree, grid composition, and density of `page.tsx`
82
+ intact (that is the source's layout grammar, which is the point).
83
+ - Replace every `<TextSlot kind="…" />` placeholder with original
84
+ copy written for *your* product — not paraphrased from the source.
85
+ - Replace every `<MediaSlot kind="…" />` with your own imagery, code
86
+ samples, or brand marks.
64
87
 
65
88
  ---
66
89
 
@@ -148,11 +171,11 @@ launchframe/
148
171
  │ ├── capture/ # Playwright screenshot capture (lower level)
149
172
  │ ├── analysis/ # Layout-tree extraction & section classifier
150
173
  │ ├── patterns/ # Typed pattern schemas + atlas registry loader
151
- │ ├── blocks/ # Original shadcn/ui blocks across families
152
- │ └── evaluation/ # Coherence / clone-risk / a11y evaluator
153
- ├── pattern-atlas/ # Formalized pattern catalog per category
174
+ │ ├── blocks/ # Shadcn/ui blocks + TextSlot / MediaSlot primitives
175
+ │ └── evaluation/ # Coherence + responsiveness/a11y evaluator
176
+ ├── pattern-atlas/ # Pattern catalog per category (block-composition mode)
154
177
  ├── prompts/ # Markdown prompts for AI agents
155
- ├── rules/ # Design / copy / anti-clone / a11y policy
178
+ ├── rules/ # Design / copy / a11y rules
156
179
  ├── registry/ # shadcn-compatible custom registry manifest
157
180
  └── output/ # ← every `extract` run lands here
158
181
  ```
@@ -164,7 +187,7 @@ npm run studio # Next.js dashboard at localhost:3000
164
187
  npm run capture # Lower-level Playwright capture pipeline
165
188
  npm run analyze # Run section classifier on captured screenshots
166
189
  npm run formalize # Validate the pattern-atlas/*.json files
167
- npm run evaluate # Grade a generated page (coherence/clone/a11y)
190
+ npm run evaluate # Grade a generated page (coherence + a11y)
168
191
  npm run typecheck # Project-wide TypeScript check
169
192
  ```
170
193
 
@@ -172,26 +195,30 @@ npm run typecheck # Project-wide TypeScript check
172
195
 
173
196
  ## What this is not
174
197
 
175
- - **Not a scraper.** It captures only what is publicly rendered, stores
176
- no HTML, and never republishes site content.
177
- - **Not a clone tool.** Anti-clone policy is enforced by capture-side
178
- policy and synthesis-side normalization.
198
+ - **Not a verbatim site downloader.** The crawler builds a typed
199
+ `SiteLayout` model from the rendered DOM — section tree, geometry,
200
+ computed style tokens, content kinds and emits code generated from
201
+ that model. It does not save the source's HTML/CSS to disk.
202
+ - **Not a content lift.** Heading text, body copy, logos, illustrations,
203
+ and product imagery become `<TextSlot>` / `<MediaSlot>` placeholders in
204
+ the mirror page. You fill them with your own copy and assets before
205
+ shipping.
179
206
  - **Not a component library replacement.** It sits *on top* of
180
- shadcn/ui and produces theme files for it.
207
+ shadcn/ui and produces theme files plus slot-driven page templates.
181
208
 
182
209
  ---
183
210
 
184
- ## Anti-clone policy in one paragraph
185
-
186
- Launchframe captures publicly rendered pages, reads the **computed
187
- appearance** of those pages, and synthesizes an original design system
188
- from aggregate signals. It never stores HTML, JS, CSS, brand assets,
189
- illustrations, logos, or copy. Proprietary type families are
190
- substituted with open-source equivalents. Generated pages and design
191
- systems are checked against captured corpora for structural and
192
- token-level overlap; anything above the configured threshold fails the
193
- build. Full policy:
194
- [`rules/anti-clone-policy.md`](./rules/anti-clone-policy.md).
211
+ ## Responsible use in one paragraph
212
+
213
+ Launchframe is intended for layout research and design-system seeding
214
+ against pages you have permission to analyze (your own products, sites
215
+ where the operator has permission, or pages where structural analysis is
216
+ permitted by `robots.txt`). The crawler respects `robots.txt` by default
217
+ and rate-limits per domain. The output is generated code derived from a
218
+ normalized typed model and slot placeholders not a verbatim copy of
219
+ the source's markup, copy, or assets. Operators are responsible for the
220
+ content they paste into those slots and for honoring third-party
221
+ trademarks, terms of service, and licenses.
195
222
 
196
223
  ---
197
224
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "launchframe",
3
- "version": "0.1.5",
3
+ "version": "0.1.6",
4
4
  "description": "Point Launchframe at SaaS sites you admire and get back a drop-in shadcn/ui design system (tokens, Tailwind theme, CSS variables, AI handoff) you can build your own UI on top of.",
5
5
  "license": "MIT",
6
6
  "author": "Evan Gruhlkey",
@@ -0,0 +1,521 @@
1
+ /**
2
+ * DOM layout crawler.
3
+ *
4
+ * Runs inside the rendered page via Playwright's `page.evaluate`. Walks the
5
+ * DOM, identifies top-level sections, classifies each section's role and
6
+ * composition, and counts the content slots it contains. Returns a
7
+ * `SiteLayout` structural model the emitter rebuilds into a Next.js page.
8
+ *
9
+ * What this records:
10
+ * - Section tree (geometry, role, composition, density)
11
+ * - Slot inventory per section: how many headings / body paragraphs /
12
+ * buttons / images / icons / logos / code blocks etc. it contains
13
+ * - Per-section style tokens: background, foreground, padding
14
+ * - Page-level tokens: fonts, primary surface colors, container width
15
+ *
16
+ * What this does NOT record:
17
+ * - Heading or body text content (slots are counts, not strings).
18
+ * - Raw HTML, CSS, or class names from the source.
19
+ * - Brand assets (logos, illustrations, product screenshots).
20
+ *
21
+ * The structural model is what the mirror emitter uses to reconstruct the
22
+ * page's section grammar with `<TextSlot>` / `<MediaSlot>` placeholders.
23
+ */
24
+
25
+ import type { Page } from "playwright";
26
+
27
+ import type {
28
+ Composition,
29
+ SectionLayout,
30
+ SectionRole,
31
+ SiteLayout,
32
+ SiteTokens,
33
+ SlotCount,
34
+ SlotKind,
35
+ } from "./types.js";
36
+
37
+ export async function crawlLayout(
38
+ page: Page,
39
+ url: string,
40
+ viewport: { width: number; height: number },
41
+ ): Promise<SiteLayout> {
42
+ await page.evaluate(() => {
43
+ const g = globalThis as unknown as { __name?: (fn: unknown) => unknown };
44
+ if (typeof g.__name === "undefined") g.__name = (fn: unknown) => fn;
45
+ });
46
+
47
+ const host = new URL(url).host;
48
+ const partial = await page.evaluate(crawlInPage);
49
+
50
+ return {
51
+ url,
52
+ host,
53
+ capturedAt: new Date().toISOString(),
54
+ viewport,
55
+ ...partial,
56
+ };
57
+ }
58
+
59
+ /**
60
+ * Browser-context crawler. Dependency-free so Playwright can serialize it.
61
+ * Returns the layout-bearing fields of `SiteLayout` (url/host/capturedAt
62
+ * are added on the Node side).
63
+ */
64
+ function crawlInPage(): Pick<
65
+ SiteLayout,
66
+ "pageHeightPx" | "sections" | "tokens"
67
+ > {
68
+ const VIEWPORT_W = window.innerWidth;
69
+ const PAGE_H = Math.max(
70
+ document.documentElement.scrollHeight,
71
+ document.body.scrollHeight,
72
+ );
73
+
74
+ /* ----- helpers ----- */
75
+
76
+ function toHex(rgb: string): string | null {
77
+ if (!rgb || rgb === "transparent") return null;
78
+ const m = rgb.match(/rgba?\(([^)]+)\)/);
79
+ if (!m) return null;
80
+ const parts = m[1]!.split(",").map((s) => s.trim());
81
+ const r = parseInt(parts[0]!, 10);
82
+ const g = parseInt(parts[1]!, 10);
83
+ const b = parseInt(parts[2]!, 10);
84
+ const a = parts[3] !== undefined ? parseFloat(parts[3]) : 1;
85
+ if (a < 0.05) return null;
86
+ if ([r, g, b].some((n) => Number.isNaN(n))) return null;
87
+ const h = (n: number) => n.toString(16).padStart(2, "0");
88
+ return `#${h(r)}${h(g)}${h(b)}`;
89
+ }
90
+
91
+ function isVisible(el: HTMLElement): boolean {
92
+ const style = getComputedStyle(el);
93
+ if (style.visibility === "hidden" || style.display === "none") return false;
94
+ if (parseFloat(style.opacity) < 0.05) return false;
95
+ const r = el.getBoundingClientRect();
96
+ return r.width > 0 && r.height > 0;
97
+ }
98
+
99
+ function directTextLength(el: Element): number {
100
+ let total = 0;
101
+ for (const child of Array.from(el.childNodes)) {
102
+ if (child.nodeType === 3) {
103
+ const text = (child.nodeValue ?? "").trim();
104
+ if (text) total += text.length;
105
+ }
106
+ }
107
+ return total;
108
+ }
109
+
110
+ function classifySlotForElement(el: HTMLElement): SlotKind | null {
111
+ const tag = el.tagName;
112
+ const style = getComputedStyle(el);
113
+ const fontSize = parseFloat(style.fontSize) || 16;
114
+ const fontWeight = parseInt(style.fontWeight, 10) || 400;
115
+
116
+ if (tag === "IMG" || tag === "PICTURE") {
117
+ // Heuristic: small square-ish images in a logo strip vs hero photos.
118
+ const r = el.getBoundingClientRect();
119
+ const ratio = r.width > 0 && r.height > 0 ? r.width / r.height : 0;
120
+ if (r.height > 0 && r.height < 56 && ratio < 6) return "logo-mono";
121
+ return "image";
122
+ }
123
+ if (tag === "SVG" || tag === "svg") {
124
+ const r = el.getBoundingClientRect();
125
+ if (r.width < 32 && r.height < 32) return "icon";
126
+ return "image";
127
+ }
128
+ if (tag === "VIDEO") return "video";
129
+ if (tag === "PRE" || tag === "CODE") return "code";
130
+ if (tag === "INPUT" || tag === "TEXTAREA" || tag === "SELECT") return "input";
131
+
132
+ if (tag === "BUTTON" || (tag === "A" && el.getAttribute("role") === "button")) {
133
+ // Decide primary vs secondary by background contrast vs the page.
134
+ const bg = toHex(style.backgroundColor);
135
+ if (bg && bg !== toHex(getComputedStyle(document.body).backgroundColor)) {
136
+ return "button-primary";
137
+ }
138
+ return "button-secondary";
139
+ }
140
+
141
+ if (tag === "A" && directTextLength(el) > 0) {
142
+ // Anchor that looks like a styled CTA (padded, bordered, or backgrounded).
143
+ const padX = parseFloat(style.paddingLeft) + parseFloat(style.paddingRight);
144
+ const hasBackdrop =
145
+ toHex(style.backgroundColor) !== null ||
146
+ parseFloat(style.borderTopWidth) > 0;
147
+ if (padX > 16 && hasBackdrop) {
148
+ return toHex(style.backgroundColor) ? "button-primary" : "button-secondary";
149
+ }
150
+ }
151
+
152
+ if (directTextLength(el) === 0) return null;
153
+
154
+ if (/^H[1-6]$/.test(tag)) {
155
+ const level = parseInt(tag.slice(1), 10);
156
+ if (level === 1) return "heading-1";
157
+ if (level === 2) return "heading-2";
158
+ return "heading-3";
159
+ }
160
+
161
+ if (tag === "LI") return "bullet";
162
+
163
+ if (fontSize >= 36 && fontWeight >= 600) return "heading-1";
164
+ if (fontSize >= 24 && fontWeight >= 500) return "heading-2";
165
+ if (fontSize >= 18 && fontWeight >= 500) return "heading-3";
166
+
167
+ if (fontSize <= 12 && /uppercase/i.test(style.textTransform)) return "eyebrow";
168
+ if (fontSize <= 13 && fontWeight >= 600) return "badge";
169
+
170
+ if (tag === "P" || tag === "SPAN" || tag === "DIV") return "body";
171
+
172
+ return null;
173
+ }
174
+
175
+ function countSlots(root: HTMLElement): SlotCount[] {
176
+ const buckets = new Map<SlotKind, number>();
177
+ const all = root.querySelectorAll<HTMLElement>("*");
178
+ for (const el of Array.from(all)) {
179
+ if (!isVisible(el)) continue;
180
+ const kind = classifySlotForElement(el);
181
+ if (kind) buckets.set(kind, (buckets.get(kind) ?? 0) + 1);
182
+ }
183
+ // De-dupe nested H1 spans etc: if an H1 contains spans, the outer H1 counts.
184
+ // We accept some over-count for body since it's coarse anyway; cap it.
185
+ if ((buckets.get("body") ?? 0) > 12) buckets.set("body", 12);
186
+ if ((buckets.get("bullet") ?? 0) > 24) buckets.set("bullet", 24);
187
+
188
+ const out: SlotCount[] = [];
189
+ for (const [kind, count] of buckets) out.push({ kind, count });
190
+ return out.sort((a, b) => a.kind.localeCompare(b.kind));
191
+ }
192
+
193
+ /* ----- section discovery ----- */
194
+
195
+ /**
196
+ * A top-level section is a block-level element that:
197
+ * - is at least ~70% of the viewport wide,
198
+ * - has a non-trivial height (>= 80 px),
199
+ * - and is one of <header>, <footer>, <main>, <section>, <article>,
200
+ * or a direct child of <body> / <main> that visually plays that role.
201
+ */
202
+ function findSections(): HTMLElement[] {
203
+ const candidates = new Set<HTMLElement>();
204
+ const tagSet = ["HEADER", "MAIN", "SECTION", "ARTICLE", "FOOTER", "NAV"];
205
+ for (const tag of tagSet) {
206
+ for (const el of Array.from(document.getElementsByTagName(tag))) {
207
+ candidates.add(el as HTMLElement);
208
+ }
209
+ }
210
+ // Add direct children of <body> and <main> as fallback.
211
+ const bodyKids = Array.from(document.body.children) as HTMLElement[];
212
+ for (const el of bodyKids) candidates.add(el);
213
+ const main = document.querySelector("main");
214
+ if (main) {
215
+ for (const el of Array.from(main.children) as HTMLElement[]) {
216
+ candidates.add(el);
217
+ }
218
+ }
219
+
220
+ const accepted: HTMLElement[] = [];
221
+ for (const el of candidates) {
222
+ if (!isVisible(el)) continue;
223
+ const r = el.getBoundingClientRect();
224
+ if (r.width < VIEWPORT_W * 0.7) continue;
225
+ if (r.height < 80) continue;
226
+ // Skip if this element is nested inside another already-accepted candidate.
227
+ // We'll do a final pass after sorting.
228
+ accepted.push(el);
229
+ }
230
+ // Sort by document y position.
231
+ accepted.sort((a, b) => {
232
+ const ay = a.getBoundingClientRect().top + window.scrollY;
233
+ const by = b.getBoundingClientRect().top + window.scrollY;
234
+ return ay - by;
235
+ });
236
+
237
+ // Drop any element fully contained in an earlier accepted one.
238
+ const final: HTMLElement[] = [];
239
+ for (const el of accepted) {
240
+ const inside = final.some((p) => p !== el && p.contains(el));
241
+ if (!inside) final.push(el);
242
+ }
243
+ return final;
244
+ }
245
+
246
+ /* ----- per-section classification ----- */
247
+
248
+ function classifyRole(
249
+ el: HTMLElement,
250
+ indexFromTop: number,
251
+ indexFromBottom: number,
252
+ slots: SlotCount[],
253
+ ): SectionRole {
254
+ const r = el.getBoundingClientRect();
255
+ const tag = el.tagName;
256
+ const count = (k: SlotKind) => slots.find((s) => s.kind === k)?.count ?? 0;
257
+
258
+ if (tag === "NAV") return "nav";
259
+ if (tag === "FOOTER" || (indexFromBottom === 0 && r.height < 600)) return "footer";
260
+ if (tag === "HEADER" && indexFromTop === 0 && r.height < 140) return "nav";
261
+
262
+ const headingCount = count("heading-1") + count("heading-2") + count("heading-3");
263
+ const buttons = count("button-primary") + count("button-secondary");
264
+ const images = count("image");
265
+ const logos = count("logo-mono");
266
+ const bullets = count("bullet");
267
+
268
+ // First in-document, has H1 + CTA → hero.
269
+ if (indexFromTop <= 1 && count("heading-1") >= 1 && buttons >= 1) return "hero";
270
+
271
+ // A wide, short band of small uniform images → logo strip.
272
+ if (logos >= 4 && headingCount <= 1 && r.height < r.width * 0.25) {
273
+ return "proof-logos";
274
+ }
275
+
276
+ // Pricing tells: 2–4 tall columns each with bullets and a button.
277
+ if (bullets >= 6 && buttons >= 2 && r.height > 360) return "pricing";
278
+
279
+ // Lots of headings (3+) of the same level + small bodies → feature grid.
280
+ if (count("heading-2") + count("heading-3") >= 3 && images <= 2) return "feature-grid";
281
+
282
+ // One heading, generous body, one media slot → deep dive.
283
+ if (headingCount >= 1 && images >= 1 && bullets <= 4 && r.height > 320) {
284
+ return "feature-deep-dive";
285
+ }
286
+
287
+ // Heading + 2 buttons, short height → conversion band near the bottom.
288
+ if (headingCount <= 2 && buttons >= 1 && r.height < 480 && indexFromBottom <= 2) {
289
+ return "conversion";
290
+ }
291
+
292
+ // Quote-shaped: short body strings, sometimes 3-up.
293
+ if (count("body") >= 3 && images === 0 && bullets === 0 && buttons === 0) {
294
+ return "proof-quotes";
295
+ }
296
+
297
+ return "other";
298
+ }
299
+
300
+ function classifyComposition(el: HTMLElement): Composition {
301
+ // Find the deepest descendant that uses CSS grid or flex with >1 row of cols.
302
+ const candidates = el.querySelectorAll<HTMLElement>("*");
303
+ let bestCols = 1;
304
+ let bestKind: "grid" | "flex" | "none" = "none";
305
+ let logoRowCols = 0;
306
+ for (const c of Array.from(candidates).slice(0, 400)) {
307
+ if (!isVisible(c)) continue;
308
+ const s = getComputedStyle(c);
309
+ if (s.display === "grid") {
310
+ const cols = s.gridTemplateColumns
311
+ .split(" ")
312
+ .filter((x) => x.trim().length > 0).length;
313
+ if (cols > bestCols) {
314
+ bestCols = cols;
315
+ bestKind = "grid";
316
+ }
317
+ } else if (s.display === "flex" && s.flexDirection.startsWith("row")) {
318
+ const kids = Array.from(c.children) as HTMLElement[];
319
+ const visibleKids = kids.filter(isVisible);
320
+ if (visibleKids.length > bestCols && visibleKids.length <= 12) {
321
+ bestCols = visibleKids.length;
322
+ bestKind = "flex";
323
+ }
324
+ if (visibleKids.length >= 4) {
325
+ const allSmall = visibleKids.every((k) => {
326
+ const kr = k.getBoundingClientRect();
327
+ return kr.height < 80 && kr.width < 200;
328
+ });
329
+ if (allSmall) logoRowCols = Math.max(logoRowCols, visibleKids.length);
330
+ }
331
+ }
332
+ }
333
+
334
+ if (logoRowCols >= 4) return "logo-row";
335
+ if (bestKind === "none" || bestCols <= 1) return "single-column";
336
+ if (bestCols === 2) return "split-2";
337
+ if (bestCols === 3) return "grid-3";
338
+ if (bestCols === 4) return "grid-4";
339
+ if (bestCols >= 5) return "list";
340
+ return "unknown";
341
+ }
342
+
343
+ function classifyDensity(el: HTMLElement, slots: SlotCount[]): "thin" | "balanced" | "dense" {
344
+ const total = slots.reduce((sum, s) => sum + s.count, 0);
345
+ const r = el.getBoundingClientRect();
346
+ const density = total / Math.max(1, r.height / 100);
347
+ if (density < 0.8) return "thin";
348
+ if (density > 2.4) return "dense";
349
+ return "balanced";
350
+ }
351
+
352
+ function extractSectionStyles(el: HTMLElement): SectionLayout["styles"] {
353
+ const s = getComputedStyle(el);
354
+ return {
355
+ backgroundHex: toHex(s.backgroundColor),
356
+ foregroundHex: toHex(s.color),
357
+ paddingTopPx: Math.round(parseFloat(s.paddingTop) || 0) || null,
358
+ paddingBottomPx: Math.round(parseFloat(s.paddingBottom) || 0) || null,
359
+ };
360
+ }
361
+
362
+ /* ----- page-level token extraction ----- */
363
+
364
+ function extractPageTokens(): SiteTokens {
365
+ const body = document.body;
366
+ const bodyStyle = getComputedStyle(body);
367
+ const bodyFontFamily = bodyStyle.fontFamily.split(",")[0]!.trim().replace(/^["']|["']$/g, "") || "system-ui";
368
+
369
+ let headingFontFamily = bodyFontFamily;
370
+ const h = document.querySelector("h1, h2, h3");
371
+ if (h) {
372
+ const hs = getComputedStyle(h);
373
+ headingFontFamily = hs.fontFamily.split(",")[0]!.trim().replace(/^["']|["']$/g, "") || bodyFontFamily;
374
+ }
375
+
376
+ const bg = toHex(bodyStyle.backgroundColor) ?? "#ffffff";
377
+ const fg = toHex(bodyStyle.color) ?? "#0a0a0a";
378
+
379
+ // Primary = the most-used non-text colored button background.
380
+ const buttonBgCounts = new Map<string, number>();
381
+ for (const b of Array.from(document.querySelectorAll<HTMLElement>("button, a, [role='button']"))) {
382
+ if (!isVisible(b)) continue;
383
+ const sb = toHex(getComputedStyle(b).backgroundColor);
384
+ if (!sb || sb === bg) continue;
385
+ buttonBgCounts.set(sb, (buttonBgCounts.get(sb) ?? 0) + 1);
386
+ }
387
+ let primary = fg;
388
+ let primaryCount = 0;
389
+ for (const [hex, count] of buttonBgCounts) {
390
+ if (count > primaryCount) {
391
+ primary = hex;
392
+ primaryCount = count;
393
+ }
394
+ }
395
+
396
+ // Muted = a frequent off-white / off-black surface color (non-page).
397
+ const surfaceCounts = new Map<string, number>();
398
+ for (const el of Array.from(document.querySelectorAll<HTMLElement>("body *"))) {
399
+ if (!isVisible(el)) continue;
400
+ const sb = toHex(getComputedStyle(el).backgroundColor);
401
+ if (!sb || sb === bg) continue;
402
+ surfaceCounts.set(sb, (surfaceCounts.get(sb) ?? 0) + 1);
403
+ }
404
+ let muted = bg;
405
+ let mutedCount = 0;
406
+ for (const [hex, count] of surfaceCounts) {
407
+ if (hex === primary) continue;
408
+ if (count > mutedCount) {
409
+ muted = hex;
410
+ mutedCount = count;
411
+ }
412
+ }
413
+
414
+ // Border = most-used border color across all elements with a border.
415
+ const borderCounts = new Map<string, number>();
416
+ for (const el of Array.from(document.querySelectorAll<HTMLElement>("body *"))) {
417
+ if (!isVisible(el)) continue;
418
+ const s = getComputedStyle(el);
419
+ if (parseFloat(s.borderTopWidth) <= 0) continue;
420
+ const bh = toHex(s.borderTopColor);
421
+ if (!bh) continue;
422
+ borderCounts.set(bh, (borderCounts.get(bh) ?? 0) + 1);
423
+ }
424
+ let border = "#e5e7eb";
425
+ let borderCount = 0;
426
+ for (const [hex, count] of borderCounts) {
427
+ if (count > borderCount) {
428
+ border = hex;
429
+ borderCount = count;
430
+ }
431
+ }
432
+
433
+ // Radius = the most-used non-zero corner radius.
434
+ const radiusCounts = new Map<number, number>();
435
+ for (const el of Array.from(document.querySelectorAll<HTMLElement>("body *"))) {
436
+ if (!isVisible(el)) continue;
437
+ const r = parseFloat(getComputedStyle(el).borderTopLeftRadius);
438
+ if (!(r > 0 && r < 64)) continue;
439
+ const k = Math.round(r);
440
+ radiusCounts.set(k, (radiusCounts.get(k) ?? 0) + 1);
441
+ }
442
+ let radius = 8;
443
+ let radiusCount = 0;
444
+ for (const [px, count] of radiusCounts) {
445
+ if (count > radiusCount) {
446
+ radius = px;
447
+ radiusCount = count;
448
+ }
449
+ }
450
+
451
+ // Container: widest layout block under 1600px.
452
+ let containerPx: number | null = null;
453
+ let containerArea = 0;
454
+ for (const el of Array.from(
455
+ document.querySelectorAll<HTMLElement>("main, section, header, footer, div"),
456
+ )) {
457
+ if (!isVisible(el)) continue;
458
+ const r = el.getBoundingClientRect();
459
+ if (r.width < 720 || r.width > 1600) continue;
460
+ if (r.height < 240) continue;
461
+ const area = r.width * r.height;
462
+ if (area > containerArea) {
463
+ containerArea = area;
464
+ containerPx = Math.round(r.width);
465
+ }
466
+ }
467
+
468
+ return {
469
+ bodyFontFamily,
470
+ headingFontFamily,
471
+ backgroundHex: bg,
472
+ foregroundHex: fg,
473
+ primaryHex: primary,
474
+ mutedHex: muted,
475
+ borderHex: border,
476
+ radiusPx: radius,
477
+ containerPx,
478
+ };
479
+ }
480
+
481
+ /* ----- main pass ----- */
482
+
483
+ const sectionEls = findSections();
484
+ const sections: SectionLayout[] = [];
485
+ for (let i = 0; i < sectionEls.length; i++) {
486
+ const el = sectionEls[i]!;
487
+ const r = el.getBoundingClientRect();
488
+ const top = r.top + window.scrollY;
489
+ const slots = countSlots(el);
490
+ const composition = classifyComposition(el);
491
+ const density = classifyDensity(el, slots);
492
+ const role = classifyRole(el, i, sectionEls.length - 1 - i, slots);
493
+ const styles = extractSectionStyles(el);
494
+
495
+ const notes: string[] = [];
496
+ if (slots.length === 0) notes.push("No content slots detected; rendering an empty wrapper.");
497
+ if (composition === "unknown") notes.push("Composition was ambiguous; fell back to single-column.");
498
+
499
+ sections.push({
500
+ id: `s${i + 1}`,
501
+ role,
502
+ composition,
503
+ density,
504
+ bbox: [
505
+ Math.max(0, Math.min(1, r.left / VIEWPORT_W)),
506
+ Math.max(0, Math.min(1, top / PAGE_H)),
507
+ Math.max(0, Math.min(1, r.width / VIEWPORT_W)),
508
+ Math.max(0, Math.min(1, r.height / PAGE_H)),
509
+ ],
510
+ slots,
511
+ styles,
512
+ notes,
513
+ });
514
+ }
515
+
516
+ return {
517
+ pageHeightPx: PAGE_H,
518
+ sections,
519
+ tokens: extractPageTokens(),
520
+ };
521
+ }