launchframe 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +63 -34
- package/package.json +1 -1
- package/packages/extract/dom-crawler.ts +521 -0
- package/packages/extract/emit.ts +2 -2
- package/packages/extract/extract.ts +85 -16
- package/packages/extract/mirror-emit.ts +617 -0
- package/packages/extract/reference-dump.ts +230 -0
- package/packages/extract/types.ts +118 -1
package/README.md
CHANGED
|
@@ -10,10 +10,14 @@ radii, shadows), and synthesizes an original design system as
|
|
|
10
10
|
`tailwind.config.ts` + `globals.css` + `tokens.json` + a Markdown
|
|
11
11
|
report and an AI-handoff file.
|
|
12
12
|
|
|
13
|
-
It
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
13
|
+
It also crawls the rendered DOM into a typed `SiteLayout` and emits a
|
|
14
|
+
**layout-mirror page** per source: a Next.js component that reconstructs
|
|
15
|
+
the source's section tree, grid, and density from typed primitives, with
|
|
16
|
+
`<TextSlot>` / `<MediaSlot>` placeholders where the source had copy,
|
|
17
|
+
logos, illustrations, or product imagery. The mirror does **not** embed
|
|
18
|
+
the source's copy text, brand assets, or product screenshots — fill those
|
|
19
|
+
slots with your own content before shipping. Proprietary type families
|
|
20
|
+
are substituted with open-source equivalents.
|
|
17
21
|
|
|
18
22
|
---
|
|
19
23
|
|
|
@@ -35,21 +39,34 @@ cd path/to/your-app-or-empty-folder
|
|
|
35
39
|
npx launchframe@latest https://site-a.example https://site-b.example
|
|
36
40
|
```
|
|
37
41
|
|
|
38
|
-
When it finishes,
|
|
39
|
-
|
|
40
|
-
|
|
42
|
+
When it finishes, every source URL has produced a **layout-mirror
|
|
43
|
+
page** under `output/<runId>/mirror/<host>/page.tsx`, plus a synthesized
|
|
44
|
+
design system at the run root.
|
|
41
45
|
|
|
42
46
|
```txt
|
|
43
47
|
output/<runId>/
|
|
44
48
|
├── FOR_AI.md ← paste / @-attach this for your AI (handoff instructions)
|
|
45
|
-
├── tokens.json ← every value, machine-readable
|
|
49
|
+
├── tokens.json ← every aggregated value, machine-readable
|
|
46
50
|
├── tailwind.config.ts ← drop-in Tailwind theme
|
|
47
51
|
├── globals.css ← drop-in shadcn-compatible CSS variables
|
|
48
52
|
├── theme-preview.tsx ← render this to eyeball the system
|
|
49
53
|
├── REPORT.md ← what was extracted, from where, why
|
|
50
54
|
├── run.json ← full run metadata (sources, timing, status)
|
|
51
55
|
├── screenshots/ ← captured PNGs
|
|
52
|
-
|
|
56
|
+
├── raw/ ← per-site raw token + SiteLayout JSON
|
|
57
|
+
├── reference/ ← verbatim DOM + copy for AI (see below)
|
|
58
|
+
│ └── <host>/
|
|
59
|
+
│ ├── page.html ← full HTML after JavaScript
|
|
60
|
+
│ ├── visible-text.txt ← paste-friendly copy extraction
|
|
61
|
+
│ ├── visible-text.json ← structured headings / body / buttons
|
|
62
|
+
│ ├── media.json ← img + video URLs
|
|
63
|
+
│ ├── meta.json ← title, description, lang
|
|
64
|
+
│ └── FOR_AI_REFERENCE.md
|
|
65
|
+
└── mirror/
|
|
66
|
+
└── <host>/
|
|
67
|
+
├── page.tsx ← Next.js: Motion + Phosphor + image/video patterns
|
|
68
|
+
├── layout.json
|
|
69
|
+
└── MIRROR_NOTES.md
|
|
53
70
|
```
|
|
54
71
|
|
|
55
72
|
---
|
|
@@ -57,10 +74,18 @@ output/<runId>/
|
|
|
57
74
|
## Hand the output to your AI
|
|
58
75
|
|
|
59
76
|
1. Run the command above so `output/<runId>/` exists.
|
|
60
|
-
2.
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
77
|
+
2. Attach **`reference/<host>/`** (`visible-text.txt`, `page.html`, `media.json`) so the model sees **exact copy and structure** from the crawl.
|
|
78
|
+
3. Pick the mirror folder: `output/<runId>/mirror/<host>/`.
|
|
79
|
+
4. Either:
|
|
80
|
+
- **Cursor:** `@`-attach `reference/<host>/`, `mirror/<host>/`, `FOR_AI.md`, and
|
|
81
|
+
`tokens.json`, then ask the agent to port copy from `visible-text.txt` into
|
|
82
|
+
`page.tsx` and wire media from `media.json`.
|
|
83
|
+
- **Claude Code:** copy both folders into your project, then ask the same.
|
|
84
|
+
5. The AI's authority order is **reference/visible-text.txt & page.html →
|
|
85
|
+
MIRROR_NOTES.md → page.tsx → tokens.json → tailwind.config.ts + globals.css**. It must:
|
|
86
|
+
- Keep the section tree, grid composition, density, Motion, and Phosphor usage in `page.tsx`.
|
|
87
|
+
- Map strings from `visible-text.txt` into the right `<TextSlot>` slots (or replace slots with plain JSX).
|
|
88
|
+
- Use `media.json` for image/video `src` / `poster` (respect licensing; prefer your own assets).
|
|
64
89
|
|
|
65
90
|
---
|
|
66
91
|
|
|
@@ -148,11 +173,11 @@ launchframe/
|
|
|
148
173
|
│ ├── capture/ # Playwright screenshot capture (lower level)
|
|
149
174
|
│ ├── analysis/ # Layout-tree extraction & section classifier
|
|
150
175
|
│ ├── patterns/ # Typed pattern schemas + atlas registry loader
|
|
151
|
-
│ ├── blocks/ #
|
|
152
|
-
│ └── evaluation/ # Coherence
|
|
153
|
-
├── pattern-atlas/ #
|
|
176
|
+
│ ├── blocks/ # Shadcn/ui blocks + TextSlot / MediaSlot primitives
|
|
177
|
+
│ └── evaluation/ # Coherence + responsiveness/a11y evaluator
|
|
178
|
+
├── pattern-atlas/ # Pattern catalog per category (block-composition mode)
|
|
154
179
|
├── prompts/ # Markdown prompts for AI agents
|
|
155
|
-
├── rules/ # Design / copy /
|
|
180
|
+
├── rules/ # Design / copy / a11y rules
|
|
156
181
|
├── registry/ # shadcn-compatible custom registry manifest
|
|
157
182
|
└── output/ # ← every `extract` run lands here
|
|
158
183
|
```
|
|
@@ -164,7 +189,7 @@ npm run studio # Next.js dashboard at localhost:3000
|
|
|
164
189
|
npm run capture # Lower-level Playwright capture pipeline
|
|
165
190
|
npm run analyze # Run section classifier on captured screenshots
|
|
166
191
|
npm run formalize # Validate the pattern-atlas/*.json files
|
|
167
|
-
npm run evaluate # Grade a generated page (coherence
|
|
192
|
+
npm run evaluate # Grade a generated page (coherence + a11y)
|
|
168
193
|
npm run typecheck # Project-wide TypeScript check
|
|
169
194
|
```
|
|
170
195
|
|
|
@@ -172,26 +197,30 @@ npm run typecheck # Project-wide TypeScript check
|
|
|
172
197
|
|
|
173
198
|
## What this is not
|
|
174
199
|
|
|
175
|
-
- **Not a
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
200
|
+
- **Not a verbatim site downloader.** The crawler builds a typed
|
|
201
|
+
`SiteLayout` model from the rendered DOM — section tree, geometry,
|
|
202
|
+
computed style tokens, content kinds — and emits code generated from
|
|
203
|
+
that model. It does not save the source's HTML/CSS to disk.
|
|
204
|
+
- **Not a content lift.** Heading text, body copy, logos, illustrations,
|
|
205
|
+
and product imagery become `<TextSlot>` / `<MediaSlot>` placeholders in
|
|
206
|
+
the mirror page. You fill them with your own copy and assets before
|
|
207
|
+
shipping.
|
|
179
208
|
- **Not a component library replacement.** It sits *on top* of
|
|
180
|
-
shadcn/ui and produces theme files
|
|
209
|
+
shadcn/ui and produces theme files plus slot-driven page templates.
|
|
181
210
|
|
|
182
211
|
---
|
|
183
212
|
|
|
184
|
-
##
|
|
185
|
-
|
|
186
|
-
Launchframe
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
213
|
+
## Responsible use in one paragraph
|
|
214
|
+
|
|
215
|
+
Launchframe is intended for layout research and design-system seeding
|
|
216
|
+
against pages you have permission to analyze (your own products, sites
|
|
217
|
+
where the operator has permission, or pages where structural analysis is
|
|
218
|
+
permitted by `robots.txt`). The crawler respects `robots.txt` by default
|
|
219
|
+
and rate-limits per domain. The output is generated code derived from a
|
|
220
|
+
normalized typed model and slot placeholders — not a verbatim copy of
|
|
221
|
+
the source's markup, copy, or assets. Operators are responsible for the
|
|
222
|
+
content they paste into those slots and for honoring third-party
|
|
223
|
+
trademarks, terms of service, and licenses.
|
|
195
224
|
|
|
196
225
|
---
|
|
197
226
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "launchframe",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.7",
|
|
4
4
|
"description": "Point Launchframe at SaaS sites you admire and get back a drop-in shadcn/ui design system (tokens, Tailwind theme, CSS variables, AI handoff) you can build your own UI on top of.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "Evan Gruhlkey",
|
|
@@ -0,0 +1,521 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DOM layout crawler.
|
|
3
|
+
*
|
|
4
|
+
* Runs inside the rendered page via Playwright's `page.evaluate`. Walks the
|
|
5
|
+
* DOM, identifies top-level sections, classifies each section's role and
|
|
6
|
+
* composition, and counts the content slots it contains. Returns a
|
|
7
|
+
* `SiteLayout` structural model the emitter rebuilds into a Next.js page.
|
|
8
|
+
*
|
|
9
|
+
* What this records:
|
|
10
|
+
* - Section tree (geometry, role, composition, density)
|
|
11
|
+
* - Slot inventory per section: how many headings / body paragraphs /
|
|
12
|
+
* buttons / images / icons / logos / code blocks etc. it contains
|
|
13
|
+
* - Per-section style tokens: background, foreground, padding
|
|
14
|
+
* - Page-level tokens: fonts, primary surface colors, container width
|
|
15
|
+
*
|
|
16
|
+
* What this does NOT record:
|
|
17
|
+
* - Heading or body text content (slots are counts, not strings).
|
|
18
|
+
* - Raw HTML, CSS, or class names from the source.
|
|
19
|
+
* - Brand assets (logos, illustrations, product screenshots).
|
|
20
|
+
*
|
|
21
|
+
* The structural model is what the mirror emitter uses to reconstruct the
|
|
22
|
+
* page's section grammar with `<TextSlot>` / `<MediaSlot>` placeholders.
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
import type { Page } from "playwright";
|
|
26
|
+
|
|
27
|
+
import type {
|
|
28
|
+
Composition,
|
|
29
|
+
SectionLayout,
|
|
30
|
+
SectionRole,
|
|
31
|
+
SiteLayout,
|
|
32
|
+
SiteTokens,
|
|
33
|
+
SlotCount,
|
|
34
|
+
SlotKind,
|
|
35
|
+
} from "./types.js";
|
|
36
|
+
|
|
37
|
+
export async function crawlLayout(
|
|
38
|
+
page: Page,
|
|
39
|
+
url: string,
|
|
40
|
+
viewport: { width: number; height: number },
|
|
41
|
+
): Promise<SiteLayout> {
|
|
42
|
+
await page.evaluate(() => {
|
|
43
|
+
const g = globalThis as unknown as { __name?: (fn: unknown) => unknown };
|
|
44
|
+
if (typeof g.__name === "undefined") g.__name = (fn: unknown) => fn;
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
const host = new URL(url).host;
|
|
48
|
+
const partial = await page.evaluate(crawlInPage);
|
|
49
|
+
|
|
50
|
+
return {
|
|
51
|
+
url,
|
|
52
|
+
host,
|
|
53
|
+
capturedAt: new Date().toISOString(),
|
|
54
|
+
viewport,
|
|
55
|
+
...partial,
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Browser-context crawler. Dependency-free so Playwright can serialize it.
|
|
61
|
+
* Returns the layout-bearing fields of `SiteLayout` (url/host/capturedAt
|
|
62
|
+
* are added on the Node side).
|
|
63
|
+
*/
|
|
64
|
+
function crawlInPage(): Pick<
|
|
65
|
+
SiteLayout,
|
|
66
|
+
"pageHeightPx" | "sections" | "tokens"
|
|
67
|
+
> {
|
|
68
|
+
const VIEWPORT_W = window.innerWidth;
|
|
69
|
+
const PAGE_H = Math.max(
|
|
70
|
+
document.documentElement.scrollHeight,
|
|
71
|
+
document.body.scrollHeight,
|
|
72
|
+
);
|
|
73
|
+
|
|
74
|
+
/* ----- helpers ----- */
|
|
75
|
+
|
|
76
|
+
function toHex(rgb: string): string | null {
|
|
77
|
+
if (!rgb || rgb === "transparent") return null;
|
|
78
|
+
const m = rgb.match(/rgba?\(([^)]+)\)/);
|
|
79
|
+
if (!m) return null;
|
|
80
|
+
const parts = m[1]!.split(",").map((s) => s.trim());
|
|
81
|
+
const r = parseInt(parts[0]!, 10);
|
|
82
|
+
const g = parseInt(parts[1]!, 10);
|
|
83
|
+
const b = parseInt(parts[2]!, 10);
|
|
84
|
+
const a = parts[3] !== undefined ? parseFloat(parts[3]) : 1;
|
|
85
|
+
if (a < 0.05) return null;
|
|
86
|
+
if ([r, g, b].some((n) => Number.isNaN(n))) return null;
|
|
87
|
+
const h = (n: number) => n.toString(16).padStart(2, "0");
|
|
88
|
+
return `#${h(r)}${h(g)}${h(b)}`;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function isVisible(el: HTMLElement): boolean {
|
|
92
|
+
const style = getComputedStyle(el);
|
|
93
|
+
if (style.visibility === "hidden" || style.display === "none") return false;
|
|
94
|
+
if (parseFloat(style.opacity) < 0.05) return false;
|
|
95
|
+
const r = el.getBoundingClientRect();
|
|
96
|
+
return r.width > 0 && r.height > 0;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
function directTextLength(el: Element): number {
|
|
100
|
+
let total = 0;
|
|
101
|
+
for (const child of Array.from(el.childNodes)) {
|
|
102
|
+
if (child.nodeType === 3) {
|
|
103
|
+
const text = (child.nodeValue ?? "").trim();
|
|
104
|
+
if (text) total += text.length;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
return total;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function classifySlotForElement(el: HTMLElement): SlotKind | null {
|
|
111
|
+
const tag = el.tagName;
|
|
112
|
+
const style = getComputedStyle(el);
|
|
113
|
+
const fontSize = parseFloat(style.fontSize) || 16;
|
|
114
|
+
const fontWeight = parseInt(style.fontWeight, 10) || 400;
|
|
115
|
+
|
|
116
|
+
if (tag === "IMG" || tag === "PICTURE") {
|
|
117
|
+
// Heuristic: small square-ish images in a logo strip vs hero photos.
|
|
118
|
+
const r = el.getBoundingClientRect();
|
|
119
|
+
const ratio = r.width > 0 && r.height > 0 ? r.width / r.height : 0;
|
|
120
|
+
if (r.height > 0 && r.height < 56 && ratio < 6) return "logo-mono";
|
|
121
|
+
return "image";
|
|
122
|
+
}
|
|
123
|
+
if (tag === "SVG" || tag === "svg") {
|
|
124
|
+
const r = el.getBoundingClientRect();
|
|
125
|
+
if (r.width < 32 && r.height < 32) return "icon";
|
|
126
|
+
return "image";
|
|
127
|
+
}
|
|
128
|
+
if (tag === "VIDEO") return "video";
|
|
129
|
+
if (tag === "PRE" || tag === "CODE") return "code";
|
|
130
|
+
if (tag === "INPUT" || tag === "TEXTAREA" || tag === "SELECT") return "input";
|
|
131
|
+
|
|
132
|
+
if (tag === "BUTTON" || (tag === "A" && el.getAttribute("role") === "button")) {
|
|
133
|
+
// Decide primary vs secondary by background contrast vs the page.
|
|
134
|
+
const bg = toHex(style.backgroundColor);
|
|
135
|
+
if (bg && bg !== toHex(getComputedStyle(document.body).backgroundColor)) {
|
|
136
|
+
return "button-primary";
|
|
137
|
+
}
|
|
138
|
+
return "button-secondary";
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
if (tag === "A" && directTextLength(el) > 0) {
|
|
142
|
+
// Anchor that looks like a styled CTA (padded, bordered, or backgrounded).
|
|
143
|
+
const padX = parseFloat(style.paddingLeft) + parseFloat(style.paddingRight);
|
|
144
|
+
const hasBackdrop =
|
|
145
|
+
toHex(style.backgroundColor) !== null ||
|
|
146
|
+
parseFloat(style.borderTopWidth) > 0;
|
|
147
|
+
if (padX > 16 && hasBackdrop) {
|
|
148
|
+
return toHex(style.backgroundColor) ? "button-primary" : "button-secondary";
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
if (directTextLength(el) === 0) return null;
|
|
153
|
+
|
|
154
|
+
if (/^H[1-6]$/.test(tag)) {
|
|
155
|
+
const level = parseInt(tag.slice(1), 10);
|
|
156
|
+
if (level === 1) return "heading-1";
|
|
157
|
+
if (level === 2) return "heading-2";
|
|
158
|
+
return "heading-3";
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
if (tag === "LI") return "bullet";
|
|
162
|
+
|
|
163
|
+
if (fontSize >= 36 && fontWeight >= 600) return "heading-1";
|
|
164
|
+
if (fontSize >= 24 && fontWeight >= 500) return "heading-2";
|
|
165
|
+
if (fontSize >= 18 && fontWeight >= 500) return "heading-3";
|
|
166
|
+
|
|
167
|
+
if (fontSize <= 12 && /uppercase/i.test(style.textTransform)) return "eyebrow";
|
|
168
|
+
if (fontSize <= 13 && fontWeight >= 600) return "badge";
|
|
169
|
+
|
|
170
|
+
if (tag === "P" || tag === "SPAN" || tag === "DIV") return "body";
|
|
171
|
+
|
|
172
|
+
return null;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
function countSlots(root: HTMLElement): SlotCount[] {
|
|
176
|
+
const buckets = new Map<SlotKind, number>();
|
|
177
|
+
const all = root.querySelectorAll<HTMLElement>("*");
|
|
178
|
+
for (const el of Array.from(all)) {
|
|
179
|
+
if (!isVisible(el)) continue;
|
|
180
|
+
const kind = classifySlotForElement(el);
|
|
181
|
+
if (kind) buckets.set(kind, (buckets.get(kind) ?? 0) + 1);
|
|
182
|
+
}
|
|
183
|
+
// De-dupe nested H1 spans etc: if an H1 contains spans, the outer H1 counts.
|
|
184
|
+
// We accept some over-count for body since it's coarse anyway; cap it.
|
|
185
|
+
if ((buckets.get("body") ?? 0) > 12) buckets.set("body", 12);
|
|
186
|
+
if ((buckets.get("bullet") ?? 0) > 24) buckets.set("bullet", 24);
|
|
187
|
+
|
|
188
|
+
const out: SlotCount[] = [];
|
|
189
|
+
for (const [kind, count] of buckets) out.push({ kind, count });
|
|
190
|
+
return out.sort((a, b) => a.kind.localeCompare(b.kind));
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
/* ----- section discovery ----- */
|
|
194
|
+
|
|
195
|
+
/**
|
|
196
|
+
* A top-level section is a block-level element that:
|
|
197
|
+
* - is at least ~70% of the viewport wide,
|
|
198
|
+
* - has a non-trivial height (>= 80 px),
|
|
199
|
+
* - and is one of <header>, <footer>, <main>, <section>, <article>,
|
|
200
|
+
* or a direct child of <body> / <main> that visually plays that role.
|
|
201
|
+
*/
|
|
202
|
+
function findSections(): HTMLElement[] {
|
|
203
|
+
const candidates = new Set<HTMLElement>();
|
|
204
|
+
const tagSet = ["HEADER", "MAIN", "SECTION", "ARTICLE", "FOOTER", "NAV"];
|
|
205
|
+
for (const tag of tagSet) {
|
|
206
|
+
for (const el of Array.from(document.getElementsByTagName(tag))) {
|
|
207
|
+
candidates.add(el as HTMLElement);
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
// Add direct children of <body> and <main> as fallback.
|
|
211
|
+
const bodyKids = Array.from(document.body.children) as HTMLElement[];
|
|
212
|
+
for (const el of bodyKids) candidates.add(el);
|
|
213
|
+
const main = document.querySelector("main");
|
|
214
|
+
if (main) {
|
|
215
|
+
for (const el of Array.from(main.children) as HTMLElement[]) {
|
|
216
|
+
candidates.add(el);
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
const accepted: HTMLElement[] = [];
|
|
221
|
+
for (const el of candidates) {
|
|
222
|
+
if (!isVisible(el)) continue;
|
|
223
|
+
const r = el.getBoundingClientRect();
|
|
224
|
+
if (r.width < VIEWPORT_W * 0.7) continue;
|
|
225
|
+
if (r.height < 80) continue;
|
|
226
|
+
// Skip if this element is nested inside another already-accepted candidate.
|
|
227
|
+
// We'll do a final pass after sorting.
|
|
228
|
+
accepted.push(el);
|
|
229
|
+
}
|
|
230
|
+
// Sort by document y position.
|
|
231
|
+
accepted.sort((a, b) => {
|
|
232
|
+
const ay = a.getBoundingClientRect().top + window.scrollY;
|
|
233
|
+
const by = b.getBoundingClientRect().top + window.scrollY;
|
|
234
|
+
return ay - by;
|
|
235
|
+
});
|
|
236
|
+
|
|
237
|
+
// Drop any element fully contained in an earlier accepted one.
|
|
238
|
+
const final: HTMLElement[] = [];
|
|
239
|
+
for (const el of accepted) {
|
|
240
|
+
const inside = final.some((p) => p !== el && p.contains(el));
|
|
241
|
+
if (!inside) final.push(el);
|
|
242
|
+
}
|
|
243
|
+
return final;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
/* ----- per-section classification ----- */
|
|
247
|
+
|
|
248
|
+
function classifyRole(
|
|
249
|
+
el: HTMLElement,
|
|
250
|
+
indexFromTop: number,
|
|
251
|
+
indexFromBottom: number,
|
|
252
|
+
slots: SlotCount[],
|
|
253
|
+
): SectionRole {
|
|
254
|
+
const r = el.getBoundingClientRect();
|
|
255
|
+
const tag = el.tagName;
|
|
256
|
+
const count = (k: SlotKind) => slots.find((s) => s.kind === k)?.count ?? 0;
|
|
257
|
+
|
|
258
|
+
if (tag === "NAV") return "nav";
|
|
259
|
+
if (tag === "FOOTER" || (indexFromBottom === 0 && r.height < 600)) return "footer";
|
|
260
|
+
if (tag === "HEADER" && indexFromTop === 0 && r.height < 140) return "nav";
|
|
261
|
+
|
|
262
|
+
const headingCount = count("heading-1") + count("heading-2") + count("heading-3");
|
|
263
|
+
const buttons = count("button-primary") + count("button-secondary");
|
|
264
|
+
const images = count("image");
|
|
265
|
+
const logos = count("logo-mono");
|
|
266
|
+
const bullets = count("bullet");
|
|
267
|
+
|
|
268
|
+
// First in-document, has H1 + CTA → hero.
|
|
269
|
+
if (indexFromTop <= 1 && count("heading-1") >= 1 && buttons >= 1) return "hero";
|
|
270
|
+
|
|
271
|
+
// A wide, short band of small uniform images → logo strip.
|
|
272
|
+
if (logos >= 4 && headingCount <= 1 && r.height < r.width * 0.25) {
|
|
273
|
+
return "proof-logos";
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
// Pricing tells: 2–4 tall columns each with bullets and a button.
|
|
277
|
+
if (bullets >= 6 && buttons >= 2 && r.height > 360) return "pricing";
|
|
278
|
+
|
|
279
|
+
// Lots of headings (3+) of the same level + small bodies → feature grid.
|
|
280
|
+
if (count("heading-2") + count("heading-3") >= 3 && images <= 2) return "feature-grid";
|
|
281
|
+
|
|
282
|
+
// One heading, generous body, one media slot → deep dive.
|
|
283
|
+
if (headingCount >= 1 && images >= 1 && bullets <= 4 && r.height > 320) {
|
|
284
|
+
return "feature-deep-dive";
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
// Heading + 2 buttons, short height → conversion band near the bottom.
|
|
288
|
+
if (headingCount <= 2 && buttons >= 1 && r.height < 480 && indexFromBottom <= 2) {
|
|
289
|
+
return "conversion";
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
// Quote-shaped: short body strings, sometimes 3-up.
|
|
293
|
+
if (count("body") >= 3 && images === 0 && bullets === 0 && buttons === 0) {
|
|
294
|
+
return "proof-quotes";
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
return "other";
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
function classifyComposition(el: HTMLElement): Composition {
|
|
301
|
+
// Find the deepest descendant that uses CSS grid or flex with >1 row of cols.
|
|
302
|
+
const candidates = el.querySelectorAll<HTMLElement>("*");
|
|
303
|
+
let bestCols = 1;
|
|
304
|
+
let bestKind: "grid" | "flex" | "none" = "none";
|
|
305
|
+
let logoRowCols = 0;
|
|
306
|
+
for (const c of Array.from(candidates).slice(0, 400)) {
|
|
307
|
+
if (!isVisible(c)) continue;
|
|
308
|
+
const s = getComputedStyle(c);
|
|
309
|
+
if (s.display === "grid") {
|
|
310
|
+
const cols = s.gridTemplateColumns
|
|
311
|
+
.split(" ")
|
|
312
|
+
.filter((x) => x.trim().length > 0).length;
|
|
313
|
+
if (cols > bestCols) {
|
|
314
|
+
bestCols = cols;
|
|
315
|
+
bestKind = "grid";
|
|
316
|
+
}
|
|
317
|
+
} else if (s.display === "flex" && s.flexDirection.startsWith("row")) {
|
|
318
|
+
const kids = Array.from(c.children) as HTMLElement[];
|
|
319
|
+
const visibleKids = kids.filter(isVisible);
|
|
320
|
+
if (visibleKids.length > bestCols && visibleKids.length <= 12) {
|
|
321
|
+
bestCols = visibleKids.length;
|
|
322
|
+
bestKind = "flex";
|
|
323
|
+
}
|
|
324
|
+
if (visibleKids.length >= 4) {
|
|
325
|
+
const allSmall = visibleKids.every((k) => {
|
|
326
|
+
const kr = k.getBoundingClientRect();
|
|
327
|
+
return kr.height < 80 && kr.width < 200;
|
|
328
|
+
});
|
|
329
|
+
if (allSmall) logoRowCols = Math.max(logoRowCols, visibleKids.length);
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
if (logoRowCols >= 4) return "logo-row";
|
|
335
|
+
if (bestKind === "none" || bestCols <= 1) return "single-column";
|
|
336
|
+
if (bestCols === 2) return "split-2";
|
|
337
|
+
if (bestCols === 3) return "grid-3";
|
|
338
|
+
if (bestCols === 4) return "grid-4";
|
|
339
|
+
if (bestCols >= 5) return "list";
|
|
340
|
+
return "unknown";
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
function classifyDensity(el: HTMLElement, slots: SlotCount[]): "thin" | "balanced" | "dense" {
|
|
344
|
+
const total = slots.reduce((sum, s) => sum + s.count, 0);
|
|
345
|
+
const r = el.getBoundingClientRect();
|
|
346
|
+
const density = total / Math.max(1, r.height / 100);
|
|
347
|
+
if (density < 0.8) return "thin";
|
|
348
|
+
if (density > 2.4) return "dense";
|
|
349
|
+
return "balanced";
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
function extractSectionStyles(el: HTMLElement): SectionLayout["styles"] {
|
|
353
|
+
const s = getComputedStyle(el);
|
|
354
|
+
return {
|
|
355
|
+
backgroundHex: toHex(s.backgroundColor),
|
|
356
|
+
foregroundHex: toHex(s.color),
|
|
357
|
+
paddingTopPx: Math.round(parseFloat(s.paddingTop) || 0) || null,
|
|
358
|
+
paddingBottomPx: Math.round(parseFloat(s.paddingBottom) || 0) || null,
|
|
359
|
+
};
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
/* ----- page-level token extraction ----- */
|
|
363
|
+
|
|
364
|
+
function extractPageTokens(): SiteTokens {
|
|
365
|
+
const body = document.body;
|
|
366
|
+
const bodyStyle = getComputedStyle(body);
|
|
367
|
+
const bodyFontFamily = bodyStyle.fontFamily.split(",")[0]!.trim().replace(/^["']|["']$/g, "") || "system-ui";
|
|
368
|
+
|
|
369
|
+
let headingFontFamily = bodyFontFamily;
|
|
370
|
+
const h = document.querySelector("h1, h2, h3");
|
|
371
|
+
if (h) {
|
|
372
|
+
const hs = getComputedStyle(h);
|
|
373
|
+
headingFontFamily = hs.fontFamily.split(",")[0]!.trim().replace(/^["']|["']$/g, "") || bodyFontFamily;
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
const bg = toHex(bodyStyle.backgroundColor) ?? "#ffffff";
|
|
377
|
+
const fg = toHex(bodyStyle.color) ?? "#0a0a0a";
|
|
378
|
+
|
|
379
|
+
// Primary = the most-used non-text colored button background.
|
|
380
|
+
const buttonBgCounts = new Map<string, number>();
|
|
381
|
+
for (const b of Array.from(document.querySelectorAll<HTMLElement>("button, a, [role='button']"))) {
|
|
382
|
+
if (!isVisible(b)) continue;
|
|
383
|
+
const sb = toHex(getComputedStyle(b).backgroundColor);
|
|
384
|
+
if (!sb || sb === bg) continue;
|
|
385
|
+
buttonBgCounts.set(sb, (buttonBgCounts.get(sb) ?? 0) + 1);
|
|
386
|
+
}
|
|
387
|
+
let primary = fg;
|
|
388
|
+
let primaryCount = 0;
|
|
389
|
+
for (const [hex, count] of buttonBgCounts) {
|
|
390
|
+
if (count > primaryCount) {
|
|
391
|
+
primary = hex;
|
|
392
|
+
primaryCount = count;
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
// Muted = a frequent off-white / off-black surface color (non-page).
|
|
397
|
+
const surfaceCounts = new Map<string, number>();
|
|
398
|
+
for (const el of Array.from(document.querySelectorAll<HTMLElement>("body *"))) {
|
|
399
|
+
if (!isVisible(el)) continue;
|
|
400
|
+
const sb = toHex(getComputedStyle(el).backgroundColor);
|
|
401
|
+
if (!sb || sb === bg) continue;
|
|
402
|
+
surfaceCounts.set(sb, (surfaceCounts.get(sb) ?? 0) + 1);
|
|
403
|
+
}
|
|
404
|
+
let muted = bg;
|
|
405
|
+
let mutedCount = 0;
|
|
406
|
+
for (const [hex, count] of surfaceCounts) {
|
|
407
|
+
if (hex === primary) continue;
|
|
408
|
+
if (count > mutedCount) {
|
|
409
|
+
muted = hex;
|
|
410
|
+
mutedCount = count;
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
// Border = most-used border color across all elements with a border.
|
|
415
|
+
const borderCounts = new Map<string, number>();
|
|
416
|
+
for (const el of Array.from(document.querySelectorAll<HTMLElement>("body *"))) {
|
|
417
|
+
if (!isVisible(el)) continue;
|
|
418
|
+
const s = getComputedStyle(el);
|
|
419
|
+
if (parseFloat(s.borderTopWidth) <= 0) continue;
|
|
420
|
+
const bh = toHex(s.borderTopColor);
|
|
421
|
+
if (!bh) continue;
|
|
422
|
+
borderCounts.set(bh, (borderCounts.get(bh) ?? 0) + 1);
|
|
423
|
+
}
|
|
424
|
+
let border = "#e5e7eb";
|
|
425
|
+
let borderCount = 0;
|
|
426
|
+
for (const [hex, count] of borderCounts) {
|
|
427
|
+
if (count > borderCount) {
|
|
428
|
+
border = hex;
|
|
429
|
+
borderCount = count;
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
// Radius = the most-used non-zero corner radius.
|
|
434
|
+
const radiusCounts = new Map<number, number>();
|
|
435
|
+
for (const el of Array.from(document.querySelectorAll<HTMLElement>("body *"))) {
|
|
436
|
+
if (!isVisible(el)) continue;
|
|
437
|
+
const r = parseFloat(getComputedStyle(el).borderTopLeftRadius);
|
|
438
|
+
if (!(r > 0 && r < 64)) continue;
|
|
439
|
+
const k = Math.round(r);
|
|
440
|
+
radiusCounts.set(k, (radiusCounts.get(k) ?? 0) + 1);
|
|
441
|
+
}
|
|
442
|
+
let radius = 8;
|
|
443
|
+
let radiusCount = 0;
|
|
444
|
+
for (const [px, count] of radiusCounts) {
|
|
445
|
+
if (count > radiusCount) {
|
|
446
|
+
radius = px;
|
|
447
|
+
radiusCount = count;
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
// Container: widest layout block under 1600px.
|
|
452
|
+
let containerPx: number | null = null;
|
|
453
|
+
let containerArea = 0;
|
|
454
|
+
for (const el of Array.from(
|
|
455
|
+
document.querySelectorAll<HTMLElement>("main, section, header, footer, div"),
|
|
456
|
+
)) {
|
|
457
|
+
if (!isVisible(el)) continue;
|
|
458
|
+
const r = el.getBoundingClientRect();
|
|
459
|
+
if (r.width < 720 || r.width > 1600) continue;
|
|
460
|
+
if (r.height < 240) continue;
|
|
461
|
+
const area = r.width * r.height;
|
|
462
|
+
if (area > containerArea) {
|
|
463
|
+
containerArea = area;
|
|
464
|
+
containerPx = Math.round(r.width);
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
return {
|
|
469
|
+
bodyFontFamily,
|
|
470
|
+
headingFontFamily,
|
|
471
|
+
backgroundHex: bg,
|
|
472
|
+
foregroundHex: fg,
|
|
473
|
+
primaryHex: primary,
|
|
474
|
+
mutedHex: muted,
|
|
475
|
+
borderHex: border,
|
|
476
|
+
radiusPx: radius,
|
|
477
|
+
containerPx,
|
|
478
|
+
};
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
/* ----- main pass ----- */
|
|
482
|
+
|
|
483
|
+
const sectionEls = findSections();
|
|
484
|
+
const sections: SectionLayout[] = [];
|
|
485
|
+
for (let i = 0; i < sectionEls.length; i++) {
|
|
486
|
+
const el = sectionEls[i]!;
|
|
487
|
+
const r = el.getBoundingClientRect();
|
|
488
|
+
const top = r.top + window.scrollY;
|
|
489
|
+
const slots = countSlots(el);
|
|
490
|
+
const composition = classifyComposition(el);
|
|
491
|
+
const density = classifyDensity(el, slots);
|
|
492
|
+
const role = classifyRole(el, i, sectionEls.length - 1 - i, slots);
|
|
493
|
+
const styles = extractSectionStyles(el);
|
|
494
|
+
|
|
495
|
+
const notes: string[] = [];
|
|
496
|
+
if (slots.length === 0) notes.push("No content slots detected; rendering an empty wrapper.");
|
|
497
|
+
if (composition === "unknown") notes.push("Composition was ambiguous; fell back to single-column.");
|
|
498
|
+
|
|
499
|
+
sections.push({
|
|
500
|
+
id: `s${i + 1}`,
|
|
501
|
+
role,
|
|
502
|
+
composition,
|
|
503
|
+
density,
|
|
504
|
+
bbox: [
|
|
505
|
+
Math.max(0, Math.min(1, r.left / VIEWPORT_W)),
|
|
506
|
+
Math.max(0, Math.min(1, top / PAGE_H)),
|
|
507
|
+
Math.max(0, Math.min(1, r.width / VIEWPORT_W)),
|
|
508
|
+
Math.max(0, Math.min(1, r.height / PAGE_H)),
|
|
509
|
+
],
|
|
510
|
+
slots,
|
|
511
|
+
styles,
|
|
512
|
+
notes,
|
|
513
|
+
});
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
return {
|
|
517
|
+
pageHeightPx: PAGE_H,
|
|
518
|
+
sections,
|
|
519
|
+
tokens: extractPageTokens(),
|
|
520
|
+
};
|
|
521
|
+
}
|