launchframe 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +144 -183
- package/bin/launchframe.mjs +261 -28
- package/package.json +52 -67
- package/template/.aider.conf.yml +3 -0
- package/template/.amazonq/cli-agents/clone-website.json +9 -0
- package/template/.amazonq/rules/project.md +161 -0
- package/template/.augment/commands/clone-website.md +518 -0
- package/template/.claude/skills/clone-website/SKILL.md +517 -0
- package/template/.clinerules +161 -0
- package/template/.codex/skills/clone-website/SKILL.md +517 -0
- package/template/.continue/commands/clone-website.md +519 -0
- package/template/.continue/rules/project.md +165 -0
- package/template/.cursor/commands/clone-website.md +514 -0
- package/template/.cursor/rules/project.mdc +20 -0
- package/template/.dockerignore +60 -0
- package/template/.gemini/commands/clone-website.toml +520 -0
- package/template/.gitattributes +9 -0
- package/template/.github/ISSUE_TEMPLATE/bug_report.yml +86 -0
- package/template/.github/ISSUE_TEMPLATE/config.yml +5 -0
- package/template/.github/ISSUE_TEMPLATE/feature_request.yml +50 -0
- package/template/.github/PULL_REQUEST_TEMPLATE.md +19 -0
- package/template/.github/copilot-instructions.md +161 -0
- package/template/.github/copilot-setup-steps.yml +3 -0
- package/template/.github/skills/clone-website/SKILL.md +517 -0
- package/template/.github/workflows/ci.yml +36 -0
- package/template/.nvmrc +1 -0
- package/template/.opencode/commands/clone-website.md +517 -0
- package/template/.windsurf/workflows/clone-website.md +514 -0
- package/template/.windsurfrules +2 -0
- package/template/AGENTS.md +79 -0
- package/template/CHANGELOG.md +80 -0
- package/template/CLAUDE.md +1 -0
- package/template/Dockerfile +114 -0
- package/template/Dockerfile.dev +15 -0
- package/template/GEMINI.md +1 -0
- package/template/README.md +118 -0
- package/template/START_HERE.md +15 -0
- package/template/components.json +25 -0
- package/template/docker-compose.yml +53 -0
- package/template/docs/design-references/.gitkeep +0 -0
- package/template/docs/design-references/comparison.png +0 -0
- package/template/docs/research/INSPECTION_GUIDE.md +80 -0
- package/template/eslint.config.mjs +18 -0
- package/template/next.config.ts +8 -0
- package/template/package.json +59 -0
- package/template/postcss.config.mjs +7 -0
- package/template/public/images/.gitkeep +0 -0
- package/template/public/seo/.gitkeep +0 -0
- package/template/public/videos/.gitkeep +0 -0
- package/template/scripts/.gitkeep +0 -0
- package/template/scripts/sync-agent-rules.sh +88 -0
- package/template/scripts/sync-skills.mjs +111 -0
- package/template/src/app/favicon.ico +0 -0
- package/template/src/app/globals.css +130 -0
- package/template/src/app/layout.tsx +33 -0
- package/template/src/app/page.tsx +9 -0
- package/template/src/components/ui/button.tsx +60 -0
- package/template/src/hooks/.gitkeep +0 -0
- package/template/src/lib/utils.ts +6 -0
- package/template/src/types/.gitkeep +0 -0
- package/template/tsconfig.json +34 -0
- package/packages/extract/automated-clone-pass.ts +0 -353
- package/packages/extract/browser-extract.ts +0 -237
- package/packages/extract/cloner-research-emit.ts +0 -270
- package/packages/extract/dom-crawler.ts +0 -521
- package/packages/extract/emit.ts +0 -553
- package/packages/extract/extract.ts +0 -548
- package/packages/extract/host-slug.ts +0 -5
- package/packages/extract/mirror-emit.ts +0 -620
- package/packages/extract/package.json +0 -13
- package/packages/extract/reference-dump.ts +0 -431
- package/packages/extract/synthesize.ts +0 -551
- package/packages/extract/types.ts +0 -316
|
@@ -1,521 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* DOM layout crawler.
|
|
3
|
-
*
|
|
4
|
-
* Runs inside the rendered page via Playwright's `page.evaluate`. Walks the
|
|
5
|
-
* DOM, identifies top-level sections, classifies each section's role and
|
|
6
|
-
* composition, and counts the content slots it contains. Returns a
|
|
7
|
-
* `SiteLayout` structural model the emitter rebuilds into a Next.js page.
|
|
8
|
-
*
|
|
9
|
-
* What this records:
|
|
10
|
-
* - Section tree (geometry, role, composition, density)
|
|
11
|
-
* - Slot inventory per section: how many headings / body paragraphs /
|
|
12
|
-
* buttons / images / icons / logos / code blocks etc. it contains
|
|
13
|
-
* - Per-section style tokens: background, foreground, padding
|
|
14
|
-
* - Page-level tokens: fonts, primary surface colors, container width
|
|
15
|
-
*
|
|
16
|
-
* What this does NOT record:
|
|
17
|
-
* - Heading or body text content (slots are counts, not strings).
|
|
18
|
-
* - Raw HTML, CSS, or class names from the source.
|
|
19
|
-
* - Brand assets (logos, illustrations, product screenshots).
|
|
20
|
-
*
|
|
21
|
-
* The structural model is what the mirror emitter uses to reconstruct the
|
|
22
|
-
* page's section grammar with `<TextSlot>` / `<MediaSlot>` placeholders.
|
|
23
|
-
*/
|
|
24
|
-
|
|
25
|
-
import type { Page } from "playwright";
|
|
26
|
-
|
|
27
|
-
import type {
|
|
28
|
-
Composition,
|
|
29
|
-
SectionLayout,
|
|
30
|
-
SectionRole,
|
|
31
|
-
SiteLayout,
|
|
32
|
-
SiteTokens,
|
|
33
|
-
SlotCount,
|
|
34
|
-
SlotKind,
|
|
35
|
-
} from "./types.js";
|
|
36
|
-
|
|
37
|
-
export async function crawlLayout(
|
|
38
|
-
page: Page,
|
|
39
|
-
url: string,
|
|
40
|
-
viewport: { width: number; height: number },
|
|
41
|
-
): Promise<SiteLayout> {
|
|
42
|
-
await page.evaluate(() => {
|
|
43
|
-
const g = globalThis as unknown as { __name?: (fn: unknown) => unknown };
|
|
44
|
-
if (typeof g.__name === "undefined") g.__name = (fn: unknown) => fn;
|
|
45
|
-
});
|
|
46
|
-
|
|
47
|
-
const host = new URL(url).host;
|
|
48
|
-
const partial = await page.evaluate(crawlInPage);
|
|
49
|
-
|
|
50
|
-
return {
|
|
51
|
-
url,
|
|
52
|
-
host,
|
|
53
|
-
capturedAt: new Date().toISOString(),
|
|
54
|
-
viewport,
|
|
55
|
-
...partial,
|
|
56
|
-
};
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
/**
|
|
60
|
-
* Browser-context crawler. Dependency-free so Playwright can serialize it.
|
|
61
|
-
* Returns the layout-bearing fields of `SiteLayout` (url/host/capturedAt
|
|
62
|
-
* are added on the Node side).
|
|
63
|
-
*/
|
|
64
|
-
function crawlInPage(): Pick<
|
|
65
|
-
SiteLayout,
|
|
66
|
-
"pageHeightPx" | "sections" | "tokens"
|
|
67
|
-
> {
|
|
68
|
-
const VIEWPORT_W = window.innerWidth;
|
|
69
|
-
const PAGE_H = Math.max(
|
|
70
|
-
document.documentElement.scrollHeight,
|
|
71
|
-
document.body.scrollHeight,
|
|
72
|
-
);
|
|
73
|
-
|
|
74
|
-
/* ----- helpers ----- */
|
|
75
|
-
|
|
76
|
-
function toHex(rgb: string): string | null {
|
|
77
|
-
if (!rgb || rgb === "transparent") return null;
|
|
78
|
-
const m = rgb.match(/rgba?\(([^)]+)\)/);
|
|
79
|
-
if (!m) return null;
|
|
80
|
-
const parts = m[1]!.split(",").map((s) => s.trim());
|
|
81
|
-
const r = parseInt(parts[0]!, 10);
|
|
82
|
-
const g = parseInt(parts[1]!, 10);
|
|
83
|
-
const b = parseInt(parts[2]!, 10);
|
|
84
|
-
const a = parts[3] !== undefined ? parseFloat(parts[3]) : 1;
|
|
85
|
-
if (a < 0.05) return null;
|
|
86
|
-
if ([r, g, b].some((n) => Number.isNaN(n))) return null;
|
|
87
|
-
const h = (n: number) => n.toString(16).padStart(2, "0");
|
|
88
|
-
return `#${h(r)}${h(g)}${h(b)}`;
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
function isVisible(el: HTMLElement): boolean {
|
|
92
|
-
const style = getComputedStyle(el);
|
|
93
|
-
if (style.visibility === "hidden" || style.display === "none") return false;
|
|
94
|
-
if (parseFloat(style.opacity) < 0.05) return false;
|
|
95
|
-
const r = el.getBoundingClientRect();
|
|
96
|
-
return r.width > 0 && r.height > 0;
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
function directTextLength(el: Element): number {
|
|
100
|
-
let total = 0;
|
|
101
|
-
for (const child of Array.from(el.childNodes)) {
|
|
102
|
-
if (child.nodeType === 3) {
|
|
103
|
-
const text = (child.nodeValue ?? "").trim();
|
|
104
|
-
if (text) total += text.length;
|
|
105
|
-
}
|
|
106
|
-
}
|
|
107
|
-
return total;
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
function classifySlotForElement(el: HTMLElement): SlotKind | null {
|
|
111
|
-
const tag = el.tagName;
|
|
112
|
-
const style = getComputedStyle(el);
|
|
113
|
-
const fontSize = parseFloat(style.fontSize) || 16;
|
|
114
|
-
const fontWeight = parseInt(style.fontWeight, 10) || 400;
|
|
115
|
-
|
|
116
|
-
if (tag === "IMG" || tag === "PICTURE") {
|
|
117
|
-
// Heuristic: small square-ish images in a logo strip vs hero photos.
|
|
118
|
-
const r = el.getBoundingClientRect();
|
|
119
|
-
const ratio = r.width > 0 && r.height > 0 ? r.width / r.height : 0;
|
|
120
|
-
if (r.height > 0 && r.height < 56 && ratio < 6) return "logo-mono";
|
|
121
|
-
return "image";
|
|
122
|
-
}
|
|
123
|
-
if (tag === "SVG" || tag === "svg") {
|
|
124
|
-
const r = el.getBoundingClientRect();
|
|
125
|
-
if (r.width < 32 && r.height < 32) return "icon";
|
|
126
|
-
return "image";
|
|
127
|
-
}
|
|
128
|
-
if (tag === "VIDEO") return "video";
|
|
129
|
-
if (tag === "PRE" || tag === "CODE") return "code";
|
|
130
|
-
if (tag === "INPUT" || tag === "TEXTAREA" || tag === "SELECT") return "input";
|
|
131
|
-
|
|
132
|
-
if (tag === "BUTTON" || (tag === "A" && el.getAttribute("role") === "button")) {
|
|
133
|
-
// Decide primary vs secondary by background contrast vs the page.
|
|
134
|
-
const bg = toHex(style.backgroundColor);
|
|
135
|
-
if (bg && bg !== toHex(getComputedStyle(document.body).backgroundColor)) {
|
|
136
|
-
return "button-primary";
|
|
137
|
-
}
|
|
138
|
-
return "button-secondary";
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
if (tag === "A" && directTextLength(el) > 0) {
|
|
142
|
-
// Anchor that looks like a styled CTA (padded, bordered, or backgrounded).
|
|
143
|
-
const padX = parseFloat(style.paddingLeft) + parseFloat(style.paddingRight);
|
|
144
|
-
const hasBackdrop =
|
|
145
|
-
toHex(style.backgroundColor) !== null ||
|
|
146
|
-
parseFloat(style.borderTopWidth) > 0;
|
|
147
|
-
if (padX > 16 && hasBackdrop) {
|
|
148
|
-
return toHex(style.backgroundColor) ? "button-primary" : "button-secondary";
|
|
149
|
-
}
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
if (directTextLength(el) === 0) return null;
|
|
153
|
-
|
|
154
|
-
if (/^H[1-6]$/.test(tag)) {
|
|
155
|
-
const level = parseInt(tag.slice(1), 10);
|
|
156
|
-
if (level === 1) return "heading-1";
|
|
157
|
-
if (level === 2) return "heading-2";
|
|
158
|
-
return "heading-3";
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
if (tag === "LI") return "bullet";
|
|
162
|
-
|
|
163
|
-
if (fontSize >= 36 && fontWeight >= 600) return "heading-1";
|
|
164
|
-
if (fontSize >= 24 && fontWeight >= 500) return "heading-2";
|
|
165
|
-
if (fontSize >= 18 && fontWeight >= 500) return "heading-3";
|
|
166
|
-
|
|
167
|
-
if (fontSize <= 12 && /uppercase/i.test(style.textTransform)) return "eyebrow";
|
|
168
|
-
if (fontSize <= 13 && fontWeight >= 600) return "badge";
|
|
169
|
-
|
|
170
|
-
if (tag === "P" || tag === "SPAN" || tag === "DIV") return "body";
|
|
171
|
-
|
|
172
|
-
return null;
|
|
173
|
-
}
|
|
174
|
-
|
|
175
|
-
function countSlots(root: HTMLElement): SlotCount[] {
|
|
176
|
-
const buckets = new Map<SlotKind, number>();
|
|
177
|
-
const all = root.querySelectorAll<HTMLElement>("*");
|
|
178
|
-
for (const el of Array.from(all)) {
|
|
179
|
-
if (!isVisible(el)) continue;
|
|
180
|
-
const kind = classifySlotForElement(el);
|
|
181
|
-
if (kind) buckets.set(kind, (buckets.get(kind) ?? 0) + 1);
|
|
182
|
-
}
|
|
183
|
-
// De-dupe nested H1 spans etc: if an H1 contains spans, the outer H1 counts.
|
|
184
|
-
// We accept some over-count for body since it's coarse anyway; cap it.
|
|
185
|
-
if ((buckets.get("body") ?? 0) > 12) buckets.set("body", 12);
|
|
186
|
-
if ((buckets.get("bullet") ?? 0) > 24) buckets.set("bullet", 24);
|
|
187
|
-
|
|
188
|
-
const out: SlotCount[] = [];
|
|
189
|
-
for (const [kind, count] of buckets) out.push({ kind, count });
|
|
190
|
-
return out.sort((a, b) => a.kind.localeCompare(b.kind));
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
/* ----- section discovery ----- */
|
|
194
|
-
|
|
195
|
-
/**
|
|
196
|
-
* A top-level section is a block-level element that:
|
|
197
|
-
* - is at least ~70% of the viewport wide,
|
|
198
|
-
* - has a non-trivial height (>= 80 px),
|
|
199
|
-
* - and is one of <header>, <footer>, <main>, <section>, <article>,
|
|
200
|
-
* or a direct child of <body> / <main> that visually plays that role.
|
|
201
|
-
*/
|
|
202
|
-
function findSections(): HTMLElement[] {
|
|
203
|
-
const candidates = new Set<HTMLElement>();
|
|
204
|
-
const tagSet = ["HEADER", "MAIN", "SECTION", "ARTICLE", "FOOTER", "NAV"];
|
|
205
|
-
for (const tag of tagSet) {
|
|
206
|
-
for (const el of Array.from(document.getElementsByTagName(tag))) {
|
|
207
|
-
candidates.add(el as HTMLElement);
|
|
208
|
-
}
|
|
209
|
-
}
|
|
210
|
-
// Add direct children of <body> and <main> as fallback.
|
|
211
|
-
const bodyKids = Array.from(document.body.children) as HTMLElement[];
|
|
212
|
-
for (const el of bodyKids) candidates.add(el);
|
|
213
|
-
const main = document.querySelector("main");
|
|
214
|
-
if (main) {
|
|
215
|
-
for (const el of Array.from(main.children) as HTMLElement[]) {
|
|
216
|
-
candidates.add(el);
|
|
217
|
-
}
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
const accepted: HTMLElement[] = [];
|
|
221
|
-
for (const el of candidates) {
|
|
222
|
-
if (!isVisible(el)) continue;
|
|
223
|
-
const r = el.getBoundingClientRect();
|
|
224
|
-
if (r.width < VIEWPORT_W * 0.7) continue;
|
|
225
|
-
if (r.height < 80) continue;
|
|
226
|
-
// Skip if this element is nested inside another already-accepted candidate.
|
|
227
|
-
// We'll do a final pass after sorting.
|
|
228
|
-
accepted.push(el);
|
|
229
|
-
}
|
|
230
|
-
// Sort by document y position.
|
|
231
|
-
accepted.sort((a, b) => {
|
|
232
|
-
const ay = a.getBoundingClientRect().top + window.scrollY;
|
|
233
|
-
const by = b.getBoundingClientRect().top + window.scrollY;
|
|
234
|
-
return ay - by;
|
|
235
|
-
});
|
|
236
|
-
|
|
237
|
-
// Drop any element fully contained in an earlier accepted one.
|
|
238
|
-
const final: HTMLElement[] = [];
|
|
239
|
-
for (const el of accepted) {
|
|
240
|
-
const inside = final.some((p) => p !== el && p.contains(el));
|
|
241
|
-
if (!inside) final.push(el);
|
|
242
|
-
}
|
|
243
|
-
return final;
|
|
244
|
-
}
|
|
245
|
-
|
|
246
|
-
/* ----- per-section classification ----- */
|
|
247
|
-
|
|
248
|
-
function classifyRole(
|
|
249
|
-
el: HTMLElement,
|
|
250
|
-
indexFromTop: number,
|
|
251
|
-
indexFromBottom: number,
|
|
252
|
-
slots: SlotCount[],
|
|
253
|
-
): SectionRole {
|
|
254
|
-
const r = el.getBoundingClientRect();
|
|
255
|
-
const tag = el.tagName;
|
|
256
|
-
const count = (k: SlotKind) => slots.find((s) => s.kind === k)?.count ?? 0;
|
|
257
|
-
|
|
258
|
-
if (tag === "NAV") return "nav";
|
|
259
|
-
if (tag === "FOOTER" || (indexFromBottom === 0 && r.height < 600)) return "footer";
|
|
260
|
-
if (tag === "HEADER" && indexFromTop === 0 && r.height < 140) return "nav";
|
|
261
|
-
|
|
262
|
-
const headingCount = count("heading-1") + count("heading-2") + count("heading-3");
|
|
263
|
-
const buttons = count("button-primary") + count("button-secondary");
|
|
264
|
-
const images = count("image");
|
|
265
|
-
const logos = count("logo-mono");
|
|
266
|
-
const bullets = count("bullet");
|
|
267
|
-
|
|
268
|
-
// First in-document, has H1 + CTA → hero.
|
|
269
|
-
if (indexFromTop <= 1 && count("heading-1") >= 1 && buttons >= 1) return "hero";
|
|
270
|
-
|
|
271
|
-
// A wide, short band of small uniform images → logo strip.
|
|
272
|
-
if (logos >= 4 && headingCount <= 1 && r.height < r.width * 0.25) {
|
|
273
|
-
return "proof-logos";
|
|
274
|
-
}
|
|
275
|
-
|
|
276
|
-
// Pricing tells: 2–4 tall columns each with bullets and a button.
|
|
277
|
-
if (bullets >= 6 && buttons >= 2 && r.height > 360) return "pricing";
|
|
278
|
-
|
|
279
|
-
// Lots of headings (3+) of the same level + small bodies → feature grid.
|
|
280
|
-
if (count("heading-2") + count("heading-3") >= 3 && images <= 2) return "feature-grid";
|
|
281
|
-
|
|
282
|
-
// One heading, generous body, one media slot → deep dive.
|
|
283
|
-
if (headingCount >= 1 && images >= 1 && bullets <= 4 && r.height > 320) {
|
|
284
|
-
return "feature-deep-dive";
|
|
285
|
-
}
|
|
286
|
-
|
|
287
|
-
// Heading + 2 buttons, short height → conversion band near the bottom.
|
|
288
|
-
if (headingCount <= 2 && buttons >= 1 && r.height < 480 && indexFromBottom <= 2) {
|
|
289
|
-
return "conversion";
|
|
290
|
-
}
|
|
291
|
-
|
|
292
|
-
// Quote-shaped: short body strings, sometimes 3-up.
|
|
293
|
-
if (count("body") >= 3 && images === 0 && bullets === 0 && buttons === 0) {
|
|
294
|
-
return "proof-quotes";
|
|
295
|
-
}
|
|
296
|
-
|
|
297
|
-
return "other";
|
|
298
|
-
}
|
|
299
|
-
|
|
300
|
-
function classifyComposition(el: HTMLElement): Composition {
|
|
301
|
-
// Find the deepest descendant that uses CSS grid or flex with >1 row of cols.
|
|
302
|
-
const candidates = el.querySelectorAll<HTMLElement>("*");
|
|
303
|
-
let bestCols = 1;
|
|
304
|
-
let bestKind: "grid" | "flex" | "none" = "none";
|
|
305
|
-
let logoRowCols = 0;
|
|
306
|
-
for (const c of Array.from(candidates).slice(0, 400)) {
|
|
307
|
-
if (!isVisible(c)) continue;
|
|
308
|
-
const s = getComputedStyle(c);
|
|
309
|
-
if (s.display === "grid") {
|
|
310
|
-
const cols = s.gridTemplateColumns
|
|
311
|
-
.split(" ")
|
|
312
|
-
.filter((x) => x.trim().length > 0).length;
|
|
313
|
-
if (cols > bestCols) {
|
|
314
|
-
bestCols = cols;
|
|
315
|
-
bestKind = "grid";
|
|
316
|
-
}
|
|
317
|
-
} else if (s.display === "flex" && s.flexDirection.startsWith("row")) {
|
|
318
|
-
const kids = Array.from(c.children) as HTMLElement[];
|
|
319
|
-
const visibleKids = kids.filter(isVisible);
|
|
320
|
-
if (visibleKids.length > bestCols && visibleKids.length <= 12) {
|
|
321
|
-
bestCols = visibleKids.length;
|
|
322
|
-
bestKind = "flex";
|
|
323
|
-
}
|
|
324
|
-
if (visibleKids.length >= 4) {
|
|
325
|
-
const allSmall = visibleKids.every((k) => {
|
|
326
|
-
const kr = k.getBoundingClientRect();
|
|
327
|
-
return kr.height < 80 && kr.width < 200;
|
|
328
|
-
});
|
|
329
|
-
if (allSmall) logoRowCols = Math.max(logoRowCols, visibleKids.length);
|
|
330
|
-
}
|
|
331
|
-
}
|
|
332
|
-
}
|
|
333
|
-
|
|
334
|
-
if (logoRowCols >= 4) return "logo-row";
|
|
335
|
-
if (bestKind === "none" || bestCols <= 1) return "single-column";
|
|
336
|
-
if (bestCols === 2) return "split-2";
|
|
337
|
-
if (bestCols === 3) return "grid-3";
|
|
338
|
-
if (bestCols === 4) return "grid-4";
|
|
339
|
-
if (bestCols >= 5) return "list";
|
|
340
|
-
return "unknown";
|
|
341
|
-
}
|
|
342
|
-
|
|
343
|
-
function classifyDensity(el: HTMLElement, slots: SlotCount[]): "thin" | "balanced" | "dense" {
|
|
344
|
-
const total = slots.reduce((sum, s) => sum + s.count, 0);
|
|
345
|
-
const r = el.getBoundingClientRect();
|
|
346
|
-
const density = total / Math.max(1, r.height / 100);
|
|
347
|
-
if (density < 0.8) return "thin";
|
|
348
|
-
if (density > 2.4) return "dense";
|
|
349
|
-
return "balanced";
|
|
350
|
-
}
|
|
351
|
-
|
|
352
|
-
function extractSectionStyles(el: HTMLElement): SectionLayout["styles"] {
|
|
353
|
-
const s = getComputedStyle(el);
|
|
354
|
-
return {
|
|
355
|
-
backgroundHex: toHex(s.backgroundColor),
|
|
356
|
-
foregroundHex: toHex(s.color),
|
|
357
|
-
paddingTopPx: Math.round(parseFloat(s.paddingTop) || 0) || null,
|
|
358
|
-
paddingBottomPx: Math.round(parseFloat(s.paddingBottom) || 0) || null,
|
|
359
|
-
};
|
|
360
|
-
}
|
|
361
|
-
|
|
362
|
-
/* ----- page-level token extraction ----- */
|
|
363
|
-
|
|
364
|
-
function extractPageTokens(): SiteTokens {
|
|
365
|
-
const body = document.body;
|
|
366
|
-
const bodyStyle = getComputedStyle(body);
|
|
367
|
-
const bodyFontFamily = bodyStyle.fontFamily.split(",")[0]!.trim().replace(/^["']|["']$/g, "") || "system-ui";
|
|
368
|
-
|
|
369
|
-
let headingFontFamily = bodyFontFamily;
|
|
370
|
-
const h = document.querySelector("h1, h2, h3");
|
|
371
|
-
if (h) {
|
|
372
|
-
const hs = getComputedStyle(h);
|
|
373
|
-
headingFontFamily = hs.fontFamily.split(",")[0]!.trim().replace(/^["']|["']$/g, "") || bodyFontFamily;
|
|
374
|
-
}
|
|
375
|
-
|
|
376
|
-
const bg = toHex(bodyStyle.backgroundColor) ?? "#ffffff";
|
|
377
|
-
const fg = toHex(bodyStyle.color) ?? "#0a0a0a";
|
|
378
|
-
|
|
379
|
-
// Primary = the most-used non-text colored button background.
|
|
380
|
-
const buttonBgCounts = new Map<string, number>();
|
|
381
|
-
for (const b of Array.from(document.querySelectorAll<HTMLElement>("button, a, [role='button']"))) {
|
|
382
|
-
if (!isVisible(b)) continue;
|
|
383
|
-
const sb = toHex(getComputedStyle(b).backgroundColor);
|
|
384
|
-
if (!sb || sb === bg) continue;
|
|
385
|
-
buttonBgCounts.set(sb, (buttonBgCounts.get(sb) ?? 0) + 1);
|
|
386
|
-
}
|
|
387
|
-
let primary = fg;
|
|
388
|
-
let primaryCount = 0;
|
|
389
|
-
for (const [hex, count] of buttonBgCounts) {
|
|
390
|
-
if (count > primaryCount) {
|
|
391
|
-
primary = hex;
|
|
392
|
-
primaryCount = count;
|
|
393
|
-
}
|
|
394
|
-
}
|
|
395
|
-
|
|
396
|
-
// Muted = a frequent off-white / off-black surface color (non-page).
|
|
397
|
-
const surfaceCounts = new Map<string, number>();
|
|
398
|
-
for (const el of Array.from(document.querySelectorAll<HTMLElement>("body *"))) {
|
|
399
|
-
if (!isVisible(el)) continue;
|
|
400
|
-
const sb = toHex(getComputedStyle(el).backgroundColor);
|
|
401
|
-
if (!sb || sb === bg) continue;
|
|
402
|
-
surfaceCounts.set(sb, (surfaceCounts.get(sb) ?? 0) + 1);
|
|
403
|
-
}
|
|
404
|
-
let muted = bg;
|
|
405
|
-
let mutedCount = 0;
|
|
406
|
-
for (const [hex, count] of surfaceCounts) {
|
|
407
|
-
if (hex === primary) continue;
|
|
408
|
-
if (count > mutedCount) {
|
|
409
|
-
muted = hex;
|
|
410
|
-
mutedCount = count;
|
|
411
|
-
}
|
|
412
|
-
}
|
|
413
|
-
|
|
414
|
-
// Border = most-used border color across all elements with a border.
|
|
415
|
-
const borderCounts = new Map<string, number>();
|
|
416
|
-
for (const el of Array.from(document.querySelectorAll<HTMLElement>("body *"))) {
|
|
417
|
-
if (!isVisible(el)) continue;
|
|
418
|
-
const s = getComputedStyle(el);
|
|
419
|
-
if (parseFloat(s.borderTopWidth) <= 0) continue;
|
|
420
|
-
const bh = toHex(s.borderTopColor);
|
|
421
|
-
if (!bh) continue;
|
|
422
|
-
borderCounts.set(bh, (borderCounts.get(bh) ?? 0) + 1);
|
|
423
|
-
}
|
|
424
|
-
let border = "#e5e7eb";
|
|
425
|
-
let borderCount = 0;
|
|
426
|
-
for (const [hex, count] of borderCounts) {
|
|
427
|
-
if (count > borderCount) {
|
|
428
|
-
border = hex;
|
|
429
|
-
borderCount = count;
|
|
430
|
-
}
|
|
431
|
-
}
|
|
432
|
-
|
|
433
|
-
// Radius = the most-used non-zero corner radius.
|
|
434
|
-
const radiusCounts = new Map<number, number>();
|
|
435
|
-
for (const el of Array.from(document.querySelectorAll<HTMLElement>("body *"))) {
|
|
436
|
-
if (!isVisible(el)) continue;
|
|
437
|
-
const r = parseFloat(getComputedStyle(el).borderTopLeftRadius);
|
|
438
|
-
if (!(r > 0 && r < 64)) continue;
|
|
439
|
-
const k = Math.round(r);
|
|
440
|
-
radiusCounts.set(k, (radiusCounts.get(k) ?? 0) + 1);
|
|
441
|
-
}
|
|
442
|
-
let radius = 8;
|
|
443
|
-
let radiusCount = 0;
|
|
444
|
-
for (const [px, count] of radiusCounts) {
|
|
445
|
-
if (count > radiusCount) {
|
|
446
|
-
radius = px;
|
|
447
|
-
radiusCount = count;
|
|
448
|
-
}
|
|
449
|
-
}
|
|
450
|
-
|
|
451
|
-
// Container: widest layout block under 1600px.
|
|
452
|
-
let containerPx: number | null = null;
|
|
453
|
-
let containerArea = 0;
|
|
454
|
-
for (const el of Array.from(
|
|
455
|
-
document.querySelectorAll<HTMLElement>("main, section, header, footer, div"),
|
|
456
|
-
)) {
|
|
457
|
-
if (!isVisible(el)) continue;
|
|
458
|
-
const r = el.getBoundingClientRect();
|
|
459
|
-
if (r.width < 720 || r.width > 1600) continue;
|
|
460
|
-
if (r.height < 240) continue;
|
|
461
|
-
const area = r.width * r.height;
|
|
462
|
-
if (area > containerArea) {
|
|
463
|
-
containerArea = area;
|
|
464
|
-
containerPx = Math.round(r.width);
|
|
465
|
-
}
|
|
466
|
-
}
|
|
467
|
-
|
|
468
|
-
return {
|
|
469
|
-
bodyFontFamily,
|
|
470
|
-
headingFontFamily,
|
|
471
|
-
backgroundHex: bg,
|
|
472
|
-
foregroundHex: fg,
|
|
473
|
-
primaryHex: primary,
|
|
474
|
-
mutedHex: muted,
|
|
475
|
-
borderHex: border,
|
|
476
|
-
radiusPx: radius,
|
|
477
|
-
containerPx,
|
|
478
|
-
};
|
|
479
|
-
}
|
|
480
|
-
|
|
481
|
-
/* ----- main pass ----- */
|
|
482
|
-
|
|
483
|
-
const sectionEls = findSections();
|
|
484
|
-
const sections: SectionLayout[] = [];
|
|
485
|
-
for (let i = 0; i < sectionEls.length; i++) {
|
|
486
|
-
const el = sectionEls[i]!;
|
|
487
|
-
const r = el.getBoundingClientRect();
|
|
488
|
-
const top = r.top + window.scrollY;
|
|
489
|
-
const slots = countSlots(el);
|
|
490
|
-
const composition = classifyComposition(el);
|
|
491
|
-
const density = classifyDensity(el, slots);
|
|
492
|
-
const role = classifyRole(el, i, sectionEls.length - 1 - i, slots);
|
|
493
|
-
const styles = extractSectionStyles(el);
|
|
494
|
-
|
|
495
|
-
const notes: string[] = [];
|
|
496
|
-
if (slots.length === 0) notes.push("No content slots detected; rendering an empty wrapper.");
|
|
497
|
-
if (composition === "unknown") notes.push("Composition was ambiguous; fell back to single-column.");
|
|
498
|
-
|
|
499
|
-
sections.push({
|
|
500
|
-
id: `s${i + 1}`,
|
|
501
|
-
role,
|
|
502
|
-
composition,
|
|
503
|
-
density,
|
|
504
|
-
bbox: [
|
|
505
|
-
Math.max(0, Math.min(1, r.left / VIEWPORT_W)),
|
|
506
|
-
Math.max(0, Math.min(1, top / PAGE_H)),
|
|
507
|
-
Math.max(0, Math.min(1, r.width / VIEWPORT_W)),
|
|
508
|
-
Math.max(0, Math.min(1, r.height / PAGE_H)),
|
|
509
|
-
],
|
|
510
|
-
slots,
|
|
511
|
-
styles,
|
|
512
|
-
notes,
|
|
513
|
-
});
|
|
514
|
-
}
|
|
515
|
-
|
|
516
|
-
return {
|
|
517
|
-
pageHeightPx: PAGE_H,
|
|
518
|
-
sections,
|
|
519
|
-
tokens: extractPageTokens(),
|
|
520
|
-
};
|
|
521
|
-
}
|