launchframe 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +63 -34
- package/package.json +1 -1
- package/packages/extract/dom-crawler.ts +521 -0
- package/packages/extract/emit.ts +2 -2
- package/packages/extract/extract.ts +85 -16
- package/packages/extract/mirror-emit.ts +617 -0
- package/packages/extract/reference-dump.ts +230 -0
- package/packages/extract/types.ts +118 -1
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Verbatim reference dump for AI / human review.
|
|
3
|
+
*
|
|
4
|
+
* Writes everything under `output/<runId>/reference/<host>/`:
|
|
5
|
+
* - page.html — full document HTML after JS render (`page.content()`)
|
|
6
|
+
* - visible-text.json — structured visible copy (headings, buttons, key blocks)
|
|
7
|
+
* - media.json — img / video / source URLs and attributes
|
|
8
|
+
* - meta.json — title, description, canonical, lang
|
|
9
|
+
* - FOR_AI_REFERENCE.md — how to use these files with an AI
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { mkdirSync, writeFileSync } from "node:fs";
|
|
13
|
+
import { join } from "node:path";
|
|
14
|
+
|
|
15
|
+
import type { Page } from "playwright";
|
|
16
|
+
|
|
17
|
+
export interface ReferenceSnapshot {
|
|
18
|
+
url: string;
|
|
19
|
+
capturedAt: string;
|
|
20
|
+
title: string | null;
|
|
21
|
+
description: string | null;
|
|
22
|
+
canonical: string | null;
|
|
23
|
+
lang: string | null;
|
|
24
|
+
/** Flattened visible strings in DOM order (useful for grep / LLM context). */
|
|
25
|
+
visibleTextBlocks: Array<{
|
|
26
|
+
tag: string;
|
|
27
|
+
role: string | null;
|
|
28
|
+
text: string;
|
|
29
|
+
}>;
|
|
30
|
+
links: Array<{ href: string; text: string }>;
|
|
31
|
+
media: Array<
|
|
32
|
+
| { type: "img"; src: string; alt: string; width: number | null; height: number | null }
|
|
33
|
+
| { type: "video"; src: string | null; poster: string | null }
|
|
34
|
+
| { type: "source"; src: string; kind: string | null }
|
|
35
|
+
>;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export async function emitPageReference(page: Page, url: string, refDir: string): Promise<string[]> {
|
|
39
|
+
mkdirSync(refDir, { recursive: true });
|
|
40
|
+
const written: string[] = [];
|
|
41
|
+
const capturedAt = new Date().toISOString();
|
|
42
|
+
|
|
43
|
+
await page.evaluate(() => {
|
|
44
|
+
const g = globalThis as unknown as { __name?: (fn: unknown) => unknown };
|
|
45
|
+
if (typeof g.__name === "undefined") g.__name = (fn: unknown) => fn;
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
const html = await page.content();
|
|
49
|
+
const htmlPath = join(refDir, "page.html");
|
|
50
|
+
writeFileSync(htmlPath, html, "utf8");
|
|
51
|
+
written.push(htmlPath);
|
|
52
|
+
|
|
53
|
+
const snapshot = (await page.evaluate(collectSnapshot)) as Omit<ReferenceSnapshot, "url" | "capturedAt">;
|
|
54
|
+
const full: ReferenceSnapshot = {
|
|
55
|
+
url,
|
|
56
|
+
capturedAt,
|
|
57
|
+
...snapshot,
|
|
58
|
+
};
|
|
59
|
+
|
|
60
|
+
writeFileSync(join(refDir, "visible-text.json"), JSON.stringify(full, null, 2) + "\n", "utf8");
|
|
61
|
+
written.push(join(refDir, "visible-text.json"));
|
|
62
|
+
|
|
63
|
+
const txtLines = [
|
|
64
|
+
`# ${full.title ?? "Untitled"}`,
|
|
65
|
+
"",
|
|
66
|
+
...full.visibleTextBlocks.map((b) => b.text),
|
|
67
|
+
"",
|
|
68
|
+
"--- links ---",
|
|
69
|
+
...full.links.map((l) => `${l.text}\t${l.href}`),
|
|
70
|
+
];
|
|
71
|
+
writeFileSync(join(refDir, "visible-text.txt"), txtLines.join("\n"), "utf8");
|
|
72
|
+
written.push(join(refDir, "visible-text.txt"));
|
|
73
|
+
|
|
74
|
+
const mediaOnly = { url, capturedAt, media: full.media };
|
|
75
|
+
writeFileSync(join(refDir, "media.json"), JSON.stringify(mediaOnly, null, 2) + "\n", "utf8");
|
|
76
|
+
written.push(join(refDir, "media.json"));
|
|
77
|
+
|
|
78
|
+
const meta = {
|
|
79
|
+
url,
|
|
80
|
+
capturedAt,
|
|
81
|
+
title: full.title,
|
|
82
|
+
description: full.description,
|
|
83
|
+
canonical: full.canonical,
|
|
84
|
+
lang: full.lang,
|
|
85
|
+
};
|
|
86
|
+
writeFileSync(join(refDir, "meta.json"), JSON.stringify(meta, null, 2) + "\n", "utf8");
|
|
87
|
+
written.push(join(refDir, "meta.json"));
|
|
88
|
+
|
|
89
|
+
writeFileSync(join(refDir, "FOR_AI_REFERENCE.md"), emitAiReadme(url, refDir), "utf8");
|
|
90
|
+
written.push(join(refDir, "FOR_AI_REFERENCE.md"));
|
|
91
|
+
|
|
92
|
+
return written;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function emitAiReadme(url: string, refDir: string): string {
|
|
96
|
+
const base = refDir.replace(/\\/g, "/");
|
|
97
|
+
return [
|
|
98
|
+
`# Reference capture — ${url}`,
|
|
99
|
+
"",
|
|
100
|
+
"Use these files when rebuilding the page in React / Next.js:",
|
|
101
|
+
"",
|
|
102
|
+
"| File | Purpose |",
|
|
103
|
+
"| ---- | ------- |",
|
|
104
|
+
"| `page.html` | Full serialized DOM after JavaScript ran in Chromium. Layout, copy, and structure match what crawled (not necessarily valid static HTML elsewhere). |",
|
|
105
|
+
"| `visible-text.json` | Exact visible strings: headings, buttons, links, and block text — good for **verbatim copy** when rewriting `page.tsx`. |",
|
|
106
|
+
"| `media.json` | Every image / video / source URL from the DOM. Host your own assets or swap for placeholders; do not hotlink without permission. |",
|
|
107
|
+
"| `meta.json` | Title, description, lang. |",
|
|
108
|
+
"",
|
|
109
|
+
`Sibling folder \`../mirror/<host>/\` has a typed \`page.tsx\` with Framer Motion, Phosphor icons, and slots — wire copy from \`visible-text.json\` and media from \`media.json\` into that file.`,
|
|
110
|
+
"",
|
|
111
|
+
`Captured path: \`${base}\``,
|
|
112
|
+
"",
|
|
113
|
+
].join("\n");
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Runs in browser context.
|
|
118
|
+
*/
|
|
119
|
+
function collectSnapshot(): Omit<ReferenceSnapshot, "url" | "capturedAt"> {
|
|
120
|
+
const title = document.title || null;
|
|
121
|
+
const descEl = document.querySelector('meta[name="description"]');
|
|
122
|
+
const description = descEl?.getAttribute("content")?.trim() || null;
|
|
123
|
+
const canonicalEl = document.querySelector('link[rel="canonical"]');
|
|
124
|
+
const canonical = canonicalEl?.getAttribute("href") || null;
|
|
125
|
+
const lang = document.documentElement.getAttribute("lang");
|
|
126
|
+
|
|
127
|
+
const visibleTextBlocks: Array<{ tag: string; role: string | null; text: string }> = [];
|
|
128
|
+
const pushBlock = (tag: string, el: HTMLElement, role: string | null) => {
|
|
129
|
+
const text = el.innerText?.trim().replace(/\s+/g, " ") ?? "";
|
|
130
|
+
if (text.length < 2 || text.length > 4000) return;
|
|
131
|
+
visibleTextBlocks.push({ tag, role, text });
|
|
132
|
+
};
|
|
133
|
+
|
|
134
|
+
for (const tag of ["H1", "H2", "H3", "H4", "H5", "H6"] as const) {
|
|
135
|
+
for (const el of Array.from(document.querySelectorAll(tag))) {
|
|
136
|
+
if (!(el instanceof HTMLElement)) continue;
|
|
137
|
+
const style = getComputedStyle(el);
|
|
138
|
+
if (style.visibility === "hidden" || style.display === "none") continue;
|
|
139
|
+
pushBlock(tag, el, el.getAttribute("role"));
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
for (const el of Array.from(document.querySelectorAll("p, li, blockquote, figcaption, label"))) {
|
|
144
|
+
if (!(el instanceof HTMLElement)) continue;
|
|
145
|
+
const style = getComputedStyle(el);
|
|
146
|
+
if (style.visibility === "hidden" || style.display === "none") continue;
|
|
147
|
+
pushBlock(el.tagName, el, el.getAttribute("role"));
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
for (const el of Array.from(document.querySelectorAll("button, [role='button']"))) {
|
|
151
|
+
if (!(el instanceof HTMLElement)) continue;
|
|
152
|
+
const style = getComputedStyle(el);
|
|
153
|
+
if (style.visibility === "hidden" || style.display === "none") continue;
|
|
154
|
+
pushBlock("BUTTON", el, el.getAttribute("role"));
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
for (const el of Array.from(document.querySelectorAll("span, div"))) {
|
|
158
|
+
if (!(el instanceof HTMLElement)) continue;
|
|
159
|
+
const role = el.getAttribute("role");
|
|
160
|
+
if (
|
|
161
|
+
role !== "heading" &&
|
|
162
|
+
!el.classList.contains("badge") &&
|
|
163
|
+
el.getAttribute("data-slot") === null
|
|
164
|
+
) {
|
|
165
|
+
// Only capture labeled small UI chrome (badges, pills) via short text + uppercase heuristic
|
|
166
|
+
const style = getComputedStyle(el);
|
|
167
|
+
if (style.visibility === "hidden" || style.display === "none") continue;
|
|
168
|
+
const text = el.innerText?.trim().replace(/\s+/g, " ") ?? "";
|
|
169
|
+
if (text.length < 8 || text.length > 240) continue;
|
|
170
|
+
if (!/^[A-Z0-9\s&.,:]+$/.test(text)) continue; // ALL-CAPS-ish eyebrow labels
|
|
171
|
+
pushBlock(el.tagName, el, role);
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
const links: Array<{ href: string; text: string }> = [];
|
|
176
|
+
for (const a of Array.from(document.querySelectorAll("a[href]"))) {
|
|
177
|
+
const href = a.getAttribute("href") ?? "";
|
|
178
|
+
if (!href || href.startsWith("javascript:")) continue;
|
|
179
|
+
const text = (a.textContent ?? "").trim().replace(/\s+/g, " ");
|
|
180
|
+
if (!text) continue;
|
|
181
|
+
try {
|
|
182
|
+
const abs = new URL(href, document.baseURI).href;
|
|
183
|
+
links.push({ href: abs, text: text.slice(0, 500) });
|
|
184
|
+
} catch {
|
|
185
|
+
links.push({ href, text: text.slice(0, 500) });
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
const media: ReferenceSnapshot["media"] = [];
|
|
190
|
+
for (const img of Array.from(document.querySelectorAll("img"))) {
|
|
191
|
+
const src = img.currentSrc || img.src;
|
|
192
|
+
if (!src) continue;
|
|
193
|
+
media.push({
|
|
194
|
+
type: "img",
|
|
195
|
+
src,
|
|
196
|
+
alt: img.alt || "",
|
|
197
|
+
width: img.naturalWidth || null,
|
|
198
|
+
height: img.naturalHeight || null,
|
|
199
|
+
});
|
|
200
|
+
}
|
|
201
|
+
for (const video of Array.from(document.querySelectorAll("video"))) {
|
|
202
|
+
const poster = video.getAttribute("poster");
|
|
203
|
+
let src: string | null = null;
|
|
204
|
+
if (video.currentSrc) src = video.currentSrc;
|
|
205
|
+
else {
|
|
206
|
+
const s = video.querySelector("source[src]");
|
|
207
|
+
src = s?.getAttribute("src") ?? null;
|
|
208
|
+
}
|
|
209
|
+
media.push({ type: "video", src, poster: poster || null });
|
|
210
|
+
}
|
|
211
|
+
for (const source of Array.from(document.querySelectorAll("source[src]"))) {
|
|
212
|
+
const src = source.getAttribute("src");
|
|
213
|
+
if (!src) continue;
|
|
214
|
+
media.push({
|
|
215
|
+
type: "source",
|
|
216
|
+
src,
|
|
217
|
+
kind: source.getAttribute("type"),
|
|
218
|
+
});
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
return {
|
|
222
|
+
title,
|
|
223
|
+
description,
|
|
224
|
+
canonical,
|
|
225
|
+
lang: lang || null,
|
|
226
|
+
visibleTextBlocks,
|
|
227
|
+
links,
|
|
228
|
+
media,
|
|
229
|
+
};
|
|
230
|
+
}
|
|
@@ -170,6 +170,120 @@ export interface DesignSystem {
|
|
|
170
170
|
notes: string[];
|
|
171
171
|
}
|
|
172
172
|
|
|
173
|
+
/* -------------------------------------------------------------------------- */
|
|
174
|
+
/* Layout mirror (per-site) */
|
|
175
|
+
/* -------------------------------------------------------------------------- */
|
|
176
|
+
|
|
177
|
+
/**
|
|
178
|
+
* Structural roles a top-level section can play. Inferred heuristically from
|
|
179
|
+
* geometry, content kinds, and document position — not from the source's
|
|
180
|
+
* class names.
|
|
181
|
+
*/
|
|
182
|
+
export type SectionRole =
|
|
183
|
+
| "nav"
|
|
184
|
+
| "hero"
|
|
185
|
+
| "feature-grid"
|
|
186
|
+
| "feature-deep-dive"
|
|
187
|
+
| "proof-logos"
|
|
188
|
+
| "proof-quotes"
|
|
189
|
+
| "pricing"
|
|
190
|
+
| "conversion"
|
|
191
|
+
| "footer"
|
|
192
|
+
| "other";
|
|
193
|
+
|
|
194
|
+
/**
|
|
195
|
+
* Coarse composition shape used by the emitter to pick a wrapper layout.
|
|
196
|
+
*/
|
|
197
|
+
export type Composition =
|
|
198
|
+
| "single-column"
|
|
199
|
+
| "split-2"
|
|
200
|
+
| "grid-2"
|
|
201
|
+
| "grid-3"
|
|
202
|
+
| "grid-4"
|
|
203
|
+
| "list"
|
|
204
|
+
| "logo-row"
|
|
205
|
+
| "unknown";
|
|
206
|
+
|
|
207
|
+
/**
|
|
208
|
+
* The kinds of content a section can hold. The emitter renders an
|
|
209
|
+
* appropriately-styled placeholder (`<TextSlot>` / `<MediaSlot>`) per slot
|
|
210
|
+
* so the user fills in their own copy and assets.
|
|
211
|
+
*/
|
|
212
|
+
export type SlotKind =
|
|
213
|
+
| "heading-1"
|
|
214
|
+
| "heading-2"
|
|
215
|
+
| "heading-3"
|
|
216
|
+
| "eyebrow"
|
|
217
|
+
| "body"
|
|
218
|
+
| "bullet"
|
|
219
|
+
| "button-primary"
|
|
220
|
+
| "button-secondary"
|
|
221
|
+
| "image"
|
|
222
|
+
| "logo-mono"
|
|
223
|
+
| "icon"
|
|
224
|
+
| "code"
|
|
225
|
+
| "badge"
|
|
226
|
+
| "input"
|
|
227
|
+
| "video";
|
|
228
|
+
|
|
229
|
+
export interface SlotCount {
|
|
230
|
+
kind: SlotKind;
|
|
231
|
+
count: number;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
/**
|
|
235
|
+
* A single top-level section in document order. Geometry is normalized to
|
|
236
|
+
* [0, 1] over the rendered page so the emitter can compare relative weight.
|
|
237
|
+
*/
|
|
238
|
+
export interface SectionLayout {
|
|
239
|
+
/** Stable id assigned in document order: s1, s2, ... */
|
|
240
|
+
id: string;
|
|
241
|
+
role: SectionRole;
|
|
242
|
+
composition: Composition;
|
|
243
|
+
density: "thin" | "balanced" | "dense";
|
|
244
|
+
/** Bounding box [x, y, w, h] normalized to [0, 1] over the rendered page. */
|
|
245
|
+
bbox: [number, number, number, number];
|
|
246
|
+
/** Aggregated content-kind counts inside the section. */
|
|
247
|
+
slots: SlotCount[];
|
|
248
|
+
/** Per-section style hints; the emitter applies these as inline overrides. */
|
|
249
|
+
styles: {
|
|
250
|
+
backgroundHex: string | null;
|
|
251
|
+
foregroundHex: string | null;
|
|
252
|
+
paddingTopPx: number | null;
|
|
253
|
+
paddingBottomPx: number | null;
|
|
254
|
+
};
|
|
255
|
+
/** Free-form notes the emitter surfaces in `MIRROR_NOTES.md`. */
|
|
256
|
+
notes: string[];
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
/**
|
|
260
|
+
* Page-level computed-style tokens. These complement the synthesized
|
|
261
|
+
* `DesignSystem` so a mirror page can apply a site-specific theme without
|
|
262
|
+
* the system having to reseed the cross-corpus palette.
|
|
263
|
+
*/
|
|
264
|
+
export interface SiteTokens {
|
|
265
|
+
bodyFontFamily: string;
|
|
266
|
+
headingFontFamily: string;
|
|
267
|
+
backgroundHex: string;
|
|
268
|
+
foregroundHex: string;
|
|
269
|
+
primaryHex: string;
|
|
270
|
+
mutedHex: string;
|
|
271
|
+
borderHex: string;
|
|
272
|
+
radiusPx: number;
|
|
273
|
+
containerPx: number | null;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
export interface SiteLayout {
|
|
277
|
+
url: string;
|
|
278
|
+
host: string;
|
|
279
|
+
capturedAt: string;
|
|
280
|
+
viewport: { width: number; height: number };
|
|
281
|
+
/** Full rendered page height in CSS pixels. */
|
|
282
|
+
pageHeightPx: number;
|
|
283
|
+
sections: SectionLayout[];
|
|
284
|
+
tokens: SiteTokens;
|
|
285
|
+
}
|
|
286
|
+
|
|
173
287
|
/* -------------------------------------------------------------------------- */
|
|
174
288
|
/* Run summary */
|
|
175
289
|
/* -------------------------------------------------------------------------- */
|
|
@@ -179,7 +293,10 @@ export interface SiteCapture {
|
|
|
179
293
|
host: string;
|
|
180
294
|
capturedAt: string;
|
|
181
295
|
screenshotPath: string;
|
|
182
|
-
|
|
296
|
+
/** Verbatim HTML + copy + media listing for AI reference. */
|
|
297
|
+
referenceDir?: string;
|
|
298
|
+
/** Path to the per-site mirror page directory, if emission succeeded. */
|
|
299
|
+
mirrorDir?: string;
|
|
183
300
|
status: "ok" | "skipped" | "failed";
|
|
184
301
|
reason?: string;
|
|
185
302
|
}
|