launchframe 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +63 -34
- package/package.json +1 -1
- package/packages/extract/dom-crawler.ts +521 -0
- package/packages/extract/emit.ts +2 -2
- package/packages/extract/extract.ts +85 -16
- package/packages/extract/mirror-emit.ts +617 -0
- package/packages/extract/reference-dump.ts +230 -0
- package/packages/extract/types.ts +118 -1
package/packages/extract/emit.ts
CHANGED
|
@@ -9,8 +9,8 @@
|
|
|
9
9
|
* globals.css — shadcn-compatible CSS variables (light + dark)
|
|
10
10
|
* theme-preview.tsx — a self-contained React component that renders
|
|
11
11
|
* every token so you can eyeball the system
|
|
12
|
-
* REPORT.md — what was extracted, from where,
|
|
13
|
-
*
|
|
12
|
+
* REPORT.md — what was extracted, from where, and how the
|
|
13
|
+
* output is meant to be used
|
|
14
14
|
*/
|
|
15
15
|
|
|
16
16
|
import { mkdirSync, writeFileSync } from "node:fs";
|
|
@@ -4,16 +4,23 @@
|
|
|
4
4
|
* npm run extract -- https://site-a.com https://site-b.com https://site-c.com
|
|
5
5
|
*
|
|
6
6
|
* For each URL: open in Chromium, screenshot, harvest computed design
|
|
7
|
-
* tokens via `browser-extract.ts
|
|
8
|
-
*
|
|
7
|
+
* tokens via `browser-extract.ts`, and crawl the rendered DOM into a
|
|
8
|
+
* typed `SiteLayout` model via `dom-crawler.ts`. After all sites:
|
|
9
|
+
* - Synthesize a drop-in shadcn-compatible design system from the
|
|
10
|
+
* aggregated tokens.
|
|
11
|
+
* - Emit a per-site **layout mirror**: a Next.js page that reconstructs
|
|
12
|
+
* the source's section structure from typed primitives, with
|
|
13
|
+
* `<TextSlot>` / `<MediaSlot>` placeholders for the user's copy and
|
|
14
|
+
* brand assets.
|
|
9
15
|
*
|
|
10
16
|
* Output goes to `output/<runId>/`.
|
|
11
17
|
*
|
|
12
|
-
*
|
|
13
|
-
* - Honor robots.txt
|
|
14
|
-
* - Per-domain rate limit defaults to 15 req/min.
|
|
15
|
-
* -
|
|
16
|
-
*
|
|
18
|
+
* Operational defaults (configurable via flags):
|
|
19
|
+
* - Honor robots.txt unless `--no-robots` is passed.
|
|
20
|
+
* - Per-domain rate limit defaults to 15 req/min (`--rate <n>`).
|
|
21
|
+
* - The crawler extracts a structured representation (section tree,
|
|
22
|
+
* computed style tokens, content kinds) and writes a verbatim
|
|
23
|
+
* `reference/<host>/` bundle (HTML + visible text + media URLs) for AI.
|
|
17
24
|
*/
|
|
18
25
|
|
|
19
26
|
import { mkdirSync, writeFileSync } from "node:fs";
|
|
@@ -23,9 +30,11 @@ import { fileURLToPath, pathToFileURL } from "node:url";
|
|
|
23
30
|
import { chromium, type Browser } from "playwright";
|
|
24
31
|
|
|
25
32
|
import { harvestTokens } from "./browser-extract.js";
|
|
33
|
+
import { crawlLayout } from "./dom-crawler.js";
|
|
26
34
|
import { emitAll } from "./emit.js";
|
|
35
|
+
import { emitPageReference } from "./reference-dump.js";
|
|
27
36
|
import { synthesize } from "./synthesize.js";
|
|
28
|
-
import type { ExtractionRun, RawTokens, SiteCapture } from "./types.js";
|
|
37
|
+
import type { ExtractionRun, RawTokens, SiteCapture, SiteLayout } from "./types.js";
|
|
29
38
|
|
|
30
39
|
const __filename = fileURLToPath(import.meta.url);
|
|
31
40
|
const __dirname = dirname(__filename);
|
|
@@ -90,9 +99,17 @@ function printHelp(): void {
|
|
|
90
99
|
"Writes to ./output/<runId>/ in your current working directory unless",
|
|
91
100
|
"you pass --out.",
|
|
92
101
|
"",
|
|
93
|
-
"
|
|
94
|
-
"
|
|
95
|
-
"
|
|
102
|
+
"For each URL the CLI:",
|
|
103
|
+
" 1. Renders the page at a desktop viewport in headless Chromium.",
|
|
104
|
+
" 2. Captures a full-page screenshot and harvests computed design tokens",
|
|
105
|
+
" (colors, type, spacing, radius, shadow) → raw/<host>.tokens.json.",
|
|
106
|
+
" 3. Writes a verbatim reference bundle → reference/<host>/ (page.html,",
|
|
107
|
+
" visible-text.json/.txt, media.json, meta.json, FOR_AI_REFERENCE.md).",
|
|
108
|
+
" 4. Crawls the DOM into SiteLayout → raw/<host>.layout.json and emits",
|
|
109
|
+
" mirror/<host>/page.tsx (Framer Motion + Phosphor + image/video slots).",
|
|
110
|
+
"",
|
|
111
|
+
"After every URL, a drop-in shadcn-compatible design system is",
|
|
112
|
+
"synthesized from the aggregated tokens and written to output/<runId>/.",
|
|
96
113
|
"",
|
|
97
114
|
"Options:",
|
|
98
115
|
" --out <dir> Output directory (default: output/<runId>)",
|
|
@@ -178,11 +195,14 @@ async function captureOne(
|
|
|
178
195
|
url: string,
|
|
179
196
|
viewport: { width: number; height: number },
|
|
180
197
|
outDir: string,
|
|
181
|
-
): Promise<{ raw: RawTokens; capture: SiteCapture } | null> {
|
|
198
|
+
): Promise<{ raw: RawTokens; layout: SiteLayout | null; capture: SiteCapture } | null> {
|
|
182
199
|
const host = new URL(url).host;
|
|
183
200
|
const stamp = `${host}.png`;
|
|
184
201
|
const screenshotPath = join(outDir, "screenshots", stamp);
|
|
185
202
|
const rawPath = join(outDir, "raw", `${host}.tokens.json`);
|
|
203
|
+
const layoutPath = join(outDir, "raw", `${host}.layout.json`);
|
|
204
|
+
const mirrorDir = join(outDir, "mirror", host);
|
|
205
|
+
const referenceDir = join(outDir, "reference", host);
|
|
186
206
|
|
|
187
207
|
const ctx = await browser.newContext({
|
|
188
208
|
userAgent: USER_AGENT,
|
|
@@ -215,18 +235,40 @@ async function captureOne(
|
|
|
215
235
|
mkdirSync(dirname(rawPath), { recursive: true });
|
|
216
236
|
writeFileSync(rawPath, JSON.stringify(raw, null, 2));
|
|
217
237
|
|
|
238
|
+
let referenceWritten: string[] = [];
|
|
239
|
+
try {
|
|
240
|
+
referenceWritten = await emitPageReference(page, url, referenceDir);
|
|
241
|
+
} catch (err) {
|
|
242
|
+
console.warn(` ! reference dump failed for ${url}: ${(err as Error).message}`);
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
let layout: SiteLayout | null = null;
|
|
246
|
+
let mirrorWritten: string[] = [];
|
|
247
|
+
try {
|
|
248
|
+
layout = await crawlLayout(page, url, viewport);
|
|
249
|
+
mkdirSync(dirname(layoutPath), { recursive: true });
|
|
250
|
+
writeFileSync(layoutPath, JSON.stringify(layout, null, 2));
|
|
251
|
+
mirrorWritten = emitMirror(layout, mirrorDir);
|
|
252
|
+
} catch (err) {
|
|
253
|
+
console.warn(` ! layout crawl failed for ${url}: ${(err as Error).message}`);
|
|
254
|
+
}
|
|
255
|
+
|
|
218
256
|
const capture: SiteCapture = {
|
|
219
257
|
url,
|
|
220
258
|
host,
|
|
221
259
|
capturedAt: raw.capturedAt,
|
|
222
260
|
screenshotPath,
|
|
223
261
|
rawTokensPath: rawPath,
|
|
262
|
+
...(referenceWritten.length > 0 ? { referenceDir } : {}),
|
|
263
|
+
...(layout ? { layoutPath } : {}),
|
|
264
|
+
...(mirrorWritten.length > 0 ? { mirrorDir } : {}),
|
|
224
265
|
status: "ok",
|
|
225
266
|
};
|
|
226
|
-
return { raw, capture };
|
|
267
|
+
return { raw, layout, capture };
|
|
227
268
|
} catch (err) {
|
|
228
269
|
return {
|
|
229
270
|
raw: emptyRaw(url, viewport),
|
|
271
|
+
layout: null,
|
|
230
272
|
capture: {
|
|
231
273
|
url,
|
|
232
274
|
host,
|
|
@@ -303,7 +345,11 @@ async function main(): Promise<void> {
|
|
|
303
345
|
captures.push(result.capture);
|
|
304
346
|
if (result.capture.status === "ok") {
|
|
305
347
|
rawList.push(result.raw);
|
|
306
|
-
|
|
348
|
+
const tag = result.layout ? "mirror" : "tokens-only";
|
|
349
|
+
const sectionCount = result.layout?.sections.length ?? 0;
|
|
350
|
+
console.log(
|
|
351
|
+
` ✓ ${url} → ${tag}${result.layout ? ` (${sectionCount} sections)` : ""}`,
|
|
352
|
+
);
|
|
307
353
|
} else {
|
|
308
354
|
console.log(` ✗ ${url} ${result.capture.reason ?? ""}`);
|
|
309
355
|
}
|
|
@@ -339,9 +385,32 @@ async function main(): Promise<void> {
|
|
|
339
385
|
console.log("[extract] wrote:");
|
|
340
386
|
for (const f of written) console.log(` → ${f}`);
|
|
341
387
|
console.log(` → ${join(outDir, "run.json")}`);
|
|
388
|
+
const mirrorDirs = captures.filter((c) => c.mirrorDir).map((c) => c.mirrorDir!);
|
|
389
|
+
const referenceDirs = captures.filter((c) => c.referenceDir).map((c) => c.referenceDir!);
|
|
390
|
+
if (mirrorDirs.length > 0) {
|
|
391
|
+
console.log("");
|
|
392
|
+
console.log("[extract] layout mirrors:");
|
|
393
|
+
for (const d of mirrorDirs) console.log(` → ${d}/page.tsx`);
|
|
394
|
+
}
|
|
395
|
+
if (referenceDirs.length > 0) {
|
|
396
|
+
console.log("");
|
|
397
|
+
console.log("[extract] AI reference (verbatim DOM + copy):");
|
|
398
|
+
for (const d of referenceDirs) console.log(` → ${d}/FOR_AI_REFERENCE.md`);
|
|
399
|
+
}
|
|
342
400
|
console.log("");
|
|
343
|
-
console.log(`[extract] done. Open ${join(outDir, "REPORT.md")} for the summary.`);
|
|
344
|
-
|
|
401
|
+
console.log(`[extract] done. Open ${join(outDir, "REPORT.md")} for the design-system summary.`);
|
|
402
|
+
if (mirrorDirs.length > 0) {
|
|
403
|
+
console.log(
|
|
404
|
+
`[extract] each mirror folder ships a Next.js page.tsx + MIRROR_NOTES.md.`,
|
|
405
|
+
);
|
|
406
|
+
console.log(`[extract] fill the <TextSlot> / <MediaSlot> placeholders with your own content.`);
|
|
407
|
+
}
|
|
408
|
+
if (referenceDirs.length > 0) {
|
|
409
|
+
console.log(
|
|
410
|
+
`[extract] paste reference/<host>/visible-text.txt or page.html into your AI for exact structure + copy.`,
|
|
411
|
+
);
|
|
412
|
+
}
|
|
413
|
+
console.log(`[extract] AI handoff: ${join(outDir, "FOR_AI.md")}`);
|
|
345
414
|
}
|
|
346
415
|
|
|
347
416
|
function makeRunId(startedAt: string, name: string | undefined): string {
|