launchframe 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -54,13 +54,19 @@ output/<runId>/
54
54
  ├── run.json ← full run metadata (sources, timing, status)
55
55
  ├── screenshots/ ← captured PNGs
56
56
  ├── raw/ ← per-site raw token + SiteLayout JSON
57
+ ├── reference/ ← verbatim DOM + copy for AI (see below)
58
+ │ └── <host>/
59
+ │ ├── page.html ← full HTML after JavaScript
60
+ │ ├── visible-text.txt ← paste-friendly copy extraction
61
+ │ ├── visible-text.json ← structured headings / body / buttons
62
+ │ ├── media.json ← img + video URLs
63
+ │ ├── meta.json ← title, description, lang
64
+ │ └── FOR_AI_REFERENCE.md
57
65
  └── mirror/
58
66
  └── <host>/
59
- ├── page.tsx ← Next.js page reconstructed from the source's
60
- │ section tree, with <TextSlot> / <MediaSlot>
61
- │ placeholders for your own copy and assets
62
- ├── layout.json ← the typed SiteLayout the page was built from
63
- └── MIRROR_NOTES.md ← what was extracted and how to fill slots
67
+ ├── page.tsx ← Next.js: Motion + Phosphor + image/video patterns
68
+ ├── layout.json
69
+ └── MIRROR_NOTES.md
64
70
  ```
65
71
 
66
72
  ---
@@ -68,22 +74,18 @@ output/<runId>/
68
74
  ## Hand the output to your AI
69
75
 
70
76
  1. Run the command above so `output/<runId>/` exists.
71
- 2. Pick the mirror folder that matches the source whose layout you want
72
- to start from: `output/<runId>/mirror/<host>/`.
73
- 3. Either:
74
- - **Cursor:** `@`-attach the mirror folder along with `FOR_AI.md` and
75
- `tokens.json`, then ask the agent to fill in `<TextSlot>` /
76
- `<MediaSlot>` placeholders with copy for *your* product.
77
- - **Claude Code:** copy the mirror folder into your project, then ask
78
- the agent the same thing.
79
- 4. The AI's authority order is **MIRROR_NOTES.md → page.tsx → tokens.json
80
- tailwind.config.ts + globals.css**. It must:
81
- - Keep the section tree, grid composition, and density of `page.tsx`
82
- intact (that is the source's layout grammar, which is the point).
83
- - Replace every `<TextSlot kind="…" />` placeholder with original
84
- copy written for *your* product — not paraphrased from the source.
85
- - Replace every `<MediaSlot kind="…" />` with your own imagery, code
86
- samples, or brand marks.
77
+ 2. Attach **`reference/<host>/`** (`visible-text.txt`, `page.html`, `media.json`) so the model sees **exact copy and structure** from the crawl.
78
+ 3. Pick the mirror folder: `output/<runId>/mirror/<host>/`.
79
+ 4. Either:
80
+ - **Cursor:** `@`-attach `reference/<host>/`, `mirror/<host>/`, `FOR_AI.md`, and
81
+ `tokens.json`, then ask the agent to port copy from `visible-text.txt` into
82
+ `page.tsx` and wire media from `media.json`.
83
+ - **Claude Code:** copy both folders into your project, then ask the same.
84
+ 5. The AI's authority order is **reference/visible-text.txt & page.html →
85
+ MIRROR_NOTES.md → page.tsx → tokens.json → tailwind.config.ts + globals.css**. It must:
86
+ - Keep the section tree, grid composition, density, Motion, and Phosphor usage in `page.tsx`.
87
+ - Map strings from `visible-text.txt` into the right `<TextSlot>` slots (or replace slots with plain JSX).
88
+ - Use `media.json` for image/video `src` / `poster` (respect licensing; prefer your own assets).
87
89
 
88
90
  ---
89
91
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "launchframe",
3
- "version": "0.1.6",
3
+ "version": "0.1.7",
4
4
  "description": "Point Launchframe at SaaS sites you admire and get back a drop-in shadcn/ui design system (tokens, Tailwind theme, CSS variables, AI handoff) you can build your own UI on top of.",
5
5
  "license": "MIT",
6
6
  "author": "Evan Gruhlkey",
@@ -19,8 +19,8 @@
19
19
  * - Honor robots.txt unless `--no-robots` is passed.
20
20
  * - Per-domain rate limit defaults to 15 req/min (`--rate <n>`).
21
21
  * - The crawler extracts a structured representation (section tree,
22
- * computed style tokens, content kinds); it does not store raw HTML,
23
- * copy text, or third-party assets in the output.
22
+ * computed style tokens, content kinds) and writes a verbatim
23
+ * `reference/<host>/` bundle (HTML + visible text + media URLs) for AI.
24
24
  */
25
25
 
26
26
  import { mkdirSync, writeFileSync } from "node:fs";
@@ -32,7 +32,7 @@ import { chromium, type Browser } from "playwright";
32
32
  import { harvestTokens } from "./browser-extract.js";
33
33
  import { crawlLayout } from "./dom-crawler.js";
34
34
  import { emitAll } from "./emit.js";
35
- import { emitMirror } from "./mirror-emit.js";
35
+ import { emitPageReference } from "./reference-dump.js";
36
36
  import { synthesize } from "./synthesize.js";
37
37
  import type { ExtractionRun, RawTokens, SiteCapture, SiteLayout } from "./types.js";
38
38
 
@@ -101,13 +101,12 @@ function printHelp(): void {
101
101
  "",
102
102
  "For each URL the CLI:",
103
103
  " 1. Renders the page at a desktop viewport in headless Chromium.",
104
- " 2. Harvests computed design tokens (colors, type, spacing, radius,",
105
- " shadow).",
106
- " 3. Crawls the rendered DOM into a typed SiteLayout (section tree,",
107
- " composition, slot counts, per-section style tokens).",
108
- " 4. Emits a layout-mirror Next.js page at",
109
- " output/<runId>/mirror/<host>/page.tsx with <TextSlot> /",
110
- " <MediaSlot> placeholders for your own copy and imagery.",
104
+ " 2. Captures a full-page screenshot and harvests computed design tokens",
105
+ " (colors, type, spacing, radius, shadow) → raw/<host>.tokens.json.",
106
+ " 3. Writes a verbatim reference bundle reference/<host>/ (page.html,",
107
+ " visible-text.json/.txt, media.json, meta.json, FOR_AI_REFERENCE.md).",
108
+ " 4. Crawls the DOM into SiteLayout → raw/<host>.layout.json and emits",
109
+ " mirror/<host>/page.tsx (Framer Motion + Phosphor + image/video slots).",
111
110
  "",
112
111
  "After every URL, a drop-in shadcn-compatible design system is",
113
112
  "synthesized from the aggregated tokens and written to output/<runId>/.",
@@ -203,6 +202,7 @@ async function captureOne(
203
202
  const rawPath = join(outDir, "raw", `${host}.tokens.json`);
204
203
  const layoutPath = join(outDir, "raw", `${host}.layout.json`);
205
204
  const mirrorDir = join(outDir, "mirror", host);
205
+ const referenceDir = join(outDir, "reference", host);
206
206
 
207
207
  const ctx = await browser.newContext({
208
208
  userAgent: USER_AGENT,
@@ -235,6 +235,13 @@ async function captureOne(
235
235
  mkdirSync(dirname(rawPath), { recursive: true });
236
236
  writeFileSync(rawPath, JSON.stringify(raw, null, 2));
237
237
 
238
+ let referenceWritten: string[] = [];
239
+ try {
240
+ referenceWritten = await emitPageReference(page, url, referenceDir);
241
+ } catch (err) {
242
+ console.warn(` ! reference dump failed for ${url}: ${(err as Error).message}`);
243
+ }
244
+
238
245
  let layout: SiteLayout | null = null;
239
246
  let mirrorWritten: string[] = [];
240
247
  try {
@@ -252,6 +259,7 @@ async function captureOne(
252
259
  capturedAt: raw.capturedAt,
253
260
  screenshotPath,
254
261
  rawTokensPath: rawPath,
262
+ ...(referenceWritten.length > 0 ? { referenceDir } : {}),
255
263
  ...(layout ? { layoutPath } : {}),
256
264
  ...(mirrorWritten.length > 0 ? { mirrorDir } : {}),
257
265
  status: "ok",
@@ -378,11 +386,17 @@ async function main(): Promise<void> {
378
386
  for (const f of written) console.log(` → ${f}`);
379
387
  console.log(` → ${join(outDir, "run.json")}`);
380
388
  const mirrorDirs = captures.filter((c) => c.mirrorDir).map((c) => c.mirrorDir!);
389
+ const referenceDirs = captures.filter((c) => c.referenceDir).map((c) => c.referenceDir!);
381
390
  if (mirrorDirs.length > 0) {
382
391
  console.log("");
383
392
  console.log("[extract] layout mirrors:");
384
393
  for (const d of mirrorDirs) console.log(` → ${d}/page.tsx`);
385
394
  }
395
+ if (referenceDirs.length > 0) {
396
+ console.log("");
397
+ console.log("[extract] AI reference (verbatim DOM + copy):");
398
+ for (const d of referenceDirs) console.log(` → ${d}/FOR_AI_REFERENCE.md`);
399
+ }
386
400
  console.log("");
387
401
  console.log(`[extract] done. Open ${join(outDir, "REPORT.md")} for the design-system summary.`);
388
402
  if (mirrorDirs.length > 0) {
@@ -391,6 +405,11 @@ async function main(): Promise<void> {
391
405
  );
392
406
  console.log(`[extract] fill the <TextSlot> / <MediaSlot> placeholders with your own content.`);
393
407
  }
408
+ if (referenceDirs.length > 0) {
409
+ console.log(
410
+ `[extract] paste reference/<host>/visible-text.txt or page.html into your AI for exact structure + copy.`,
411
+ );
412
+ }
394
413
  console.log(`[extract] AI handoff: ${join(outDir, "FOR_AI.md")}`);
395
414
  }
396
415
 
@@ -9,9 +9,9 @@
9
9
  * - `<dir>/layout.json` — the captured `SiteLayout` model for review.
10
10
  * - `<dir>/MIRROR_NOTES.md` — what was extracted, how slots are filled.
11
11
  *
12
- * The emitter never embeds the source's verbatim copy text or brand
13
- * assets. Headings, body copy, buttons, images, and logos are rendered as
14
- * slot placeholders that the operator fills in.
12
+ * The emitter renders **slot placeholders** in `page.tsx` for final copy;
13
+ * pair this with `reference/<host>/visible-text.txt` or `page.html` for
14
+ * verbatim source strings when prompting an AI.
15
15
  */
16
16
 
17
17
  import { mkdirSync, writeFileSync } from "node:fs";
@@ -58,6 +58,8 @@ function emitPage(layout: SiteLayout): string {
58
58
  );
59
59
 
60
60
  return [
61
+ '"use client";',
62
+ "",
61
63
  "/**",
62
64
  ` * Mirror page for ${layout.url}`,
63
65
  ` * Captured ${layout.capturedAt}`,
@@ -65,19 +67,30 @@ function emitPage(layout: SiteLayout): string {
65
67
  " *",
66
68
  " * Generated by launchframe — packages/extract/mirror-emit.ts",
67
69
  " *",
68
- " * Fill in <TextSlot> and <MediaSlot> placeholders with your own copy",
69
- " * and imagery before shipping. The visual tokens (color, type, radius,",
70
- ' * container width) reflect the source page and are scoped to the',
71
- ' * `.mirror-root` wrapper below no global CSS leakage.',
70
+ " * Reference for exact copy + DOM: ../../reference/<host>/",
71
+ " * (visible-text.txt, page.html, media.json).",
72
+ " *",
73
+ " * Framer Motion: MirrorEnter / MirrorEnterFromEnd / MirrorStaggerRow.",
74
+ " * Icons: @phosphor-icons/react via @framework/blocks.",
75
+ " * Swap <MediaSlot> for next/image or <video controls playsInline>.",
72
76
  " */",
73
77
  "",
74
78
  'import {',
75
- " FadeUp,",
79
+ " CaretRight,",
80
+ " Clock,",
81
+ " FileText,",
82
+ " ListBullets,",
83
+ " MirrorEnter,",
84
+ " MirrorEnterFromEnd,",
85
+ " MirrorStaggerRow,",
76
86
  " MediaSlot,",
87
+ " PlayCircle,",
88
+ " Queue,",
89
+ " Sparkle,",
77
90
  " Stagger,",
78
91
  " StaggerItem,",
79
92
  " TextSlot,",
80
- " cn,",
93
+ " VideoCamera,",
81
94
  "} from \"@framework/blocks\";",
82
95
  "",
83
96
  `export const meta = ${JSON.stringify(
@@ -85,6 +98,7 @@ function emitPage(layout: SiteLayout): string {
85
98
  kind: "mirror",
86
99
  source: layout.url,
87
100
  capturedAt: layout.capturedAt,
101
+ referencePath: `../../reference/${layout.host}`,
88
102
  sections: layout.sections.map((s) => ({
89
103
  id: s.id,
90
104
  role: s.role,
@@ -244,7 +258,7 @@ function emitSingleColumn(s: SectionLayout, labelId: string): string {
244
258
  if ((slots["button-primary"] ?? 0) > 0) buttons.push(textSlot("button-primary"));
245
259
  if ((slots["button-secondary"] ?? 0) > 0) buttons.push(textSlot("button-secondary"));
246
260
 
247
- const headBlock = head.length > 0 ? wrapFadeUp(head.join("\n")) : "";
261
+ const headBlock = head.length > 0 ? wrapMirrorEnter(head.join("\n")) : "";
248
262
  const buttonsBlock =
249
263
  buttons.length > 0
250
264
  ? `<div className="mt-8 flex flex-wrap items-center gap-3">\n${indent(buttons.join("\n"), 2)}\n</div>`
@@ -280,7 +294,11 @@ function emitSplitTwo(s: SectionLayout, labelId: string): string {
280
294
  }
281
295
 
282
296
  const buttons: string[] = [];
283
- if ((slots["button-primary"] ?? 0) > 0) buttons.push(textSlot("button-primary"));
297
+ if ((slots["button-primary"] ?? 0) > 0) {
298
+ buttons.push(
299
+ '<span className="inline-flex items-center gap-2 has-[button]:gap-0"><TextSlot kind="button-primary" /><CaretRight className="size-4 opacity-90" weight="bold" aria-hidden /></span>',
300
+ );
301
+ }
284
302
  if ((slots["button-secondary"] ?? 0) > 0) buttons.push(textSlot("button-secondary"));
285
303
  if (buttons.length > 0) {
286
304
  textChildren.push(
@@ -292,8 +310,11 @@ function emitSplitTwo(s: SectionLayout, labelId: string): string {
292
310
  );
293
311
  }
294
312
 
295
- const textCol = wrapFadeUp(textChildren.join("\n"));
296
- const mediaCol = wrapFadeUp(emitMediaSlots(slots, { aspect: "video" }) || mediaSlot("image"));
313
+ const textCol = wrapMirrorEnter(textChildren.join("\n"));
314
+ const mediaInner = emitHeroMediaColumn(slots);
315
+ const mediaCol = [`<MirrorEnterFromEnd>`, indent(mediaInner, 1), `</MirrorEnterFromEnd>`].join(
316
+ "\n",
317
+ );
297
318
 
298
319
  return [
299
320
  '<div className="grid items-center gap-12 md:grid-cols-2">',
@@ -303,6 +324,74 @@ function emitSplitTwo(s: SectionLayout, labelId: string): string {
303
324
  ].join("\n");
304
325
  }
305
326
 
327
+ /** Rich right column: product-style card + Phosphor + image/video slots. */
328
+ function emitHeroMediaColumn(slots: Partial<Record<SlotKind, number>>): string {
329
+ const hasVideo = (slots.video ?? 0) > 0;
330
+ const queueCard = [
331
+ '<div className="overflow-hidden rounded-xl border border-border bg-card shadow-lg ring-1 ring-foreground/[0.06]">',
332
+ ' <header className="flex items-center justify-between gap-3 border-b border-border px-4 py-3">',
333
+ ' <span className="flex items-center gap-2 min-w-0">',
334
+ ' <Queue className="size-5 shrink-0 text-[var(--mirror-primary)]" weight="duotone" aria-hidden />',
335
+ ' <TextSlot kind="eyebrow" />',
336
+ " </span>",
337
+ ' <TextSlot kind="badge" />',
338
+ " </header>",
339
+ ' <ul className="divide-y divide-border">',
340
+ " <MirrorStaggerRow index={0} className=\"flex gap-3 px-4 py-3\">",
341
+ ' <FileText className="size-5 shrink-0 text-[var(--mirror-primary)] mt-0.5" weight="duotone" aria-hidden />',
342
+ ' <div className="min-w-0 space-y-1">',
343
+ ' <TextSlot kind="heading-3" />',
344
+ ' <TextSlot kind="body" />',
345
+ " </div>",
346
+ " </MirrorStaggerRow>",
347
+ " <MirrorStaggerRow index={1} className=\"flex gap-3 px-4 py-3\">",
348
+ ' <Clock className="size-5 shrink-0 text-[var(--mirror-primary)] mt-0.5" weight="duotone" aria-hidden />',
349
+ ' <div className="min-w-0 space-y-1">',
350
+ ' <TextSlot kind="heading-3" />',
351
+ ' <TextSlot kind="body" />',
352
+ " </div>",
353
+ " </MirrorStaggerRow>",
354
+ " <MirrorStaggerRow index={2} className=\"flex gap-3 px-4 py-3\">",
355
+ ' <ListBullets className="size-5 shrink-0 text-[var(--mirror-primary)] mt-0.5" weight="duotone" aria-hidden />',
356
+ ' <div className="min-w-0 space-y-1">',
357
+ ' <TextSlot kind="heading-3" />',
358
+ ' <TextSlot kind="body" />',
359
+ " </div>",
360
+ " </MirrorStaggerRow>",
361
+ " </ul>",
362
+ "</div>",
363
+ ].join("\n");
364
+
365
+ const imageBlock = [
366
+ '<div className="mt-6 space-y-3">',
367
+ ' <MediaSlot kind="image" aspect="video" />',
368
+ " {/* Drop in when you have assets: import Image from 'next/image' */}",
369
+ ' {/* <Image src="/hero.jpg" alt="" fill className="object-cover rounded-xl" sizes="(max-width:768px) 100vw, 50vw" /> */}',
370
+ "</div>",
371
+ ].join("\n");
372
+
373
+ const videoBlock = hasVideo
374
+ ? [
375
+ '<div className="mt-6">',
376
+ ' <div className="relative overflow-hidden rounded-xl border border-border bg-muted/30 aspect-video">',
377
+ ' <VideoCamera className="absolute left-3 top-3 size-6 text-muted-foreground" weight="duotone" aria-hidden />',
378
+ ' <video className="h-full w-full object-cover" controls playsInline preload="metadata" poster="/poster-frame.jpg">',
379
+ ' <source src="/product-demo.mp4" type="video/mp4" />',
380
+ " </video>",
381
+ " </div>",
382
+ ' <p className="mt-2 text-xs text-muted-foreground">No autoplay — swap poster + src from reference/media.json.</p>',
383
+ "</div>",
384
+ ].join("\n")
385
+ : [
386
+ '<div className="mt-6 flex items-center gap-2 text-xs text-muted-foreground">',
387
+ ' <PlayCircle className="size-4 shrink-0" weight="regular" aria-hidden />',
388
+ ' <span>Optional: add a <code className="font-mono">video</code> block; see media.json from the crawl.</span>',
389
+ "</div>",
390
+ ].join("\n");
391
+
392
+ return [queueCard, imageBlock, videoBlock].join("\n");
393
+ }
394
+
306
395
  function emitGrid(s: SectionLayout, labelId: string): string {
307
396
  const slots = slotMap(s.slots);
308
397
  const cols = gridCols(s.composition);
@@ -319,22 +408,12 @@ function emitGrid(s: SectionLayout, labelId: string): string {
319
408
  const cardHeading: SlotKind =
320
409
  (slots["heading-2"] ?? 0) >= cols ? "heading-2" : "heading-3";
321
410
 
322
- const card = [
323
- '<li className="flex flex-col gap-3 rounded-lg border border-border bg-card p-6">',
324
- ` ${mediaSlot("icon", { aspect: "square", className: "size-10" })}`,
325
- ` ${textSlot(cardHeading)}`,
326
- ` ${textSlot("body")}`,
327
- "</li>",
328
- ].join("\n");
329
-
330
- const cards: string[] = [];
331
- for (let i = 0; i < cols; i++) cards.push(card);
332
411
  const grid = [
333
412
  `<Stagger as="ul" className=${JSON.stringify(`mt-12 grid gap-6 md:grid-cols-${cols}`)}>`,
334
413
  ...cards.map((c) =>
335
414
  [
336
415
  ' <StaggerItem as="li" className="flex flex-col gap-3 rounded-lg border border-border bg-card p-6">',
337
- ` ${mediaSlot("icon", { aspect: "square", className: "size-10" })}`,
416
+ ' <Sparkle className="size-10 text-[var(--mirror-primary)]" weight="duotone" aria-hidden />',
338
417
  ` ${textSlot(cardHeading)}`,
339
418
  ` ${textSlot("body")}`,
340
419
  " </StaggerItem>",
@@ -344,7 +423,7 @@ function emitGrid(s: SectionLayout, labelId: string): string {
344
423
  ];
345
424
 
346
425
  const head = [
347
- heading ? wrapFadeUp(heading) : "",
426
+ heading ? wrapMirrorEnter(heading) : "",
348
427
  introBody,
349
428
  ]
350
429
  .filter(Boolean)
@@ -384,7 +463,7 @@ function emitList(s: SectionLayout, labelId: string): string {
384
463
  for (let i = 0; i < count; i++) items.push(textSlot("bullet"));
385
464
 
386
465
  return [
387
- heading ? wrapFadeUp(heading) : "",
466
+ heading ? wrapMirrorEnter(heading) : "",
388
467
  `<ul className="mt-8 space-y-3 text-sm">`,
389
468
  ...items.map((it) => ` ${it}`),
390
469
  "</ul>",
@@ -426,7 +505,17 @@ function emitMediaSlots(
426
505
  const parts: string[] = [];
427
506
  if ((slots.image ?? 0) > 0) parts.push(mediaSlot("image", { aspect: opts.aspect ?? "video" }));
428
507
  if ((slots.code ?? 0) > 0) parts.push(mediaSlot("code", { aspect: "auto" }));
429
- if ((slots.video ?? 0) > 0) parts.push(mediaSlot("video", { aspect: opts.aspect ?? "video" }));
508
+ if ((slots.video ?? 0) > 0) {
509
+ parts.push(
510
+ [
511
+ '<div className="overflow-hidden rounded-xl border border-border aspect-video bg-muted/20">',
512
+ ' <video className="h-full w-full object-cover" controls playsInline preload="metadata" poster="/poster.jpg">',
513
+ ' <source src="/clip.mp4" type="video/mp4" />',
514
+ " </video>",
515
+ "</div>",
516
+ ].join("\n"),
517
+ );
518
+ }
430
519
  if (parts.length === 0) return "";
431
520
  if (parts.length === 1) return `<div className="mt-12">${parts[0]}</div>`;
432
521
  return [
@@ -436,11 +525,11 @@ function emitMediaSlots(
436
525
  ].join("\n");
437
526
  }
438
527
 
439
- function wrapFadeUp(children: string): string {
528
+ function wrapMirrorEnter(children: string): string {
440
529
  return [
441
- "<FadeUp>",
530
+ "<MirrorEnter>",
442
531
  indent(children, 1),
443
- "</FadeUp>",
532
+ "</MirrorEnter>",
444
533
  ].join("\n");
445
534
  }
446
535
 
@@ -481,16 +570,22 @@ function emitNotes(layout: SiteLayout): string {
481
570
  lines.push("- Slot inventory per section (counts of headings / body / buttons / images / icons / logos)");
482
571
  lines.push("- Per-section background and padding");
483
572
  lines.push("- Page-level tokens (fonts, primary/muted/border/foreground, radius, container width)");
573
+ lines.push("- **Polished UI shell** in `page.tsx`: Framer Motion (`MirrorEnter`, `MirrorEnterFromEnd`, `MirrorStaggerRow`), Phosphor icons, image + video placeholders");
484
574
  lines.push("");
485
- lines.push("## What was **not** extracted");
575
+ lines.push("## Verbatim reference (same crawl)");
486
576
  lines.push("");
487
- lines.push("- Source headlines, body copy, or microcopy text");
488
- lines.push("- Brand logos, illustrations, or product screenshots");
489
- lines.push("- Source HTML, CSS, or class names");
577
+ lines.push(`Open **../reference/${layout.host}/** alongside this folder:`);
490
578
  lines.push("");
491
- lines.push(
492
- "The `page.tsx` reconstructs the section grammar from typed primitives. Headings, body copy, buttons, images, and logos appear as `<TextSlot>` / `<MediaSlot>` placeholders. Fill them with your own content before shipping.",
493
- );
579
+ lines.push("- `page.html` — full serialized DOM after JS (exact structure for an AI)");
580
+ lines.push("- `visible-text.txt` / `visible-text.json` visible headings, paragraphs, buttons");
581
+ lines.push("- `media.json` — image and video URLs from the page");
582
+ lines.push("- `FOR_AI_REFERENCE.md` — how to use the bundle");
583
+ lines.push("");
584
+ lines.push("Paste `visible-text.txt` or excerpts into your AI when filling `<TextSlot>` nodes.");
585
+ lines.push("");
586
+ lines.push("## What stays as slots in page.tsx");
587
+ lines.push("");
588
+ lines.push("`<TextSlot>` / `<MediaSlot>` keep the React tree clean until you substitute real strings and assets. Copy text from `visible-text.txt`; swap `poster` / `src` on `<video>` and `next/image` from `media.json`.");
494
589
  lines.push("");
495
590
  lines.push("## Section breakdown");
496
591
  lines.push("");
@@ -0,0 +1,230 @@
1
+ /**
2
+ * Verbatim reference dump for AI / human review.
3
+ *
4
+ * Writes everything under `output/<runId>/reference/<host>/`:
5
+ * - page.html — full document HTML after JS render (`page.content()`)
6
+ * - visible-text.json — structured visible copy (headings, buttons, key blocks)
7
+ * - media.json — img / video / source URLs and attributes
8
+ * - meta.json — title, description, canonical, lang
9
+ * - FOR_AI_REFERENCE.md — how to use these files with an AI
10
+ */
11
+
12
+ import { mkdirSync, writeFileSync } from "node:fs";
13
+ import { join } from "node:path";
14
+
15
+ import type { Page } from "playwright";
16
+
17
+ export interface ReferenceSnapshot {
18
+ url: string;
19
+ capturedAt: string;
20
+ title: string | null;
21
+ description: string | null;
22
+ canonical: string | null;
23
+ lang: string | null;
24
+ /** Flattened visible strings in DOM order (useful for grep / LLM context). */
25
+ visibleTextBlocks: Array<{
26
+ tag: string;
27
+ role: string | null;
28
+ text: string;
29
+ }>;
30
+ links: Array<{ href: string; text: string }>;
31
+ media: Array<
32
+ | { type: "img"; src: string; alt: string; width: number | null; height: number | null }
33
+ | { type: "video"; src: string | null; poster: string | null }
34
+ | { type: "source"; src: string; kind: string | null }
35
+ >;
36
+ }
37
+
38
+ export async function emitPageReference(page: Page, url: string, refDir: string): Promise<string[]> {
39
+ mkdirSync(refDir, { recursive: true });
40
+ const written: string[] = [];
41
+ const capturedAt = new Date().toISOString();
42
+
43
+ await page.evaluate(() => {
44
+ const g = globalThis as unknown as { __name?: (fn: unknown) => unknown };
45
+ if (typeof g.__name === "undefined") g.__name = (fn: unknown) => fn;
46
+ });
47
+
48
+ const html = await page.content();
49
+ const htmlPath = join(refDir, "page.html");
50
+ writeFileSync(htmlPath, html, "utf8");
51
+ written.push(htmlPath);
52
+
53
+ const snapshot = (await page.evaluate(collectSnapshot)) as Omit<ReferenceSnapshot, "url" | "capturedAt">;
54
+ const full: ReferenceSnapshot = {
55
+ url,
56
+ capturedAt,
57
+ ...snapshot,
58
+ };
59
+
60
+ writeFileSync(join(refDir, "visible-text.json"), JSON.stringify(full, null, 2) + "\n", "utf8");
61
+ written.push(join(refDir, "visible-text.json"));
62
+
63
+ const txtLines = [
64
+ `# ${full.title ?? "Untitled"}`,
65
+ "",
66
+ ...full.visibleTextBlocks.map((b) => b.text),
67
+ "",
68
+ "--- links ---",
69
+ ...full.links.map((l) => `${l.text}\t${l.href}`),
70
+ ];
71
+ writeFileSync(join(refDir, "visible-text.txt"), txtLines.join("\n"), "utf8");
72
+ written.push(join(refDir, "visible-text.txt"));
73
+
74
+ const mediaOnly = { url, capturedAt, media: full.media };
75
+ writeFileSync(join(refDir, "media.json"), JSON.stringify(mediaOnly, null, 2) + "\n", "utf8");
76
+ written.push(join(refDir, "media.json"));
77
+
78
+ const meta = {
79
+ url,
80
+ capturedAt,
81
+ title: full.title,
82
+ description: full.description,
83
+ canonical: full.canonical,
84
+ lang: full.lang,
85
+ };
86
+ writeFileSync(join(refDir, "meta.json"), JSON.stringify(meta, null, 2) + "\n", "utf8");
87
+ written.push(join(refDir, "meta.json"));
88
+
89
+ writeFileSync(join(refDir, "FOR_AI_REFERENCE.md"), emitAiReadme(url, refDir), "utf8");
90
+ written.push(join(refDir, "FOR_AI_REFERENCE.md"));
91
+
92
+ return written;
93
+ }
94
+
95
+ function emitAiReadme(url: string, refDir: string): string {
96
+ const base = refDir.replace(/\\/g, "/");
97
+ return [
98
+ `# Reference capture — ${url}`,
99
+ "",
100
+ "Use these files when rebuilding the page in React / Next.js:",
101
+ "",
102
+ "| File | Purpose |",
103
+ "| ---- | ------- |",
104
+ "| `page.html` | Full serialized DOM after JavaScript ran in Chromium. Layout, copy, and structure match what crawled (not necessarily valid static HTML elsewhere). |",
105
+ "| `visible-text.json` | Exact visible strings: headings, buttons, links, and block text — good for **verbatim copy** when rewriting `page.tsx`. |",
106
+ "| `media.json` | Every image / video / source URL from the DOM. Host your own assets or swap for placeholders; do not hotlink without permission. |",
107
+ "| `meta.json` | Title, description, lang. |",
108
+ "",
109
+ `Sibling folder \`../mirror/<host>/\` has a typed \`page.tsx\` with Framer Motion, Phosphor icons, and slots — wire copy from \`visible-text.json\` and media from \`media.json\` into that file.`,
110
+ "",
111
+ `Captured path: \`${base}\``,
112
+ "",
113
+ ].join("\n");
114
+ }
115
+
116
+ /**
117
+ * Runs in browser context.
118
+ */
119
+ function collectSnapshot(): Omit<ReferenceSnapshot, "url" | "capturedAt"> {
120
+ const title = document.title || null;
121
+ const descEl = document.querySelector('meta[name="description"]');
122
+ const description = descEl?.getAttribute("content")?.trim() || null;
123
+ const canonicalEl = document.querySelector('link[rel="canonical"]');
124
+ const canonical = canonicalEl?.getAttribute("href") || null;
125
+ const lang = document.documentElement.getAttribute("lang");
126
+
127
+ const visibleTextBlocks: Array<{ tag: string; role: string | null; text: string }> = [];
128
+ const pushBlock = (tag: string, el: HTMLElement, role: string | null) => {
129
+ const text = el.innerText?.trim().replace(/\s+/g, " ") ?? "";
130
+ if (text.length < 2 || text.length > 4000) return;
131
+ visibleTextBlocks.push({ tag, role, text });
132
+ };
133
+
134
+ for (const tag of ["H1", "H2", "H3", "H4", "H5", "H6"] as const) {
135
+ for (const el of Array.from(document.querySelectorAll(tag))) {
136
+ if (!(el instanceof HTMLElement)) continue;
137
+ const style = getComputedStyle(el);
138
+ if (style.visibility === "hidden" || style.display === "none") continue;
139
+ pushBlock(tag, el, el.getAttribute("role"));
140
+ }
141
+ }
142
+
143
+ for (const el of Array.from(document.querySelectorAll("p, li, blockquote, figcaption, label"))) {
144
+ if (!(el instanceof HTMLElement)) continue;
145
+ const style = getComputedStyle(el);
146
+ if (style.visibility === "hidden" || style.display === "none") continue;
147
+ pushBlock(el.tagName, el, el.getAttribute("role"));
148
+ }
149
+
150
+ for (const el of Array.from(document.querySelectorAll("button, [role='button']"))) {
151
+ if (!(el instanceof HTMLElement)) continue;
152
+ const style = getComputedStyle(el);
153
+ if (style.visibility === "hidden" || style.display === "none") continue;
154
+ pushBlock("BUTTON", el, el.getAttribute("role"));
155
+ }
156
+
157
+ for (const el of Array.from(document.querySelectorAll("span, div"))) {
158
+ if (!(el instanceof HTMLElement)) continue;
159
+ const role = el.getAttribute("role");
160
+ if (
161
+ role !== "heading" &&
162
+ !el.classList.contains("badge") &&
163
+ el.getAttribute("data-slot") === null
164
+ ) {
165
+ // Only capture labeled small UI chrome (badges, pills) via short text + uppercase heuristic
166
+ const style = getComputedStyle(el);
167
+ if (style.visibility === "hidden" || style.display === "none") continue;
168
+ const text = el.innerText?.trim().replace(/\s+/g, " ") ?? "";
169
+ if (text.length < 8 || text.length > 240) continue;
170
+ if (!/^[A-Z0-9\s&.,:]+$/.test(text)) continue; // ALL-CAPS-ish eyebrow labels
171
+ pushBlock(el.tagName, el, role);
172
+ }
173
+ }
174
+
175
+ const links: Array<{ href: string; text: string }> = [];
176
+ for (const a of Array.from(document.querySelectorAll("a[href]"))) {
177
+ const href = a.getAttribute("href") ?? "";
178
+ if (!href || href.startsWith("javascript:")) continue;
179
+ const text = (a.textContent ?? "").trim().replace(/\s+/g, " ");
180
+ if (!text) continue;
181
+ try {
182
+ const abs = new URL(href, document.baseURI).href;
183
+ links.push({ href: abs, text: text.slice(0, 500) });
184
+ } catch {
185
+ links.push({ href, text: text.slice(0, 500) });
186
+ }
187
+ }
188
+
189
+ const media: ReferenceSnapshot["media"] = [];
190
+ for (const img of Array.from(document.querySelectorAll("img"))) {
191
+ const src = img.currentSrc || img.src;
192
+ if (!src) continue;
193
+ media.push({
194
+ type: "img",
195
+ src,
196
+ alt: img.alt || "",
197
+ width: img.naturalWidth || null,
198
+ height: img.naturalHeight || null,
199
+ });
200
+ }
201
+ for (const video of Array.from(document.querySelectorAll("video"))) {
202
+ const poster = video.getAttribute("poster");
203
+ let src: string | null = null;
204
+ if (video.currentSrc) src = video.currentSrc;
205
+ else {
206
+ const s = video.querySelector("source[src]");
207
+ src = s?.getAttribute("src") ?? null;
208
+ }
209
+ media.push({ type: "video", src, poster: poster || null });
210
+ }
211
+ for (const source of Array.from(document.querySelectorAll("source[src]"))) {
212
+ const src = source.getAttribute("src");
213
+ if (!src) continue;
214
+ media.push({
215
+ type: "source",
216
+ src,
217
+ kind: source.getAttribute("type"),
218
+ });
219
+ }
220
+
221
+ return {
222
+ title,
223
+ description,
224
+ canonical,
225
+ lang: lang || null,
226
+ visibleTextBlocks,
227
+ links,
228
+ media,
229
+ };
230
+ }
@@ -293,9 +293,8 @@ export interface SiteCapture {
293
293
  host: string;
294
294
  capturedAt: string;
295
295
  screenshotPath: string;
296
- rawTokensPath: string;
297
- /** Path to the per-site `SiteLayout` JSON, if the mirror crawl succeeded. */
298
- layoutPath?: string;
296
+ /** Verbatim HTML + copy + media listing for AI reference. */
297
+ referenceDir?: string;
299
298
  /** Path to the per-site mirror page directory, if emission succeeded. */
300
299
  mirrorDir?: string;
301
300
  status: "ok" | "skipped" | "failed";