launchframe 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +147 -144
- package/bin/launchframe.mjs +315 -279
- package/package.json +1 -1
- package/template/.amazonq/cli-agents/clone-website.json +1 -1
- package/template/.amazonq/rules/project.md +130 -80
- package/template/.augment/commands/clone-website.md +30 -13
- package/template/.claude/skills/clone-website/SKILL.md +534 -517
- package/template/.clinerules +130 -80
- package/template/.codex/skills/clone-website/SKILL.md +31 -14
- package/template/.continue/commands/clone-website.md +30 -13
- package/template/.continue/rules/project.md +130 -80
- package/template/.cursor/commands/clone-website.md +30 -13
- package/template/.cursor/rules/project.mdc +22 -20
- package/template/.gemini/commands/clone-website.toml +30 -13
- package/template/.github/copilot-instructions.md +130 -80
- package/template/.github/skills/clone-website/SKILL.md +31 -14
- package/template/.opencode/commands/clone-website.md +30 -13
- package/template/.windsurf/workflows/clone-website.md +30 -13
- package/template/AGENTS.md +100 -79
- package/template/README.md +121 -118
- package/template/START_HERE.md +15 -15
- package/template/docs/design-references/playwright-example.com-1440px.png +0 -0
- package/template/docs/design-references/playwright-example.com-390px.png +0 -0
- package/template/docs/research/INSPECTION_GUIDE.md +109 -80
- package/template/package.json +63 -59
- package/template/scripts/recon-playwright.mjs +323 -0
|
@@ -1,80 +1,109 @@
|
|
|
1
|
-
# Website Inspection Guide
|
|
2
|
-
|
|
3
|
-
##
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
- [ ]
|
|
11
|
-
- [ ]
|
|
12
|
-
- [ ]
|
|
13
|
-
- [ ]
|
|
14
|
-
- [ ]
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
###
|
|
19
|
-
|
|
20
|
-
- [ ] **
|
|
21
|
-
- [ ] **
|
|
22
|
-
- [ ] **
|
|
23
|
-
- [ ] **
|
|
24
|
-
- [ ] **
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
-
|
|
43
|
-
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
-
|
|
47
|
-
-
|
|
48
|
-
-
|
|
49
|
-
-
|
|
50
|
-
-
|
|
51
|
-
-
|
|
52
|
-
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
-
|
|
70
|
-
-
|
|
71
|
-
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
1
|
+
# Website Inspection Guide
|
|
2
|
+
|
|
3
|
+
## Priority (read first): media & motion
|
|
4
|
+
|
|
5
|
+
Launchframe clones live pages for a **visual** result. Two things most often separate a convincing build from a hollow one:
|
|
6
|
+
|
|
7
|
+
### 1. Images & video (do this before obsessing over utility classes)
|
|
8
|
+
|
|
9
|
+
- [ ] **Every `<img>`** — `src` / `srcset` / `currentSrc`, `sizes`, `loading`, `decoding`, `alt`, intrinsic dimensions
|
|
10
|
+
- [ ] **`<picture>` / `<source>`** — resolution switches, art direction, `type` (WebP/AVIF)
|
|
11
|
+
- [ ] **Every `<video>`** — `src` + nested `<source>`, **poster**, `autoplay`, `loop`, `muted`, `playsinline`, `controls`
|
|
12
|
+
- [ ] **Background images** — `background-image` on ancestors (hero stacks are often **layers** of img + gradient + PNG mockup)
|
|
13
|
+
- [ ] **Lazy / below-fold** — scroll the page once before asset discovery so `data-src` / lazy-loaded URLs resolve if the site uses them
|
|
14
|
+
- [ ] **Download** — mirror into `public/images/` and `public/videos/` with stable paths; list failures in `docs/research/EXTRACTION_LIMITATIONS.md`
|
|
15
|
+
|
|
16
|
+
If automation hits a bot wall, **do not pretend extraction succeeded** — capture what you can from successful fetches and document gaps.
|
|
17
|
+
|
|
18
|
+
### 2. Motion (prefer Framer Motion in this repo)
|
|
19
|
+
|
|
20
|
+
- [ ] **Entrance** — fade/slide/scale on mount or on **scroll into view** (note threshold / `margin`)
|
|
21
|
+
- [ ] **Stagger** — children animating in sequence (hero bullets, card grids)
|
|
22
|
+
- [ ] **Scroll-linked** — progress, parallax, pinned sections (may combine with CSS `animation-timeline` or libs)
|
|
23
|
+
- [ ] **Gestures** — drag, pan, hover follow (often Framer Motion)
|
|
24
|
+
- [ ] **Implementation rule** — use **`framer-motion`** for anything beyond trivial single-property CSS `transition`. Record **duration, easing, delay, stagger**, and **trigger** (scroll, hover, tap) in specs.
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## How to Reverse-Engineer Any Website
|
|
29
|
+
|
|
30
|
+
This guide outlines what to capture when inspecting a target website via Chrome MCP or browser DevTools.
|
|
31
|
+
|
|
32
|
+
## Phase 1: Visual Audit
|
|
33
|
+
|
|
34
|
+
### Screenshots to Capture
|
|
35
|
+
- [ ] Every distinct page — desktop, tablet, mobile
|
|
36
|
+
- [ ] Dark mode variants (if applicable)
|
|
37
|
+
- [ ] Light mode variants (if applicable)
|
|
38
|
+
- [ ] Key interaction states (hover, active, open menus, modals)
|
|
39
|
+
- [ ] Loading/skeleton states
|
|
40
|
+
- [ ] Empty states
|
|
41
|
+
- [ ] Error states
|
|
42
|
+
- [ ] **Video frames** — capture a frame mid-play for reference if motion is subtle
|
|
43
|
+
- [ ] **Hero / full-bleed** — wide crops where raster layers are easy to miss
|
|
44
|
+
|
|
45
|
+
### Design Tokens to Extract
|
|
46
|
+
- [ ] **Colors** — background, text (primary/secondary/muted), accent, border, hover, error, success, warning
|
|
47
|
+
- [ ] **Typography** — font family, sizes (h1-h6, body, caption, label), weights, line heights, letter spacing
|
|
48
|
+
- [ ] **Spacing** — padding/margin patterns (look for a scale: 4px, 8px, 12px, 16px, 24px, 32px, etc.)
|
|
49
|
+
- [ ] **Border radius** — buttons, cards, avatars, inputs
|
|
50
|
+
- [ ] **Shadows/elevation** — card shadows, dropdown shadows, modal overlay
|
|
51
|
+
- [ ] **Breakpoints** — when does the layout shift? (inspect with DevTools responsive mode)
|
|
52
|
+
- [ ] **Icons** — which icon library? custom SVGs? sizes?
|
|
53
|
+
- [ ] **Avatars** — sizes, shapes, fallback behavior
|
|
54
|
+
- [ ] **Buttons** — all variants (primary, secondary, ghost, icon-only, danger)
|
|
55
|
+
- [ ] **Inputs** — text fields, textareas, selects, checkboxes, toggles
|
|
56
|
+
|
|
57
|
+
## Phase 2: Component Inventory
|
|
58
|
+
|
|
59
|
+
For each distinct UI component, document:
|
|
60
|
+
1. **Name** — what would you call this component?
|
|
61
|
+
2. **Structure** — what HTML elements / child components does it contain?
|
|
62
|
+
3. **Variants** — does it have different sizes, colors, or states?
|
|
63
|
+
4. **States** — default, hover, active, disabled, loading, error, empty
|
|
64
|
+
5. **Responsive behavior** — how does it change at different breakpoints?
|
|
65
|
+
6. **Interactions** — click, hover, focus, keyboard navigation
|
|
66
|
+
7. **Animations** — transitions, entrance/exit, micro-interactions — **`framer-motion` vs CSS** and exact timing
|
|
67
|
+
|
|
68
|
+
### Common Components to Look For
|
|
69
|
+
- Navigation (top bar, sidebar, bottom bar)
|
|
70
|
+
- Cards / list items
|
|
71
|
+
- Buttons and links
|
|
72
|
+
- Forms and inputs
|
|
73
|
+
- Modals and dialogs
|
|
74
|
+
- Dropdowns and menus
|
|
75
|
+
- Tabs and segmented controls
|
|
76
|
+
- Avatars and user badges
|
|
77
|
+
- Loading skeletons
|
|
78
|
+
- Toast notifications
|
|
79
|
+
- Tooltips and popovers
|
|
80
|
+
- **Video / Lottie / canvas** blocks (do not substitute with static mockups without documenting why)
|
|
81
|
+
|
|
82
|
+
## Phase 3: Layout Architecture
|
|
83
|
+
|
|
84
|
+
- [ ] **Grid system** — CSS Grid? Flexbox? Fixed widths?
|
|
85
|
+
- [ ] **Column layout** — how many columns at each breakpoint?
|
|
86
|
+
- [ ] **Max-width** — main content area max-width
|
|
87
|
+
- [ ] **Sticky elements** — header, sidebar, floating buttons
|
|
88
|
+
- [ ] **Z-index layers** — navigation, modals, tooltips, overlays
|
|
89
|
+
- [ ] **Scroll behavior** — infinite scroll, pagination, virtual scrolling
|
|
90
|
+
|
|
91
|
+
## Phase 4: Technical Stack Analysis
|
|
92
|
+
|
|
93
|
+
- [ ] **Framework** — React? Vue? Angular? Check `__NEXT_DATA__`, `__NUXT__`, `ng-version`
|
|
94
|
+
- [ ] **CSS approach** — Tailwind (utility classes), CSS Modules, Styled Components, Emotion, vanilla CSS
|
|
95
|
+
- [ ] **State management** — Redux (check DevTools), React Query, Zustand, Pinia
|
|
96
|
+
- [ ] **API patterns** — REST, GraphQL (check network tab for `/graphql` requests)
|
|
97
|
+
- [ ] **Font loading** — Google Fonts, self-hosted, system fonts
|
|
98
|
+
- [ ] **Image strategy** — CDN, lazy loading, srcset, WebP/AVIF — **mirror URLs you are allowed to fetch**
|
|
99
|
+
- [ ] **Animation library** — site may use GSAP, Lottie, Rive, or CSS only — **in the Next.js clone, default to Framer Motion** unless a different lib is required for parity
|
|
100
|
+
|
|
101
|
+
## Phase 5: Documentation Output
|
|
102
|
+
|
|
103
|
+
After inspection, create these files in `docs/research/`:
|
|
104
|
+
1. `DESIGN_TOKENS.md` — All extracted colors, typography, spacing
|
|
105
|
+
2. `COMPONENT_INVENTORY.md` — Every component with structure notes
|
|
106
|
+
3. **`MEDIA_MANIFEST.md`** — (recommended) Table of every image/video/poster URL → local `public/` path or “blocked”
|
|
107
|
+
4. `LAYOUT_ARCHITECTURE.md` — Page layouts, grid system, responsive behavior
|
|
108
|
+
5. `INTERACTION_PATTERNS.md` — Animations: **CSS vs Framer Motion**, transitions, hover states
|
|
109
|
+
6. `TECH_STACK_ANALYSIS.md` — What the site uses and our chosen equivalents (Framer Motion for React animation)
|
package/template/package.json
CHANGED
|
@@ -1,59 +1,63 @@
|
|
|
1
|
-
{
|
|
2
|
-
"name": "ai-website-clone-template",
|
|
3
|
-
"version": "0.3.1",
|
|
4
|
-
"private": true,
|
|
5
|
-
"description": "Clone any website into a clean, modern Next.js codebase using AI coding agents",
|
|
6
|
-
"author": "JCodesMore",
|
|
7
|
-
"license": "MIT",
|
|
8
|
-
"repository": {
|
|
9
|
-
"type": "git",
|
|
10
|
-
"url": "https://github.com/JCodesMore/ai-website-cloner-template.git"
|
|
11
|
-
},
|
|
12
|
-
"homepage": "https://github.com/JCodesMore/ai-website-cloner-template",
|
|
13
|
-
"bugs": {
|
|
14
|
-
"url": "https://github.com/JCodesMore/ai-website-cloner-template/issues"
|
|
15
|
-
},
|
|
16
|
-
"keywords": [
|
|
17
|
-
"claude-code",
|
|
18
|
-
"website-clone",
|
|
19
|
-
"reverse-engineering",
|
|
20
|
-
"nextjs",
|
|
21
|
-
"ai",
|
|
22
|
-
"template",
|
|
23
|
-
"tailwindcss",
|
|
24
|
-
"shadcn-ui"
|
|
25
|
-
],
|
|
26
|
-
"engines": {
|
|
27
|
-
"node": ">=24"
|
|
28
|
-
},
|
|
29
|
-
"scripts": {
|
|
30
|
-
"dev": "next dev",
|
|
31
|
-
"build": "next build",
|
|
32
|
-
"start": "next start",
|
|
33
|
-
"lint": "eslint",
|
|
34
|
-
"typecheck": "tsc --noEmit",
|
|
35
|
-
"check": "npm run lint && npm run typecheck && npm run build"
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
"
|
|
41
|
-
"
|
|
42
|
-
"
|
|
43
|
-
"
|
|
44
|
-
"react
|
|
45
|
-
"
|
|
46
|
-
"
|
|
47
|
-
"
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
"
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
"@
|
|
54
|
-
"
|
|
55
|
-
"
|
|
56
|
-
"
|
|
57
|
-
"
|
|
58
|
-
|
|
59
|
-
|
|
1
|
+
{
|
|
2
|
+
"name": "ai-website-clone-template",
|
|
3
|
+
"version": "0.3.1",
|
|
4
|
+
"private": true,
|
|
5
|
+
"description": "Clone any website into a clean, modern Next.js codebase using AI coding agents",
|
|
6
|
+
"author": "JCodesMore",
|
|
7
|
+
"license": "MIT",
|
|
8
|
+
"repository": {
|
|
9
|
+
"type": "git",
|
|
10
|
+
"url": "https://github.com/JCodesMore/ai-website-cloner-template.git"
|
|
11
|
+
},
|
|
12
|
+
"homepage": "https://github.com/JCodesMore/ai-website-cloner-template",
|
|
13
|
+
"bugs": {
|
|
14
|
+
"url": "https://github.com/JCodesMore/ai-website-cloner-template/issues"
|
|
15
|
+
},
|
|
16
|
+
"keywords": [
|
|
17
|
+
"claude-code",
|
|
18
|
+
"website-clone",
|
|
19
|
+
"reverse-engineering",
|
|
20
|
+
"nextjs",
|
|
21
|
+
"ai",
|
|
22
|
+
"template",
|
|
23
|
+
"tailwindcss",
|
|
24
|
+
"shadcn-ui"
|
|
25
|
+
],
|
|
26
|
+
"engines": {
|
|
27
|
+
"node": ">=24"
|
|
28
|
+
},
|
|
29
|
+
"scripts": {
|
|
30
|
+
"dev": "next dev",
|
|
31
|
+
"build": "next build",
|
|
32
|
+
"start": "next start",
|
|
33
|
+
"lint": "eslint",
|
|
34
|
+
"typecheck": "tsc --noEmit",
|
|
35
|
+
"check": "npm run lint && npm run typecheck && npm run build",
|
|
36
|
+
"recon": "node scripts/recon-playwright.mjs",
|
|
37
|
+
"recon:headed": "node scripts/recon-playwright.mjs --headed"
|
|
38
|
+
},
|
|
39
|
+
"dependencies": {
|
|
40
|
+
"@base-ui/react": "^1.3.0",
|
|
41
|
+
"class-variance-authority": "^0.7.1",
|
|
42
|
+
"clsx": "^2.1.1",
|
|
43
|
+
"framer-motion": "^12.4.0",
|
|
44
|
+
"lucide-react": "^1.6.0",
|
|
45
|
+
"next": "16.2.1",
|
|
46
|
+
"react": "19.2.4",
|
|
47
|
+
"react-dom": "19.2.4",
|
|
48
|
+
"shadcn": "^4.1.0",
|
|
49
|
+
"tailwind-merge": "^3.5.0",
|
|
50
|
+
"tw-animate-css": "^1.4.0"
|
|
51
|
+
},
|
|
52
|
+
"devDependencies": {
|
|
53
|
+
"@tailwindcss/postcss": "^4",
|
|
54
|
+
"@types/node": "^24",
|
|
55
|
+
"@types/react": "^19",
|
|
56
|
+
"@types/react-dom": "^19",
|
|
57
|
+
"eslint": "^9",
|
|
58
|
+
"eslint-config-next": "16.2.1",
|
|
59
|
+
"playwright": "^1.49.1",
|
|
60
|
+
"tailwindcss": "^4",
|
|
61
|
+
"typescript": "^5"
|
|
62
|
+
}
|
|
63
|
+
}
|
|
@@ -0,0 +1,323 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Playwright recon when Browser / Chrome DevTools MCP is unavailable or broken.
|
|
4
|
+
*
|
|
5
|
+
* Reads URL from launchframe.config.json (or --url). Writes:
|
|
6
|
+
* - docs/design-references/playwright-<host>-<w>px.png (full page)
|
|
7
|
+
* - docs/research/computed-snapshot.json (styles + asset inventory + bot-wall hint)
|
|
8
|
+
* - docs/research/MEDIA_MANIFEST.md (table of discovered media URLs)
|
|
9
|
+
*
|
|
10
|
+
* Usage:
|
|
11
|
+
* npm run recon
|
|
12
|
+
* npm run recon:headed
|
|
13
|
+
* node scripts/recon-playwright.mjs --url https://example.com --headed
|
|
14
|
+
*
|
|
15
|
+
* First time: npx playwright install chromium
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import { readFileSync, writeFileSync, mkdirSync, existsSync } from "node:fs";
|
|
19
|
+
import { dirname, join } from "node:path";
|
|
20
|
+
import { fileURLToPath } from "node:url";
|
|
21
|
+
|
|
22
|
+
const ROOT = join(dirname(fileURLToPath(import.meta.url)), "..");
|
|
23
|
+
|
|
24
|
+
function parseArgs(argv) {
|
|
25
|
+
const out = { url: null, headed: false, channel: process.env.PW_CHANNEL || undefined };
|
|
26
|
+
for (let i = 0; i < argv.length; i++) {
|
|
27
|
+
const a = argv[i];
|
|
28
|
+
if (a === "--headed" || a === "-H") out.headed = true;
|
|
29
|
+
else if (a === "--url" || a === "-u") out.url = argv[++i];
|
|
30
|
+
else if (a.startsWith("--url=")) out.url = a.slice(6);
|
|
31
|
+
else if (a === "--channel" || a === "-c") out.channel = argv[++i] || undefined;
|
|
32
|
+
}
|
|
33
|
+
return out;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function readLaunchframeUrl() {
|
|
37
|
+
const p = join(ROOT, "launchframe.config.json");
|
|
38
|
+
if (!existsSync(p)) return null;
|
|
39
|
+
try {
|
|
40
|
+
const j = JSON.parse(readFileSync(p, "utf8"));
|
|
41
|
+
return j.url || null;
|
|
42
|
+
} catch {
|
|
43
|
+
return null;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function safeHost(url) {
|
|
48
|
+
try {
|
|
49
|
+
return new URL(url).hostname.replace(/[^a-z0-9.-]+/gi, "_");
|
|
50
|
+
} catch {
|
|
51
|
+
return "unknown-host";
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/** Scroll to bottom slowly to trigger lazy-loaded media */
|
|
56
|
+
async function scrollFullPage(page) {
|
|
57
|
+
await page.evaluate(async () => {
|
|
58
|
+
const delay = (ms) => new Promise((r) => setTimeout(r, ms));
|
|
59
|
+
const step = Math.max(1, Math.floor(window.innerHeight * 0.85));
|
|
60
|
+
let y = 0;
|
|
61
|
+
const max = document.documentElement.scrollHeight;
|
|
62
|
+
while (y < max) {
|
|
63
|
+
y = Math.min(y + step, max);
|
|
64
|
+
window.scrollTo(0, y);
|
|
65
|
+
await delay(200);
|
|
66
|
+
}
|
|
67
|
+
window.scrollTo(0, 0);
|
|
68
|
+
await delay(300);
|
|
69
|
+
});
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
async function gatherPageData(page) {
|
|
73
|
+
return page.evaluate(() => {
|
|
74
|
+
const title = document.title || "";
|
|
75
|
+
const text = (document.body?.innerText || "").slice(0, 5000);
|
|
76
|
+
const likelyBotWall =
|
|
77
|
+
/just a moment/i.test(title) ||
|
|
78
|
+
/checking your browser/i.test(text) ||
|
|
79
|
+
/cf-browser-verification/i.test(document.documentElement?.innerHTML || "") ||
|
|
80
|
+
document.querySelector("#challenge-running, .cf-browser-verification, #cf-challenge-running") !== null;
|
|
81
|
+
|
|
82
|
+
const imgs = [...document.querySelectorAll("img")].map((img) => ({
|
|
83
|
+
src: img.currentSrc || img.src || "",
|
|
84
|
+
srcset: img.srcset || "",
|
|
85
|
+
alt: img.alt || "",
|
|
86
|
+
w: img.naturalWidth,
|
|
87
|
+
h: img.naturalHeight,
|
|
88
|
+
}));
|
|
89
|
+
|
|
90
|
+
const videos = [...document.querySelectorAll("video")].map((v) => ({
|
|
91
|
+
src: v.currentSrc || v.src || "",
|
|
92
|
+
poster: v.poster || "",
|
|
93
|
+
autoplay: v.autoplay,
|
|
94
|
+
loop: v.loop,
|
|
95
|
+
muted: v.muted,
|
|
96
|
+
}));
|
|
97
|
+
|
|
98
|
+
const bgUrls = [];
|
|
99
|
+
const seen = new Set();
|
|
100
|
+
for (const el of document.querySelectorAll("*")) {
|
|
101
|
+
if (bgUrls.length > 400) break;
|
|
102
|
+
const bg = getComputedStyle(el).backgroundImage;
|
|
103
|
+
if (!bg || bg === "none") continue;
|
|
104
|
+
const m = /url\(["']?([^"')]+)["']?\)/.exec(bg);
|
|
105
|
+
if (m && m[1] && !seen.has(m[1])) {
|
|
106
|
+
seen.add(m[1]);
|
|
107
|
+
bgUrls.push({ url: m[1], tag: el.tagName.toLowerCase() });
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
const props = [
|
|
112
|
+
"fontSize",
|
|
113
|
+
"fontWeight",
|
|
114
|
+
"fontFamily",
|
|
115
|
+
"lineHeight",
|
|
116
|
+
"letterSpacing",
|
|
117
|
+
"color",
|
|
118
|
+
"backgroundColor",
|
|
119
|
+
"padding",
|
|
120
|
+
"margin",
|
|
121
|
+
"maxWidth",
|
|
122
|
+
"display",
|
|
123
|
+
"gap",
|
|
124
|
+
"borderRadius",
|
|
125
|
+
"boxShadow",
|
|
126
|
+
];
|
|
127
|
+
|
|
128
|
+
function pickStyles(el) {
|
|
129
|
+
if (!el) return null;
|
|
130
|
+
const cs = getComputedStyle(el);
|
|
131
|
+
const o = {};
|
|
132
|
+
for (const p of props) o[p] = cs[p];
|
|
133
|
+
return o;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
const landmarks = {
|
|
137
|
+
html: pickStyles(document.documentElement),
|
|
138
|
+
body: pickStyles(document.body),
|
|
139
|
+
h1: pickStyles(document.querySelector("h1")),
|
|
140
|
+
header: pickStyles(document.querySelector("header")),
|
|
141
|
+
main: pickStyles(document.querySelector("main")),
|
|
142
|
+
firstSection: pickStyles(document.querySelector("main section, section")),
|
|
143
|
+
};
|
|
144
|
+
|
|
145
|
+
return {
|
|
146
|
+
title,
|
|
147
|
+
likelyBotWall,
|
|
148
|
+
textSample: text.slice(0, 1200),
|
|
149
|
+
assetInventory: {
|
|
150
|
+
images: imgs.filter((i) => i.src),
|
|
151
|
+
videos,
|
|
152
|
+
backgroundImages: bgUrls,
|
|
153
|
+
svgCount: document.querySelectorAll("svg").length,
|
|
154
|
+
},
|
|
155
|
+
computedLandmarks: landmarks,
|
|
156
|
+
};
|
|
157
|
+
});
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
function writeMediaManifest(host, data, limitationsPath) {
|
|
161
|
+
const { images, videos, backgroundImages } = data.assetInventory;
|
|
162
|
+
const lines = [
|
|
163
|
+
"# MEDIA_MANIFEST (Playwright)",
|
|
164
|
+
"",
|
|
165
|
+
`Host: **${host}**`,
|
|
166
|
+
"",
|
|
167
|
+
"## Images (`<img>`)",
|
|
168
|
+
"",
|
|
169
|
+
"| src | alt | natural |",
|
|
170
|
+
"|-----|-----|---------|",
|
|
171
|
+
];
|
|
172
|
+
for (const i of images.slice(0, 200)) {
|
|
173
|
+
const s = i.src.replace(/\|/g, "\\|");
|
|
174
|
+
lines.push(`| ${s} | ${(i.alt || "").replace(/\|/g, "\\|")} | ${i.w}×${i.h} |`);
|
|
175
|
+
}
|
|
176
|
+
if (images.length > 200) lines.push(`\n_…and ${images.length - 200} more_\n`);
|
|
177
|
+
|
|
178
|
+
lines.push("", "## Video", "", "| src | poster |", "|-----|--------|");
|
|
179
|
+
for (const v of videos) {
|
|
180
|
+
lines.push(`| ${(v.src || "").replace(/\|/g, "\\|")} | ${(v.poster || "").replace(/\|/g, "\\|")} |`);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
lines.push("", "## Background images (sample)", "", "| url |", "|-----|");
|
|
184
|
+
for (const b of backgroundImages.slice(0, 150)) {
|
|
185
|
+
lines.push(`| ${b.url.replace(/\|/g, "\\|")} |`);
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
lines.push("", "---", "", `_Generated by scripts/recon-playwright.mjs. Download with scripts/download-assets.mjs when ready._`);
|
|
189
|
+
if (limitationsPath) lines.push("", `See also: **${limitationsPath}**`);
|
|
190
|
+
|
|
191
|
+
writeFileSync(join(ROOT, "docs", "research", "MEDIA_MANIFEST.md"), lines.join("\n") + "\n", "utf8");
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
function writeLimitations(host, url, finalUrl, data) {
|
|
195
|
+
const p = join(ROOT, "docs", "research", "EXTRACTION_LIMITATIONS.md");
|
|
196
|
+
const lines = [
|
|
197
|
+
"# Extraction limitations",
|
|
198
|
+
"",
|
|
199
|
+
`- **Requested URL:** ${url}`,
|
|
200
|
+
`- **Final URL:** ${finalUrl}`,
|
|
201
|
+
`- **Host:** ${host}`,
|
|
202
|
+
`- **Captured at:** ${new Date().toISOString()}`,
|
|
203
|
+
`- **Tool:** Playwright (\`npm run recon\`)`,
|
|
204
|
+
"",
|
|
205
|
+
];
|
|
206
|
+
if (data.likelyBotWall) {
|
|
207
|
+
lines.push(
|
|
208
|
+
"## Bot / challenge page suspected",
|
|
209
|
+
"",
|
|
210
|
+
"Title or DOM matches common WAF/interstitial patterns. **Computed styles and screenshots may not reflect the real marketing page.** Prefer \`npm run recon:headed\` with real Chrome, or open the site manually and export assets.",
|
|
211
|
+
""
|
|
212
|
+
);
|
|
213
|
+
} else {
|
|
214
|
+
lines.push(
|
|
215
|
+
"## Notes",
|
|
216
|
+
"",
|
|
217
|
+
"Snapshot-derived data is from Playwright Chromium, not interactive MCP. Re-run after layout shifts; verify against a real browser for pixel QA.",
|
|
218
|
+
""
|
|
219
|
+
);
|
|
220
|
+
}
|
|
221
|
+
writeFileSync(p, lines.join("\n"), "utf8");
|
|
222
|
+
return "docs/research/EXTRACTION_LIMITATIONS.md";
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
async function main() {
|
|
226
|
+
const args = parseArgs(process.argv.slice(2));
|
|
227
|
+
const url = args.url || readLaunchframeUrl();
|
|
228
|
+
if (!url) {
|
|
229
|
+
console.error("No URL: set launchframe.config.json or pass --url https://...");
|
|
230
|
+
process.exit(1);
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
let chromium;
|
|
234
|
+
try {
|
|
235
|
+
({ chromium } = await import("playwright"));
|
|
236
|
+
} catch {
|
|
237
|
+
console.error("Install Playwright: npm i -D playwright && npx playwright install chromium");
|
|
238
|
+
process.exit(1);
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
const host = safeHost(url);
|
|
242
|
+
mkdirSync(join(ROOT, "docs", "design-references"), { recursive: true });
|
|
243
|
+
mkdirSync(join(ROOT, "docs", "research"), { recursive: true });
|
|
244
|
+
|
|
245
|
+
const browser = await chromium.launch({
|
|
246
|
+
headless: !args.headed,
|
|
247
|
+
channel: args.channel,
|
|
248
|
+
});
|
|
249
|
+
|
|
250
|
+
const context = await browser.newContext({
|
|
251
|
+
userAgent:
|
|
252
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
|
|
253
|
+
locale: "en-US",
|
|
254
|
+
});
|
|
255
|
+
|
|
256
|
+
const page = await context.newPage();
|
|
257
|
+
|
|
258
|
+
const snapshot = {
|
|
259
|
+
fetchedAt: new Date().toISOString(),
|
|
260
|
+
requestedUrl: url,
|
|
261
|
+
finalUrl: null,
|
|
262
|
+
viewports: {},
|
|
263
|
+
title: null,
|
|
264
|
+
likelyBotWall: false,
|
|
265
|
+
assetInventory: null,
|
|
266
|
+
computedLandmarks: null,
|
|
267
|
+
textSample: null,
|
|
268
|
+
};
|
|
269
|
+
|
|
270
|
+
try {
|
|
271
|
+
await page.goto(url, { waitUntil: "domcontentloaded", timeout: 90_000 });
|
|
272
|
+
await new Promise((r) => setTimeout(r, 2500));
|
|
273
|
+
snapshot.finalUrl = page.url();
|
|
274
|
+
|
|
275
|
+
for (const w of [1440, 390]) {
|
|
276
|
+
await page.setViewportSize({ width: w, height: w === 1440 ? 900 : 844 });
|
|
277
|
+
if (w === 1440) await scrollFullPage(page);
|
|
278
|
+
|
|
279
|
+
const shotPath = join(ROOT, "docs", "design-references", `playwright-${host}-${w}px.png`);
|
|
280
|
+
await page.screenshot({ path: shotPath, fullPage: true });
|
|
281
|
+
snapshot.viewports[w] = { screenshot: `docs/design-references/playwright-${host}-${w}px.png` };
|
|
282
|
+
|
|
283
|
+
if (w === 1440) {
|
|
284
|
+
const data = await gatherPageData(page);
|
|
285
|
+
snapshot.title = data.title;
|
|
286
|
+
snapshot.likelyBotWall = data.likelyBotWall;
|
|
287
|
+
snapshot.assetInventory = data.assetInventory;
|
|
288
|
+
snapshot.computedLandmarks = data.computedLandmarks;
|
|
289
|
+
snapshot.textSample = data.textSample;
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
} finally {
|
|
293
|
+
await browser.close();
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
const limRel = writeLimitations(host, url, snapshot.finalUrl, {
|
|
297
|
+
likelyBotWall: snapshot.likelyBotWall,
|
|
298
|
+
});
|
|
299
|
+
|
|
300
|
+
writeFileSync(
|
|
301
|
+
join(ROOT, "docs", "research", "computed-snapshot.json"),
|
|
302
|
+
JSON.stringify(snapshot, null, 2) + "\n",
|
|
303
|
+
"utf8"
|
|
304
|
+
);
|
|
305
|
+
|
|
306
|
+
if (snapshot.assetInventory) {
|
|
307
|
+
writeMediaManifest(host, { assetInventory: snapshot.assetInventory }, limRel);
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
console.log("Playwright recon finished.");
|
|
311
|
+
console.log(` Screenshots: docs/design-references/playwright-${host}-1440px.png (+ 390px)`);
|
|
312
|
+
console.log(` Snapshot: docs/research/computed-snapshot.json`);
|
|
313
|
+
console.log(` Media list: docs/research/MEDIA_MANIFEST.md`);
|
|
314
|
+
console.log(` Limits: ${limRel}`);
|
|
315
|
+
if (snapshot.likelyBotWall) {
|
|
316
|
+
console.warn("\n⚠ Possible bot wall / challenge page — check EXTRACTION_LIMITATIONS.md and try: npm run recon:headed\n");
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
main().catch((e) => {
|
|
321
|
+
console.error(e);
|
|
322
|
+
process.exit(1);
|
|
323
|
+
});
|