@su-record/vibe 2.9.40 → 2.10.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +28 -12
- package/README.en.md +2 -3
- package/README.md +33 -27
- package/agents/{teams/figma → figma}/figma-analyst.md +2 -2
- package/agents/research/{best-practices-agent.md → best-practices.md} +1 -1
- package/agents/research/{codebase-patterns-agent.md → codebase-patterns.md} +1 -1
- package/agents/research/{framework-docs-agent.md → framework-docs.md} +1 -1
- package/agents/research/{security-advisory-agent.md → security-advisory.md} +1 -1
- package/agents/teams/research-team.md +4 -4
- package/agents/teams/review-debate-team.md +2 -2
- package/agents/teams/security-team.md +4 -4
- package/dist/cli/auth.d.ts +0 -1
- package/dist/cli/auth.d.ts.map +1 -1
- package/dist/cli/auth.js +1 -18
- package/dist/cli/auth.js.map +1 -1
- package/dist/cli/collaborator.d.ts +3 -3
- package/dist/cli/collaborator.js +4 -4
- package/dist/cli/collaborator.js.map +1 -1
- package/dist/cli/commands/info.d.ts.map +1 -1
- package/dist/cli/commands/info.js +0 -1
- package/dist/cli/commands/info.js.map +1 -1
- package/dist/cli/commands/init.d.ts +3 -4
- package/dist/cli/commands/init.d.ts.map +1 -1
- package/dist/cli/commands/init.js +15 -20
- package/dist/cli/commands/init.js.map +1 -1
- package/dist/cli/commands/remove.d.ts.map +1 -1
- package/dist/cli/commands/remove.js +2 -7
- package/dist/cli/commands/remove.js.map +1 -1
- package/dist/cli/commands/update.d.ts.map +1 -1
- package/dist/cli/commands/update.js +10 -10
- package/dist/cli/commands/update.js.map +1 -1
- package/dist/cli/index.js +1 -3
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/postinstall/claude-agents.d.ts +3 -1
- package/dist/cli/postinstall/claude-agents.d.ts.map +1 -1
- package/dist/cli/postinstall/claude-agents.js +47 -9
- package/dist/cli/postinstall/claude-agents.js.map +1 -1
- package/dist/cli/postinstall/constants.d.ts +5 -0
- package/dist/cli/postinstall/constants.d.ts.map +1 -1
- package/dist/cli/postinstall/constants.js +165 -23
- package/dist/cli/postinstall/constants.js.map +1 -1
- package/dist/cli/postinstall/cursor-skills.js +2 -2
- package/dist/cli/postinstall/main.d.ts.map +1 -1
- package/dist/cli/postinstall/main.js +26 -24
- package/dist/cli/postinstall/main.js.map +1 -1
- package/dist/cli/setup/LegacyMigration.d.ts +3 -3
- package/dist/cli/setup/LegacyMigration.d.ts.map +1 -1
- package/dist/cli/setup/LegacyMigration.js +3 -5
- package/dist/cli/setup/LegacyMigration.js.map +1 -1
- package/dist/cli/setup/ProjectSetup.d.ts +18 -8
- package/dist/cli/setup/ProjectSetup.d.ts.map +1 -1
- package/dist/cli/setup/ProjectSetup.js +70 -19
- package/dist/cli/setup/ProjectSetup.js.map +1 -1
- package/dist/cli/setup.d.ts +1 -1
- package/dist/cli/setup.d.ts.map +1 -1
- package/dist/cli/setup.js +1 -1
- package/dist/cli/setup.js.map +1 -1
- package/dist/cli/utils/cli-detector.d.ts +0 -7
- package/dist/cli/utils/cli-detector.d.ts.map +1 -1
- package/dist/cli/utils/cli-detector.js +0 -95
- package/dist/cli/utils/cli-detector.js.map +1 -1
- package/dist/cli/utils.d.ts +1 -1
- package/dist/cli/utils.d.ts.map +1 -1
- package/dist/cli/utils.js +1 -2
- package/dist/cli/utils.js.map +1 -1
- package/dist/infra/lib/OrchestrateWorkflow.js +1 -1
- package/dist/infra/lib/OrchestrateWorkflow.js.map +1 -1
- package/dist/infra/lib/memory/MemoryStorage.d.ts +1 -1
- package/dist/infra/lib/memory/MemoryStorage.d.ts.map +1 -1
- package/dist/infra/lib/memory/MemoryStorage.js +2 -3
- package/dist/infra/lib/memory/MemoryStorage.js.map +1 -1
- package/dist/infra/lib/telemetry/SkillTelemetry.test.js +4 -4
- package/dist/infra/lib/telemetry/SkillTelemetry.test.js.map +1 -1
- package/dist/infra/orchestrator/parallelResearch.js +4 -4
- package/dist/infra/orchestrator/parallelResearch.js.map +1 -1
- package/hooks/scripts/__tests__/pre-tool-guard.test.js +1 -1
- package/hooks/scripts/clone-extract.js +712 -0
- package/hooks/scripts/clone-refine.js +510 -0
- package/hooks/scripts/clone-to-scss.js +275 -0
- package/hooks/scripts/clone-validate.js +280 -0
- package/hooks/scripts/codex-notify.js +49 -0
- package/hooks/scripts/command-log.js +1 -1
- package/hooks/scripts/lib/dispatcher.js +2 -3
- package/hooks/scripts/lib/scope-from-spec.js +2 -4
- package/hooks/scripts/llm-orchestrate.js +2 -7
- package/hooks/scripts/prompt-dispatcher.js +3 -3
- package/hooks/scripts/step-counter.js +1 -1
- package/hooks/scripts/utils.js +5 -10
- package/package.json +2 -1
- package/skills/agents-md/SKILL.md +2 -0
- package/skills/arch-guard/SKILL.md +2 -0
- package/skills/brand-assets/SKILL.md +1 -0
- package/skills/capability-loop/SKILL.md +2 -0
- package/skills/characterization-test/SKILL.md +2 -0
- package/skills/chub-usage/SKILL.md +1 -0
- package/skills/claude-md-guide/SKILL.md +2 -0
- package/skills/clone/SKILL.md +361 -0
- package/skills/commerce-patterns/SKILL.md +1 -0
- package/skills/commit-push-pr/SKILL.md +1 -0
- package/skills/context7-usage/SKILL.md +1 -0
- package/skills/{vibe-contract → contract}/SKILL.md +7 -8
- package/skills/create-prd/SKILL.md +1 -0
- package/skills/design-audit/SKILL.md +1 -0
- package/skills/design-critique/SKILL.md +1 -0
- package/skills/design-distill/SKILL.md +1 -0
- package/skills/design-normalize/SKILL.md +1 -0
- package/skills/design-polish/SKILL.md +1 -0
- package/skills/design-teach/SKILL.md +2 -0
- package/skills/devlog/SKILL.md +1 -0
- package/skills/{vibe-docs → docs}/SKILL.md +8 -8
- package/skills/e2e-commerce/SKILL.md +1 -0
- package/skills/event-comms/SKILL.md +1 -0
- package/skills/event-ops/SKILL.md +1 -0
- package/skills/event-planning/SKILL.md +1 -0
- package/skills/exec-plan/SKILL.md +2 -0
- package/skills/{vibe-figma → figma}/SKILL.md +4 -3
- package/skills/{vibe-figma-convert → figma-convert}/SKILL.md +4 -3
- package/skills/{vibe-figma-extract → figma-extract}/SKILL.md +4 -3
- package/skills/git-worktree/SKILL.md +1 -0
- package/skills/handoff/SKILL.md +2 -0
- package/skills/{vibe-interview → interview}/SKILL.md +16 -16
- package/skills/parallel-research/SKILL.md +2 -0
- package/skills/{vibe-plan → plan}/SKILL.md +9 -9
- package/skills/prioritization-frameworks/SKILL.md +1 -0
- package/skills/priority-todos/SKILL.md +2 -0
- package/skills/{vibe-regress → regress}/SKILL.md +5 -6
- package/skills/rob-pike/SKILL.md +2 -0
- package/skills/seo-checklist/SKILL.md +1 -0
- package/skills/{vibe-spec → spec}/SKILL.md +14 -14
- package/skills/{vibe-spec-review → spec-review}/SKILL.md +8 -9
- package/skills/systematic-debugging/SKILL.md +2 -0
- package/skills/techdebt/SKILL.md +2 -0
- package/skills/{vibe-test → test}/SKILL.md +19 -19
- package/skills/tool-fallback/SKILL.md +1 -0
- package/skills/typescript-advanced-types/SKILL.md +1 -0
- package/skills/ui-ux-pro-max/SKILL.md +1 -0
- package/skills/user-personas/SKILL.md +1 -0
- package/skills/vercel-react-best-practices/SKILL.md +1 -0
- package/skills/vibe/SKILL.md +288 -0
- package/{commands/vibe.analyze.md → skills/vibe.analyze/SKILL.md} +2 -0
- package/skills/vibe.clone/SKILL.md +117 -0
- package/{commands/vibe.contract.md → skills/vibe.contract/SKILL.md} +3 -1
- package/{commands/vibe.docs.md → skills/vibe.docs/SKILL.md} +3 -1
- package/{commands/vibe.event.md → skills/vibe.event/SKILL.md} +2 -0
- package/{commands/vibe.figma.md → skills/vibe.figma/SKILL.md} +25 -23
- package/{commands/vibe.harness.md → skills/vibe.harness/SKILL.md} +2 -0
- package/{commands/vibe.reason.md → skills/vibe.reason/SKILL.md} +2 -0
- package/{commands/vibe.regress.md → skills/vibe.regress/SKILL.md} +5 -3
- package/{commands/vibe.review.md → skills/vibe.review/SKILL.md} +2 -0
- package/{commands/vibe.run.md → skills/vibe.run/SKILL.md} +5 -1
- package/{commands/vibe.scaffold.md → skills/vibe.scaffold/SKILL.md} +2 -0
- package/{commands/vibe.spec.md → skills/vibe.spec/SKILL.md} +36 -34
- package/{commands/vibe.test.md → skills/vibe.test/SKILL.md} +8 -6
- package/{commands/vibe.trace.md → skills/vibe.trace/SKILL.md} +7 -0
- package/{commands/vibe.utils.md → skills/vibe.utils/SKILL.md} +2 -0
- package/{commands/vibe.verify.md → skills/vibe.verify/SKILL.md} +4 -2
- package/skills/video-production/SKILL.md +1 -0
- package/vibe/rules/principles/dual-harness-doctrine.md +50 -0
- /package/agents/{teams/figma → figma}/figma-architect.md +0 -0
- /package/agents/{teams/figma → figma}/figma-auditor.md +0 -0
- /package/agents/{teams/figma → figma}/figma-builder.md +0 -0
- /package/skills/{vibe-docs → docs}/templates/architecture.md +0 -0
- /package/skills/{vibe-docs → docs}/templates/behavioral-principles.md +0 -0
- /package/skills/{vibe-docs → docs}/templates/readme.md +0 -0
- /package/skills/{vibe-docs → docs}/templates/release-notes.md +0 -0
- /package/skills/{vibe-figma → figma}/rubrics/extraction-checklist.md +0 -0
- /package/skills/{vibe-figma → figma}/templates/component-index.md +0 -0
- /package/skills/{vibe-figma → figma}/templates/component-spec.md +0 -0
- /package/skills/{vibe-figma → figma}/templates/figma-handoff.md +0 -0
- /package/skills/{vibe-figma → figma}/templates/remapped-tree.md +0 -0
- /package/skills/{vibe-figma-convert → figma-convert}/rubrics/conversion-rules.md +0 -0
- /package/skills/{vibe-figma-convert → figma-convert}/templates/component.md +0 -0
- /package/skills/{vibe-figma-extract → figma-extract}/rubrics/image-rules.md +0 -0
- /package/skills/{vibe-interview → interview}/checklists/api.md +0 -0
- /package/skills/{vibe-interview → interview}/checklists/feature.md +0 -0
- /package/skills/{vibe-interview → interview}/checklists/library.md +0 -0
- /package/skills/{vibe-interview → interview}/checklists/mobile.md +0 -0
- /package/skills/{vibe-interview → interview}/checklists/webapp.md +0 -0
- /package/skills/{vibe-interview → interview}/checklists/website.md +0 -0
- /package/skills/{vibe-regress → regress}/templates/bug.md +0 -0
- /package/skills/{vibe-regress → regress}/templates/test-jest.md +0 -0
- /package/skills/{vibe-regress → regress}/templates/test-vitest.md +0 -0
|
@@ -0,0 +1,712 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* clone-extract.js — 웹사이트 → 렌더링된 DOM/CSS/자산 정밀 추출
|
|
5
|
+
*
|
|
6
|
+
* Usage:
|
|
7
|
+
* node clone-extract.js capture <URL> --out=<dir> --viewport=375x812 --bp=mo [--stealth] [--ignore-robots]
|
|
8
|
+
*
|
|
9
|
+
* Output (in <dir>):
|
|
10
|
+
* rendered.html — sanitized full DOM after JS
|
|
11
|
+
* computed.json — per-element computed CSS + box + pseudo-elements + shadow DOM
|
|
12
|
+
* screenshot.png — full-page screenshot
|
|
13
|
+
* stylesheets.json — @font-face + @keyframes harvested from all sheets
|
|
14
|
+
* asset-map.json — remote URL → local path mapping
|
|
15
|
+
* assets/images/*, assets/fonts/*
|
|
16
|
+
*
|
|
17
|
+
* Requires: puppeteer (optional peer dep; `npm install puppeteer` if missing)
|
|
18
|
+
*
|
|
19
|
+
* Fidelity guarantees:
|
|
20
|
+
* 1. Pseudo-elements (::before/::after) captured separately
|
|
21
|
+
* 2. Shadow DOM traversed (open shadow roots)
|
|
22
|
+
* 3. Inline SVG preserved as-is in nodes
|
|
23
|
+
* 4. CSS custom properties (--vars) captured per element
|
|
24
|
+
* 5. @font-face downloaded with format() preference (woff2 > woff > ttf)
|
|
25
|
+
* 6. Node IDs stable via DOM path, not attribute mutation
|
|
26
|
+
* 7. <picture>/srcset resolved to currentSrc (matches viewport)
|
|
27
|
+
* 8. Gradients & multi-backgrounds preserved verbatim
|
|
28
|
+
*/
|
|
29
|
+
|
|
30
|
+
import fs from 'fs';
|
|
31
|
+
import path from 'path';
|
|
32
|
+
import { URL } from 'url';
|
|
33
|
+
import http from 'http';
|
|
34
|
+
import https from 'https';
|
|
35
|
+
|
|
36
|
+
// ─── CSS property allow-list (captured per element) ─────────────────
|
|
37
|
+
// Kept narrow to keep computed.json sane; downstream scripts may add more.
|
|
38
|
+
const CSS_PROPS = [
|
|
39
|
+
// layout / box
|
|
40
|
+
'display', 'position', 'top', 'right', 'bottom', 'left', 'inset',
|
|
41
|
+
'float', 'clear',
|
|
42
|
+
'flex-direction', 'flex-wrap', 'justify-content', 'align-items', 'align-content',
|
|
43
|
+
'align-self', 'justify-self', 'place-items', 'place-content',
|
|
44
|
+
'gap', 'row-gap', 'column-gap', 'order', 'flex-grow', 'flex-shrink', 'flex-basis',
|
|
45
|
+
'grid-template-columns', 'grid-template-rows', 'grid-template-areas',
|
|
46
|
+
'grid-column', 'grid-row', 'grid-area', 'grid-auto-flow', 'grid-auto-rows', 'grid-auto-columns',
|
|
47
|
+
'width', 'height', 'min-width', 'min-height', 'max-width', 'max-height',
|
|
48
|
+
'aspect-ratio',
|
|
49
|
+
'margin-top', 'margin-right', 'margin-bottom', 'margin-left',
|
|
50
|
+
'padding-top', 'padding-right', 'padding-bottom', 'padding-left',
|
|
51
|
+
'box-sizing', 'overflow', 'overflow-x', 'overflow-y', 'z-index',
|
|
52
|
+
'pointer-events',
|
|
53
|
+
// typography
|
|
54
|
+
'font-family', 'font-size', 'font-weight', 'font-style', 'line-height',
|
|
55
|
+
'letter-spacing', 'word-spacing', 'text-align', 'text-decoration',
|
|
56
|
+
'text-decoration-color', 'text-decoration-thickness', 'text-underline-offset',
|
|
57
|
+
'text-transform', 'text-shadow', 'color',
|
|
58
|
+
'white-space', 'word-break', 'overflow-wrap', 'text-overflow',
|
|
59
|
+
'-webkit-line-clamp', '-webkit-box-orient',
|
|
60
|
+
// decoration / paint
|
|
61
|
+
'background-color', 'background-image', 'background-size', 'background-position',
|
|
62
|
+
'background-repeat', 'background-attachment', 'background-clip', 'background-origin',
|
|
63
|
+
'background-blend-mode',
|
|
64
|
+
'border-top-width', 'border-right-width', 'border-bottom-width', 'border-left-width',
|
|
65
|
+
'border-top-style', 'border-right-style', 'border-bottom-style', 'border-left-style',
|
|
66
|
+
'border-top-color', 'border-right-color', 'border-bottom-color', 'border-left-color',
|
|
67
|
+
'border-top-left-radius', 'border-top-right-radius',
|
|
68
|
+
'border-bottom-left-radius', 'border-bottom-right-radius',
|
|
69
|
+
'box-shadow', 'opacity', 'mix-blend-mode', 'filter', 'backdrop-filter',
|
|
70
|
+
'transform', 'transform-origin', 'perspective',
|
|
71
|
+
'transition', 'animation', 'visibility',
|
|
72
|
+
// content (pseudo-elements need this)
|
|
73
|
+
'content', 'cursor', 'caret-color', 'list-style', 'object-fit', 'object-position',
|
|
74
|
+
'mask-image', 'mask-size', 'mask-position', 'clip-path',
|
|
75
|
+
];
|
|
76
|
+
|
|
77
|
+
// ─── CLI parse ──────────────────────────────────────────────────────
|
|
78
|
+
function parseArgs(argv) {
|
|
79
|
+
const [, , cmd, urlArg, ...rest] = argv;
|
|
80
|
+
const opts = { stealth: false, ignoreRobots: false };
|
|
81
|
+
for (const arg of rest) {
|
|
82
|
+
if (arg.startsWith('--out=')) opts.out = arg.slice(6);
|
|
83
|
+
else if (arg.startsWith('--viewport=')) opts.viewport = arg.slice(11);
|
|
84
|
+
else if (arg.startsWith('--bp=')) opts.bp = arg.slice(5);
|
|
85
|
+
else if (arg.startsWith('--wait=')) opts.wait = Number(arg.slice(7));
|
|
86
|
+
else if (arg === '--stealth') opts.stealth = true;
|
|
87
|
+
else if (arg === '--ignore-robots') opts.ignoreRobots = true;
|
|
88
|
+
}
|
|
89
|
+
return { cmd, url: urlArg, opts };
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
function parseViewport(v) {
|
|
93
|
+
const m = /^(\d+)x(\d+)(?:@(\d+(?:\.\d+)?))?$/.exec(v || '');
|
|
94
|
+
if (!m) throw new Error(`Invalid viewport: ${v}. Expected WxH or WxH@DPR (e.g. 375x812@2)`);
|
|
95
|
+
return {
|
|
96
|
+
width: Number(m[1]),
|
|
97
|
+
height: Number(m[2]),
|
|
98
|
+
deviceScaleFactor: m[3] ? Number(m[3]) : 1,
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// ─── Puppeteer dynamic loader (optional dep) ────────────────────────
|
|
103
|
+
async function loadPuppeteer() {
|
|
104
|
+
try {
|
|
105
|
+
return await import('puppeteer');
|
|
106
|
+
} catch {
|
|
107
|
+
throw new Error(
|
|
108
|
+
'puppeteer is not installed. Run: npm install puppeteer\n' +
|
|
109
|
+
'Required for clone-extract.js (clone Phase 1).',
|
|
110
|
+
);
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// ─── robots.txt check ───────────────────────────────────────────────
|
|
115
|
+
async function checkRobots(targetUrl) {
|
|
116
|
+
const u = new URL(targetUrl);
|
|
117
|
+
const robotsUrl = `${u.protocol}//${u.host}/robots.txt`;
|
|
118
|
+
try {
|
|
119
|
+
const body = await fetchText(robotsUrl, 5000);
|
|
120
|
+
const lines = body.split('\n').map((l) => l.trim());
|
|
121
|
+
let inStar = false;
|
|
122
|
+
const disallows = [];
|
|
123
|
+
for (const line of lines) {
|
|
124
|
+
if (/^user-agent:\s*\*/i.test(line)) inStar = true;
|
|
125
|
+
else if (/^user-agent:/i.test(line)) inStar = false;
|
|
126
|
+
else if (inStar && /^disallow:\s*/i.test(line)) {
|
|
127
|
+
const p = line.replace(/^disallow:\s*/i, '').trim();
|
|
128
|
+
if (p) disallows.push(p);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
const reqPath = u.pathname || '/';
|
|
132
|
+
return disallows.some((p) => reqPath.startsWith(p));
|
|
133
|
+
} catch {
|
|
134
|
+
return false;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
function fetchText(url, timeoutMs = 10000) {
|
|
139
|
+
return new Promise((resolve, reject) => {
|
|
140
|
+
const client = url.startsWith('https:') ? https : http;
|
|
141
|
+
const req = client.get(url, { timeout: timeoutMs }, (res) => {
|
|
142
|
+
if (res.statusCode && res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
|
|
143
|
+
return resolve(fetchText(res.headers.location, timeoutMs));
|
|
144
|
+
}
|
|
145
|
+
if (res.statusCode !== 200) {
|
|
146
|
+
return reject(new Error(`HTTP ${res.statusCode} for ${url}`));
|
|
147
|
+
}
|
|
148
|
+
let data = '';
|
|
149
|
+
res.setEncoding('utf8');
|
|
150
|
+
res.on('data', (c) => { data += c; });
|
|
151
|
+
res.on('end', () => resolve(data));
|
|
152
|
+
});
|
|
153
|
+
req.on('error', reject);
|
|
154
|
+
req.on('timeout', () => req.destroy(new Error(`timeout: ${url}`)));
|
|
155
|
+
});
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
function fetchBinary(url, timeoutMs = 30000, depth = 0) {
|
|
159
|
+
if (depth > 5) return Promise.reject(new Error('too many redirects'));
|
|
160
|
+
return new Promise((resolve, reject) => {
|
|
161
|
+
const client = url.startsWith('https:') ? https : http;
|
|
162
|
+
const req = client.get(url, {
|
|
163
|
+
timeout: timeoutMs,
|
|
164
|
+
headers: { 'User-Agent': 'Mozilla/5.0 clone' },
|
|
165
|
+
}, (res) => {
|
|
166
|
+
if (res.statusCode && res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
|
|
167
|
+
const next = new URL(res.headers.location, url).href;
|
|
168
|
+
return resolve(fetchBinary(next, timeoutMs, depth + 1));
|
|
169
|
+
}
|
|
170
|
+
if (res.statusCode !== 200) {
|
|
171
|
+
return reject(new Error(`HTTP ${res.statusCode}`));
|
|
172
|
+
}
|
|
173
|
+
const chunks = [];
|
|
174
|
+
res.on('data', (c) => chunks.push(c));
|
|
175
|
+
res.on('end', () => resolve(Buffer.concat(chunks)));
|
|
176
|
+
});
|
|
177
|
+
req.on('error', reject);
|
|
178
|
+
req.on('timeout', () => req.destroy(new Error('timeout')));
|
|
179
|
+
});
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// ─── Asset download with concurrency limit ──────────────────────────
|
|
183
|
+
async function downloadAssets(urls, outDir, concurrency = 8) {
|
|
184
|
+
const queue = [...urls];
|
|
185
|
+
const map = {};
|
|
186
|
+
const seenNames = new Set();
|
|
187
|
+
const workers = Array.from({ length: concurrency }, async () => {
|
|
188
|
+
while (queue.length > 0) {
|
|
189
|
+
const u = queue.shift();
|
|
190
|
+
if (!u) break;
|
|
191
|
+
try {
|
|
192
|
+
const buf = await fetchBinary(u);
|
|
193
|
+
const filename = uniqueFilename(u, outDir, seenNames);
|
|
194
|
+
const full = path.join(outDir, filename);
|
|
195
|
+
fs.mkdirSync(path.dirname(full), { recursive: true });
|
|
196
|
+
fs.writeFileSync(full, buf);
|
|
197
|
+
map[u] = { local: filename, status: 'ok', bytes: buf.length };
|
|
198
|
+
} catch (e) {
|
|
199
|
+
map[u] = { local: null, status: 'missing', error: String(e.message || e) };
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
});
|
|
203
|
+
await Promise.all(workers);
|
|
204
|
+
return map;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
function uniqueFilename(rawUrl, baseDir, seen) {
|
|
208
|
+
let base;
|
|
209
|
+
try {
|
|
210
|
+
const u = new URL(rawUrl);
|
|
211
|
+
base = path.basename(u.pathname) || 'asset';
|
|
212
|
+
} catch {
|
|
213
|
+
base = 'asset';
|
|
214
|
+
}
|
|
215
|
+
base = base.replace(/[?#].*$/, '') || 'asset';
|
|
216
|
+
// sanitize
|
|
217
|
+
base = base.replace(/[^a-zA-Z0-9._-]/g, '_');
|
|
218
|
+
if (!path.extname(base)) base += '.bin';
|
|
219
|
+
let candidate = base;
|
|
220
|
+
let i = 1;
|
|
221
|
+
while (seen.has(candidate) || fs.existsSync(path.join(baseDir, candidate))) {
|
|
222
|
+
const ext = path.extname(base);
|
|
223
|
+
const stem = base.slice(0, -ext.length);
|
|
224
|
+
candidate = `${stem}-${i++}${ext}`;
|
|
225
|
+
}
|
|
226
|
+
seen.add(candidate);
|
|
227
|
+
return candidate;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
// ─── In-page extraction (runs inside the page) ──────────────────────
|
|
231
|
+
// IMPORTANT: this function is serialized to a string. It must be self-contained.
|
|
232
|
+
const PAGE_EXTRACT = `
|
|
233
|
+
(function (props) {
|
|
234
|
+
const out = [];
|
|
235
|
+
const stylesheets = { fontFaces: [], keyframes: [], cssVars: {} };
|
|
236
|
+
const pathById = new Map();
|
|
237
|
+
|
|
238
|
+
// Stable ID via DOM path (no attribute mutation)
|
|
239
|
+
const pathFor = (el, parent) => {
|
|
240
|
+
if (!parent) return '0';
|
|
241
|
+
const siblings = Array.from(el.parentNode.children);
|
|
242
|
+
const idx = siblings.indexOf(el);
|
|
243
|
+
return parent + '.' + idx;
|
|
244
|
+
};
|
|
245
|
+
|
|
246
|
+
const captureCss = (cs) => {
|
|
247
|
+
const css = {};
|
|
248
|
+
for (const p of props) {
|
|
249
|
+
const v = cs.getPropertyValue(p);
|
|
250
|
+
if (v && v !== 'normal' && v !== 'auto' && v !== 'none' && v !== '0px' && v !== '') {
|
|
251
|
+
css[p] = v.trim();
|
|
252
|
+
} else if (v === '0px' || v === 'auto') {
|
|
253
|
+
// keep zeros/auto for box model props
|
|
254
|
+
if (/^(margin|padding|border|width|height|inset|top|right|bottom|left|gap)/.test(p)) {
|
|
255
|
+
css[p] = v.trim();
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
// CSS custom properties
|
|
260
|
+
const vars = {};
|
|
261
|
+
for (let i = 0; i < cs.length; i++) {
|
|
262
|
+
const name = cs[i];
|
|
263
|
+
if (name && name.startsWith('--')) {
|
|
264
|
+
vars[name] = cs.getPropertyValue(name).trim();
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
if (Object.keys(vars).length) css['--vars'] = vars;
|
|
268
|
+
return css;
|
|
269
|
+
};
|
|
270
|
+
|
|
271
|
+
const capturePseudo = (el, kind, parentId) => {
|
|
272
|
+
const cs = getComputedStyle(el, '::' + kind);
|
|
273
|
+
const content = cs.getPropertyValue('content');
|
|
274
|
+
// Real pseudo-element only if content is not 'none' or empty
|
|
275
|
+
if (!content || content === 'none' || content === 'normal') return null;
|
|
276
|
+
const css = captureCss(cs);
|
|
277
|
+
if (Object.keys(css).length <= 1) return null;
|
|
278
|
+
return {
|
|
279
|
+
id: parentId + '::' + kind,
|
|
280
|
+
parent: parentId,
|
|
281
|
+
tag: '::' + kind,
|
|
282
|
+
pseudo: true,
|
|
283
|
+
content: content,
|
|
284
|
+
css,
|
|
285
|
+
};
|
|
286
|
+
};
|
|
287
|
+
|
|
288
|
+
const isSvgElement = (el) => el.namespaceURI === 'http://www.w3.org/2000/svg';
|
|
289
|
+
|
|
290
|
+
const captureSvg = (svg, id, parentId) => {
|
|
291
|
+
return {
|
|
292
|
+
id,
|
|
293
|
+
parent: parentId,
|
|
294
|
+
tag: 'svg',
|
|
295
|
+
isSvg: true,
|
|
296
|
+
svgMarkup: svg.outerHTML,
|
|
297
|
+
box: rectOf(svg),
|
|
298
|
+
css: captureCss(getComputedStyle(svg)),
|
|
299
|
+
};
|
|
300
|
+
};
|
|
301
|
+
|
|
302
|
+
const rectOf = (el) => {
|
|
303
|
+
const r = el.getBoundingClientRect();
|
|
304
|
+
// Adjust for current scroll (we capture mid-scroll)
|
|
305
|
+
return {
|
|
306
|
+
x: Math.round((r.x + window.scrollX) * 100) / 100,
|
|
307
|
+
y: Math.round((r.y + window.scrollY) * 100) / 100,
|
|
308
|
+
w: Math.round(r.width * 100) / 100,
|
|
309
|
+
h: Math.round(r.height * 100) / 100,
|
|
310
|
+
};
|
|
311
|
+
};
|
|
312
|
+
|
|
313
|
+
const collectSrcset = (el) => {
|
|
314
|
+
const ss = el.getAttribute('srcset') || el.getAttribute('data-srcset');
|
|
315
|
+
if (!ss) return [];
|
|
316
|
+
return ss.split(',').map((part) => {
|
|
317
|
+
const bits = part.trim().split(/\\s+/);
|
|
318
|
+
try { return new URL(bits[0], location.href).href; } catch { return null; }
|
|
319
|
+
}).filter(Boolean);
|
|
320
|
+
};
|
|
321
|
+
|
|
322
|
+
const walk = (el, parentId, root) => {
|
|
323
|
+
if (!(el instanceof Element)) return;
|
|
324
|
+
|
|
325
|
+
const cs = getComputedStyle(el);
|
|
326
|
+
if (cs.display === 'none') return;
|
|
327
|
+
// Keep visibility:hidden — may matter for layout (placeholders)
|
|
328
|
+
|
|
329
|
+
const id = pathFor(el, parentId);
|
|
330
|
+
pathById.set(id, el);
|
|
331
|
+
|
|
332
|
+
if (isSvgElement(el) && el.tagName.toLowerCase() === 'svg') {
|
|
333
|
+
out.push(captureSvg(el, id, parentId));
|
|
334
|
+
return;
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
const node = {
|
|
338
|
+
id,
|
|
339
|
+
parent: parentId,
|
|
340
|
+
tag: el.tagName.toLowerCase(),
|
|
341
|
+
classes: el.getAttribute('class') || '',
|
|
342
|
+
attrs: {
|
|
343
|
+
src: el.getAttribute('src') || el.getAttribute('data-src') || null,
|
|
344
|
+
currentSrc: (el.tagName === 'IMG' || el.tagName === 'SOURCE') ? (el.currentSrc || null) : null,
|
|
345
|
+
srcset: el.getAttribute('srcset') || el.getAttribute('data-srcset') || null,
|
|
346
|
+
href: el.getAttribute('href') || null,
|
|
347
|
+
alt: el.getAttribute('alt') || null,
|
|
348
|
+
title: el.getAttribute('title') || null,
|
|
349
|
+
role: el.getAttribute('role') || null,
|
|
350
|
+
ariaLabel: el.getAttribute('aria-label') || null,
|
|
351
|
+
type: el.getAttribute('type') || null,
|
|
352
|
+
name: el.getAttribute('name') || null,
|
|
353
|
+
placeholder: el.getAttribute('placeholder') || null,
|
|
354
|
+
value: el.getAttribute('value') || null,
|
|
355
|
+
},
|
|
356
|
+
text: '',
|
|
357
|
+
box: rectOf(el),
|
|
358
|
+
css: captureCss(cs),
|
|
359
|
+
};
|
|
360
|
+
|
|
361
|
+
// Direct text content (only if no element children to avoid duplication)
|
|
362
|
+
if (el.children.length === 0 || Array.from(el.childNodes).every((n) => n.nodeType === Node.TEXT_NODE || n.nodeType === Node.ELEMENT_NODE)) {
|
|
363
|
+
let txt = '';
|
|
364
|
+
for (const n of el.childNodes) {
|
|
365
|
+
if (n.nodeType === Node.TEXT_NODE) txt += n.textContent;
|
|
366
|
+
}
|
|
367
|
+
txt = txt.trim();
|
|
368
|
+
if (txt) node.text = txt.length > 2000 ? txt.slice(0, 2000) + '…' : txt;
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
out.push(node);
|
|
372
|
+
|
|
373
|
+
// Pseudo elements
|
|
374
|
+
const before = capturePseudo(el, 'before', id);
|
|
375
|
+
if (before) out.push(before);
|
|
376
|
+
const after = capturePseudo(el, 'after', id);
|
|
377
|
+
if (after) out.push(after);
|
|
378
|
+
|
|
379
|
+
// Shadow DOM (open shadow roots only)
|
|
380
|
+
if (el.shadowRoot && el.shadowRoot.mode === 'open') {
|
|
381
|
+
for (const child of el.shadowRoot.children) walk(child, id + '/shadow', root);
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
for (const child of el.children) walk(child, id, root);
|
|
385
|
+
};
|
|
386
|
+
|
|
387
|
+
walk(document.documentElement, null, document.documentElement);
|
|
388
|
+
|
|
389
|
+
// Collect asset URLs (images + fonts) ─────────────────────────────
|
|
390
|
+
const images = new Set();
|
|
391
|
+
const fonts = new Set();
|
|
392
|
+
|
|
393
|
+
document.querySelectorAll('img').forEach((img) => {
|
|
394
|
+
if (img.currentSrc) images.add(img.currentSrc);
|
|
395
|
+
else if (img.src) images.add(img.src);
|
|
396
|
+
const ss = img.getAttribute('srcset') || img.getAttribute('data-srcset');
|
|
397
|
+
if (ss) {
|
|
398
|
+
ss.split(',').forEach((p) => {
|
|
399
|
+
const u = p.trim().split(/\\s+/)[0];
|
|
400
|
+
try { images.add(new URL(u, location.href).href); } catch {}
|
|
401
|
+
});
|
|
402
|
+
}
|
|
403
|
+
});
|
|
404
|
+
document.querySelectorAll('source').forEach((s) => {
|
|
405
|
+
const ss = s.getAttribute('srcset');
|
|
406
|
+
if (!ss) return;
|
|
407
|
+
ss.split(',').forEach((p) => {
|
|
408
|
+
const u = p.trim().split(/\\s+/)[0];
|
|
409
|
+
try { images.add(new URL(u, location.href).href); } catch {}
|
|
410
|
+
});
|
|
411
|
+
});
|
|
412
|
+
document.querySelectorAll('video, audio').forEach((el) => {
|
|
413
|
+
if (el.poster) images.add(new URL(el.poster, location.href).href);
|
|
414
|
+
});
|
|
415
|
+
|
|
416
|
+
const bgUrlRe = /url\\(['"]?([^'")]+)['"]?\\)/g;
|
|
417
|
+
document.querySelectorAll('*').forEach((el) => {
|
|
418
|
+
const cs = getComputedStyle(el);
|
|
419
|
+
const bg = cs.backgroundImage;
|
|
420
|
+
if (bg && bg !== 'none') {
|
|
421
|
+
let m;
|
|
422
|
+
const re = new RegExp(bgUrlRe.source, 'g');
|
|
423
|
+
while ((m = re.exec(bg)) !== null) {
|
|
424
|
+
try { images.add(new URL(m[1], location.href).href); } catch {}
|
|
425
|
+
}
|
|
426
|
+
}
|
|
427
|
+
// also check pseudo bg
|
|
428
|
+
for (const k of ['::before', '::after']) {
|
|
429
|
+
const pcs = getComputedStyle(el, k);
|
|
430
|
+
const pbg = pcs.backgroundImage;
|
|
431
|
+
if (pbg && pbg !== 'none') {
|
|
432
|
+
let m;
|
|
433
|
+
const re = new RegExp(bgUrlRe.source, 'g');
|
|
434
|
+
while ((m = re.exec(pbg)) !== null) {
|
|
435
|
+
try { images.add(new URL(m[1], location.href).href); } catch {}
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
});
|
|
440
|
+
|
|
441
|
+
// @font-face from all stylesheets
|
|
442
|
+
for (const sheet of document.styleSheets) {
|
|
443
|
+
let rules;
|
|
444
|
+
try { rules = sheet.cssRules; } catch { continue; }
|
|
445
|
+
if (!rules) continue;
|
|
446
|
+
for (const rule of rules) {
|
|
447
|
+
if (rule.constructor.name === 'CSSFontFaceRule' || rule.type === 5) {
|
|
448
|
+
const family = rule.style.getPropertyValue('font-family');
|
|
449
|
+
const weight = rule.style.getPropertyValue('font-weight');
|
|
450
|
+
const style = rule.style.getPropertyValue('font-style');
|
|
451
|
+
const display = rule.style.getPropertyValue('font-display');
|
|
452
|
+
const src = rule.style.getPropertyValue('src');
|
|
453
|
+
const parsedSrcs = [];
|
|
454
|
+
const re = /url\\(['"]?([^'")]+)['"]?\\)(?:\\s*format\\(['"]?([^'")]+)['"]?\\))?/g;
|
|
455
|
+
let m;
|
|
456
|
+
while ((m = re.exec(src)) !== null) {
|
|
457
|
+
const abs = (() => { try { return new URL(m[1], location.href).href; } catch { return m[1]; } })();
|
|
458
|
+
parsedSrcs.push({ url: abs, format: m[2] || null });
|
|
459
|
+
fonts.add(abs);
|
|
460
|
+
}
|
|
461
|
+
stylesheets.fontFaces.push({
|
|
462
|
+
family: family.replace(/^['"]|['"]$/g, ''),
|
|
463
|
+
weight, style, display,
|
|
464
|
+
sources: parsedSrcs,
|
|
465
|
+
});
|
|
466
|
+
} else if (rule.constructor.name === 'CSSKeyframesRule' || rule.type === 7) {
|
|
467
|
+
const frames = [];
|
|
468
|
+
for (const fr of rule.cssRules) frames.push({ keyText: fr.keyText, css: fr.cssText });
|
|
469
|
+
stylesheets.keyframes.push({ name: rule.name, frames });
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
// :root CSS vars
|
|
475
|
+
const rootCs = getComputedStyle(document.documentElement);
|
|
476
|
+
for (let i = 0; i < rootCs.length; i++) {
|
|
477
|
+
const name = rootCs[i];
|
|
478
|
+
if (name && name.startsWith('--')) {
|
|
479
|
+
stylesheets.cssVars[name] = rootCs.getPropertyValue(name).trim();
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
return {
|
|
484
|
+
nodes: out,
|
|
485
|
+
assets: { images: Array.from(images), fonts: Array.from(fonts) },
|
|
486
|
+
stylesheets,
|
|
487
|
+
title: document.title,
|
|
488
|
+
html: document.documentElement.outerHTML,
|
|
489
|
+
docSize: {
|
|
490
|
+
width: document.documentElement.scrollWidth,
|
|
491
|
+
height: document.documentElement.scrollHeight,
|
|
492
|
+
},
|
|
493
|
+
};
|
|
494
|
+
})
|
|
495
|
+
`;
|
|
496
|
+
|
|
497
|
+
// ─── Progressive scroll to trigger lazy loading ─────────────────────
|
|
498
|
+
async function scrollToBottom(page) {
|
|
499
|
+
await page.evaluate(`(async () => {
|
|
500
|
+
await new Promise((resolve) => {
|
|
501
|
+
let total = 0;
|
|
502
|
+
const step = 400;
|
|
503
|
+
const max = Math.max(document.documentElement.scrollHeight, 50000);
|
|
504
|
+
const timer = setInterval(() => {
|
|
505
|
+
window.scrollBy(0, step);
|
|
506
|
+
total += step;
|
|
507
|
+
if (total >= document.body.scrollHeight || total >= max) {
|
|
508
|
+
clearInterval(timer);
|
|
509
|
+
window.scrollTo(0, 0);
|
|
510
|
+
setTimeout(resolve, 600);
|
|
511
|
+
}
|
|
512
|
+
}, 80);
|
|
513
|
+
});
|
|
514
|
+
})()`);
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
// Freeze animations to a deterministic frame for stable capture
|
|
518
|
+
async function freezeAnimations(page) {
|
|
519
|
+
await page.addStyleTag({
|
|
520
|
+
content: `
|
|
521
|
+
*, *::before, *::after {
|
|
522
|
+
animation-duration: 0s !important;
|
|
523
|
+
animation-delay: 0s !important;
|
|
524
|
+
animation-play-state: paused !important;
|
|
525
|
+
transition-duration: 0s !important;
|
|
526
|
+
transition-delay: 0s !important;
|
|
527
|
+
}
|
|
528
|
+
`,
|
|
529
|
+
});
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
// HTML sanitization: strip scripts/analytics before saving
|
|
533
|
+
function sanitizeHtml(html) {
|
|
534
|
+
return html
|
|
535
|
+
.replace(/<script\b[^>]*>[\s\S]*?<\/script>/gi, '')
|
|
536
|
+
.replace(/<noscript\b[^>]*>[\s\S]*?<\/noscript>/gi, '')
|
|
537
|
+
.replace(/\son[a-z]+="[^"]*"/gi, '')
|
|
538
|
+
.replace(/\son[a-z]+='[^']*'/gi, '');
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
// ─── Main capture flow ──────────────────────────────────────────────
|
|
542
|
+
async function capture({ url, opts }) {
|
|
543
|
+
if (!url) throw new Error('URL is required');
|
|
544
|
+
if (!opts.out) throw new Error('--out=<dir> is required');
|
|
545
|
+
if (!opts.viewport) throw new Error('--viewport=WxH is required');
|
|
546
|
+
|
|
547
|
+
const viewport = parseViewport(opts.viewport);
|
|
548
|
+
|
|
549
|
+
if (!opts.ignoreRobots) {
|
|
550
|
+
const blocked = await checkRobots(url);
|
|
551
|
+
if (blocked) {
|
|
552
|
+
throw new Error(
|
|
553
|
+
`robots.txt disallows ${url}. Pass --ignore-robots only with the site owner's permission.`,
|
|
554
|
+
);
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
fs.mkdirSync(opts.out, { recursive: true });
|
|
559
|
+
const assetsDir = path.join(opts.out, 'assets');
|
|
560
|
+
const imagesDir = path.join(assetsDir, 'images');
|
|
561
|
+
const fontsDir = path.join(assetsDir, 'fonts');
|
|
562
|
+
fs.mkdirSync(imagesDir, { recursive: true });
|
|
563
|
+
fs.mkdirSync(fontsDir, { recursive: true });
|
|
564
|
+
|
|
565
|
+
console.log(`[clone-extract] ${opts.bp || ''} ${viewport.width}x${viewport.height}@${viewport.deviceScaleFactor} ${url}`);
|
|
566
|
+
|
|
567
|
+
const puppeteer = await loadPuppeteer();
|
|
568
|
+
const browser = await puppeteer.launch({
|
|
569
|
+
headless: 'new',
|
|
570
|
+
args: [
|
|
571
|
+
'--no-sandbox',
|
|
572
|
+
'--disable-setuid-sandbox',
|
|
573
|
+
'--disable-dev-shm-usage',
|
|
574
|
+
'--font-render-hinting=none',
|
|
575
|
+
],
|
|
576
|
+
});
|
|
577
|
+
|
|
578
|
+
try {
|
|
579
|
+
const page = await browser.newPage();
|
|
580
|
+
await page.setViewport(viewport);
|
|
581
|
+
await page.setUserAgent(
|
|
582
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 ' +
|
|
583
|
+
'(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
584
|
+
);
|
|
585
|
+
// disable cache to get fresh assets
|
|
586
|
+
await page.setCacheEnabled(false);
|
|
587
|
+
|
|
588
|
+
await page.goto(url, { waitUntil: 'networkidle2', timeout: 90000 });
|
|
589
|
+
await scrollToBottom(page);
|
|
590
|
+
if (opts.wait) await new Promise((r) => setTimeout(r, opts.wait));
|
|
591
|
+
else await new Promise((r) => setTimeout(r, 1200));
|
|
592
|
+
|
|
593
|
+
await freezeAnimations(page);
|
|
594
|
+
|
|
595
|
+
const extracted = await page.evaluate(PAGE_EXTRACT, CSS_PROPS);
|
|
596
|
+
|
|
597
|
+
// Screenshot AFTER freezing animations
|
|
598
|
+
const shotPath = path.join(opts.out, 'screenshot.png');
|
|
599
|
+
await page.screenshot({ path: shotPath, fullPage: true });
|
|
600
|
+
|
|
601
|
+
// Pick best font per (family+weight+style): prefer woff2 > woff > ttf > otf
|
|
602
|
+
const fontPriority = (fmt) => ({ woff2: 0, woff: 1, ttf: 2, otf: 3 }[fmt] ?? 9);
|
|
603
|
+
const bestFontUrls = new Set();
|
|
604
|
+
for (const ff of extracted.stylesheets.fontFaces) {
|
|
605
|
+
const sorted = [...ff.sources].sort((a, b) => fontPriority(a.format) - fontPriority(b.format));
|
|
606
|
+
if (sorted[0]) bestFontUrls.add(sorted[0].url);
|
|
607
|
+
}
|
|
608
|
+
// Also keep all referenced fonts (don't drop fallbacks completely — keep top 2 per face)
|
|
609
|
+
const fontDownloadSet = new Set(bestFontUrls);
|
|
610
|
+
for (const ff of extracted.stylesheets.fontFaces) {
|
|
611
|
+
const sorted = [...ff.sources].sort((a, b) => fontPriority(a.format) - fontPriority(b.format));
|
|
612
|
+
if (sorted[1]) fontDownloadSet.add(sorted[1].url);
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
console.log(`[clone-extract] downloading ${extracted.assets.images.length} images, ${fontDownloadSet.size} fonts (filtered from ${extracted.assets.fonts.length})`);
|
|
616
|
+
const imageMap = await downloadAssets(extracted.assets.images, imagesDir, 8);
|
|
617
|
+
const fontMap = await downloadAssets(Array.from(fontDownloadSet), fontsDir, 4);
|
|
618
|
+
|
|
619
|
+
const assetMap = {};
|
|
620
|
+
for (const [u, info] of Object.entries(imageMap)) {
|
|
621
|
+
assetMap[u] = info.status === 'ok'
|
|
622
|
+
? { local: `assets/images/${info.local}`, status: 'ok', bytes: info.bytes, kind: 'image' }
|
|
623
|
+
: { ...info, kind: 'image' };
|
|
624
|
+
}
|
|
625
|
+
for (const [u, info] of Object.entries(fontMap)) {
|
|
626
|
+
assetMap[u] = info.status === 'ok'
|
|
627
|
+
? { local: `assets/fonts/${info.local}`, status: 'ok', bytes: info.bytes, kind: 'font' }
|
|
628
|
+
: { ...info, kind: 'font' };
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
// Rewrite HTML — replace remote URLs with local paths
|
|
632
|
+
let html = sanitizeHtml(extracted.html);
|
|
633
|
+
const sortedUrls = Object.keys(assetMap).sort((a, b) => b.length - a.length);
|
|
634
|
+
for (const u of sortedUrls) {
|
|
635
|
+
const info = assetMap[u];
|
|
636
|
+
if (info.status !== 'ok') continue;
|
|
637
|
+
const escaped = u.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
638
|
+
html = html.replace(new RegExp(escaped, 'g'), info.local);
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
// Rewrite computed CSS values (background-image, mask-image) per node
|
|
642
|
+
const rewriteCssValue = (val) => {
|
|
643
|
+
if (!val || typeof val !== 'string') return val;
|
|
644
|
+
let out = val;
|
|
645
|
+
for (const u of sortedUrls) {
|
|
646
|
+
const info = assetMap[u];
|
|
647
|
+
if (info.status !== 'ok') continue;
|
|
648
|
+
if (out.includes(u)) out = out.split(u).join(info.local);
|
|
649
|
+
}
|
|
650
|
+
return out;
|
|
651
|
+
};
|
|
652
|
+
for (const node of extracted.nodes) {
|
|
653
|
+
if (!node.css) continue;
|
|
654
|
+
for (const prop of ['background-image', 'mask-image', 'content', 'border-image']) {
|
|
655
|
+
if (node.css[prop]) node.css[prop] = rewriteCssValue(node.css[prop]);
|
|
656
|
+
}
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
// Rewrite @font-face srcs in stylesheets.fontFaces
|
|
660
|
+
for (const ff of extracted.stylesheets.fontFaces) {
|
|
661
|
+
ff.sources = ff.sources.map((s) => {
|
|
662
|
+
const info = assetMap[s.url];
|
|
663
|
+
return info && info.status === 'ok'
|
|
664
|
+
? { ...s, url: info.local, original: s.url }
|
|
665
|
+
: s;
|
|
666
|
+
});
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
// Write outputs
|
|
670
|
+
fs.writeFileSync(path.join(opts.out, 'rendered.html'), html);
|
|
671
|
+
fs.writeFileSync(
|
|
672
|
+
path.join(opts.out, 'computed.json'),
|
|
673
|
+
JSON.stringify({
|
|
674
|
+
meta: {
|
|
675
|
+
url,
|
|
676
|
+
viewport,
|
|
677
|
+
bp: opts.bp || null,
|
|
678
|
+
title: extracted.title,
|
|
679
|
+
docSize: extracted.docSize,
|
|
680
|
+
capturedAt: new Date().toISOString(),
|
|
681
|
+
},
|
|
682
|
+
nodes: extracted.nodes,
|
|
683
|
+
}),
|
|
684
|
+
);
|
|
685
|
+
fs.writeFileSync(
|
|
686
|
+
path.join(opts.out, 'stylesheets.json'),
|
|
687
|
+
JSON.stringify(extracted.stylesheets, null, 2),
|
|
688
|
+
);
|
|
689
|
+
fs.writeFileSync(path.join(opts.out, 'asset-map.json'), JSON.stringify(assetMap, null, 2));
|
|
690
|
+
|
|
691
|
+
const okCount = Object.values(assetMap).filter((a) => a.status === 'ok').length;
|
|
692
|
+
console.log(`[clone-extract] done → ${opts.out}`);
|
|
693
|
+
console.log(` nodes: ${extracted.nodes.length}, fontFaces: ${extracted.stylesheets.fontFaces.length}`);
|
|
694
|
+
console.log(` assets: ${okCount}/${Object.keys(assetMap).length} downloaded`);
|
|
695
|
+
} finally {
|
|
696
|
+
await browser.close();
|
|
697
|
+
}
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
// ─── Entry ──────────────────────────────────────────────────────────
|
|
701
|
+
const { cmd, url, opts } = parseArgs(process.argv);
|
|
702
|
+
|
|
703
|
+
if (cmd !== 'capture') {
|
|
704
|
+
console.error('Usage: node clone-extract.js capture <URL> --out=<dir> --viewport=WxH[@DPR] --bp=mo|pc [--stealth] [--ignore-robots] [--wait=ms]');
|
|
705
|
+
process.exit(1);
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
capture({ url, opts }).catch((err) => {
|
|
709
|
+
console.error(`[clone-extract] FAIL: ${err.message}`);
|
|
710
|
+
if (process.env.DEBUG) console.error(err.stack);
|
|
711
|
+
process.exit(1);
|
|
712
|
+
});
|