design-clone 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/.env.example +14 -0
  2. package/LICENSE +21 -0
  3. package/README.md +166 -0
  4. package/SKILL.md +239 -0
  5. package/bin/cli.js +45 -0
  6. package/bin/commands/help.js +29 -0
  7. package/bin/commands/init.js +126 -0
  8. package/bin/commands/verify.js +99 -0
  9. package/bin/utils/copy.js +65 -0
  10. package/bin/utils/validate.js +122 -0
  11. package/docs/basic-clone.md +63 -0
  12. package/docs/cli-reference.md +94 -0
  13. package/docs/design-clone-architecture.md +247 -0
  14. package/docs/pixel-perfect.md +86 -0
  15. package/docs/troubleshooting.md +97 -0
  16. package/package.json +57 -0
  17. package/requirements.txt +5 -0
  18. package/src/ai/analyze-structure.py +305 -0
  19. package/src/ai/extract-design-tokens.py +439 -0
  20. package/src/ai/prompts/__init__.py +2 -0
  21. package/src/ai/prompts/design_tokens.py +183 -0
  22. package/src/ai/prompts/structure_analysis.py +273 -0
  23. package/src/core/cookie-handler.js +76 -0
  24. package/src/core/css-extractor.js +107 -0
  25. package/src/core/dimension-extractor.js +366 -0
  26. package/src/core/dimension-output.js +208 -0
  27. package/src/core/extract-assets.js +468 -0
  28. package/src/core/filter-css.js +499 -0
  29. package/src/core/html-extractor.js +102 -0
  30. package/src/core/lazy-loader.js +188 -0
  31. package/src/core/page-readiness.js +161 -0
  32. package/src/core/screenshot.js +380 -0
  33. package/src/post-process/enhance-assets.js +157 -0
  34. package/src/post-process/fetch-images.js +398 -0
  35. package/src/post-process/inject-icons.js +311 -0
  36. package/src/utils/__init__.py +16 -0
  37. package/src/utils/__pycache__/__init__.cpython-313.pyc +0 -0
  38. package/src/utils/__pycache__/env.cpython-313.pyc +0 -0
  39. package/src/utils/browser.js +103 -0
  40. package/src/utils/env.js +153 -0
  41. package/src/utils/env.py +134 -0
  42. package/src/utils/helpers.js +71 -0
  43. package/src/utils/puppeteer.js +281 -0
  44. package/src/verification/verify-layout.js +424 -0
  45. package/src/verification/verify-menu.js +422 -0
  46. package/templates/base.css +705 -0
  47. package/templates/base.html +293 -0
@@ -0,0 +1,468 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Asset Extraction Script for Pixel-Perfect Clone
4
+ *
5
+ * Downloads and organizes assets from source website:
6
+ * - Images (jpg, png, gif, webp, svg)
7
+ * - Fonts (woff, woff2, ttf, otf)
8
+ * - CSS-embedded images (background-url)
9
+ *
10
+ * Usage:
11
+ * node extract-assets.js --url <url> --output <dir> [--verbose]
12
+ *
13
+ * Options:
14
+ * --url Target website URL (required)
15
+ * --output Output directory (required)
16
+ * --verbose Show detailed progress
17
+ * --timeout Download timeout in ms (default: 30000)
18
+ */
19
+
20
+ import fs from 'fs/promises';
21
+ import path from 'path';
22
+ import https from 'https';
23
+ import http from 'http';
24
+ import { URL } from 'url';
25
+
26
+ // Import browser abstraction (auto-detects chrome-devtools or standalone)
27
+ import { getBrowser, getPage, closeBrowser, disconnectBrowser, parseArgs, outputJSON, outputError } from '../utils/browser.js';
28
+
29
+ // Asset type configurations
30
+ const ASSET_TYPES = {
31
+ images: {
32
+ extensions: ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.svg', '.ico', '.avif'],
33
+ folder: 'images',
34
+ selectors: ['img[src]', 'picture source[srcset]', '[style*="background"]', 'link[rel="icon"]', 'link[rel="apple-touch-icon"]']
35
+ },
36
+ fonts: {
37
+ extensions: ['.woff', '.woff2', '.ttf', '.otf', '.eot'],
38
+ folder: 'fonts',
39
+ patterns: [/@font-face\s*\{[^}]*url\s*\(\s*['"]?([^'")\s]+)['"]?\s*\)/gi]
40
+ },
41
+ icons: {
42
+ extensions: ['.svg'],
43
+ folder: 'icons',
44
+ selectors: ['svg', 'use[href]', 'use[xlink\\:href]']
45
+ }
46
+ };
47
+
48
+ // Rate limiting configuration
49
+ const RATE_LIMIT = {
50
+ maxConcurrent: 5,
51
+ delayBetweenBatches: 200
52
+ };
53
+
54
+ /**
55
+ * Parse CSS for asset URLs
56
+ */
57
+ function extractCssUrls(cssContent, baseUrl) {
58
+ const urls = new Set();
59
+
60
+ // Background images
61
+ const bgPattern = /url\s*\(\s*['"]?([^'")\s]+)['"]?\s*\)/gi;
62
+ let match;
63
+ while ((match = bgPattern.exec(cssContent)) !== null) {
64
+ const url = match[1];
65
+ if (!url.startsWith('data:')) {
66
+ try {
67
+ const absoluteUrl = new URL(url, baseUrl).href;
68
+ urls.add(absoluteUrl);
69
+ } catch { /* ignore invalid URLs */ }
70
+ }
71
+ }
72
+
73
+ // Font URLs
74
+ const fontPattern = /@font-face\s*\{[^}]*src:\s*([^;]+)/gi;
75
+ while ((match = fontPattern.exec(cssContent)) !== null) {
76
+ const srcValue = match[1];
77
+ const urlPattern = /url\s*\(\s*['"]?([^'")\s]+)['"]?\s*\)/gi;
78
+ let urlMatch;
79
+ while ((urlMatch = urlPattern.exec(srcValue)) !== null) {
80
+ const url = urlMatch[1];
81
+ if (!url.startsWith('data:')) {
82
+ try {
83
+ const absoluteUrl = new URL(url, baseUrl).href;
84
+ urls.add(absoluteUrl);
85
+ } catch { /* ignore invalid URLs */ }
86
+ }
87
+ }
88
+ }
89
+
90
+ return Array.from(urls);
91
+ }
92
+
93
+ /**
94
+ * Download a file with timeout and retry
95
+ */
96
+ async function downloadFile(url, destPath, timeout = 30000, retries = 2) {
97
+ const protocol = url.startsWith('https') ? https : http;
98
+
99
+ for (let attempt = 0; attempt <= retries; attempt++) {
100
+ try {
101
+ await new Promise((resolve, reject) => {
102
+ const timeoutId = setTimeout(() => reject(new Error('Download timeout')), timeout);
103
+
104
+ const request = protocol.get(url, {
105
+ headers: {
106
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36',
107
+ 'Accept': '*/*'
108
+ }
109
+ }, (response) => {
110
+ // Handle redirects
111
+ if (response.statusCode >= 300 && response.statusCode < 400 && response.headers.location) {
112
+ clearTimeout(timeoutId);
113
+ downloadFile(response.headers.location, destPath, timeout, 0)
114
+ .then(resolve)
115
+ .catch(reject);
116
+ return;
117
+ }
118
+
119
+ if (response.statusCode !== 200) {
120
+ clearTimeout(timeoutId);
121
+ reject(new Error(`HTTP ${response.statusCode}`));
122
+ return;
123
+ }
124
+
125
+ const chunks = [];
126
+ response.on('data', chunk => chunks.push(chunk));
127
+ response.on('end', async () => {
128
+ clearTimeout(timeoutId);
129
+ try {
130
+ const buffer = Buffer.concat(chunks);
131
+ await fs.mkdir(path.dirname(destPath), { recursive: true });
132
+ await fs.writeFile(destPath, buffer);
133
+ resolve({ size: buffer.length });
134
+ } catch (err) {
135
+ reject(err);
136
+ }
137
+ });
138
+ response.on('error', (err) => {
139
+ clearTimeout(timeoutId);
140
+ reject(err);
141
+ });
142
+ });
143
+
144
+ request.on('error', (err) => {
145
+ clearTimeout(timeoutId);
146
+ reject(err);
147
+ });
148
+ });
149
+
150
+ return { success: true };
151
+ } catch (err) {
152
+ if (attempt === retries) {
153
+ return { success: false, error: err.message };
154
+ }
155
+ await new Promise(r => setTimeout(r, 500 * (attempt + 1)));
156
+ }
157
+ }
158
+ }
159
+
160
+ /**
161
+ * Download files in batches with rate limiting
162
+ */
163
+ async function downloadBatch(downloads, verbose = false) {
164
+ const results = {
165
+ success: 0,
166
+ failed: 0,
167
+ skipped: 0,
168
+ errors: []
169
+ };
170
+
171
+ for (let i = 0; i < downloads.length; i += RATE_LIMIT.maxConcurrent) {
172
+ const batch = downloads.slice(i, i + RATE_LIMIT.maxConcurrent);
173
+
174
+ const promises = batch.map(async ({ url, destPath, type }) => {
175
+ // Check if file already exists
176
+ try {
177
+ await fs.access(destPath);
178
+ results.skipped++;
179
+ return { url, skipped: true };
180
+ } catch { /* file doesn't exist, continue */ }
181
+
182
+ const result = await downloadFile(url, destPath);
183
+ if (result.success) {
184
+ results.success++;
185
+ if (verbose) console.error(` ✓ ${type}: ${path.basename(destPath)}`);
186
+ } else {
187
+ results.failed++;
188
+ results.errors.push({ url, error: result.error });
189
+ if (verbose) console.error(` ✗ ${type}: ${path.basename(url)} - ${result.error}`);
190
+ }
191
+ return { url, ...result };
192
+ });
193
+
194
+ await Promise.all(promises);
195
+
196
+ if (i + RATE_LIMIT.maxConcurrent < downloads.length) {
197
+ await new Promise(r => setTimeout(r, RATE_LIMIT.delayBetweenBatches));
198
+ }
199
+ }
200
+
201
+ return results;
202
+ }
203
+
204
+ /**
205
+ * Generate safe filename from URL
206
+ */
207
+ function getSafeFilename(url) {
208
+ try {
209
+ const urlObj = new URL(url);
210
+ let filename = path.basename(urlObj.pathname);
211
+
212
+ // Handle query strings in filename
213
+ if (urlObj.search) {
214
+ const hash = Buffer.from(urlObj.search).toString('base64').slice(0, 8);
215
+ const ext = path.extname(filename);
216
+ const base = path.basename(filename, ext);
217
+ filename = `${base}-${hash}${ext}`;
218
+ }
219
+
220
+ // Sanitize filename
221
+ filename = filename.replace(/[^a-zA-Z0-9._-]/g, '_');
222
+
223
+ // Ensure extension
224
+ if (!path.extname(filename)) {
225
+ filename += '.bin';
226
+ }
227
+
228
+ return filename;
229
+ } catch {
230
+ return `asset-${Date.now()}.bin`;
231
+ }
232
+ }
233
+
234
+ /**
235
+ * Determine asset type from URL
236
+ */
237
+ function getAssetType(url) {
238
+ const ext = path.extname(new URL(url).pathname).toLowerCase();
239
+
240
+ if (ASSET_TYPES.fonts.extensions.includes(ext)) return 'fonts';
241
+ if (ext === '.svg') return 'icons';
242
+ if (ASSET_TYPES.images.extensions.includes(ext)) return 'images';
243
+
244
+ return 'other';
245
+ }
246
+
247
+ /**
248
+ * Extract all assets from page
249
+ */
250
+ async function extractAssetsFromPage(page, baseUrl) {
251
+ return await page.evaluate((url) => {
252
+ const assets = {
253
+ images: new Set(),
254
+ fonts: new Set(),
255
+ icons: new Set(),
256
+ cssUrls: []
257
+ };
258
+
259
+ // Images from img tags
260
+ document.querySelectorAll('img[src]').forEach(img => {
261
+ const src = img.getAttribute('src');
262
+ if (src && !src.startsWith('data:')) {
263
+ try {
264
+ assets.images.add(new URL(src, url).href);
265
+ } catch { /* ignore */ }
266
+ }
267
+ });
268
+
269
+ // Images from srcset
270
+ document.querySelectorAll('[srcset]').forEach(el => {
271
+ const srcset = el.getAttribute('srcset');
272
+ if (srcset) {
273
+ srcset.split(',').forEach(part => {
274
+ const src = part.trim().split(/\s+/)[0];
275
+ if (src && !src.startsWith('data:')) {
276
+ try {
277
+ assets.images.add(new URL(src, url).href);
278
+ } catch { /* ignore */ }
279
+ }
280
+ });
281
+ }
282
+ });
283
+
284
+ // Background images from inline styles
285
+ document.querySelectorAll('[style*="background"]').forEach(el => {
286
+ const style = el.getAttribute('style');
287
+ const urlMatch = style.match(/url\s*\(\s*['"]?([^'")\s]+)['"]?\s*\)/i);
288
+ if (urlMatch && !urlMatch[1].startsWith('data:')) {
289
+ try {
290
+ assets.images.add(new URL(urlMatch[1], url).href);
291
+ } catch { /* ignore */ }
292
+ }
293
+ });
294
+
295
+ // Favicon and touch icons
296
+ document.querySelectorAll('link[rel*="icon"]').forEach(link => {
297
+ const href = link.getAttribute('href');
298
+ if (href && !href.startsWith('data:')) {
299
+ try {
300
+ assets.images.add(new URL(href, url).href);
301
+ } catch { /* ignore */ }
302
+ }
303
+ });
304
+
305
+ // Inline SVGs - extract as string
306
+ const inlineSvgs = [];
307
+ document.querySelectorAll('svg').forEach((svg, index) => {
308
+ const svgContent = svg.outerHTML;
309
+ if (svgContent.length < 50000) { // Skip huge SVGs
310
+ inlineSvgs.push({
311
+ id: svg.id || `inline-svg-${index}`,
312
+ content: svgContent
313
+ });
314
+ }
315
+ });
316
+
317
+ // CSS stylesheet URLs for font extraction
318
+ document.querySelectorAll('link[rel="stylesheet"]').forEach(link => {
319
+ const href = link.getAttribute('href');
320
+ if (href) {
321
+ try {
322
+ assets.cssUrls.push(new URL(href, url).href);
323
+ } catch { /* ignore */ }
324
+ }
325
+ });
326
+
327
+ return {
328
+ images: Array.from(assets.images),
329
+ cssUrls: assets.cssUrls,
330
+ inlineSvgs
331
+ };
332
+ }, baseUrl);
333
+ }
334
+
335
+ /**
336
+ * Main extraction function
337
+ */
338
+ async function extractAssets() {
339
+ const args = parseArgs(process.argv.slice(2));
340
+
341
+ if (!args.url) {
342
+ outputError(new Error('--url is required'));
343
+ process.exit(1);
344
+ }
345
+ if (!args.output) {
346
+ outputError(new Error('--output directory is required'));
347
+ process.exit(1);
348
+ }
349
+
350
+ const verbose = args.verbose === 'true';
351
+ const timeout = args.timeout ? parseInt(args.timeout) : 30000;
352
+
353
+ try {
354
+ // Create output directories
355
+ const assetsDir = path.join(args.output, 'assets');
356
+ await fs.mkdir(path.join(assetsDir, 'images'), { recursive: true });
357
+ await fs.mkdir(path.join(assetsDir, 'fonts'), { recursive: true });
358
+ await fs.mkdir(path.join(assetsDir, 'icons'), { recursive: true });
359
+
360
+ // Launch browser and navigate
361
+ const browser = await getBrowser({ headless: args.headless !== 'false' });
362
+ const page = await getPage(browser);
363
+
364
+ if (verbose) console.error(`\n📦 Extracting assets from: ${args.url}\n`);
365
+
366
+ await page.goto(args.url, {
367
+ waitUntil: 'networkidle2',
368
+ timeout: 30000
369
+ });
370
+
371
+ // Extract assets from page
372
+ const pageAssets = await extractAssetsFromPage(page, args.url);
373
+
374
+ // Collect CSS content for font extraction
375
+ let allCssContent = '';
376
+
377
+ // Get inline styles
378
+ const inlineCss = await page.evaluate(() => {
379
+ return Array.from(document.querySelectorAll('style'))
380
+ .map(style => style.textContent)
381
+ .join('\n');
382
+ });
383
+ allCssContent += inlineCss;
384
+
385
+ // Get external CSS content (already extracted by multi-screenshot if available)
386
+ const sourceCssPath = path.join(args.output, 'analysis', 'source.css');
387
+ try {
388
+ const sourceCss = await fs.readFile(sourceCssPath, 'utf-8');
389
+ allCssContent += '\n' + sourceCss;
390
+ } catch { /* source.css not available */ }
391
+
392
+ // Extract URLs from CSS
393
+ const cssAssetUrls = extractCssUrls(allCssContent, args.url);
394
+
395
+ // Combine all URLs and categorize
396
+ const allUrls = new Set([...pageAssets.images, ...cssAssetUrls]);
397
+
398
+ const downloads = [];
399
+ const urlMapping = {};
400
+
401
+ for (const url of allUrls) {
402
+ const type = getAssetType(url);
403
+ const filename = getSafeFilename(url);
404
+ const destPath = path.join(assetsDir, type === 'other' ? 'images' : type, filename);
405
+ const relativePath = path.relative(args.output, destPath);
406
+
407
+ downloads.push({ url, destPath, type });
408
+ urlMapping[url] = relativePath;
409
+ }
410
+
411
+ if (verbose) {
412
+ console.error(`Found ${downloads.length} assets to download:`);
413
+ console.error(` - Images: ${downloads.filter(d => d.type === 'images').length}`);
414
+ console.error(` - Fonts: ${downloads.filter(d => d.type === 'fonts').length}`);
415
+ console.error(` - Icons: ${downloads.filter(d => d.type === 'icons').length}`);
416
+ console.error('');
417
+ }
418
+
419
+ // Download assets
420
+ const downloadResults = await downloadBatch(downloads, verbose);
421
+
422
+ // Save inline SVGs
423
+ let savedSvgs = 0;
424
+ for (const svg of pageAssets.inlineSvgs) {
425
+ const filename = `${svg.id.replace(/[^a-zA-Z0-9-_]/g, '_')}.svg`;
426
+ const svgPath = path.join(assetsDir, 'icons', filename);
427
+ try {
428
+ await fs.writeFile(svgPath, svg.content, 'utf-8');
429
+ savedSvgs++;
430
+ } catch { /* ignore */ }
431
+ }
432
+
433
+ // Save URL mapping for HTML rewriting
434
+ const mappingPath = path.join(assetsDir, 'url-mapping.json');
435
+ await fs.writeFile(mappingPath, JSON.stringify(urlMapping, null, 2));
436
+
437
+ // Close browser
438
+ if (args.close === 'true') {
439
+ await closeBrowser();
440
+ } else {
441
+ await disconnectBrowser();
442
+ }
443
+
444
+ const result = {
445
+ success: true,
446
+ assetsDir: path.resolve(assetsDir),
447
+ urlMapping: mappingPath,
448
+ stats: {
449
+ total: downloads.length,
450
+ downloaded: downloadResults.success,
451
+ failed: downloadResults.failed,
452
+ skipped: downloadResults.skipped,
453
+ inlineSvgs: savedSvgs
454
+ },
455
+ errors: downloadResults.errors.length > 0 ? downloadResults.errors.slice(0, 10) : undefined
456
+ };
457
+
458
+ outputJSON(result);
459
+ process.exit(0);
460
+
461
+ } catch (error) {
462
+ outputError(error);
463
+ process.exit(1);
464
+ }
465
+ }
466
+
467
+ // Run
468
+ extractAssets();