design-clone 1.0.2 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,377 @@
1
+ /**
2
+ * Multi-page Screenshot Capture
3
+ *
4
+ * Capture screenshots + extract HTML/CSS for multiple pages
5
+ * using a shared browser session for efficiency.
6
+ *
7
+ * Usage:
8
+ * import { captureMultiplePages } from './multi-page-screenshot.js';
9
+ * const result = await captureMultiplePages(pages, { outputDir: './output' });
10
+ */
11
+
12
+ import path from 'path';
13
+ import fs from 'fs/promises';
14
+
15
+ import { getBrowser, getPage, disconnectBrowser } from '../utils/browser.js';
16
+ import { captureViewport, VIEWPORTS, DEFAULT_SCROLL_DELAY } from './screenshot.js';
17
+ import { waitForDomStable, waitForPageReady } from './page-readiness.js';
18
+ import { dismissCookieBanner } from './cookie-handler.js';
19
+ import { extractCleanHtml, JS_FRAMEWORK_PATTERNS, MAX_HTML_SIZE } from './html-extractor.js';
20
+ import { extractAllCss, MAX_CSS_SIZE } from './css-extractor.js';
21
+ import { filterCssFile } from './filter-css.js';
22
+
23
+ // Default options
24
+ const DEFAULT_OPTIONS = {
25
+ viewports: ['desktop', 'tablet', 'mobile'],
26
+ fullPage: true,
27
+ extractHtml: true,
28
+ extractCss: true,
29
+ filterUnused: true,
30
+ maxSize: 5, // MB for screenshots
31
+ scrollDelay: DEFAULT_SCROLL_DELAY,
32
+ timeout: 60000,
33
+ onProgress: null // (current, total, pageInfo) => {}
34
+ };
35
+
36
+ /**
37
+ * Convert page path to safe filename
38
+ * @param {string} pagePath - URL path (e.g., '/about', '/services/consulting')
39
+ * @returns {string} Safe filename (e.g., 'about', 'services-consulting')
40
+ */
41
+ export function pathToFilename(pagePath) {
42
+ if (!pagePath || pagePath === '/') return 'index';
43
+ return pagePath
44
+ .replace(/^\//, '') // Remove leading slash
45
+ .replace(/\/$/, '') // Remove trailing slash
46
+ .replace(/\//g, '-') // Replace slashes with dashes
47
+ .replace(/[^a-z0-9-]/gi, '-') // Replace special chars
48
+ .replace(/-+/g, '-') // Collapse multiple dashes
49
+ .toLowerCase();
50
+ }
51
+
52
+ /**
53
+ * Create output directory structure
54
+ * @param {string} outputDir - Base output directory
55
+ * @param {string[]} viewports - Viewport names
56
+ */
57
+ async function createOutputStructure(outputDir, viewports) {
58
+ const dirs = [
59
+ outputDir,
60
+ path.join(outputDir, 'html'),
61
+ path.join(outputDir, 'css'),
62
+ ...viewports.map(vp => path.join(outputDir, 'analysis', vp))
63
+ ];
64
+
65
+ for (const dir of dirs) {
66
+ await fs.mkdir(dir, { recursive: true });
67
+ }
68
+ }
69
+
70
+ /**
71
+ * Capture a single page (all viewports + HTML/CSS extraction)
72
+ * @param {Page} page - Puppeteer page instance
73
+ * @param {Object} pageInfo - Page info { path, name, url }
74
+ * @param {string} outputDir - Output directory
75
+ * @param {Object} options - Capture options
76
+ * @returns {Promise<Object>} Capture result for this page
77
+ */
78
+ async function captureSinglePage(page, pageInfo, outputDir, options) {
79
+ const filename = pathToFilename(pageInfo.path);
80
+ const result = {
81
+ path: pageInfo.path,
82
+ name: pageInfo.name,
83
+ url: pageInfo.url,
84
+ filename,
85
+ screenshots: {},
86
+ html: null,
87
+ css: null,
88
+ warnings: []
89
+ };
90
+
91
+ try {
92
+ // Navigate to page
93
+ await page.goto(pageInfo.url, {
94
+ waitUntil: ['load', 'networkidle0'],
95
+ timeout: options.timeout
96
+ });
97
+
98
+ // Wait for page ready
99
+ await waitForPageReady(page);
100
+
101
+ // Dismiss cookie banner (may already be dismissed)
102
+ await dismissCookieBanner(page).catch(() => {});
103
+
104
+ // Extra stabilization
105
+ await waitForDomStable(page, 300, 3000);
106
+
107
+ // Extract HTML
108
+ if (options.extractHtml) {
109
+ try {
110
+ const htmlResult = await extractCleanHtml(page, JS_FRAMEWORK_PATTERNS);
111
+ const htmlSize = Buffer.byteLength(htmlResult.html, 'utf-8');
112
+
113
+ if (htmlSize > MAX_HTML_SIZE) {
114
+ result.warnings.push(`HTML size exceeds limit: ${(htmlSize / 1024 / 1024).toFixed(1)}MB`);
115
+ } else {
116
+ const htmlPath = path.join(outputDir, 'html', `${filename}.html`);
117
+ await fs.writeFile(htmlPath, htmlResult.html, 'utf-8');
118
+ result.html = {
119
+ path: htmlPath,
120
+ size: htmlSize,
121
+ elementCount: htmlResult.elementCount
122
+ };
123
+ if (htmlResult.warnings.length > 0) {
124
+ result.warnings.push(...htmlResult.warnings);
125
+ }
126
+ }
127
+ } catch (err) {
128
+ result.warnings.push(`HTML extraction failed: ${err.message}`);
129
+ result.html = { error: err.message, failed: true };
130
+ }
131
+ }
132
+
133
+ // Extract CSS
134
+ if (options.extractCss) {
135
+ try {
136
+ const cssData = await extractAllCss(page, pageInfo.url);
137
+ const rawCss = cssData.cssBlocks
138
+ .map(b => `/* Source: ${b.source} */\n${b.css}`)
139
+ .join('\n\n');
140
+ const cssSize = Buffer.byteLength(rawCss, 'utf-8');
141
+
142
+ if (cssSize > MAX_CSS_SIZE) {
143
+ result.warnings.push(`CSS size exceeds limit: ${(cssSize / 1024 / 1024).toFixed(1)}MB`);
144
+ } else {
145
+ const cssPath = path.join(outputDir, 'css', `${filename}-raw.css`);
146
+ await fs.writeFile(cssPath, rawCss, 'utf-8');
147
+ result.css = {
148
+ path: cssPath,
149
+ size: cssSize,
150
+ ruleCount: cssData.totalRules,
151
+ corsBlocked: cssData.corsBlocked.length
152
+ };
153
+ if (cssData.warnings.length > 0) {
154
+ result.warnings.push(...cssData.warnings);
155
+ }
156
+ }
157
+ } catch (err) {
158
+ result.warnings.push(`CSS extraction failed: ${err.message}`);
159
+ result.css = { error: err.message, failed: true };
160
+ }
161
+ }
162
+
163
+ // Filter CSS if both HTML and CSS extracted successfully
164
+ if (options.filterUnused && result.html?.path && result.css?.path &&
165
+ !result.html.failed && !result.css.failed) {
166
+ try {
167
+ const filteredPath = path.join(outputDir, 'css', `${filename}.css`);
168
+ const filterResult = await filterCssFile(
169
+ result.html.path,
170
+ result.css.path,
171
+ filteredPath,
172
+ false,
173
+ outputDir
174
+ );
175
+ result.cssFiltered = {
176
+ path: filteredPath,
177
+ size: filterResult.output.size,
178
+ reduction: filterResult.stats.reduction
179
+ };
180
+ } catch (err) {
181
+ result.warnings.push(`CSS filtering failed: ${err.message}`);
182
+ }
183
+ }
184
+
185
+ // Capture viewports
186
+ for (const viewport of options.viewports) {
187
+ if (!VIEWPORTS[viewport]) {
188
+ result.warnings.push(`Invalid viewport: ${viewport}`);
189
+ continue;
190
+ }
191
+
192
+ try {
193
+ const screenshotPath = path.join(outputDir, 'analysis', viewport, `${filename}.png`);
194
+ const vpResult = await captureViewport(
195
+ page,
196
+ viewport,
197
+ screenshotPath,
198
+ options.fullPage,
199
+ options.maxSize,
200
+ options.scrollDelay
201
+ );
202
+ result.screenshots[viewport] = {
203
+ path: vpResult.path,
204
+ size: vpResult.size,
205
+ compressed: vpResult.compressed
206
+ };
207
+ } catch (err) {
208
+ result.warnings.push(`${viewport} capture failed: ${err.message}`);
209
+ result.screenshots[viewport] = { error: err.message, failed: true };
210
+ }
211
+ }
212
+
213
+ result.success = true;
214
+ } catch (err) {
215
+ result.success = false;
216
+ result.error = err.message;
217
+ result.warnings.push(`Page capture failed: ${err.message}`);
218
+ }
219
+
220
+ return result;
221
+ }
222
+
223
+ /**
224
+ * Capture multiple pages with shared browser session
225
+ * @param {Array} pages - Array of { path, name, url }
226
+ * @param {Object} options - Capture options
227
+ * @returns {Promise<Object>} Complete capture result
228
+ */
229
+ export async function captureMultiplePages(pages, options = {}) {
230
+ const opts = { ...DEFAULT_OPTIONS, ...options };
231
+ const startTime = Date.now();
232
+
233
+ if (!opts.outputDir) {
234
+ throw new Error('outputDir is required');
235
+ }
236
+
237
+ // Create output directory structure
238
+ await createOutputStructure(opts.outputDir, opts.viewports);
239
+
240
+ let browser = null;
241
+ const results = {
242
+ success: true,
243
+ baseUrl: pages[0]?.url ? new URL(pages[0].url).origin : null,
244
+ outputDir: path.resolve(opts.outputDir),
245
+ pages: [],
246
+ cssFiles: [], // Raw CSS paths
247
+ cssFilesFiltered: [], // Filtered CSS paths
248
+ stats: {
249
+ totalPages: pages.length,
250
+ successfulPages: 0,
251
+ failedPages: 0,
252
+ totalScreenshots: 0,
253
+ totalWarnings: 0
254
+ },
255
+ capturedAt: new Date().toISOString()
256
+ };
257
+
258
+ try {
259
+ // Launch browser once
260
+ browser = await getBrowser({ headless: true });
261
+
262
+ for (let i = 0; i < pages.length; i++) {
263
+ const pageInfo = pages[i];
264
+
265
+ // Progress callback
266
+ if (opts.onProgress) {
267
+ opts.onProgress(i + 1, pages.length, {
268
+ path: pageInfo.path,
269
+ name: pageInfo.name,
270
+ status: 'capturing'
271
+ });
272
+ }
273
+
274
+ // Get a new page tab
275
+ const page = await getPage(browser);
276
+
277
+ try {
278
+ // Capture this page
279
+ const pageResult = await captureSinglePage(page, pageInfo, opts.outputDir, opts);
280
+ results.pages.push(pageResult);
281
+
282
+ // Track CSS files for merging
283
+ if (pageResult.css?.path && !pageResult.css.failed) {
284
+ results.cssFiles.push(pageResult.css.path);
285
+ }
286
+
287
+ // Track filtered CSS files
288
+ if (pageResult.cssFiltered?.path) {
289
+ results.cssFilesFiltered.push(pageResult.cssFiltered.path);
290
+ }
291
+
292
+ // Update stats
293
+ if (pageResult.success) {
294
+ results.stats.successfulPages++;
295
+ results.stats.totalScreenshots += Object.keys(pageResult.screenshots)
296
+ .filter(vp => !pageResult.screenshots[vp].failed).length;
297
+ } else {
298
+ results.stats.failedPages++;
299
+ }
300
+ results.stats.totalWarnings += pageResult.warnings.length;
301
+
302
+ // Progress callback - done
303
+ if (opts.onProgress) {
304
+ opts.onProgress(i + 1, pages.length, {
305
+ path: pageInfo.path,
306
+ name: pageInfo.name,
307
+ status: 'done'
308
+ });
309
+ }
310
+ } finally {
311
+ // Close tab, keep browser
312
+ await page.close().catch(() => {});
313
+ }
314
+ }
315
+ } catch (err) {
316
+ results.success = false;
317
+ results.error = err.message;
318
+ } finally {
319
+ // Disconnect browser
320
+ if (browser) {
321
+ await disconnectBrowser().catch(() => {});
322
+ }
323
+ }
324
+
325
+ // Calculate total time
326
+ results.stats.totalTimeMs = Date.now() - startTime;
327
+
328
+ // Write results JSON
329
+ const resultsPath = path.join(opts.outputDir, 'capture-results.json');
330
+ await fs.writeFile(resultsPath, JSON.stringify(results, null, 2));
331
+ results.resultsFile = resultsPath;
332
+
333
+ return results;
334
+ }
335
+
336
+ // CLI support
337
+ const isMainModule = process.argv[1] && (
338
+ process.argv[1].endsWith('multi-page-screenshot.js') ||
339
+ process.argv[1].includes('multi-page-screenshot')
340
+ );
341
+
342
+ if (isMainModule) {
343
+ // Simple CLI: node multi-page-screenshot.js <url> <outputDir>
344
+ const url = process.argv[2];
345
+ const outputDir = process.argv[3] || './multi-capture-output';
346
+
347
+ if (!url) {
348
+ console.error('Usage: node multi-page-screenshot.js <url> [outputDir]');
349
+ process.exit(1);
350
+ }
351
+
352
+ // Import discoverPages for CLI mode
353
+ import('./discover-pages.js').then(async ({ discoverPages }) => {
354
+ console.error(`[INFO] Discovering pages from ${url}...`);
355
+ const discovery = await discoverPages(url, { maxPages: 5 });
356
+
357
+ if (!discovery.success) {
358
+ console.error(`[ERROR] Discovery failed: ${discovery.error}`);
359
+ process.exit(1);
360
+ }
361
+
362
+ console.error(`[INFO] Found ${discovery.pages.length} pages`);
363
+
364
+ const result = await captureMultiplePages(discovery.pages, {
365
+ outputDir,
366
+ onProgress: (current, total, info) => {
367
+ console.error(`[${current}/${total}] ${info.status}: ${info.name} (${info.path})`);
368
+ }
369
+ });
370
+
371
+ console.log(JSON.stringify(result, null, 2));
372
+ process.exit(result.success ? 0 : 1);
373
+ }).catch(err => {
374
+ console.error(`[ERROR] ${err.message}`);
375
+ process.exit(1);
376
+ });
377
+ }
@@ -0,0 +1,226 @@
1
+ /**
2
+ * Link Rewriting Module
3
+ *
4
+ * Rewrites internal links in HTML to point to local .html files.
5
+ * Preserves external links unchanged.
6
+ *
7
+ * Usage:
8
+ * import { rewriteLinks, createPageManifest } from './rewrite-links.js';
9
+ * const rewritten = rewriteLinks(html, manifest, { baseUrl });
10
+ */
11
+
12
+ import { normalizeUrl } from './discover-pages.js';
13
+
14
+ /**
15
+ * Convert URL path to local filename
16
+ * @param {string} urlPath - URL path (e.g., '/about', '/services/consulting')
17
+ * @returns {string} Local filename (e.g., 'about.html', 'services-consulting.html')
18
+ */
19
+ export function pathToFilename(urlPath) {
20
+ if (!urlPath || urlPath === '/' || urlPath === '') {
21
+ return 'index.html';
22
+ }
23
+
24
+ const name = urlPath
25
+ .replace(/^\//, '') // Remove leading slash
26
+ .replace(/\/$/, '') // Remove trailing slash
27
+ .replace(/\//g, '-') // Replace slashes with dashes
28
+ .replace(/[^a-z0-9-]/gi, '-') // Replace special chars
29
+ .replace(/-+/g, '-') // Collapse multiple dashes
30
+ .toLowerCase();
31
+
32
+ return `${name}.html`;
33
+ }
34
+
35
+ /**
36
+ * Create page manifest from discovered pages
37
+ * @param {Array} pages - Array of { path, name, url }
38
+ * @param {Object} options - Additional options
39
+ * @returns {Object} Page manifest
40
+ */
41
+ export function createPageManifest(pages, options = {}) {
42
+ const baseUrl = pages[0]?.url ? new URL(pages[0].url).origin : '';
43
+
44
+ const manifest = {
45
+ baseUrl,
46
+ capturedAt: new Date().toISOString(),
47
+ pages: pages.map(page => ({
48
+ path: page.path,
49
+ name: page.name,
50
+ file: pathToFilename(page.path),
51
+ originalUrl: page.url
52
+ })),
53
+ assets: {
54
+ css: 'styles.css',
55
+ tokens: options.hasTokens ? 'tokens.css' : null
56
+ },
57
+ stats: options.stats || {}
58
+ };
59
+
60
+ return manifest;
61
+ }
62
+
63
+ /**
64
+ * Build URL to filename mapping from manifest
65
+ * @param {Object} manifest - Page manifest
66
+ * @returns {Map} URL -> filename mapping
67
+ */
68
+ function buildUrlMap(manifest) {
69
+ const urlMap = new Map();
70
+
71
+ for (const page of manifest.pages) {
72
+ // Map by full URL
73
+ if (page.originalUrl) {
74
+ urlMap.set(page.originalUrl, page.file);
75
+ // Also without trailing slash
76
+ const noSlash = page.originalUrl.replace(/\/$/, '');
77
+ urlMap.set(noSlash, page.file);
78
+ }
79
+
80
+ // Map by path
81
+ if (page.path) {
82
+ urlMap.set(page.path, page.file);
83
+ // Also without trailing slash
84
+ if (page.path !== '/') {
85
+ urlMap.set(page.path.replace(/\/$/, ''), page.file);
86
+ }
87
+ }
88
+ }
89
+
90
+ return urlMap;
91
+ }
92
+
93
+ /**
94
+ * Rewrite links in HTML to point to local files
95
+ * @param {string} html - HTML content
96
+ * @param {Object} manifest - Page manifest
97
+ * @param {Object} options - Rewrite options
98
+ * @returns {string} HTML with rewritten links
99
+ */
100
+ export function rewriteLinks(html, manifest, options = {}) {
101
+ const { baseUrl, rewriteCss = true, injectTokensCss = false } = options;
102
+ const urlMap = buildUrlMap(manifest);
103
+
104
+ let result = html;
105
+
106
+ // Rewrite <a href="..."> links
107
+ result = result.replace(
108
+ /(<a\s[^>]*href=["'])([^"']+)(["'][^>]*>)/gi,
109
+ (match, prefix, href, suffix) => {
110
+ // Skip empty, javascript:, mailto:, tel:, and anchor-only links
111
+ if (!href ||
112
+ href.startsWith('javascript:') ||
113
+ href.startsWith('mailto:') ||
114
+ href.startsWith('tel:') ||
115
+ href.startsWith('#')) {
116
+ return match;
117
+ }
118
+
119
+ // Try to match against manifest
120
+ let filename = null;
121
+
122
+ // Direct path match
123
+ if (urlMap.has(href)) {
124
+ filename = urlMap.get(href);
125
+ }
126
+ // Normalized URL match
127
+ else if (baseUrl) {
128
+ const normalized = normalizeUrl(baseUrl, href);
129
+ if (normalized && urlMap.has(normalized)) {
130
+ filename = urlMap.get(normalized);
131
+ }
132
+ }
133
+
134
+ if (filename) {
135
+ // Preserve fragment if present
136
+ const fragmentMatch = href.match(/#[^#]*$/);
137
+ const fragment = fragmentMatch ? fragmentMatch[0] : '';
138
+ return `${prefix}${filename}${fragment}${suffix}`;
139
+ }
140
+
141
+ // Keep original for external/unknown links
142
+ return match;
143
+ }
144
+ );
145
+
146
+ // Rewrite CSS links to use shared styles.css
147
+ if (rewriteCss) {
148
+ result = result.replace(
149
+ /<link([^>]*?)href=["'][^"']*\.css["']([^>]*?)>/gi,
150
+ (match, before, after) => {
151
+ // Check if it's a stylesheet link
152
+ if (match.includes('rel="stylesheet"') || match.includes("rel='stylesheet'") ||
153
+ !match.includes('rel=')) {
154
+ return `<link${before}href="../styles.css" rel="stylesheet"${after}>`;
155
+ }
156
+ return match;
157
+ }
158
+ );
159
+
160
+ // Remove duplicate stylesheet links (keep first)
161
+ const seenStylesheets = new Set();
162
+ result = result.replace(
163
+ /<link[^>]*href=["']\.\.\/styles\.css["'][^>]*>/gi,
164
+ (match) => {
165
+ if (seenStylesheets.has('styles.css')) {
166
+ return ''; // Remove duplicate
167
+ }
168
+ seenStylesheets.add('styles.css');
169
+ return match;
170
+ }
171
+ );
172
+
173
+ // Inject tokens.css before styles.css if requested
174
+ if (injectTokensCss) {
175
+ result = result.replace(
176
+ /(<link[^>]*href=["']\.\.\/styles\.css["'][^>]*>)/i,
177
+ '<link href="../tokens.css" rel="stylesheet">\n $1'
178
+ );
179
+ }
180
+ }
181
+
182
+ return result;
183
+ }
184
+
185
+ /**
186
+ * Rewrite links in all HTML files in a directory
187
+ * @param {string} htmlDir - Directory containing HTML files
188
+ * @param {Object} manifest - Page manifest
189
+ * @param {Object} options - Rewrite options
190
+ * @returns {Promise<Object>} Rewrite results
191
+ */
192
+ export async function rewriteAllLinks(htmlDir, manifest, options = {}) {
193
+ const fs = await import('fs/promises');
194
+ const path = await import('path');
195
+
196
+ const results = {
197
+ processed: [],
198
+ errors: []
199
+ };
200
+
201
+ for (const page of manifest.pages) {
202
+ const htmlPath = path.join(htmlDir, page.file);
203
+
204
+ try {
205
+ const html = await fs.readFile(htmlPath, 'utf-8');
206
+ const rewritten = rewriteLinks(html, manifest, options);
207
+ await fs.writeFile(htmlPath, rewritten, 'utf-8');
208
+ results.processed.push(page.file);
209
+ } catch (err) {
210
+ results.errors.push({ file: page.file, error: err.message });
211
+ }
212
+ }
213
+
214
+ return results;
215
+ }
216
+
217
+ // CLI support
218
+ const isMainModule = process.argv[1] && (
219
+ process.argv[1].endsWith('rewrite-links.js') ||
220
+ process.argv[1].includes('rewrite-links')
221
+ );
222
+
223
+ if (isMainModule) {
224
+ console.log('rewrite-links.js - Use as module, not CLI');
225
+ console.log('Exports: rewriteLinks, createPageManifest, pathToFilename, rewriteAllLinks');
226
+ }
@@ -377,4 +377,21 @@ async function captureMultiViewport() {
377
377
  }
378
378
  }
379
379
 
380
- captureMultiViewport();
380
+ // Export for module use
381
+ export {
382
+ captureViewport,
383
+ VIEWPORTS,
384
+ VIEWPORT_SETTLE_DELAY,
385
+ DEFAULT_SCROLL_DELAY,
386
+ compressIfNeeded
387
+ };
388
+
389
+ // Run if called directly (not imported as module)
390
+ const isMainModule = process.argv[1] && (
391
+ process.argv[1].endsWith('screenshot.js') ||
392
+ process.argv[1].includes('screenshot')
393
+ );
394
+
395
+ if (isMainModule) {
396
+ captureMultiViewport();
397
+ }