design-clone 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/.env.example +14 -0
  2. package/LICENSE +21 -0
  3. package/README.md +166 -0
  4. package/SKILL.md +239 -0
  5. package/bin/cli.js +45 -0
  6. package/bin/commands/help.js +29 -0
  7. package/bin/commands/init.js +126 -0
  8. package/bin/commands/verify.js +99 -0
  9. package/bin/utils/copy.js +65 -0
  10. package/bin/utils/validate.js +122 -0
  11. package/docs/basic-clone.md +63 -0
  12. package/docs/cli-reference.md +94 -0
  13. package/docs/design-clone-architecture.md +247 -0
  14. package/docs/pixel-perfect.md +86 -0
  15. package/docs/troubleshooting.md +97 -0
  16. package/package.json +57 -0
  17. package/requirements.txt +5 -0
  18. package/src/ai/analyze-structure.py +305 -0
  19. package/src/ai/extract-design-tokens.py +439 -0
  20. package/src/ai/prompts/__init__.py +2 -0
  21. package/src/ai/prompts/design_tokens.py +183 -0
  22. package/src/ai/prompts/structure_analysis.py +273 -0
  23. package/src/core/cookie-handler.js +76 -0
  24. package/src/core/css-extractor.js +107 -0
  25. package/src/core/dimension-extractor.js +366 -0
  26. package/src/core/dimension-output.js +208 -0
  27. package/src/core/extract-assets.js +468 -0
  28. package/src/core/filter-css.js +499 -0
  29. package/src/core/html-extractor.js +102 -0
  30. package/src/core/lazy-loader.js +188 -0
  31. package/src/core/page-readiness.js +161 -0
  32. package/src/core/screenshot.js +380 -0
  33. package/src/post-process/enhance-assets.js +157 -0
  34. package/src/post-process/fetch-images.js +398 -0
  35. package/src/post-process/inject-icons.js +311 -0
  36. package/src/utils/__init__.py +16 -0
  37. package/src/utils/__pycache__/__init__.cpython-313.pyc +0 -0
  38. package/src/utils/__pycache__/env.cpython-313.pyc +0 -0
  39. package/src/utils/browser.js +103 -0
  40. package/src/utils/env.js +153 -0
  41. package/src/utils/env.py +134 -0
  42. package/src/utils/helpers.js +71 -0
  43. package/src/utils/puppeteer.js +281 -0
  44. package/src/verification/verify-layout.js +424 -0
  45. package/src/verification/verify-menu.js +422 -0
  46. package/templates/base.css +705 -0
  47. package/templates/base.html +293 -0
@@ -0,0 +1,499 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Filter CSS to remove unused selectors
4
+ *
5
+ * Usage:
6
+ * node filter-css.js --html source.html --css source-raw.css --output source.css
7
+ *
8
+ * Options:
9
+ * --html Path to cleaned HTML file (required)
10
+ * --css Path to raw CSS file (required)
11
+ * --output Path for filtered CSS output (required)
12
+ * --verbose Enable verbose logging
13
+ *
14
+ * Uses css-tree for AST parsing and selector analysis.
15
+ *
16
+ * Memory: Max 10MB CSS input. Large files may cause high memory usage during AST parsing.
17
+ * Reduction: Typical 20-30% reduction. Complex selectors (combinators, nth-child) kept conservatively.
18
+ */
19
+
20
+ import fs from 'fs/promises';
21
+ import path from 'path';
22
+
23
+ // Dependency check for css-tree
24
+ let csstree;
25
+ try {
26
+ csstree = await import('css-tree');
27
+ } catch {
28
+ console.error(JSON.stringify({
29
+ success: false,
30
+ error: 'css-tree not installed',
31
+ hint: 'Run: npm install css-tree'
32
+ }, null, 2));
33
+ process.exit(1);
34
+ }
35
+
36
+ // Constants - Memory limit for CSS input (prevents OOM on large files)
37
+ const MAX_CSS_INPUT_SIZE = 10 * 1024 * 1024; // 10MB max input
38
+
39
+ // Rules that should always be kept (critical for layout)
40
+ const ALWAYS_KEEP_PATTERNS = [
41
+ /^html$/i,
42
+ /^body$/i,
43
+ /^\*$/,
44
+ /^:root$/i
45
+ ];
46
+
47
+ // At-rules that should always be kept
48
+ const KEEP_AT_RULES = ['font-face', 'keyframes', 'import', 'charset', 'namespace'];
49
+
50
+ // CSS injection patterns to sanitize (XSS vectors)
51
+ const CSS_INJECTION_PATTERNS = [
52
+ /expression\s*\(/gi, // IE expression()
53
+ /-moz-binding\s*:/gi, // Firefox XBL binding
54
+ /url\s*\(\s*["']?javascript:/gi, // javascript: URLs
55
+ /url\s*\(\s*["']?data:text\/html/gi, // data: HTML URLs
56
+ /behavior\s*:/gi, // IE behavior
57
+ /@import\s+["']?javascript:/gi // @import javascript:
58
+ ];
59
+
60
+ /**
61
+ * Validate file path is within allowed directory (prevents path traversal)
62
+ * @param {string} filePath - Path to validate
63
+ * @param {string} allowedDir - Directory paths must be within (optional, defaults to cwd)
64
+ * @returns {string} Resolved absolute path
65
+ * @throws {Error} If path is outside allowed directory
66
+ */
67
+ function validatePath(filePath, allowedDir = process.cwd()) {
68
+ const resolved = path.resolve(filePath);
69
+ const allowed = path.resolve(allowedDir);
70
+
71
+ // Check for path traversal: resolved path must start with allowed directory
72
+ if (!resolved.startsWith(allowed + path.sep) && resolved !== allowed) {
73
+ throw new Error(`Path "${filePath}" is outside allowed directory "${allowedDir}"`);
74
+ }
75
+
76
+ return resolved;
77
+ }
78
+
79
+ /**
80
+ * Sanitize CSS output to remove potential XSS vectors
81
+ * @param {string} css - CSS string to sanitize
82
+ * @returns {string} Sanitized CSS
83
+ */
84
+ function sanitizeCss(css) {
85
+ let sanitized = css;
86
+ for (const pattern of CSS_INJECTION_PATTERNS) {
87
+ sanitized = sanitized.replace(pattern, '/* [sanitized] */');
88
+ }
89
+ return sanitized;
90
+ }
91
+
92
+ /**
93
+ * Simple argument parser
94
+ */
95
+ function parseArgs(args) {
96
+ const result = {};
97
+ for (let i = 0; i < args.length; i++) {
98
+ if (args[i].startsWith('--')) {
99
+ const key = args[i].slice(2);
100
+ const nextArg = args[i + 1];
101
+ if (nextArg && !nextArg.startsWith('--')) {
102
+ result[key] = nextArg;
103
+ i++;
104
+ } else {
105
+ result[key] = true;
106
+ }
107
+ }
108
+ }
109
+ return result;
110
+ }
111
+
112
+ /**
113
+ * Parse HTML and build sets of all possible selector matches
114
+ * Uses regex for speed (no DOM parser needed)
115
+ * @returns {{ tags: Set, ids: Set, classes: Set, attributes: Set }}
116
+ */
117
+ function analyzeHtml(html) {
118
+ const tags = new Set();
119
+ const ids = new Set();
120
+ const classes = new Set();
121
+ const attributes = new Set();
122
+
123
+ // Extract tag names: <tagname or <tagname>
124
+ const tagMatches = html.matchAll(/<([a-z][a-z0-9]*)/gi);
125
+ for (const match of tagMatches) {
126
+ tags.add(match[1].toLowerCase());
127
+ }
128
+
129
+ // Extract IDs: id="value" or id='value'
130
+ const idMatches = html.matchAll(/\bid=["']([^"']+)["']/gi);
131
+ for (const match of idMatches) {
132
+ ids.add(match[1]);
133
+ }
134
+
135
+ // Extract classes: class="value1 value2" or class='value1 value2'
136
+ const classMatches = html.matchAll(/\bclass=["']([^"']+)["']/gi);
137
+ for (const match of classMatches) {
138
+ const classNames = match[1].split(/\s+/);
139
+ classNames.forEach(c => {
140
+ const trimmed = c.trim();
141
+ if (trimmed) classes.add(trimmed);
142
+ });
143
+ }
144
+
145
+ // Extract data attributes: data-foo="bar"
146
+ const attrMatches = html.matchAll(/\s(data-[a-z0-9-]+)/gi);
147
+ for (const match of attrMatches) {
148
+ attributes.add(match[1].toLowerCase());
149
+ }
150
+
151
+ // Add common attributes that are often used in selectors
152
+ const commonAttrs = ['href', 'src', 'type', 'name', 'value', 'disabled', 'checked',
153
+ 'selected', 'readonly', 'required', 'placeholder', 'role',
154
+ 'aria-hidden', 'aria-label', 'aria-expanded', 'target', 'rel'];
155
+ commonAttrs.forEach(attr => {
156
+ if (html.includes(attr + '=') || html.includes(attr + ' ') || html.includes(attr + '>')) {
157
+ attributes.add(attr);
158
+ }
159
+ });
160
+
161
+ return { tags, ids, classes, attributes };
162
+ }
163
+
164
+ /**
165
+ * Check if a single CSS selector matches any element in the HTML
166
+ * @param {Object} selectorAst - css-tree selector AST node
167
+ * @param {Object} htmlAnalysis - Result from analyzeHtml
168
+ * @returns {boolean}
169
+ */
170
+ function selectorMatches(selectorAst, htmlAnalysis) {
171
+ const { tags, ids, classes } = htmlAnalysis;
172
+ let matches = true;
173
+ let hasSpecificSelector = false;
174
+
175
+ csstree.walk(selectorAst, {
176
+ enter(node) {
177
+ switch (node.type) {
178
+ case 'TypeSelector':
179
+ // Tag selector: div, span, header, etc.
180
+ hasSpecificSelector = true;
181
+ if (node.name !== '*' && !tags.has(node.name.toLowerCase())) {
182
+ matches = false;
183
+ }
184
+ break;
185
+
186
+ case 'IdSelector':
187
+ // ID selector: #main, #header
188
+ hasSpecificSelector = true;
189
+ if (!ids.has(node.name)) {
190
+ matches = false;
191
+ }
192
+ break;
193
+
194
+ case 'ClassSelector':
195
+ // Class selector: .container, .btn
196
+ hasSpecificSelector = true;
197
+ if (!classes.has(node.name)) {
198
+ matches = false;
199
+ }
200
+ break;
201
+
202
+ case 'AttributeSelector':
203
+ // Attribute selector: [type="text"], [data-foo]
204
+ // Be lenient with attribute selectors - hard to check accurately
205
+ hasSpecificSelector = true;
206
+ break;
207
+
208
+ case 'PseudoClassSelector':
209
+ // Pseudo-class: :hover, :focus, :first-child
210
+ // Always keep - these are state-based
211
+ break;
212
+
213
+ case 'PseudoElementSelector':
214
+ // Pseudo-element: ::before, ::after, ::placeholder
215
+ // Always keep
216
+ break;
217
+ }
218
+ }
219
+ });
220
+
221
+ // If no specific selectors found, keep the rule
222
+ if (!hasSpecificSelector) {
223
+ return true;
224
+ }
225
+
226
+ return matches;
227
+ }
228
+
229
+ /**
230
+ * Check if any selector in a selector list matches
231
+ * @param {Object} selectorList - css-tree SelectorList AST node
232
+ * @param {Object} htmlAnalysis - Result from analyzeHtml
233
+ * @returns {boolean}
234
+ */
235
+ function selectorListMatches(selectorList, htmlAnalysis) {
236
+ let anyMatch = false;
237
+
238
+ csstree.walk(selectorList, {
239
+ visit: 'Selector',
240
+ enter(node) {
241
+ if (selectorMatches(node, htmlAnalysis)) {
242
+ anyMatch = true;
243
+ }
244
+ }
245
+ });
246
+
247
+ return anyMatch;
248
+ }
249
+
250
+ /**
251
+ * Check if a selector text should always be kept
252
+ */
253
+ function shouldAlwaysKeep(selectorText) {
254
+ return ALWAYS_KEEP_PATTERNS.some(pattern => pattern.test(selectorText.trim()));
255
+ }
256
+
257
+ /**
258
+ * Filter CSS rules based on HTML analysis
259
+ * @param {Object} cssAst - css-tree AST
260
+ * @param {Object} htmlAnalysis - Result from analyzeHtml
261
+ * @param {boolean} verbose - Enable verbose logging
262
+ * @returns {Object} stats
263
+ */
264
+ function filterCss(cssAst, htmlAnalysis, verbose) {
265
+ const stats = {
266
+ totalRules: 0,
267
+ keptRules: 0,
268
+ removedRules: 0,
269
+ atRules: 0,
270
+ mediaQueries: 0
271
+ };
272
+
273
+ const nodesToRemove = [];
274
+
275
+ // Walk through all rules
276
+ csstree.walk(cssAst, {
277
+ visit: 'Rule',
278
+ enter(node, item, list) {
279
+ stats.totalRules++;
280
+
281
+ // Check if selector matches HTML
282
+ if (node.prelude && node.prelude.type === 'SelectorList') {
283
+ // Get selector text for always-keep check
284
+ const selectorText = csstree.generate(node.prelude);
285
+
286
+ if (shouldAlwaysKeep(selectorText)) {
287
+ stats.keptRules++;
288
+ return;
289
+ }
290
+
291
+ if (!selectorListMatches(node.prelude, htmlAnalysis)) {
292
+ nodesToRemove.push({ item, list });
293
+ stats.removedRules++;
294
+ } else {
295
+ stats.keptRules++;
296
+ }
297
+ } else {
298
+ // Keep rules without standard selectors
299
+ stats.keptRules++;
300
+ }
301
+ }
302
+ });
303
+
304
+ // Remove filtered rules
305
+ for (const { item, list } of nodesToRemove) {
306
+ if (list) {
307
+ list.remove(item);
308
+ }
309
+ }
310
+
311
+ // Count at-rules
312
+ csstree.walk(cssAst, {
313
+ visit: 'Atrule',
314
+ enter(node) {
315
+ stats.atRules++;
316
+ if (node.name === 'media') {
317
+ stats.mediaQueries++;
318
+ }
319
+ }
320
+ });
321
+
322
+ if (verbose) {
323
+ console.error(`[CSS Filter] Total rules: ${stats.totalRules}`);
324
+ console.error(`[CSS Filter] Kept: ${stats.keptRules} (${Math.round(stats.keptRules / stats.totalRules * 100)}%)`);
325
+ console.error(`[CSS Filter] Removed: ${stats.removedRules}`);
326
+ console.error(`[CSS Filter] At-rules: ${stats.atRules} (${stats.mediaQueries} media queries)`);
327
+ }
328
+
329
+ return stats;
330
+ }
331
+
332
+ /**
333
+ * Main filtering function
334
+ * @param {string} htmlPath - Path to HTML file
335
+ * @param {string} cssPath - Path to raw CSS file
336
+ * @param {string} outputPath - Path for filtered CSS output
337
+ * @param {boolean} verbose - Enable verbose logging
338
+ * @param {string} allowedDir - Base directory for path validation (optional)
339
+ * @returns {Promise<Object>} Result object
340
+ */
341
+ async function filterCssFile(htmlPath, cssPath, outputPath, verbose = false, allowedDir = null) {
342
+ const startTime = Date.now();
343
+
344
+ // Validate paths if allowedDir specified (security: prevent path traversal)
345
+ const resolvedHtml = allowedDir ? validatePath(htmlPath, allowedDir) : path.resolve(htmlPath);
346
+ const resolvedCss = allowedDir ? validatePath(cssPath, allowedDir) : path.resolve(cssPath);
347
+ const resolvedOutput = allowedDir ? validatePath(outputPath, allowedDir) : path.resolve(outputPath);
348
+
349
+ // Read input files with detailed error messages
350
+ let html, css;
351
+ try {
352
+ [html, css] = await Promise.all([
353
+ fs.readFile(resolvedHtml, 'utf-8'),
354
+ fs.readFile(resolvedCss, 'utf-8')
355
+ ]);
356
+ } catch (readError) {
357
+ const failedFile = readError.path || 'unknown';
358
+ throw new Error(`Failed to read file "${failedFile}": ${readError.message}`);
359
+ }
360
+
361
+ const inputSize = Buffer.byteLength(css, 'utf-8');
362
+
363
+ // Size limit check with detailed message
364
+ if (inputSize > MAX_CSS_INPUT_SIZE) {
365
+ throw new Error(
366
+ `CSS file "${resolvedCss}" (${(inputSize / 1024 / 1024).toFixed(1)}MB) ` +
367
+ `exceeds ${MAX_CSS_INPUT_SIZE / 1024 / 1024}MB limit. ` +
368
+ `Consider splitting the CSS file or increasing MAX_CSS_INPUT_SIZE.`
369
+ );
370
+ }
371
+
372
+ if (verbose) {
373
+ console.error(`[CSS Filter] Input CSS size: ${(inputSize / 1024).toFixed(1)}KB`);
374
+ }
375
+
376
+ // Analyze HTML
377
+ const htmlAnalysis = analyzeHtml(html);
378
+ if (verbose) {
379
+ console.error(`[CSS Filter] HTML Analysis:`);
380
+ console.error(` Tags: ${htmlAnalysis.tags.size}`);
381
+ console.error(` IDs: ${htmlAnalysis.ids.size}`);
382
+ console.error(` Classes: ${htmlAnalysis.classes.size}`);
383
+ console.error(` Attributes: ${htmlAnalysis.attributes.size}`);
384
+ }
385
+
386
+ // Parse CSS with css-tree
387
+ let ast;
388
+ try {
389
+ ast = csstree.parse(css, {
390
+ parseRulePrelude: true,
391
+ parseValue: false // Skip value parsing for speed
392
+ });
393
+ } catch (parseError) {
394
+ if (verbose) {
395
+ console.error(`[CSS Filter] Parse error: ${parseError.message}`);
396
+ console.error(`[CSS Filter] Attempting lenient parse...`);
397
+ }
398
+ // Try lenient parse on error
399
+ try {
400
+ ast = csstree.parse(css, {
401
+ parseRulePrelude: false,
402
+ parseValue: false
403
+ });
404
+ } catch (lenientError) {
405
+ throw new Error(`Failed to parse CSS: ${lenientError.message}`);
406
+ }
407
+ }
408
+
409
+ // Filter CSS
410
+ const stats = filterCss(ast, htmlAnalysis, verbose);
411
+
412
+ // Generate output CSS and sanitize for XSS vectors
413
+ let filteredCss = csstree.generate(ast);
414
+ filteredCss = sanitizeCss(filteredCss);
415
+ const outputSize = Buffer.byteLength(filteredCss, 'utf-8');
416
+
417
+ // Write output with detailed error message
418
+ try {
419
+ await fs.writeFile(resolvedOutput, filteredCss, 'utf-8');
420
+ } catch (writeError) {
421
+ throw new Error(`Failed to write output "${resolvedOutput}": ${writeError.message}`);
422
+ }
423
+
424
+ const duration = Date.now() - startTime;
425
+ const reductionPercent = Math.round((1 - outputSize / inputSize) * 100);
426
+
427
+ if (verbose) {
428
+ console.error(`[CSS Filter] Output CSS size: ${(outputSize / 1024).toFixed(1)}KB`);
429
+ console.error(`[CSS Filter] Reduction: ${reductionPercent}%`);
430
+ console.error(`[CSS Filter] Duration: ${duration}ms`);
431
+ }
432
+
433
+ return {
434
+ success: true,
435
+ input: {
436
+ html: resolvedHtml,
437
+ css: resolvedCss,
438
+ cssSize: inputSize
439
+ },
440
+ output: {
441
+ path: resolvedOutput,
442
+ size: outputSize
443
+ },
444
+ htmlAnalysis: {
445
+ tags: htmlAnalysis.tags.size,
446
+ ids: htmlAnalysis.ids.size,
447
+ classes: htmlAnalysis.classes.size
448
+ },
449
+ stats: {
450
+ ...stats,
451
+ reduction: `${reductionPercent}%`,
452
+ durationMs: duration
453
+ }
454
+ };
455
+ }
456
+
457
+ /**
458
+ * CLI entry point
459
+ */
460
+ async function main() {
461
+ const args = parseArgs(process.argv.slice(2));
462
+
463
+ if (!args.html || !args.css || !args.output) {
464
+ console.error('Usage: node filter-css.js --html source.html --css source-raw.css --output source.css [--verbose]');
465
+ process.exit(1);
466
+ }
467
+
468
+ try {
469
+ const result = await filterCssFile(
470
+ args.html,
471
+ args.css,
472
+ args.output,
473
+ args.verbose === 'true' || args.verbose === true
474
+ );
475
+
476
+ // Output JSON to stdout
477
+ console.log(JSON.stringify(result, null, 2));
478
+ process.exit(0);
479
+ } catch (error) {
480
+ console.error(JSON.stringify({
481
+ success: false,
482
+ error: error.message
483
+ }, null, 2));
484
+ process.exit(1);
485
+ }
486
+ }
487
+
488
+ // Export for module use
489
+ export { filterCssFile, analyzeHtml, validatePath, sanitizeCss };
490
+
491
+ // Run if called directly (not imported as module)
492
+ const isMainModule = process.argv[1] && (
493
+ process.argv[1].endsWith('filter-css.js') ||
494
+ process.argv[1].includes('filter-css')
495
+ );
496
+
497
+ if (isMainModule) {
498
+ main();
499
+ }
@@ -0,0 +1,102 @@
1
+ /**
2
+ * HTML Extractor
3
+ *
4
+ * Extract and clean HTML from page, removing scripts,
5
+ * event handlers, and framework-specific attributes.
6
+ */
7
+
8
+ // Size limits
9
+ export const MAX_HTML_SIZE = 10 * 1024 * 1024; // 10MB limit
10
+ export const MAX_DOM_ELEMENTS = 50000; // Warn on large DOMs
11
+
12
+ // JS framework attribute patterns to remove
13
+ export const JS_FRAMEWORK_PATTERNS = [
14
+ /^data-react/i, /^data-vue/i, /^data-ng/i, /^ng-/i,
15
+ /^data-svelte/i, /^x-/i, /^hx-/i, /^v-/i,
16
+ /^data-alpine/i, /^wire:/i, /^@/
17
+ ];
18
+
19
+ /**
20
+ * Extract and clean HTML from page
21
+ * @param {Page} page - Puppeteer page
22
+ * @param {Array} frameworkPatterns - Patterns to remove
23
+ * @returns {Promise<{html: string, warnings: string[], elementCount: number}>}
24
+ */
25
+ export async function extractCleanHtml(page, frameworkPatterns = JS_FRAMEWORK_PATTERNS) {
26
+ return await page.evaluate((patterns) => {
27
+ const warnings = [];
28
+
29
+ // Check DOM size
30
+ const elementCount = document.querySelectorAll('*').length;
31
+ if (elementCount > 50000) {
32
+ warnings.push(`Large DOM: ${elementCount} elements`);
33
+ }
34
+
35
+ // Clone document to avoid modifying live page
36
+ const doc = document.documentElement.cloneNode(true);
37
+
38
+ // Remove scripts and noscript
39
+ doc.querySelectorAll('script, noscript').forEach(el => el.remove());
40
+ doc.querySelectorAll('svg script, svg a[href^="javascript:"]').forEach(el => el.remove());
41
+
42
+ // Sanitize CSS links
43
+ doc.querySelectorAll('link[rel="stylesheet"]').forEach(link => {
44
+ const href = link.getAttribute('href') || '';
45
+ if (href.startsWith('javascript:') || href.startsWith('data:')) {
46
+ link.remove();
47
+ }
48
+ });
49
+
50
+ // Sanitize inline styles
51
+ doc.querySelectorAll('style').forEach(style => {
52
+ const content = style.textContent || '';
53
+ if (content.match(/@import\s+url\s*\(\s*['"]?(javascript|data):/i)) {
54
+ style.remove();
55
+ }
56
+ });
57
+
58
+ // Convert patterns to regex
59
+ const patternRegexes = patterns.map(p => new RegExp(p.source, p.flags));
60
+
61
+ // Remove event handlers and framework attributes
62
+ const allElements = doc.querySelectorAll('*');
63
+ allElements.forEach(el => {
64
+ const attrs = [...el.attributes];
65
+ attrs.forEach(attr => {
66
+ if (attr.name.startsWith('on')) {
67
+ el.removeAttribute(attr.name);
68
+ }
69
+ if (patternRegexes.some(p => p.test(attr.name))) {
70
+ el.removeAttribute(attr.name);
71
+ }
72
+ });
73
+ });
74
+
75
+ // Remove hidden elements
76
+ doc.querySelectorAll('[hidden], [style*="display: none"], [style*="display:none"]')
77
+ .forEach(el => el.remove());
78
+
79
+ // Remove empty style tags
80
+ doc.querySelectorAll('style:empty').forEach(el => el.remove());
81
+
82
+ // Remove HTML comments
83
+ const removeComments = (node) => {
84
+ const children = [...node.childNodes];
85
+ children.forEach(child => {
86
+ if (child.nodeType === 8) {
87
+ child.remove();
88
+ } else if (child.nodeType === 1) {
89
+ removeComments(child);
90
+ }
91
+ });
92
+ };
93
+ removeComments(doc);
94
+
95
+ // Build clean HTML
96
+ const html = '<!DOCTYPE html>\n<html lang="' +
97
+ (document.documentElement.lang || 'en') + '">\n' +
98
+ doc.innerHTML + '\n</html>';
99
+
100
+ return { html, warnings, elementCount };
101
+ }, frameworkPatterns.map(r => ({ source: r.source, flags: r.flags })));
102
+ }