design-clone 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +14 -0
- package/LICENSE +21 -0
- package/README.md +166 -0
- package/SKILL.md +239 -0
- package/bin/cli.js +45 -0
- package/bin/commands/help.js +29 -0
- package/bin/commands/init.js +126 -0
- package/bin/commands/verify.js +99 -0
- package/bin/utils/copy.js +65 -0
- package/bin/utils/validate.js +122 -0
- package/docs/basic-clone.md +63 -0
- package/docs/cli-reference.md +94 -0
- package/docs/design-clone-architecture.md +247 -0
- package/docs/pixel-perfect.md +86 -0
- package/docs/troubleshooting.md +97 -0
- package/package.json +57 -0
- package/requirements.txt +5 -0
- package/src/ai/analyze-structure.py +305 -0
- package/src/ai/extract-design-tokens.py +439 -0
- package/src/ai/prompts/__init__.py +2 -0
- package/src/ai/prompts/design_tokens.py +183 -0
- package/src/ai/prompts/structure_analysis.py +273 -0
- package/src/core/cookie-handler.js +76 -0
- package/src/core/css-extractor.js +107 -0
- package/src/core/dimension-extractor.js +366 -0
- package/src/core/dimension-output.js +208 -0
- package/src/core/extract-assets.js +468 -0
- package/src/core/filter-css.js +499 -0
- package/src/core/html-extractor.js +102 -0
- package/src/core/lazy-loader.js +188 -0
- package/src/core/page-readiness.js +161 -0
- package/src/core/screenshot.js +380 -0
- package/src/post-process/enhance-assets.js +157 -0
- package/src/post-process/fetch-images.js +398 -0
- package/src/post-process/inject-icons.js +311 -0
- package/src/utils/__init__.py +16 -0
- package/src/utils/__pycache__/__init__.cpython-313.pyc +0 -0
- package/src/utils/__pycache__/env.cpython-313.pyc +0 -0
- package/src/utils/browser.js +103 -0
- package/src/utils/env.js +153 -0
- package/src/utils/env.py +134 -0
- package/src/utils/helpers.js +71 -0
- package/src/utils/puppeteer.js +281 -0
- package/src/verification/verify-layout.js +424 -0
- package/src/verification/verify-menu.js +422 -0
- package/templates/base.css +705 -0
- package/templates/base.html +293 -0
|
@@ -0,0 +1,499 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Filter CSS to remove unused selectors
|
|
4
|
+
*
|
|
5
|
+
* Usage:
|
|
6
|
+
* node filter-css.js --html source.html --css source-raw.css --output source.css
|
|
7
|
+
*
|
|
8
|
+
* Options:
|
|
9
|
+
* --html Path to cleaned HTML file (required)
|
|
10
|
+
* --css Path to raw CSS file (required)
|
|
11
|
+
* --output Path for filtered CSS output (required)
|
|
12
|
+
* --verbose Enable verbose logging
|
|
13
|
+
*
|
|
14
|
+
* Uses css-tree for AST parsing and selector analysis.
|
|
15
|
+
*
|
|
16
|
+
* Memory: Max 10MB CSS input. Large files may cause high memory usage during AST parsing.
|
|
17
|
+
* Reduction: Typical 20-30% reduction. Complex selectors (combinators, nth-child) kept conservatively.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
import fs from 'fs/promises';
|
|
21
|
+
import path from 'path';
|
|
22
|
+
|
|
23
|
+
// Dependency check for css-tree
|
|
24
|
+
let csstree;
|
|
25
|
+
try {
|
|
26
|
+
csstree = await import('css-tree');
|
|
27
|
+
} catch {
|
|
28
|
+
console.error(JSON.stringify({
|
|
29
|
+
success: false,
|
|
30
|
+
error: 'css-tree not installed',
|
|
31
|
+
hint: 'Run: npm install css-tree'
|
|
32
|
+
}, null, 2));
|
|
33
|
+
process.exit(1);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// Constants - Memory limit for CSS input (prevents OOM on large files)
|
|
37
|
+
const MAX_CSS_INPUT_SIZE = 10 * 1024 * 1024; // 10MB max input
|
|
38
|
+
|
|
39
|
+
// Rules that should always be kept (critical for layout)
|
|
40
|
+
const ALWAYS_KEEP_PATTERNS = [
|
|
41
|
+
/^html$/i,
|
|
42
|
+
/^body$/i,
|
|
43
|
+
/^\*$/,
|
|
44
|
+
/^:root$/i
|
|
45
|
+
];
|
|
46
|
+
|
|
47
|
+
// At-rules that should always be kept
|
|
48
|
+
const KEEP_AT_RULES = ['font-face', 'keyframes', 'import', 'charset', 'namespace'];
|
|
49
|
+
|
|
50
|
+
// CSS injection patterns to sanitize (XSS vectors)
|
|
51
|
+
const CSS_INJECTION_PATTERNS = [
|
|
52
|
+
/expression\s*\(/gi, // IE expression()
|
|
53
|
+
/-moz-binding\s*:/gi, // Firefox XBL binding
|
|
54
|
+
/url\s*\(\s*["']?javascript:/gi, // javascript: URLs
|
|
55
|
+
/url\s*\(\s*["']?data:text\/html/gi, // data: HTML URLs
|
|
56
|
+
/behavior\s*:/gi, // IE behavior
|
|
57
|
+
/@import\s+["']?javascript:/gi // @import javascript:
|
|
58
|
+
];
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Validate file path is within allowed directory (prevents path traversal)
|
|
62
|
+
* @param {string} filePath - Path to validate
|
|
63
|
+
* @param {string} allowedDir - Directory paths must be within (optional, defaults to cwd)
|
|
64
|
+
* @returns {string} Resolved absolute path
|
|
65
|
+
* @throws {Error} If path is outside allowed directory
|
|
66
|
+
*/
|
|
67
|
+
function validatePath(filePath, allowedDir = process.cwd()) {
|
|
68
|
+
const resolved = path.resolve(filePath);
|
|
69
|
+
const allowed = path.resolve(allowedDir);
|
|
70
|
+
|
|
71
|
+
// Check for path traversal: resolved path must start with allowed directory
|
|
72
|
+
if (!resolved.startsWith(allowed + path.sep) && resolved !== allowed) {
|
|
73
|
+
throw new Error(`Path "${filePath}" is outside allowed directory "${allowedDir}"`);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
return resolved;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Sanitize CSS output to remove potential XSS vectors
|
|
81
|
+
* @param {string} css - CSS string to sanitize
|
|
82
|
+
* @returns {string} Sanitized CSS
|
|
83
|
+
*/
|
|
84
|
+
function sanitizeCss(css) {
|
|
85
|
+
let sanitized = css;
|
|
86
|
+
for (const pattern of CSS_INJECTION_PATTERNS) {
|
|
87
|
+
sanitized = sanitized.replace(pattern, '/* [sanitized] */');
|
|
88
|
+
}
|
|
89
|
+
return sanitized;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Simple argument parser
|
|
94
|
+
*/
|
|
95
|
+
function parseArgs(args) {
|
|
96
|
+
const result = {};
|
|
97
|
+
for (let i = 0; i < args.length; i++) {
|
|
98
|
+
if (args[i].startsWith('--')) {
|
|
99
|
+
const key = args[i].slice(2);
|
|
100
|
+
const nextArg = args[i + 1];
|
|
101
|
+
if (nextArg && !nextArg.startsWith('--')) {
|
|
102
|
+
result[key] = nextArg;
|
|
103
|
+
i++;
|
|
104
|
+
} else {
|
|
105
|
+
result[key] = true;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
return result;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* Parse HTML and build sets of all possible selector matches
|
|
114
|
+
* Uses regex for speed (no DOM parser needed)
|
|
115
|
+
* @returns {{ tags: Set, ids: Set, classes: Set, attributes: Set }}
|
|
116
|
+
*/
|
|
117
|
+
function analyzeHtml(html) {
|
|
118
|
+
const tags = new Set();
|
|
119
|
+
const ids = new Set();
|
|
120
|
+
const classes = new Set();
|
|
121
|
+
const attributes = new Set();
|
|
122
|
+
|
|
123
|
+
// Extract tag names: <tagname or <tagname>
|
|
124
|
+
const tagMatches = html.matchAll(/<([a-z][a-z0-9]*)/gi);
|
|
125
|
+
for (const match of tagMatches) {
|
|
126
|
+
tags.add(match[1].toLowerCase());
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// Extract IDs: id="value" or id='value'
|
|
130
|
+
const idMatches = html.matchAll(/\bid=["']([^"']+)["']/gi);
|
|
131
|
+
for (const match of idMatches) {
|
|
132
|
+
ids.add(match[1]);
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// Extract classes: class="value1 value2" or class='value1 value2'
|
|
136
|
+
const classMatches = html.matchAll(/\bclass=["']([^"']+)["']/gi);
|
|
137
|
+
for (const match of classMatches) {
|
|
138
|
+
const classNames = match[1].split(/\s+/);
|
|
139
|
+
classNames.forEach(c => {
|
|
140
|
+
const trimmed = c.trim();
|
|
141
|
+
if (trimmed) classes.add(trimmed);
|
|
142
|
+
});
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// Extract data attributes: data-foo="bar"
|
|
146
|
+
const attrMatches = html.matchAll(/\s(data-[a-z0-9-]+)/gi);
|
|
147
|
+
for (const match of attrMatches) {
|
|
148
|
+
attributes.add(match[1].toLowerCase());
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// Add common attributes that are often used in selectors
|
|
152
|
+
const commonAttrs = ['href', 'src', 'type', 'name', 'value', 'disabled', 'checked',
|
|
153
|
+
'selected', 'readonly', 'required', 'placeholder', 'role',
|
|
154
|
+
'aria-hidden', 'aria-label', 'aria-expanded', 'target', 'rel'];
|
|
155
|
+
commonAttrs.forEach(attr => {
|
|
156
|
+
if (html.includes(attr + '=') || html.includes(attr + ' ') || html.includes(attr + '>')) {
|
|
157
|
+
attributes.add(attr);
|
|
158
|
+
}
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
return { tags, ids, classes, attributes };
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Check if a single CSS selector matches any element in the HTML
|
|
166
|
+
* @param {Object} selectorAst - css-tree selector AST node
|
|
167
|
+
* @param {Object} htmlAnalysis - Result from analyzeHtml
|
|
168
|
+
* @returns {boolean}
|
|
169
|
+
*/
|
|
170
|
+
function selectorMatches(selectorAst, htmlAnalysis) {
|
|
171
|
+
const { tags, ids, classes } = htmlAnalysis;
|
|
172
|
+
let matches = true;
|
|
173
|
+
let hasSpecificSelector = false;
|
|
174
|
+
|
|
175
|
+
csstree.walk(selectorAst, {
|
|
176
|
+
enter(node) {
|
|
177
|
+
switch (node.type) {
|
|
178
|
+
case 'TypeSelector':
|
|
179
|
+
// Tag selector: div, span, header, etc.
|
|
180
|
+
hasSpecificSelector = true;
|
|
181
|
+
if (node.name !== '*' && !tags.has(node.name.toLowerCase())) {
|
|
182
|
+
matches = false;
|
|
183
|
+
}
|
|
184
|
+
break;
|
|
185
|
+
|
|
186
|
+
case 'IdSelector':
|
|
187
|
+
// ID selector: #main, #header
|
|
188
|
+
hasSpecificSelector = true;
|
|
189
|
+
if (!ids.has(node.name)) {
|
|
190
|
+
matches = false;
|
|
191
|
+
}
|
|
192
|
+
break;
|
|
193
|
+
|
|
194
|
+
case 'ClassSelector':
|
|
195
|
+
// Class selector: .container, .btn
|
|
196
|
+
hasSpecificSelector = true;
|
|
197
|
+
if (!classes.has(node.name)) {
|
|
198
|
+
matches = false;
|
|
199
|
+
}
|
|
200
|
+
break;
|
|
201
|
+
|
|
202
|
+
case 'AttributeSelector':
|
|
203
|
+
// Attribute selector: [type="text"], [data-foo]
|
|
204
|
+
// Be lenient with attribute selectors - hard to check accurately
|
|
205
|
+
hasSpecificSelector = true;
|
|
206
|
+
break;
|
|
207
|
+
|
|
208
|
+
case 'PseudoClassSelector':
|
|
209
|
+
// Pseudo-class: :hover, :focus, :first-child
|
|
210
|
+
// Always keep - these are state-based
|
|
211
|
+
break;
|
|
212
|
+
|
|
213
|
+
case 'PseudoElementSelector':
|
|
214
|
+
// Pseudo-element: ::before, ::after, ::placeholder
|
|
215
|
+
// Always keep
|
|
216
|
+
break;
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
});
|
|
220
|
+
|
|
221
|
+
// If no specific selectors found, keep the rule
|
|
222
|
+
if (!hasSpecificSelector) {
|
|
223
|
+
return true;
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
return matches;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
/**
|
|
230
|
+
* Check if any selector in a selector list matches
|
|
231
|
+
* @param {Object} selectorList - css-tree SelectorList AST node
|
|
232
|
+
* @param {Object} htmlAnalysis - Result from analyzeHtml
|
|
233
|
+
* @returns {boolean}
|
|
234
|
+
*/
|
|
235
|
+
function selectorListMatches(selectorList, htmlAnalysis) {
|
|
236
|
+
let anyMatch = false;
|
|
237
|
+
|
|
238
|
+
csstree.walk(selectorList, {
|
|
239
|
+
visit: 'Selector',
|
|
240
|
+
enter(node) {
|
|
241
|
+
if (selectorMatches(node, htmlAnalysis)) {
|
|
242
|
+
anyMatch = true;
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
});
|
|
246
|
+
|
|
247
|
+
return anyMatch;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
/**
|
|
251
|
+
* Check if a selector text should always be kept
|
|
252
|
+
*/
|
|
253
|
+
function shouldAlwaysKeep(selectorText) {
|
|
254
|
+
return ALWAYS_KEEP_PATTERNS.some(pattern => pattern.test(selectorText.trim()));
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
/**
|
|
258
|
+
* Filter CSS rules based on HTML analysis
|
|
259
|
+
* @param {Object} cssAst - css-tree AST
|
|
260
|
+
* @param {Object} htmlAnalysis - Result from analyzeHtml
|
|
261
|
+
* @param {boolean} verbose - Enable verbose logging
|
|
262
|
+
* @returns {Object} stats
|
|
263
|
+
*/
|
|
264
|
+
function filterCss(cssAst, htmlAnalysis, verbose) {
|
|
265
|
+
const stats = {
|
|
266
|
+
totalRules: 0,
|
|
267
|
+
keptRules: 0,
|
|
268
|
+
removedRules: 0,
|
|
269
|
+
atRules: 0,
|
|
270
|
+
mediaQueries: 0
|
|
271
|
+
};
|
|
272
|
+
|
|
273
|
+
const nodesToRemove = [];
|
|
274
|
+
|
|
275
|
+
// Walk through all rules
|
|
276
|
+
csstree.walk(cssAst, {
|
|
277
|
+
visit: 'Rule',
|
|
278
|
+
enter(node, item, list) {
|
|
279
|
+
stats.totalRules++;
|
|
280
|
+
|
|
281
|
+
// Check if selector matches HTML
|
|
282
|
+
if (node.prelude && node.prelude.type === 'SelectorList') {
|
|
283
|
+
// Get selector text for always-keep check
|
|
284
|
+
const selectorText = csstree.generate(node.prelude);
|
|
285
|
+
|
|
286
|
+
if (shouldAlwaysKeep(selectorText)) {
|
|
287
|
+
stats.keptRules++;
|
|
288
|
+
return;
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
if (!selectorListMatches(node.prelude, htmlAnalysis)) {
|
|
292
|
+
nodesToRemove.push({ item, list });
|
|
293
|
+
stats.removedRules++;
|
|
294
|
+
} else {
|
|
295
|
+
stats.keptRules++;
|
|
296
|
+
}
|
|
297
|
+
} else {
|
|
298
|
+
// Keep rules without standard selectors
|
|
299
|
+
stats.keptRules++;
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
});
|
|
303
|
+
|
|
304
|
+
// Remove filtered rules
|
|
305
|
+
for (const { item, list } of nodesToRemove) {
|
|
306
|
+
if (list) {
|
|
307
|
+
list.remove(item);
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
// Count at-rules
|
|
312
|
+
csstree.walk(cssAst, {
|
|
313
|
+
visit: 'Atrule',
|
|
314
|
+
enter(node) {
|
|
315
|
+
stats.atRules++;
|
|
316
|
+
if (node.name === 'media') {
|
|
317
|
+
stats.mediaQueries++;
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
});
|
|
321
|
+
|
|
322
|
+
if (verbose) {
|
|
323
|
+
console.error(`[CSS Filter] Total rules: ${stats.totalRules}`);
|
|
324
|
+
console.error(`[CSS Filter] Kept: ${stats.keptRules} (${Math.round(stats.keptRules / stats.totalRules * 100)}%)`);
|
|
325
|
+
console.error(`[CSS Filter] Removed: ${stats.removedRules}`);
|
|
326
|
+
console.error(`[CSS Filter] At-rules: ${stats.atRules} (${stats.mediaQueries} media queries)`);
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
return stats;
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
/**
|
|
333
|
+
* Main filtering function
|
|
334
|
+
* @param {string} htmlPath - Path to HTML file
|
|
335
|
+
* @param {string} cssPath - Path to raw CSS file
|
|
336
|
+
* @param {string} outputPath - Path for filtered CSS output
|
|
337
|
+
* @param {boolean} verbose - Enable verbose logging
|
|
338
|
+
* @param {string} allowedDir - Base directory for path validation (optional)
|
|
339
|
+
* @returns {Promise<Object>} Result object
|
|
340
|
+
*/
|
|
341
|
+
async function filterCssFile(htmlPath, cssPath, outputPath, verbose = false, allowedDir = null) {
|
|
342
|
+
const startTime = Date.now();
|
|
343
|
+
|
|
344
|
+
// Validate paths if allowedDir specified (security: prevent path traversal)
|
|
345
|
+
const resolvedHtml = allowedDir ? validatePath(htmlPath, allowedDir) : path.resolve(htmlPath);
|
|
346
|
+
const resolvedCss = allowedDir ? validatePath(cssPath, allowedDir) : path.resolve(cssPath);
|
|
347
|
+
const resolvedOutput = allowedDir ? validatePath(outputPath, allowedDir) : path.resolve(outputPath);
|
|
348
|
+
|
|
349
|
+
// Read input files with detailed error messages
|
|
350
|
+
let html, css;
|
|
351
|
+
try {
|
|
352
|
+
[html, css] = await Promise.all([
|
|
353
|
+
fs.readFile(resolvedHtml, 'utf-8'),
|
|
354
|
+
fs.readFile(resolvedCss, 'utf-8')
|
|
355
|
+
]);
|
|
356
|
+
} catch (readError) {
|
|
357
|
+
const failedFile = readError.path || 'unknown';
|
|
358
|
+
throw new Error(`Failed to read file "${failedFile}": ${readError.message}`);
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
const inputSize = Buffer.byteLength(css, 'utf-8');
|
|
362
|
+
|
|
363
|
+
// Size limit check with detailed message
|
|
364
|
+
if (inputSize > MAX_CSS_INPUT_SIZE) {
|
|
365
|
+
throw new Error(
|
|
366
|
+
`CSS file "${resolvedCss}" (${(inputSize / 1024 / 1024).toFixed(1)}MB) ` +
|
|
367
|
+
`exceeds ${MAX_CSS_INPUT_SIZE / 1024 / 1024}MB limit. ` +
|
|
368
|
+
`Consider splitting the CSS file or increasing MAX_CSS_INPUT_SIZE.`
|
|
369
|
+
);
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
if (verbose) {
|
|
373
|
+
console.error(`[CSS Filter] Input CSS size: ${(inputSize / 1024).toFixed(1)}KB`);
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
// Analyze HTML
|
|
377
|
+
const htmlAnalysis = analyzeHtml(html);
|
|
378
|
+
if (verbose) {
|
|
379
|
+
console.error(`[CSS Filter] HTML Analysis:`);
|
|
380
|
+
console.error(` Tags: ${htmlAnalysis.tags.size}`);
|
|
381
|
+
console.error(` IDs: ${htmlAnalysis.ids.size}`);
|
|
382
|
+
console.error(` Classes: ${htmlAnalysis.classes.size}`);
|
|
383
|
+
console.error(` Attributes: ${htmlAnalysis.attributes.size}`);
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
// Parse CSS with css-tree
|
|
387
|
+
let ast;
|
|
388
|
+
try {
|
|
389
|
+
ast = csstree.parse(css, {
|
|
390
|
+
parseRulePrelude: true,
|
|
391
|
+
parseValue: false // Skip value parsing for speed
|
|
392
|
+
});
|
|
393
|
+
} catch (parseError) {
|
|
394
|
+
if (verbose) {
|
|
395
|
+
console.error(`[CSS Filter] Parse error: ${parseError.message}`);
|
|
396
|
+
console.error(`[CSS Filter] Attempting lenient parse...`);
|
|
397
|
+
}
|
|
398
|
+
// Try lenient parse on error
|
|
399
|
+
try {
|
|
400
|
+
ast = csstree.parse(css, {
|
|
401
|
+
parseRulePrelude: false,
|
|
402
|
+
parseValue: false
|
|
403
|
+
});
|
|
404
|
+
} catch (lenientError) {
|
|
405
|
+
throw new Error(`Failed to parse CSS: ${lenientError.message}`);
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
// Filter CSS
|
|
410
|
+
const stats = filterCss(ast, htmlAnalysis, verbose);
|
|
411
|
+
|
|
412
|
+
// Generate output CSS and sanitize for XSS vectors
|
|
413
|
+
let filteredCss = csstree.generate(ast);
|
|
414
|
+
filteredCss = sanitizeCss(filteredCss);
|
|
415
|
+
const outputSize = Buffer.byteLength(filteredCss, 'utf-8');
|
|
416
|
+
|
|
417
|
+
// Write output with detailed error message
|
|
418
|
+
try {
|
|
419
|
+
await fs.writeFile(resolvedOutput, filteredCss, 'utf-8');
|
|
420
|
+
} catch (writeError) {
|
|
421
|
+
throw new Error(`Failed to write output "${resolvedOutput}": ${writeError.message}`);
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
const duration = Date.now() - startTime;
|
|
425
|
+
const reductionPercent = Math.round((1 - outputSize / inputSize) * 100);
|
|
426
|
+
|
|
427
|
+
if (verbose) {
|
|
428
|
+
console.error(`[CSS Filter] Output CSS size: ${(outputSize / 1024).toFixed(1)}KB`);
|
|
429
|
+
console.error(`[CSS Filter] Reduction: ${reductionPercent}%`);
|
|
430
|
+
console.error(`[CSS Filter] Duration: ${duration}ms`);
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
return {
|
|
434
|
+
success: true,
|
|
435
|
+
input: {
|
|
436
|
+
html: resolvedHtml,
|
|
437
|
+
css: resolvedCss,
|
|
438
|
+
cssSize: inputSize
|
|
439
|
+
},
|
|
440
|
+
output: {
|
|
441
|
+
path: resolvedOutput,
|
|
442
|
+
size: outputSize
|
|
443
|
+
},
|
|
444
|
+
htmlAnalysis: {
|
|
445
|
+
tags: htmlAnalysis.tags.size,
|
|
446
|
+
ids: htmlAnalysis.ids.size,
|
|
447
|
+
classes: htmlAnalysis.classes.size
|
|
448
|
+
},
|
|
449
|
+
stats: {
|
|
450
|
+
...stats,
|
|
451
|
+
reduction: `${reductionPercent}%`,
|
|
452
|
+
durationMs: duration
|
|
453
|
+
}
|
|
454
|
+
};
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
/**
|
|
458
|
+
* CLI entry point
|
|
459
|
+
*/
|
|
460
|
+
async function main() {
|
|
461
|
+
const args = parseArgs(process.argv.slice(2));
|
|
462
|
+
|
|
463
|
+
if (!args.html || !args.css || !args.output) {
|
|
464
|
+
console.error('Usage: node filter-css.js --html source.html --css source-raw.css --output source.css [--verbose]');
|
|
465
|
+
process.exit(1);
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
try {
|
|
469
|
+
const result = await filterCssFile(
|
|
470
|
+
args.html,
|
|
471
|
+
args.css,
|
|
472
|
+
args.output,
|
|
473
|
+
args.verbose === 'true' || args.verbose === true
|
|
474
|
+
);
|
|
475
|
+
|
|
476
|
+
// Output JSON to stdout
|
|
477
|
+
console.log(JSON.stringify(result, null, 2));
|
|
478
|
+
process.exit(0);
|
|
479
|
+
} catch (error) {
|
|
480
|
+
console.error(JSON.stringify({
|
|
481
|
+
success: false,
|
|
482
|
+
error: error.message
|
|
483
|
+
}, null, 2));
|
|
484
|
+
process.exit(1);
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
// Export for module use
|
|
489
|
+
export { filterCssFile, analyzeHtml, validatePath, sanitizeCss };
|
|
490
|
+
|
|
491
|
+
// Run if called directly (not imported as module)
|
|
492
|
+
const isMainModule = process.argv[1] && (
|
|
493
|
+
process.argv[1].endsWith('filter-css.js') ||
|
|
494
|
+
process.argv[1].includes('filter-css')
|
|
495
|
+
);
|
|
496
|
+
|
|
497
|
+
if (isMainModule) {
|
|
498
|
+
main();
|
|
499
|
+
}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HTML Extractor
|
|
3
|
+
*
|
|
4
|
+
* Extract and clean HTML from page, removing scripts,
|
|
5
|
+
* event handlers, and framework-specific attributes.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// Size limits
|
|
9
|
+
export const MAX_HTML_SIZE = 10 * 1024 * 1024; // 10MB limit
|
|
10
|
+
export const MAX_DOM_ELEMENTS = 50000; // Warn on large DOMs
|
|
11
|
+
|
|
12
|
+
// JS framework attribute patterns to remove
|
|
13
|
+
export const JS_FRAMEWORK_PATTERNS = [
|
|
14
|
+
/^data-react/i, /^data-vue/i, /^data-ng/i, /^ng-/i,
|
|
15
|
+
/^data-svelte/i, /^x-/i, /^hx-/i, /^v-/i,
|
|
16
|
+
/^data-alpine/i, /^wire:/i, /^@/
|
|
17
|
+
];
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Extract and clean HTML from page
|
|
21
|
+
* @param {Page} page - Puppeteer page
|
|
22
|
+
* @param {Array} frameworkPatterns - Patterns to remove
|
|
23
|
+
* @returns {Promise<{html: string, warnings: string[], elementCount: number}>}
|
|
24
|
+
*/
|
|
25
|
+
export async function extractCleanHtml(page, frameworkPatterns = JS_FRAMEWORK_PATTERNS) {
|
|
26
|
+
return await page.evaluate((patterns) => {
|
|
27
|
+
const warnings = [];
|
|
28
|
+
|
|
29
|
+
// Check DOM size
|
|
30
|
+
const elementCount = document.querySelectorAll('*').length;
|
|
31
|
+
if (elementCount > 50000) {
|
|
32
|
+
warnings.push(`Large DOM: ${elementCount} elements`);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// Clone document to avoid modifying live page
|
|
36
|
+
const doc = document.documentElement.cloneNode(true);
|
|
37
|
+
|
|
38
|
+
// Remove scripts and noscript
|
|
39
|
+
doc.querySelectorAll('script, noscript').forEach(el => el.remove());
|
|
40
|
+
doc.querySelectorAll('svg script, svg a[href^="javascript:"]').forEach(el => el.remove());
|
|
41
|
+
|
|
42
|
+
// Sanitize CSS links
|
|
43
|
+
doc.querySelectorAll('link[rel="stylesheet"]').forEach(link => {
|
|
44
|
+
const href = link.getAttribute('href') || '';
|
|
45
|
+
if (href.startsWith('javascript:') || href.startsWith('data:')) {
|
|
46
|
+
link.remove();
|
|
47
|
+
}
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
// Sanitize inline styles
|
|
51
|
+
doc.querySelectorAll('style').forEach(style => {
|
|
52
|
+
const content = style.textContent || '';
|
|
53
|
+
if (content.match(/@import\s+url\s*\(\s*['"]?(javascript|data):/i)) {
|
|
54
|
+
style.remove();
|
|
55
|
+
}
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
// Convert patterns to regex
|
|
59
|
+
const patternRegexes = patterns.map(p => new RegExp(p.source, p.flags));
|
|
60
|
+
|
|
61
|
+
// Remove event handlers and framework attributes
|
|
62
|
+
const allElements = doc.querySelectorAll('*');
|
|
63
|
+
allElements.forEach(el => {
|
|
64
|
+
const attrs = [...el.attributes];
|
|
65
|
+
attrs.forEach(attr => {
|
|
66
|
+
if (attr.name.startsWith('on')) {
|
|
67
|
+
el.removeAttribute(attr.name);
|
|
68
|
+
}
|
|
69
|
+
if (patternRegexes.some(p => p.test(attr.name))) {
|
|
70
|
+
el.removeAttribute(attr.name);
|
|
71
|
+
}
|
|
72
|
+
});
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
// Remove hidden elements
|
|
76
|
+
doc.querySelectorAll('[hidden], [style*="display: none"], [style*="display:none"]')
|
|
77
|
+
.forEach(el => el.remove());
|
|
78
|
+
|
|
79
|
+
// Remove empty style tags
|
|
80
|
+
doc.querySelectorAll('style:empty').forEach(el => el.remove());
|
|
81
|
+
|
|
82
|
+
// Remove HTML comments
|
|
83
|
+
const removeComments = (node) => {
|
|
84
|
+
const children = [...node.childNodes];
|
|
85
|
+
children.forEach(child => {
|
|
86
|
+
if (child.nodeType === 8) {
|
|
87
|
+
child.remove();
|
|
88
|
+
} else if (child.nodeType === 1) {
|
|
89
|
+
removeComments(child);
|
|
90
|
+
}
|
|
91
|
+
});
|
|
92
|
+
};
|
|
93
|
+
removeComments(doc);
|
|
94
|
+
|
|
95
|
+
// Build clean HTML
|
|
96
|
+
const html = '<!DOCTYPE html>\n<html lang="' +
|
|
97
|
+
(document.documentElement.lang || 'en') + '">\n' +
|
|
98
|
+
doc.innerHTML + '\n</html>';
|
|
99
|
+
|
|
100
|
+
return { html, warnings, elementCount };
|
|
101
|
+
}, frameworkPatterns.map(r => ({ source: r.source, flags: r.flags })));
|
|
102
|
+
}
|