design-clone 1.2.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +26 -12
- package/bin/commands/clone-site.js +75 -10
- package/bin/commands/init.js +33 -1
- package/bin/commands/verify.js +5 -3
- package/bin/utils/validate.js +24 -8
- package/docs/cli-reference.md +200 -2
- package/docs/codebase-summary.md +309 -0
- package/docs/design-clone-architecture.md +259 -42
- package/docs/pixel-perfect.md +35 -4
- package/docs/project-roadmap.md +382 -0
- package/docs/troubleshooting.md +5 -4
- package/package.json +10 -8
- package/src/ai/__pycache__/analyze-structure.cpython-313.pyc +0 -0
- package/src/ai/__pycache__/extract-design-tokens.cpython-313.pyc +0 -0
- package/src/ai/analyze-structure.py +73 -3
- package/src/ai/extract-design-tokens.py +356 -13
- package/src/ai/prompts/__pycache__/design_tokens.cpython-313.pyc +0 -0
- package/src/ai/prompts/__pycache__/structure_analysis.cpython-313.pyc +0 -0
- package/src/ai/prompts/__pycache__/ux_audit.cpython-313.pyc +0 -0
- package/src/ai/prompts/design_tokens.py +133 -0
- package/src/ai/prompts/structure_analysis.py +329 -10
- package/src/ai/prompts/ux_audit.py +198 -0
- package/src/ai/ux-audit.js +596 -0
- package/src/core/app-state-snapshot.js +511 -0
- package/src/core/content-counter.js +342 -0
- package/src/core/cookie-handler.js +1 -1
- package/src/core/css-extractor.js +4 -4
- package/src/core/dimension-extractor.js +93 -21
- package/src/core/dimension-output.js +103 -6
- package/src/core/discover-pages.js +242 -14
- package/src/core/dom-tree-analyzer.js +298 -0
- package/src/core/extract-assets.js +1 -1
- package/src/core/framework-detector.js +538 -0
- package/src/core/html-extractor.js +45 -4
- package/src/core/lazy-loader.js +7 -7
- package/src/core/multi-page-screenshot.js +9 -6
- package/src/core/page-readiness.js +8 -8
- package/src/core/screenshot.js +138 -9
- package/src/core/section-cropper.js +209 -0
- package/src/core/section-detector.js +386 -0
- package/src/core/semantic-enhancer.js +492 -0
- package/src/core/state-capture.js +18 -22
- package/src/core/tests/test-section-cropper.js +177 -0
- package/src/core/tests/test-section-detector.js +55 -0
- package/src/core/video-capture.js +152 -146
- package/src/route-discoverers/angular-discoverer.js +157 -0
- package/src/route-discoverers/astro-discoverer.js +123 -0
- package/src/route-discoverers/base-discoverer.js +242 -0
- package/src/route-discoverers/index.js +106 -0
- package/src/route-discoverers/next-discoverer.js +130 -0
- package/src/route-discoverers/nuxt-discoverer.js +138 -0
- package/src/route-discoverers/react-discoverer.js +139 -0
- package/src/route-discoverers/svelte-discoverer.js +109 -0
- package/src/route-discoverers/universal-discoverer.js +227 -0
- package/src/route-discoverers/vue-discoverer.js +118 -0
- package/src/utils/__init__.py +1 -1
- package/src/utils/__pycache__/__init__.cpython-313.pyc +0 -0
- package/src/utils/browser.js +11 -37
- package/src/utils/playwright.js +213 -0
- package/src/verification/generate-audit-report.js +398 -0
- package/src/verification/verify-footer.js +493 -0
- package/src/verification/verify-header.js +486 -0
- package/src/verification/verify-layout.js +2 -2
- package/src/verification/verify-menu.js +4 -20
- package/src/verification/verify-slider.js +533 -0
- package/src/utils/puppeteer.js +0 -281
|
@@ -0,0 +1,538 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Framework Detector Module
|
|
3
|
+
*
|
|
4
|
+
* Detects JavaScript frameworks used on a page by checking:
|
|
5
|
+
* - Global objects (window.__NEXT_DATA__, etc.)
|
|
6
|
+
* - DOM attributes ([data-reactroot], [ng-version], etc.)
|
|
7
|
+
* - Script URL patterns (/_next/, /_nuxt/, etc.)
|
|
8
|
+
*
|
|
9
|
+
* Returns framework info with confidence scoring.
|
|
10
|
+
*
|
|
11
|
+
* Usage:
|
|
12
|
+
* import { detectFramework } from './framework-detector.js';
|
|
13
|
+
* const info = await detectFramework(page);
|
|
14
|
+
* // { framework: 'next', version: '14.0.0', confidence: 'high', ... }
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* @typedef {Object} FrameworkInfo
|
|
19
|
+
* @property {string|null} framework - 'next'|'nuxt'|'vue'|'react'|'angular'|'svelte'|'astro'|null
|
|
20
|
+
* @property {string|null} version - Framework version if detectable
|
|
21
|
+
* @property {'spa'|'ssr'|'ssg'|'unknown'} routingType - Routing/rendering strategy
|
|
22
|
+
* @property {'high'|'medium'|'low'} confidence - Detection confidence
|
|
23
|
+
* @property {string[]} signals - Matched detection signals
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
// Confidence thresholds
|
|
27
|
+
const CONFIDENCE_HIGH_THRESHOLD = 5;
|
|
28
|
+
const CONFIDENCE_MEDIUM_THRESHOLD = 3;
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Detection signals for each framework
|
|
32
|
+
* Each signal has: type, path/selector/pattern, weight (1-3), signal (label)
|
|
33
|
+
*/
|
|
34
|
+
const DETECTION_SIGNALS = {
|
|
35
|
+
next: [
|
|
36
|
+
{ type: 'global', path: ['__NEXT_DATA__'], weight: 3, signal: '__NEXT_DATA__' },
|
|
37
|
+
{ type: 'global', path: ['__NEXT_LOADED_PAGES__'], weight: 2, signal: '__NEXT_LOADED_PAGES__' },
|
|
38
|
+
{ type: 'global', path: ['__BUILD_MANIFEST'], weight: 2, signal: '__BUILD_MANIFEST' },
|
|
39
|
+
{ type: 'dom', selector: '#__next', weight: 2, signal: '#__next' },
|
|
40
|
+
{ type: 'script', pattern: '/_next/', weight: 1, signal: 'script:/_next/' }
|
|
41
|
+
],
|
|
42
|
+
nuxt: [
|
|
43
|
+
{ type: 'global', path: ['__NUXT__'], weight: 3, signal: '__NUXT__' },
|
|
44
|
+
{ type: 'global', path: ['$nuxt'], weight: 2, signal: '$nuxt' },
|
|
45
|
+
{ type: 'global', path: ['__NUXT_PATHS__'], weight: 2, signal: '__NUXT_PATHS__' },
|
|
46
|
+
{ type: 'dom', selector: '#__nuxt', weight: 2, signal: '#__nuxt' },
|
|
47
|
+
{ type: 'dom', selector: '#__layout', weight: 1, signal: '#__layout' },
|
|
48
|
+
{ type: 'script', pattern: '/_nuxt/', weight: 1, signal: 'script:/_nuxt/' }
|
|
49
|
+
],
|
|
50
|
+
vue: [
|
|
51
|
+
{ type: 'global', path: ['__VUE__'], weight: 3, signal: '__VUE__' },
|
|
52
|
+
{ type: 'global', path: ['Vue'], weight: 2, signal: 'Vue' },
|
|
53
|
+
{ type: 'global', path: ['__VUE_DEVTOOLS_GLOBAL_HOOK__'], weight: 1, signal: '__VUE_DEVTOOLS_GLOBAL_HOOK__' },
|
|
54
|
+
{ type: 'dom', selector: '[data-v-]', weight: 2, signal: 'data-v-*' },
|
|
55
|
+
{ type: 'dom', selector: '[data-server-rendered]', weight: 2, signal: 'data-server-rendered' }
|
|
56
|
+
],
|
|
57
|
+
react: [
|
|
58
|
+
{ type: 'global', path: ['__REACT_DEVTOOLS_GLOBAL_HOOK__'], weight: 1, signal: '__REACT_DEVTOOLS_GLOBAL_HOOK__' },
|
|
59
|
+
{ type: 'dom', selector: '[data-reactroot]', weight: 3, signal: 'data-reactroot' },
|
|
60
|
+
{ type: 'dom', selector: '[data-reactid]', weight: 2, signal: 'data-reactid' },
|
|
61
|
+
{ type: 'dom', selector: '#root[data-reactroot], #root > div', weight: 1, signal: '#root' }
|
|
62
|
+
],
|
|
63
|
+
angular: [
|
|
64
|
+
{ type: 'global', path: ['ng'], weight: 2, signal: 'ng' },
|
|
65
|
+
{ type: 'global', path: ['getAllAngularRootElements'], weight: 3, signal: 'getAllAngularRootElements' },
|
|
66
|
+
{ type: 'dom', selector: '[ng-version]', weight: 3, signal: 'ng-version' },
|
|
67
|
+
{ type: 'dom', selector: 'app-root', weight: 2, signal: 'app-root' },
|
|
68
|
+
{ type: 'dom', selector: '[_nghost-]', weight: 2, signal: '_nghost-*' },
|
|
69
|
+
{ type: 'dom', selector: '[ng-app]', weight: 2, signal: 'ng-app' }
|
|
70
|
+
],
|
|
71
|
+
svelte: [
|
|
72
|
+
{ type: 'global', path: ['__svelte__'], weight: 2, signal: '__svelte__' },
|
|
73
|
+
{ type: 'global', path: ['__sveltekit'], weight: 3, signal: '__sveltekit' },
|
|
74
|
+
{ type: 'dom', selector: '[data-sveltekit-preload-data]', weight: 3, signal: 'data-sveltekit-preload-data' },
|
|
75
|
+
{ type: 'dom', selector: '[data-sveltekit-reload]', weight: 2, signal: 'data-sveltekit-reload' },
|
|
76
|
+
{ type: 'script', pattern: '/@svelte/', weight: 1, signal: 'script:/@svelte/' }
|
|
77
|
+
],
|
|
78
|
+
astro: [
|
|
79
|
+
{ type: 'dom', selector: 'astro-island', weight: 3, signal: 'astro-island' },
|
|
80
|
+
{ type: 'dom', selector: '[data-astro-cid-]', weight: 2, signal: 'data-astro-cid-*' },
|
|
81
|
+
{ type: 'dom', selector: '[data-astro-source-file]', weight: 2, signal: 'data-astro-source-file' },
|
|
82
|
+
{ type: 'meta', name: 'generator', pattern: 'Astro', weight: 3, signal: 'meta:generator:Astro' },
|
|
83
|
+
{ type: 'script', pattern: '/@astrojs/', weight: 1, signal: 'script:/@astrojs/' }
|
|
84
|
+
]
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Calculate confidence level based on total weight
|
|
89
|
+
* @param {number} totalWeight - Sum of matched signal weights
|
|
90
|
+
* @returns {'high'|'medium'|'low'} Confidence level
|
|
91
|
+
*/
|
|
92
|
+
function calculateConfidence(totalWeight) {
|
|
93
|
+
if (totalWeight >= CONFIDENCE_HIGH_THRESHOLD) return 'high';
|
|
94
|
+
if (totalWeight >= CONFIDENCE_MEDIUM_THRESHOLD) return 'medium';
|
|
95
|
+
return 'low';
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Safe property access without eval()
|
|
100
|
+
* @param {Object} obj - Object to traverse
|
|
101
|
+
* @param {string[]} path - Property path array
|
|
102
|
+
* @returns {*} Value at path or undefined
|
|
103
|
+
*/
|
|
104
|
+
function safeGet(obj, path) {
|
|
105
|
+
let current = obj;
|
|
106
|
+
for (const key of path) {
|
|
107
|
+
if (current === null || current === undefined) return undefined;
|
|
108
|
+
current = current[key];
|
|
109
|
+
}
|
|
110
|
+
return current;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Check if element has attribute with prefix
|
|
115
|
+
* @param {Element} el - DOM element
|
|
116
|
+
* @param {string} prefix - Attribute prefix
|
|
117
|
+
* @returns {boolean}
|
|
118
|
+
*/
|
|
119
|
+
function hasAttributeWithPrefix(el, prefix) {
|
|
120
|
+
return Array.from(el.attributes).some(attr => attr.name.startsWith(prefix));
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Detection logic that runs in browser context via page.evaluate()
|
|
125
|
+
* @param {Object} signals - DETECTION_SIGNALS object
|
|
126
|
+
* @returns {Object} Detection results for all frameworks
|
|
127
|
+
*/
|
|
128
|
+
function browserDetectionLogic(signals) {
|
|
129
|
+
// Helper: safe property access without eval
|
|
130
|
+
function safeGet(obj, path) {
|
|
131
|
+
let current = obj;
|
|
132
|
+
for (const key of path) {
|
|
133
|
+
if (current === null || current === undefined) return undefined;
|
|
134
|
+
current = current[key];
|
|
135
|
+
}
|
|
136
|
+
return current;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// Helper: check if any element has attribute with prefix
|
|
140
|
+
function hasAttrPrefix(prefix) {
|
|
141
|
+
return Array.from(document.querySelectorAll('*')).some(el =>
|
|
142
|
+
Array.from(el.attributes).some(attr => attr.name.startsWith(prefix))
|
|
143
|
+
);
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
const results = {};
|
|
147
|
+
|
|
148
|
+
for (const [framework, checks] of Object.entries(signals)) {
|
|
149
|
+
let totalWeight = 0;
|
|
150
|
+
const matchedSignals = [];
|
|
151
|
+
let version = null;
|
|
152
|
+
|
|
153
|
+
for (const check of checks) {
|
|
154
|
+
let matched = false;
|
|
155
|
+
|
|
156
|
+
try {
|
|
157
|
+
switch (check.type) {
|
|
158
|
+
case 'global':
|
|
159
|
+
// Safe property traversal instead of eval()
|
|
160
|
+
matched = safeGet(window, check.path) !== undefined;
|
|
161
|
+
break;
|
|
162
|
+
|
|
163
|
+
case 'dom':
|
|
164
|
+
// Handle attribute selectors with partial match
|
|
165
|
+
if (check.selector.includes('[data-v-]')) {
|
|
166
|
+
matched = hasAttrPrefix('data-v-');
|
|
167
|
+
} else if (check.selector.includes('[data-astro-cid-]')) {
|
|
168
|
+
matched = hasAttrPrefix('data-astro-cid-');
|
|
169
|
+
} else if (check.selector.includes('[_nghost-]')) {
|
|
170
|
+
matched = hasAttrPrefix('_nghost-');
|
|
171
|
+
} else {
|
|
172
|
+
matched = !!document.querySelector(check.selector);
|
|
173
|
+
}
|
|
174
|
+
break;
|
|
175
|
+
|
|
176
|
+
case 'script':
|
|
177
|
+
// Check if any script src contains pattern
|
|
178
|
+
const scripts = Array.from(document.querySelectorAll('script[src]'));
|
|
179
|
+
matched = scripts.some(s => s.src.includes(check.pattern));
|
|
180
|
+
break;
|
|
181
|
+
|
|
182
|
+
case 'meta':
|
|
183
|
+
// Check meta tag content
|
|
184
|
+
const meta = document.querySelector(`meta[name="${check.name}"]`);
|
|
185
|
+
matched = meta && meta.content && meta.content.includes(check.pattern);
|
|
186
|
+
break;
|
|
187
|
+
}
|
|
188
|
+
} catch (e) {
|
|
189
|
+
matched = false;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
if (matched) {
|
|
193
|
+
totalWeight += check.weight;
|
|
194
|
+
matchedSignals.push(check.signal);
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// Extract version based on framework
|
|
199
|
+
if (totalWeight > 0) {
|
|
200
|
+
try {
|
|
201
|
+
switch (framework) {
|
|
202
|
+
case 'next':
|
|
203
|
+
const nextData = safeGet(window, ['__NEXT_DATA__']);
|
|
204
|
+
if (nextData) {
|
|
205
|
+
version = nextData.nextExport ? 'export' : (nextData.buildId || null);
|
|
206
|
+
// Try runtime config version
|
|
207
|
+
if (nextData.runtimeConfig?.version) {
|
|
208
|
+
version = nextData.runtimeConfig.version;
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
break;
|
|
212
|
+
case 'nuxt':
|
|
213
|
+
const nuxtConfig = safeGet(window, ['__NUXT__', 'config', 'app', 'buildId']);
|
|
214
|
+
if (nuxtConfig) version = nuxtConfig;
|
|
215
|
+
break;
|
|
216
|
+
case 'vue':
|
|
217
|
+
version = safeGet(window, ['Vue', 'version']) ||
|
|
218
|
+
safeGet(window, ['__VUE__', 'version']) || null;
|
|
219
|
+
break;
|
|
220
|
+
case 'react':
|
|
221
|
+
version = safeGet(window, ['React', 'version']) || null;
|
|
222
|
+
break;
|
|
223
|
+
case 'angular':
|
|
224
|
+
const ngVersion = document.querySelector('[ng-version]');
|
|
225
|
+
if (ngVersion) version = ngVersion.getAttribute('ng-version');
|
|
226
|
+
break;
|
|
227
|
+
case 'svelte':
|
|
228
|
+
// Svelte doesn't expose version easily
|
|
229
|
+
break;
|
|
230
|
+
case 'astro':
|
|
231
|
+
const astroMeta = document.querySelector('meta[name="generator"]');
|
|
232
|
+
if (astroMeta && astroMeta.content.includes('Astro')) {
|
|
233
|
+
const match = astroMeta.content.match(/Astro v?([\d.]+)/);
|
|
234
|
+
if (match) version = match[1];
|
|
235
|
+
}
|
|
236
|
+
break;
|
|
237
|
+
}
|
|
238
|
+
} catch (e) {
|
|
239
|
+
// Ignore version extraction errors
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
results[framework] = {
|
|
244
|
+
weight: totalWeight,
|
|
245
|
+
signals: matchedSignals,
|
|
246
|
+
version
|
|
247
|
+
};
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
return results;
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
/**
|
|
254
|
+
* Infer routing type based on framework and detected signals
|
|
255
|
+
* @param {import('playwright').Page} page - Playwright page object
|
|
256
|
+
* @param {string} framework - Detected framework name
|
|
257
|
+
* @returns {Promise<'spa'|'ssr'|'ssg'|'unknown'>} Routing type
|
|
258
|
+
*/
|
|
259
|
+
async function inferRoutingType(page, framework) {
|
|
260
|
+
if (!framework) return 'unknown';
|
|
261
|
+
|
|
262
|
+
return await page.evaluate((fw) => {
|
|
263
|
+
// Helper for safe property access
|
|
264
|
+
function safeGet(obj, path) {
|
|
265
|
+
let current = obj;
|
|
266
|
+
for (const key of path) {
|
|
267
|
+
if (current === null || current === undefined) return undefined;
|
|
268
|
+
current = current[key];
|
|
269
|
+
}
|
|
270
|
+
return current;
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
try {
|
|
274
|
+
switch (fw) {
|
|
275
|
+
case 'next': {
|
|
276
|
+
const nextData = safeGet(window, ['__NEXT_DATA__']);
|
|
277
|
+
if (nextData) {
|
|
278
|
+
if (nextData.nextExport) return 'ssg';
|
|
279
|
+
if (nextData.isFallback === false) return 'ssr';
|
|
280
|
+
if (document.querySelector('[data-nscript]')) return 'ssr';
|
|
281
|
+
}
|
|
282
|
+
return 'ssr';
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
case 'nuxt': {
|
|
286
|
+
const nuxtData = safeGet(window, ['__NUXT__']);
|
|
287
|
+
if (nuxtData?.serverRendered === true) return 'ssr';
|
|
288
|
+
if (nuxtData?.serverRendered === false) return 'spa';
|
|
289
|
+
return 'ssr';
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
case 'vue':
|
|
293
|
+
if (window.$nuxt) return 'ssr'; // Actually Nuxt
|
|
294
|
+
if (document.querySelector('[data-server-rendered="true"]')) return 'ssr';
|
|
295
|
+
return 'spa';
|
|
296
|
+
|
|
297
|
+
case 'react':
|
|
298
|
+
if (safeGet(window, ['__NEXT_DATA__'])) return 'ssr';
|
|
299
|
+
if (window.___gatsby) return 'ssg';
|
|
300
|
+
return 'spa';
|
|
301
|
+
|
|
302
|
+
case 'angular':
|
|
303
|
+
if (document.querySelector('[ng-server-context]')) return 'ssr';
|
|
304
|
+
return 'spa';
|
|
305
|
+
|
|
306
|
+
case 'svelte':
|
|
307
|
+
if (safeGet(window, ['__sveltekit'])) return 'ssr';
|
|
308
|
+
return 'spa';
|
|
309
|
+
|
|
310
|
+
case 'astro':
|
|
311
|
+
return 'ssg';
|
|
312
|
+
|
|
313
|
+
default:
|
|
314
|
+
return 'unknown';
|
|
315
|
+
}
|
|
316
|
+
} catch (e) {
|
|
317
|
+
return 'unknown';
|
|
318
|
+
}
|
|
319
|
+
}, framework);
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
/**
|
|
323
|
+
* Detect framework used on the current page
|
|
324
|
+
* @param {import('playwright').Page} page - Playwright page object
|
|
325
|
+
* @returns {Promise<FrameworkInfo>} Framework detection result
|
|
326
|
+
*/
|
|
327
|
+
export async function detectFramework(page) {
|
|
328
|
+
// Run detection logic in browser context
|
|
329
|
+
const results = await page.evaluate((signals) => {
|
|
330
|
+
// Helper: safe property access without eval
|
|
331
|
+
function safeGet(obj, path) {
|
|
332
|
+
let current = obj;
|
|
333
|
+
for (const key of path) {
|
|
334
|
+
if (current === null || current === undefined) return undefined;
|
|
335
|
+
current = current[key];
|
|
336
|
+
}
|
|
337
|
+
return current;
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
// Helper: check if any element has attribute with prefix
|
|
341
|
+
function hasAttrPrefix(prefix) {
|
|
342
|
+
return Array.from(document.querySelectorAll('*')).some(el =>
|
|
343
|
+
Array.from(el.attributes).some(attr => attr.name.startsWith(prefix))
|
|
344
|
+
);
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
const results = {};
|
|
348
|
+
|
|
349
|
+
for (const [framework, checks] of Object.entries(signals)) {
|
|
350
|
+
let totalWeight = 0;
|
|
351
|
+
const matchedSignals = [];
|
|
352
|
+
let version = null;
|
|
353
|
+
|
|
354
|
+
for (const check of checks) {
|
|
355
|
+
let matched = false;
|
|
356
|
+
|
|
357
|
+
try {
|
|
358
|
+
switch (check.type) {
|
|
359
|
+
case 'global':
|
|
360
|
+
matched = safeGet(window, check.path) !== undefined;
|
|
361
|
+
break;
|
|
362
|
+
|
|
363
|
+
case 'dom':
|
|
364
|
+
if (check.selector.includes('[data-v-]')) {
|
|
365
|
+
matched = hasAttrPrefix('data-v-');
|
|
366
|
+
} else if (check.selector.includes('[data-astro-cid-]')) {
|
|
367
|
+
matched = hasAttrPrefix('data-astro-cid-');
|
|
368
|
+
} else if (check.selector.includes('[_nghost-]')) {
|
|
369
|
+
matched = hasAttrPrefix('_nghost-');
|
|
370
|
+
} else {
|
|
371
|
+
matched = !!document.querySelector(check.selector);
|
|
372
|
+
}
|
|
373
|
+
break;
|
|
374
|
+
|
|
375
|
+
case 'script':
|
|
376
|
+
const scripts = Array.from(document.querySelectorAll('script[src]'));
|
|
377
|
+
matched = scripts.some(s => s.src.includes(check.pattern));
|
|
378
|
+
break;
|
|
379
|
+
|
|
380
|
+
case 'meta':
|
|
381
|
+
const meta = document.querySelector(`meta[name="${check.name}"]`);
|
|
382
|
+
matched = meta && meta.content && meta.content.includes(check.pattern);
|
|
383
|
+
break;
|
|
384
|
+
}
|
|
385
|
+
} catch (e) {
|
|
386
|
+
matched = false;
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
if (matched) {
|
|
390
|
+
totalWeight += check.weight;
|
|
391
|
+
matchedSignals.push(check.signal);
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
// Extract version based on framework
|
|
396
|
+
if (totalWeight > 0) {
|
|
397
|
+
try {
|
|
398
|
+
switch (framework) {
|
|
399
|
+
case 'next':
|
|
400
|
+
const nextData = safeGet(window, ['__NEXT_DATA__']);
|
|
401
|
+
if (nextData) {
|
|
402
|
+
version = nextData.nextExport ? 'export' : (nextData.buildId || null);
|
|
403
|
+
if (nextData.runtimeConfig?.version) {
|
|
404
|
+
version = nextData.runtimeConfig.version;
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
break;
|
|
408
|
+
case 'nuxt':
|
|
409
|
+
const nuxtConfig = safeGet(window, ['__NUXT__', 'config', 'app', 'buildId']);
|
|
410
|
+
if (nuxtConfig) version = nuxtConfig;
|
|
411
|
+
break;
|
|
412
|
+
case 'vue':
|
|
413
|
+
version = safeGet(window, ['Vue', 'version']) ||
|
|
414
|
+
safeGet(window, ['__VUE__', 'version']) || null;
|
|
415
|
+
break;
|
|
416
|
+
case 'react':
|
|
417
|
+
version = safeGet(window, ['React', 'version']) || null;
|
|
418
|
+
break;
|
|
419
|
+
case 'angular':
|
|
420
|
+
const ngVersion = document.querySelector('[ng-version]');
|
|
421
|
+
if (ngVersion) version = ngVersion.getAttribute('ng-version');
|
|
422
|
+
break;
|
|
423
|
+
case 'svelte':
|
|
424
|
+
break;
|
|
425
|
+
case 'astro':
|
|
426
|
+
const astroMeta = document.querySelector('meta[name="generator"]');
|
|
427
|
+
if (astroMeta && astroMeta.content.includes('Astro')) {
|
|
428
|
+
const match = astroMeta.content.match(/Astro v?([\d.]+)/);
|
|
429
|
+
if (match) version = match[1];
|
|
430
|
+
}
|
|
431
|
+
break;
|
|
432
|
+
}
|
|
433
|
+
} catch (e) {
|
|
434
|
+
// Ignore version extraction errors
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
results[framework] = {
|
|
439
|
+
weight: totalWeight,
|
|
440
|
+
signals: matchedSignals,
|
|
441
|
+
version
|
|
442
|
+
};
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
return results;
|
|
446
|
+
}, DETECTION_SIGNALS);
|
|
447
|
+
|
|
448
|
+
// Find framework with highest weight
|
|
449
|
+
// Priority order: SSR frameworks first, then base frameworks
|
|
450
|
+
const priorityOrder = ['next', 'nuxt', 'astro', 'svelte', 'angular', 'vue', 'react'];
|
|
451
|
+
|
|
452
|
+
let bestFramework = null;
|
|
453
|
+
let bestWeight = 0;
|
|
454
|
+
let bestSignals = [];
|
|
455
|
+
let bestVersion = null;
|
|
456
|
+
|
|
457
|
+
for (const framework of priorityOrder) {
|
|
458
|
+
const result = results[framework];
|
|
459
|
+
if (result.weight > bestWeight) {
|
|
460
|
+
bestWeight = result.weight;
|
|
461
|
+
bestFramework = framework;
|
|
462
|
+
bestSignals = result.signals;
|
|
463
|
+
bestVersion = result.version;
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
// Calculate confidence
|
|
468
|
+
const confidence = bestWeight > 0 ? calculateConfidence(bestWeight) : 'low';
|
|
469
|
+
|
|
470
|
+
// Infer routing type
|
|
471
|
+
const routingType = await inferRoutingType(page, bestFramework);
|
|
472
|
+
|
|
473
|
+
return {
|
|
474
|
+
framework: bestFramework,
|
|
475
|
+
version: bestVersion,
|
|
476
|
+
routingType,
|
|
477
|
+
confidence,
|
|
478
|
+
signals: bestSignals
|
|
479
|
+
};
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
/**
|
|
483
|
+
* Format detection result for CLI output
|
|
484
|
+
* @param {FrameworkInfo} info - Detection result
|
|
485
|
+
* @returns {string} Human-readable summary
|
|
486
|
+
*/
|
|
487
|
+
export function formatDetectionResult(info) {
|
|
488
|
+
if (!info.framework) {
|
|
489
|
+
return 'No framework detected (static HTML or unknown framework)';
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
const parts = [
|
|
493
|
+
`Framework: ${info.framework}`,
|
|
494
|
+
info.version ? `Version: ${info.version}` : null,
|
|
495
|
+
`Routing: ${info.routingType}`,
|
|
496
|
+
`Confidence: ${info.confidence}`,
|
|
497
|
+
`Signals: ${info.signals.join(', ')}`
|
|
498
|
+
].filter(Boolean);
|
|
499
|
+
|
|
500
|
+
return parts.join(' | ');
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
// CLI support - check if this is the main module being executed directly
|
|
504
|
+
// Use import.meta.url to compare with process.argv[1]
|
|
505
|
+
import { fileURLToPath } from 'url';
|
|
506
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
507
|
+
const isMainModule = process.argv[1] === __filename;
|
|
508
|
+
|
|
509
|
+
if (isMainModule) {
|
|
510
|
+
const { getBrowser, getPage, disconnectBrowser } = await import('../utils/browser.js');
|
|
511
|
+
|
|
512
|
+
const url = process.argv[2];
|
|
513
|
+
if (!url) {
|
|
514
|
+
console.error('Usage: node framework-detector.js <url>');
|
|
515
|
+
process.exit(1);
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
try {
|
|
519
|
+
const browser = await getBrowser({ headless: true });
|
|
520
|
+
const page = await getPage(browser);
|
|
521
|
+
|
|
522
|
+
await page.goto(url, { waitUntil: 'networkidle', timeout: 30000 });
|
|
523
|
+
|
|
524
|
+
// Wait for hydration
|
|
525
|
+
await new Promise(r => setTimeout(r, 2000));
|
|
526
|
+
|
|
527
|
+
const result = await detectFramework(page);
|
|
528
|
+
|
|
529
|
+
console.log(JSON.stringify(result, null, 2));
|
|
530
|
+
console.error('\n' + formatDetectionResult(result));
|
|
531
|
+
|
|
532
|
+
await disconnectBrowser();
|
|
533
|
+
process.exit(0);
|
|
534
|
+
} catch (error) {
|
|
535
|
+
console.error(JSON.stringify({ error: error.message }));
|
|
536
|
+
process.exit(1);
|
|
537
|
+
}
|
|
538
|
+
}
|
|
@@ -3,9 +3,11 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Extract and clean HTML from page, removing scripts,
|
|
5
5
|
* event handlers, and framework-specific attributes.
|
|
6
|
+
* Optionally enhances with WordPress-compatible semantic structure.
|
|
6
7
|
*/
|
|
7
8
|
|
|
8
9
|
import { LAYOUT_PROPERTIES } from './css-extractor.js';
|
|
10
|
+
import { enhanceSemanticHTMLInPage } from './semantic-enhancer.js';
|
|
9
11
|
|
|
10
12
|
// Size limits
|
|
11
13
|
export const MAX_HTML_SIZE = 10 * 1024 * 1024; // 10MB limit
|
|
@@ -34,12 +36,12 @@ export const CRITICAL_POSITION = ['absolute', 'fixed'];
|
|
|
34
36
|
|
|
35
37
|
/**
|
|
36
38
|
* Extract and clean HTML from page
|
|
37
|
-
* @param {Page} page -
|
|
39
|
+
* @param {Page} page - Playwright page
|
|
38
40
|
* @param {Array} frameworkPatterns - Patterns to remove
|
|
39
41
|
* @returns {Promise<{html: string, warnings: string[], elementCount: number}>}
|
|
40
42
|
*/
|
|
41
43
|
export async function extractCleanHtml(page, frameworkPatterns = JS_FRAMEWORK_PATTERNS) {
|
|
42
|
-
return await page.evaluate((patterns, inlineProps, criticalDisplay, criticalPosition) => {
|
|
44
|
+
return await page.evaluate(({ patterns, inlineProps, criticalDisplay, criticalPosition }) => {
|
|
43
45
|
const warnings = [];
|
|
44
46
|
|
|
45
47
|
// Check DOM size
|
|
@@ -166,6 +168,45 @@ export async function extractCleanHtml(page, frameworkPatterns = JS_FRAMEWORK_PA
|
|
|
166
168
|
doc.innerHTML + '\n</html>';
|
|
167
169
|
|
|
168
170
|
return { html, warnings, elementCount, inlinedCount };
|
|
169
|
-
},
|
|
170
|
-
|
|
171
|
+
}, {
|
|
172
|
+
patterns: frameworkPatterns.map(r => ({ source: r.source, flags: r.flags })),
|
|
173
|
+
inlineProps: INLINE_LAYOUT_PROPS,
|
|
174
|
+
criticalDisplay: CRITICAL_DISPLAY,
|
|
175
|
+
criticalPosition: CRITICAL_POSITION
|
|
176
|
+
});
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Extract, clean, and optionally enhance HTML with semantic structure
|
|
181
|
+
* @param {Page} page - Playwright page
|
|
182
|
+
* @param {Object} options - Configuration options
|
|
183
|
+
* @param {boolean} [options.enhanceSemantic=true] - Add WordPress semantic IDs/classes/roles
|
|
184
|
+
* @param {Array} [options.frameworkPatterns] - Custom framework patterns to remove
|
|
185
|
+
* @returns {Promise<{html: string, warnings: string[], elementCount: number, semanticStats?: Object}>}
|
|
186
|
+
*/
|
|
187
|
+
export async function extractAndEnhanceHtml(page, options = {}) {
|
|
188
|
+
const {
|
|
189
|
+
enhanceSemantic = true,
|
|
190
|
+
frameworkPatterns = JS_FRAMEWORK_PATTERNS
|
|
191
|
+
} = options;
|
|
192
|
+
|
|
193
|
+
// First extract clean HTML
|
|
194
|
+
const result = await extractCleanHtml(page, frameworkPatterns);
|
|
195
|
+
|
|
196
|
+
// Apply semantic enhancement if enabled
|
|
197
|
+
if (enhanceSemantic) {
|
|
198
|
+
try {
|
|
199
|
+
const enhanced = await enhanceSemanticHTMLInPage(page, result.html);
|
|
200
|
+
return {
|
|
201
|
+
...result,
|
|
202
|
+
html: enhanced.html,
|
|
203
|
+
semanticStats: enhanced.stats
|
|
204
|
+
};
|
|
205
|
+
} catch (err) {
|
|
206
|
+
result.warnings.push(`Semantic enhancement failed: ${err.message}`);
|
|
207
|
+
return result;
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
return result;
|
|
171
212
|
}
|
package/src/core/lazy-loader.js
CHANGED
|
@@ -14,7 +14,7 @@ export const IMAGE_LOAD_TIMEOUT = 20000;
|
|
|
14
14
|
* - Sets loading="eager" on all images
|
|
15
15
|
* - Copies data-src to src if exists
|
|
16
16
|
* - Triggers IntersectionObserver by scrolling
|
|
17
|
-
* @param {Page} page -
|
|
17
|
+
* @param {Page} page - Playwright page
|
|
18
18
|
*/
|
|
19
19
|
export async function forceLazyImages(page) {
|
|
20
20
|
return await page.evaluate(async () => {
|
|
@@ -51,7 +51,7 @@ export async function forceLazyImages(page) {
|
|
|
51
51
|
|
|
52
52
|
/**
|
|
53
53
|
* Force all hidden animated elements to be visible
|
|
54
|
-
* @param {Page} page -
|
|
54
|
+
* @param {Page} page - Playwright page
|
|
55
55
|
*/
|
|
56
56
|
export async function forceAnimatedElementsVisible(page) {
|
|
57
57
|
return await page.evaluate(() => {
|
|
@@ -78,12 +78,12 @@ export async function forceAnimatedElementsVisible(page) {
|
|
|
78
78
|
|
|
79
79
|
/**
|
|
80
80
|
* Trigger lazy loading by scrolling through entire page
|
|
81
|
-
* @param {Page} page -
|
|
81
|
+
* @param {Page} page - Playwright page
|
|
82
82
|
* @param {number} maxIterations - Max scroll iterations
|
|
83
83
|
* @param {number} scrollDelay - Pause time between scrolls
|
|
84
84
|
*/
|
|
85
85
|
export async function triggerLazyLoad(page, maxIterations = 20, scrollDelay = 1500) {
|
|
86
|
-
return await page.evaluate(async (maxIter, pauseMs) => {
|
|
86
|
+
return await page.evaluate(async ({ maxIter, pauseMs }) => {
|
|
87
87
|
return new Promise(async (resolve) => {
|
|
88
88
|
const viewportHeight = window.innerHeight;
|
|
89
89
|
const totalHeight = document.body.scrollHeight;
|
|
@@ -128,12 +128,12 @@ export async function triggerLazyLoad(page, maxIterations = 20, scrollDelay = 15
|
|
|
128
128
|
stableAt: iterations
|
|
129
129
|
});
|
|
130
130
|
});
|
|
131
|
-
}, maxIterations, scrollDelay);
|
|
131
|
+
}, { maxIter: maxIterations, pauseMs: scrollDelay });
|
|
132
132
|
}
|
|
133
133
|
|
|
134
134
|
/**
|
|
135
135
|
* Wait for all images to finish loading
|
|
136
|
-
* @param {Page} page -
|
|
136
|
+
* @param {Page} page - Playwright page
|
|
137
137
|
* @param {number} timeout - Max wait time
|
|
138
138
|
*/
|
|
139
139
|
export async function waitForAllImages(page, timeout = IMAGE_LOAD_TIMEOUT) {
|
|
@@ -178,7 +178,7 @@ export async function waitForAllImages(page, timeout = IMAGE_LOAD_TIMEOUT) {
|
|
|
178
178
|
}, timeout);
|
|
179
179
|
|
|
180
180
|
try {
|
|
181
|
-
await page.
|
|
181
|
+
await page.waitForLoadState('networkidle', { timeout: Math.min(timeout, 10000) });
|
|
182
182
|
} catch {
|
|
183
183
|
// Network didn't become idle, continue anyway
|
|
184
184
|
}
|