design-clone 1.2.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +26 -12
- package/bin/commands/clone-site.js +75 -10
- package/bin/commands/init.js +33 -1
- package/bin/commands/verify.js +5 -3
- package/bin/utils/validate.js +24 -8
- package/docs/cli-reference.md +200 -2
- package/docs/codebase-summary.md +309 -0
- package/docs/design-clone-architecture.md +259 -42
- package/docs/pixel-perfect.md +35 -4
- package/docs/project-roadmap.md +382 -0
- package/docs/troubleshooting.md +5 -4
- package/package.json +10 -8
- package/src/ai/__pycache__/analyze-structure.cpython-313.pyc +0 -0
- package/src/ai/__pycache__/extract-design-tokens.cpython-313.pyc +0 -0
- package/src/ai/analyze-structure.py +73 -3
- package/src/ai/extract-design-tokens.py +356 -13
- package/src/ai/prompts/__pycache__/design_tokens.cpython-313.pyc +0 -0
- package/src/ai/prompts/__pycache__/structure_analysis.cpython-313.pyc +0 -0
- package/src/ai/prompts/__pycache__/ux_audit.cpython-313.pyc +0 -0
- package/src/ai/prompts/design_tokens.py +133 -0
- package/src/ai/prompts/structure_analysis.py +329 -10
- package/src/ai/prompts/ux_audit.py +198 -0
- package/src/ai/ux-audit.js +596 -0
- package/src/core/app-state-snapshot.js +511 -0
- package/src/core/content-counter.js +342 -0
- package/src/core/cookie-handler.js +1 -1
- package/src/core/css-extractor.js +4 -4
- package/src/core/dimension-extractor.js +93 -21
- package/src/core/dimension-output.js +103 -6
- package/src/core/discover-pages.js +242 -14
- package/src/core/dom-tree-analyzer.js +298 -0
- package/src/core/extract-assets.js +1 -1
- package/src/core/framework-detector.js +538 -0
- package/src/core/html-extractor.js +45 -4
- package/src/core/lazy-loader.js +7 -7
- package/src/core/multi-page-screenshot.js +9 -6
- package/src/core/page-readiness.js +8 -8
- package/src/core/screenshot.js +138 -9
- package/src/core/section-cropper.js +209 -0
- package/src/core/section-detector.js +386 -0
- package/src/core/semantic-enhancer.js +492 -0
- package/src/core/state-capture.js +18 -22
- package/src/core/tests/test-section-cropper.js +177 -0
- package/src/core/tests/test-section-detector.js +55 -0
- package/src/core/video-capture.js +152 -146
- package/src/route-discoverers/angular-discoverer.js +157 -0
- package/src/route-discoverers/astro-discoverer.js +123 -0
- package/src/route-discoverers/base-discoverer.js +242 -0
- package/src/route-discoverers/index.js +106 -0
- package/src/route-discoverers/next-discoverer.js +130 -0
- package/src/route-discoverers/nuxt-discoverer.js +138 -0
- package/src/route-discoverers/react-discoverer.js +139 -0
- package/src/route-discoverers/svelte-discoverer.js +109 -0
- package/src/route-discoverers/universal-discoverer.js +227 -0
- package/src/route-discoverers/vue-discoverer.js +118 -0
- package/src/utils/__init__.py +1 -1
- package/src/utils/__pycache__/__init__.cpython-313.pyc +0 -0
- package/src/utils/browser.js +11 -37
- package/src/utils/playwright.js +213 -0
- package/src/verification/generate-audit-report.js +398 -0
- package/src/verification/verify-footer.js +493 -0
- package/src/verification/verify-header.js +486 -0
- package/src/verification/verify-layout.js +2 -2
- package/src/verification/verify-menu.js +4 -20
- package/src/verification/verify-slider.js +533 -0
- package/src/utils/puppeteer.js +0 -281
|
@@ -89,7 +89,18 @@ export function sanitizeViewportData(data, vpName) {
|
|
|
89
89
|
}
|
|
90
90
|
|
|
91
91
|
/**
|
|
92
|
-
* Build cross-viewport summary for AI consumption
|
|
92
|
+
* Build cross-viewport summary for AI consumption.
|
|
93
|
+
* Includes section-aware typography and container data.
|
|
94
|
+
*
|
|
95
|
+
* @param {Object} viewports - Viewport data keyed by name (desktop, tablet, mobile)
|
|
96
|
+
* @returns {Object} Summary with:
|
|
97
|
+
* - maxContainerWidth: Largest container width across all viewports
|
|
98
|
+
* - commonGap: Average gap from card patterns
|
|
99
|
+
* - breakpoints: Viewport width breakpoints
|
|
100
|
+
* - typography: Flat h1/h2/h3/body sizes by viewport (backward compat)
|
|
101
|
+
* - typographyBySection: Typography grouped by section context (hero h1 != content h1)
|
|
102
|
+
* - cardPatterns: Card group statistics
|
|
103
|
+
* - sections: Section detection summary (found flag + width/containerWidth)
|
|
93
104
|
*/
|
|
94
105
|
export function buildCrossViewportSummary(viewports) {
|
|
95
106
|
const summary = {
|
|
@@ -100,31 +111,83 @@ export function buildCrossViewportSummary(viewports) {
|
|
|
100
111
|
tablet: VIEWPORTS.tablet.width,
|
|
101
112
|
mobile: VIEWPORTS.mobile.width
|
|
102
113
|
},
|
|
114
|
+
// Flat typography for backward compatibility
|
|
103
115
|
typography: { h1: {}, h2: {}, h3: {}, body: {} },
|
|
104
|
-
|
|
116
|
+
// NEW: Typography by section context
|
|
117
|
+
typographyBySection: {
|
|
118
|
+
hero: {},
|
|
119
|
+
content: {},
|
|
120
|
+
header: {},
|
|
121
|
+
footer: {},
|
|
122
|
+
sidebar: {}
|
|
123
|
+
},
|
|
124
|
+
cardPatterns: { totalGroups: 0, avgCardSize: null },
|
|
125
|
+
// NEW: Section summary
|
|
126
|
+
sections: {
|
|
127
|
+
hero: { found: false, containerWidth: null },
|
|
128
|
+
content: { found: false, containerWidth: null },
|
|
129
|
+
header: { found: false, containerWidth: null },
|
|
130
|
+
footer: { found: false, containerWidth: null },
|
|
131
|
+
sidebar: { found: false, width: null }
|
|
132
|
+
}
|
|
105
133
|
};
|
|
106
134
|
|
|
107
135
|
for (const [vpName, vpData] of Object.entries(viewports)) {
|
|
108
136
|
if (!vpData) continue;
|
|
109
137
|
|
|
138
|
+
// Container section mapping
|
|
110
139
|
if (vpData.containers) {
|
|
111
140
|
for (const container of vpData.containers) {
|
|
112
141
|
if (container.width > summary.maxContainerWidth) {
|
|
113
142
|
summary.maxContainerWidth = container.width;
|
|
114
143
|
}
|
|
144
|
+
// Track section widths
|
|
145
|
+
const section = container.section || 'content';
|
|
146
|
+
if (summary.sections[section]) {
|
|
147
|
+
summary.sections[section].found = true;
|
|
148
|
+
// Sidebar uses 'width' field, others use 'containerWidth'
|
|
149
|
+
if (section === 'sidebar') {
|
|
150
|
+
if (!summary.sections[section].width ||
|
|
151
|
+
container.width > summary.sections[section].width) {
|
|
152
|
+
summary.sections[section].width = container.width;
|
|
153
|
+
}
|
|
154
|
+
} else {
|
|
155
|
+
if (!summary.sections[section].containerWidth ||
|
|
156
|
+
container.width > summary.sections[section].containerWidth) {
|
|
157
|
+
summary.sections[section].containerWidth = container.width;
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
}
|
|
115
161
|
}
|
|
116
162
|
}
|
|
117
163
|
|
|
164
|
+
// Typography by section
|
|
118
165
|
if (vpData.typography) {
|
|
119
166
|
for (const typo of vpData.typography) {
|
|
120
167
|
const tag = typo.selector?.toLowerCase();
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
if (tag === '
|
|
168
|
+
const section = typo.section || 'content';
|
|
169
|
+
|
|
170
|
+
// Flat typography (backward compat) - take first found
|
|
171
|
+
if (tag === 'h1' && !summary.typography.h1[vpName]) summary.typography.h1[vpName] = typo.fontSize;
|
|
172
|
+
if (tag === 'h2' && !summary.typography.h2[vpName]) summary.typography.h2[vpName] = typo.fontSize;
|
|
173
|
+
if (tag === 'h3' && !summary.typography.h3[vpName]) summary.typography.h3[vpName] = typo.fontSize;
|
|
174
|
+
if (tag === 'p' && !summary.typography.body[vpName]) summary.typography.body[vpName] = typo.fontSize;
|
|
175
|
+
|
|
176
|
+
// Typography by section
|
|
177
|
+
if (!summary.typographyBySection[section]) {
|
|
178
|
+
summary.typographyBySection[section] = {};
|
|
179
|
+
}
|
|
180
|
+
if (!summary.typographyBySection[section][tag]) {
|
|
181
|
+
summary.typographyBySection[section][tag] = {};
|
|
182
|
+
}
|
|
183
|
+
// Take first found per section/tag/viewport
|
|
184
|
+
if (!summary.typographyBySection[section][tag][vpName]) {
|
|
185
|
+
summary.typographyBySection[section][tag][vpName] = typo.fontSize;
|
|
186
|
+
}
|
|
125
187
|
}
|
|
126
188
|
}
|
|
127
189
|
|
|
190
|
+
// Card patterns (unchanged)
|
|
128
191
|
if (vpData.cards && vpData.cards.length > 0) {
|
|
129
192
|
summary.cardPatterns.totalGroups += vpData.cards.length;
|
|
130
193
|
if (vpName === 'desktop' && vpData.cards[0]?.avgDimensions) {
|
|
@@ -143,6 +206,7 @@ export function buildCrossViewportSummary(viewports) {
|
|
|
143
206
|
|
|
144
207
|
/**
|
|
145
208
|
* Generate AI-friendly summary (compact, <5KB)
|
|
209
|
+
* Includes section-aware typography for accurate reconstruction
|
|
146
210
|
* @param {Object} fullOutput - Full component-dimensions.json
|
|
147
211
|
* @returns {Object} Compact summary for AI prompts
|
|
148
212
|
*/
|
|
@@ -173,6 +237,29 @@ export function generateAISummary(fullOutput) {
|
|
|
173
237
|
};
|
|
174
238
|
}
|
|
175
239
|
|
|
240
|
+
/**
|
|
241
|
+
* Convert typographyBySection to AI-friendly format with px units
|
|
242
|
+
*/
|
|
243
|
+
function inferTypographyBySection(typographyBySection) {
|
|
244
|
+
const result = {};
|
|
245
|
+
for (const [section, tags] of Object.entries(typographyBySection || {})) {
|
|
246
|
+
if (!tags || Object.keys(tags).length === 0) continue;
|
|
247
|
+
result[section] = {};
|
|
248
|
+
for (const [tag, sizes] of Object.entries(tags)) {
|
|
249
|
+
// Use desktop first, then tablet, then mobile
|
|
250
|
+
const size = sizes.desktop || sizes.tablet || sizes.mobile || 0;
|
|
251
|
+
if (size > 0) {
|
|
252
|
+
result[section][tag] = size + "px";
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
// Remove empty sections
|
|
256
|
+
if (Object.keys(result[section]).length === 0) {
|
|
257
|
+
delete result[section];
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
return result;
|
|
261
|
+
}
|
|
262
|
+
|
|
176
263
|
return {
|
|
177
264
|
_comment: "USE THESE EXACT VALUES - DO NOT ESTIMATE",
|
|
178
265
|
EXACT_DIMENSIONS: {
|
|
@@ -187,6 +274,16 @@ export function generateAISummary(fullOutput) {
|
|
|
187
274
|
h3: (summary.typography.h3.desktop || 24) + "px",
|
|
188
275
|
body: (summary.typography.body.desktop || 16) + "px"
|
|
189
276
|
},
|
|
277
|
+
// NEW: Section-aware typography (hero h1 != content h1)
|
|
278
|
+
TYPOGRAPHY_BY_SECTION: inferTypographyBySection(summary.typographyBySection),
|
|
279
|
+
// NEW: Section info
|
|
280
|
+
SECTIONS: {
|
|
281
|
+
hero: summary.sections?.hero || { found: false },
|
|
282
|
+
content: summary.sections?.content || { found: false },
|
|
283
|
+
header: summary.sections?.header || { found: false },
|
|
284
|
+
footer: summary.sections?.footer || { found: false },
|
|
285
|
+
sidebar: summary.sections?.sidebar || { found: false }
|
|
286
|
+
},
|
|
190
287
|
RESPONSIVE: {
|
|
191
288
|
desktop_breakpoint: summary.breakpoints.desktop + "px",
|
|
192
289
|
tablet_breakpoint: summary.breakpoints.tablet + "px",
|
|
@@ -4,6 +4,11 @@
|
|
|
4
4
|
* Extracts navigation links from a website to discover cloneable pages.
|
|
5
5
|
* Handles SPA hydration, filters external links, and normalizes URLs.
|
|
6
6
|
*
|
|
7
|
+
* Enhanced with SPA/Framework support (v1.3):
|
|
8
|
+
* - Framework detection (Next.js, Nuxt, Vue, React, Angular, Svelte, Astro)
|
|
9
|
+
* - Framework-specific route discovery
|
|
10
|
+
* - App state capture (optional)
|
|
11
|
+
*
|
|
7
12
|
* Usage:
|
|
8
13
|
* import { discoverPages } from './discover-pages.js';
|
|
9
14
|
* const result = await discoverPages('https://example.com', { maxPages: 10 });
|
|
@@ -13,6 +18,11 @@ import { getBrowser, getPage, disconnectBrowser } from '../utils/browser.js';
|
|
|
13
18
|
import { waitForDomStable, waitForPageReady } from './page-readiness.js';
|
|
14
19
|
import { dismissCookieBanner } from './cookie-handler.js';
|
|
15
20
|
|
|
21
|
+
// SPA/Framework support imports
|
|
22
|
+
import { detectFramework, formatDetectionResult } from './framework-detector.js';
|
|
23
|
+
import { discoverRoutes as discoverFrameworkRoutes } from '../route-discoverers/index.js';
|
|
24
|
+
import { captureAppState, formatStateSnapshot } from './app-state-snapshot.js';
|
|
25
|
+
|
|
16
26
|
// Navigation selectors in priority order
|
|
17
27
|
const NAV_SELECTORS = [
|
|
18
28
|
'header nav a',
|
|
@@ -41,14 +51,47 @@ const EXCLUDE_PATTERNS = [
|
|
|
41
51
|
/tiktok\.com/i
|
|
42
52
|
];
|
|
43
53
|
|
|
54
|
+
// Valid framework names for validation
|
|
55
|
+
const VALID_FRAMEWORKS = ['next', 'nuxt', 'vue', 'react', 'angular', 'svelte', 'astro'];
|
|
56
|
+
|
|
44
57
|
// Default options
|
|
45
58
|
const DEFAULT_OPTIONS = {
|
|
46
59
|
maxPages: 10,
|
|
47
60
|
selectors: null, // Use default NAV_SELECTORS if null
|
|
48
61
|
includeSubdomains: false,
|
|
49
|
-
timeout: 30000
|
|
62
|
+
timeout: 30000,
|
|
63
|
+
// SPA/Framework options (v1.3)
|
|
64
|
+
spaMode: true, // Enable SPA detection and route discovery
|
|
65
|
+
framework: null, // Force specific framework (skip detection)
|
|
66
|
+
noSpaDetect: false, // Disable SPA/framework detection entirely
|
|
67
|
+
captureState: false // Capture app state (Redux/Vuex/Pinia/Zustand)
|
|
50
68
|
};
|
|
51
69
|
|
|
70
|
+
/**
|
|
71
|
+
* Log warning message (only in TTY mode)
|
|
72
|
+
* @param {string} message - Warning message
|
|
73
|
+
*/
|
|
74
|
+
function logWarning(message) {
|
|
75
|
+
if (process.stderr.isTTY) {
|
|
76
|
+
console.error(`[discover-pages] WARN: ${message}`);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Validate and normalize framework option
|
|
82
|
+
* @param {string|null} framework - Framework name to validate
|
|
83
|
+
* @returns {string|null} Validated framework name or null
|
|
84
|
+
*/
|
|
85
|
+
function validateFramework(framework) {
|
|
86
|
+
if (!framework) return null;
|
|
87
|
+
const normalized = String(framework).toLowerCase().trim();
|
|
88
|
+
if (VALID_FRAMEWORKS.includes(normalized)) {
|
|
89
|
+
return normalized;
|
|
90
|
+
}
|
|
91
|
+
logWarning(`Invalid framework "${framework}". Valid options: ${VALID_FRAMEWORKS.join(', ')}`);
|
|
92
|
+
return null;
|
|
93
|
+
}
|
|
94
|
+
|
|
52
95
|
/**
|
|
53
96
|
* Normalize URL for comparison and deduplication
|
|
54
97
|
* @param {string} baseUrl - Base URL for resolving relative paths
|
|
@@ -140,10 +183,102 @@ function shouldExclude(href) {
|
|
|
140
183
|
return EXCLUDE_PATTERNS.some(pattern => pattern.test(href));
|
|
141
184
|
}
|
|
142
185
|
|
|
186
|
+
/**
|
|
187
|
+
* Normalize a path (remove trailing slash except for root)
|
|
188
|
+
* @param {string} path - Path to normalize
|
|
189
|
+
* @returns {string} Normalized path
|
|
190
|
+
*/
|
|
191
|
+
function normalizePath(path) {
|
|
192
|
+
if (!path || typeof path !== 'string') return '/';
|
|
193
|
+
return path.endsWith('/') && path !== '/' ? path.slice(0, -1) : path;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Merge framework-discovered routes with link-scraped pages
|
|
198
|
+
* Prioritizes framework routes (higher quality), fills gaps with link-scraped
|
|
199
|
+
*
|
|
200
|
+
* @param {Array|null} frameworkRoutes - Routes from framework discoverer
|
|
201
|
+
* @param {Array|null} linkScrapedPages - Pages from link scraping
|
|
202
|
+
* @param {string} baseDomain - Base domain for URL normalization
|
|
203
|
+
* @param {string} baseUrl - Base URL for resolving paths
|
|
204
|
+
* @returns {Array} Merged and deduplicated pages
|
|
205
|
+
*
|
|
206
|
+
* @example
|
|
207
|
+
* const merged = mergeRoutes(
|
|
208
|
+
* [{ path: '/about', name: 'About' }],
|
|
209
|
+
* [{ path: '/contact', name: 'Contact' }],
|
|
210
|
+
* 'example.com',
|
|
211
|
+
* 'https://example.com'
|
|
212
|
+
* );
|
|
213
|
+
*/
|
|
214
|
+
function mergeRoutes(frameworkRoutes, linkScrapedPages, baseDomain, baseUrl) {
|
|
215
|
+
// Input validation
|
|
216
|
+
if (!baseDomain || typeof baseDomain !== 'string') {
|
|
217
|
+
logWarning('mergeRoutes: Invalid baseDomain');
|
|
218
|
+
baseDomain = '';
|
|
219
|
+
}
|
|
220
|
+
if (!baseUrl || typeof baseUrl !== 'string') {
|
|
221
|
+
logWarning('mergeRoutes: Invalid baseUrl');
|
|
222
|
+
baseUrl = '';
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
const seenPaths = new Set();
|
|
226
|
+
const merged = [];
|
|
227
|
+
|
|
228
|
+
// Add framework routes first (higher quality, more accurate)
|
|
229
|
+
if (Array.isArray(frameworkRoutes)) {
|
|
230
|
+
for (const route of frameworkRoutes) {
|
|
231
|
+
if (!route || typeof route !== 'object') continue;
|
|
232
|
+
|
|
233
|
+
const normalizedPath = normalizePath(route.path || '/');
|
|
234
|
+
if (seenPaths.has(normalizedPath)) continue;
|
|
235
|
+
seenPaths.add(normalizedPath);
|
|
236
|
+
|
|
237
|
+
const url = normalizeUrl(baseUrl, normalizedPath) || route.url || '';
|
|
238
|
+
|
|
239
|
+
merged.push({
|
|
240
|
+
path: normalizedPath,
|
|
241
|
+
name: route.name || extractPageName('', normalizedPath),
|
|
242
|
+
url,
|
|
243
|
+
source: route.source || 'framework',
|
|
244
|
+
dynamic: Boolean(route.dynamic)
|
|
245
|
+
});
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
// Add link-scraped pages (fill gaps)
|
|
250
|
+
if (Array.isArray(linkScrapedPages)) {
|
|
251
|
+
for (const page of linkScrapedPages) {
|
|
252
|
+
if (!page || typeof page !== 'object') continue;
|
|
253
|
+
|
|
254
|
+
const normalizedPath = normalizePath(page.path || '/');
|
|
255
|
+
if (seenPaths.has(normalizedPath)) continue;
|
|
256
|
+
seenPaths.add(normalizedPath);
|
|
257
|
+
|
|
258
|
+
merged.push({
|
|
259
|
+
path: normalizedPath,
|
|
260
|
+
name: page.name || extractPageName('', normalizedPath),
|
|
261
|
+
url: page.url || normalizeUrl(baseUrl, normalizedPath) || '',
|
|
262
|
+
source: 'link-scrape',
|
|
263
|
+
dynamic: false
|
|
264
|
+
});
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
return merged;
|
|
269
|
+
}
|
|
270
|
+
|
|
143
271
|
/**
|
|
144
272
|
* Discover pages from a website by extracting navigation links
|
|
273
|
+
* Enhanced with SPA/Framework support (v1.3)
|
|
274
|
+
*
|
|
145
275
|
* @param {string} baseUrl - Starting URL to discover from
|
|
146
276
|
* @param {Object} options - Discovery options
|
|
277
|
+
* @param {number} [options.maxPages=10] - Maximum pages to discover
|
|
278
|
+
* @param {boolean} [options.spaMode=true] - Enable SPA detection
|
|
279
|
+
* @param {string} [options.framework] - Force specific framework
|
|
280
|
+
* @param {boolean} [options.noSpaDetect=false] - Disable SPA detection
|
|
281
|
+
* @param {boolean} [options.captureState=false] - Capture app state
|
|
147
282
|
* @returns {Promise<Object>} Discovery result
|
|
148
283
|
*/
|
|
149
284
|
export async function discoverPages(baseUrl, options = {}) {
|
|
@@ -164,7 +299,7 @@ export async function discoverPages(baseUrl, options = {}) {
|
|
|
164
299
|
|
|
165
300
|
// Navigate to page
|
|
166
301
|
await page.goto(baseUrl, {
|
|
167
|
-
waitUntil:
|
|
302
|
+
waitUntil: 'networkidle',
|
|
168
303
|
timeout: opts.timeout
|
|
169
304
|
});
|
|
170
305
|
|
|
@@ -182,7 +317,62 @@ export async function discoverPages(baseUrl, options = {}) {
|
|
|
182
317
|
// Wait a bit more for any dynamic content
|
|
183
318
|
await new Promise(r => setTimeout(r, 1000));
|
|
184
319
|
|
|
185
|
-
//
|
|
320
|
+
// =========================================
|
|
321
|
+
// SPA/Framework Detection (v1.3)
|
|
322
|
+
// =========================================
|
|
323
|
+
let frameworkInfo = null;
|
|
324
|
+
let frameworkRoutes = [];
|
|
325
|
+
let stateSnapshot = null;
|
|
326
|
+
|
|
327
|
+
if (!opts.noSpaDetect) {
|
|
328
|
+
// Framework detection
|
|
329
|
+
if (opts.framework) {
|
|
330
|
+
// User forced specific framework - validate it
|
|
331
|
+
const validatedFramework = validateFramework(opts.framework);
|
|
332
|
+
if (validatedFramework) {
|
|
333
|
+
frameworkInfo = {
|
|
334
|
+
framework: validatedFramework,
|
|
335
|
+
version: null,
|
|
336
|
+
routingType: 'spa',
|
|
337
|
+
confidence: 'forced',
|
|
338
|
+
signals: ['user-specified']
|
|
339
|
+
};
|
|
340
|
+
}
|
|
341
|
+
} else {
|
|
342
|
+
// Auto-detect framework
|
|
343
|
+
try {
|
|
344
|
+
frameworkInfo = await detectFramework(page);
|
|
345
|
+
} catch (e) {
|
|
346
|
+
logWarning(`Framework detection failed: ${e.message}`);
|
|
347
|
+
frameworkInfo = null;
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
// Framework-specific route discovery
|
|
352
|
+
if (frameworkInfo?.framework && opts.spaMode) {
|
|
353
|
+
try {
|
|
354
|
+
const discoveryResult = await discoverFrameworkRoutes(page, baseUrl, frameworkInfo);
|
|
355
|
+
frameworkRoutes = discoveryResult.routes || [];
|
|
356
|
+
} catch (e) {
|
|
357
|
+
logWarning(`Route discovery failed for ${frameworkInfo.framework}: ${e.message}`);
|
|
358
|
+
frameworkRoutes = [];
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
// Capture app state (optional)
|
|
363
|
+
if (opts.captureState && frameworkInfo) {
|
|
364
|
+
try {
|
|
365
|
+
stateSnapshot = await captureAppState(page, frameworkInfo);
|
|
366
|
+
} catch (e) {
|
|
367
|
+
logWarning(`State capture failed: ${e.message}`);
|
|
368
|
+
stateSnapshot = null;
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
// =========================================
|
|
374
|
+
// Traditional Link Scraping (existing logic)
|
|
375
|
+
// =========================================
|
|
186
376
|
const selectors = opts.selectors || NAV_SELECTORS;
|
|
187
377
|
const selectorString = selectors.join(', ');
|
|
188
378
|
|
|
@@ -196,13 +386,13 @@ export async function discoverPages(baseUrl, options = {}) {
|
|
|
196
386
|
|
|
197
387
|
// Process and filter links
|
|
198
388
|
const seenUrls = new Set();
|
|
199
|
-
const
|
|
389
|
+
const linkScrapedPages = [];
|
|
200
390
|
|
|
201
391
|
// Always include homepage first
|
|
202
392
|
const homeUrl = normalizeUrl(baseUrl, '/');
|
|
203
393
|
if (homeUrl) {
|
|
204
394
|
seenUrls.add(homeUrl);
|
|
205
|
-
|
|
395
|
+
linkScrapedPages.push({
|
|
206
396
|
path: '/',
|
|
207
397
|
name: 'Home',
|
|
208
398
|
url: homeUrl
|
|
@@ -232,14 +422,31 @@ export async function discoverPages(baseUrl, options = {}) {
|
|
|
232
422
|
|
|
233
423
|
// Add to results
|
|
234
424
|
seenUrls.add(normalized);
|
|
235
|
-
|
|
425
|
+
linkScrapedPages.push({
|
|
236
426
|
path,
|
|
237
427
|
name: extractPageName(link.text, path),
|
|
238
428
|
url: normalized
|
|
239
429
|
});
|
|
240
430
|
|
|
241
431
|
// Check max pages limit
|
|
242
|
-
if (
|
|
432
|
+
if (linkScrapedPages.length >= opts.maxPages) break;
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
// =========================================
|
|
436
|
+
// Merge Routes (v1.3)
|
|
437
|
+
// =========================================
|
|
438
|
+
let pages;
|
|
439
|
+
if (frameworkRoutes.length > 0) {
|
|
440
|
+
// Merge framework routes with link-scraped pages
|
|
441
|
+
pages = mergeRoutes(frameworkRoutes, linkScrapedPages, baseDomain, baseUrl);
|
|
442
|
+
} else {
|
|
443
|
+
// No framework routes, use link-scraped pages only
|
|
444
|
+
pages = linkScrapedPages.map(p => ({ ...p, source: 'link-scrape', dynamic: false }));
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
// Apply max pages limit to merged results
|
|
448
|
+
if (pages.length > opts.maxPages) {
|
|
449
|
+
pages = pages.slice(0, opts.maxPages);
|
|
243
450
|
}
|
|
244
451
|
|
|
245
452
|
// Sort by path depth (shallow first)
|
|
@@ -257,25 +464,47 @@ export async function discoverPages(baseUrl, options = {}) {
|
|
|
257
464
|
success: true,
|
|
258
465
|
baseUrl: baseUrlObj.origin,
|
|
259
466
|
baseDomain,
|
|
467
|
+
// SPA/Framework data (v1.3)
|
|
468
|
+
framework: frameworkInfo,
|
|
469
|
+
stateSnapshot: stateSnapshot,
|
|
470
|
+
// Page discovery results
|
|
260
471
|
pages,
|
|
261
472
|
stats: {
|
|
262
473
|
totalLinksFound: rawLinks.length,
|
|
474
|
+
frameworkRoutesFound: frameworkRoutes.length,
|
|
263
475
|
pagesDiscovered: pages.length,
|
|
264
476
|
durationMs: duration
|
|
265
477
|
}
|
|
266
478
|
};
|
|
267
479
|
} catch (error) {
|
|
480
|
+
// Normalize baseUrl in error case for consistency
|
|
481
|
+
let normalizedBaseUrl = baseUrl;
|
|
482
|
+
let errorBaseDomain = '';
|
|
483
|
+
try {
|
|
484
|
+
const urlObj = new URL(baseUrl);
|
|
485
|
+
normalizedBaseUrl = urlObj.origin;
|
|
486
|
+
errorBaseDomain = urlObj.hostname;
|
|
487
|
+
} catch {
|
|
488
|
+
// Keep original baseUrl if parsing fails
|
|
489
|
+
}
|
|
490
|
+
|
|
268
491
|
return {
|
|
269
492
|
success: false,
|
|
270
|
-
baseUrl,
|
|
493
|
+
baseUrl: normalizedBaseUrl,
|
|
494
|
+
baseDomain: errorBaseDomain,
|
|
495
|
+
framework: null,
|
|
496
|
+
stateSnapshot: null,
|
|
271
497
|
pages: [{
|
|
272
498
|
path: '/',
|
|
273
499
|
name: 'Home',
|
|
274
|
-
url: normalizeUrl(baseUrl, '/') || baseUrl
|
|
500
|
+
url: normalizeUrl(baseUrl, '/') || baseUrl,
|
|
501
|
+
source: 'fallback',
|
|
502
|
+
dynamic: false
|
|
275
503
|
}],
|
|
276
504
|
error: error.message,
|
|
277
505
|
stats: {
|
|
278
506
|
totalLinksFound: 0,
|
|
507
|
+
frameworkRoutesFound: 0,
|
|
279
508
|
pagesDiscovered: 1,
|
|
280
509
|
durationMs: Date.now() - startTime
|
|
281
510
|
}
|
|
@@ -287,11 +516,10 @@ export async function discoverPages(baseUrl, options = {}) {
|
|
|
287
516
|
}
|
|
288
517
|
}
|
|
289
518
|
|
|
290
|
-
// CLI support
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
);
|
|
519
|
+
// CLI support - use exact file match to avoid triggering when imported
|
|
520
|
+
import { fileURLToPath } from 'url';
|
|
521
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
522
|
+
const isMainModule = process.argv[1] === __filename;
|
|
295
523
|
|
|
296
524
|
if (isMainModule) {
|
|
297
525
|
const url = process.argv[2];
|