design-clone 1.1.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/README.md +42 -20
  2. package/SKILL.md +74 -0
  3. package/bin/commands/clone-site.js +75 -10
  4. package/bin/commands/init.js +33 -1
  5. package/bin/commands/verify.js +5 -3
  6. package/bin/utils/validate.js +24 -8
  7. package/docs/cli-reference.md +224 -2
  8. package/docs/codebase-summary.md +309 -0
  9. package/docs/design-clone-architecture.md +290 -45
  10. package/docs/pixel-perfect.md +35 -4
  11. package/docs/project-roadmap.md +382 -0
  12. package/docs/troubleshooting.md +5 -4
  13. package/package.json +12 -6
  14. package/src/ai/__pycache__/analyze-structure.cpython-313.pyc +0 -0
  15. package/src/ai/__pycache__/extract-design-tokens.cpython-313.pyc +0 -0
  16. package/src/ai/analyze-structure.py +73 -3
  17. package/src/ai/extract-design-tokens.py +356 -13
  18. package/src/ai/prompts/__pycache__/__init__.cpython-313.pyc +0 -0
  19. package/src/ai/prompts/__pycache__/design_tokens.cpython-313.pyc +0 -0
  20. package/src/ai/prompts/__pycache__/structure_analysis.cpython-313.pyc +0 -0
  21. package/src/ai/prompts/__pycache__/ux_audit.cpython-313.pyc +0 -0
  22. package/src/ai/prompts/design_tokens.py +133 -0
  23. package/src/ai/prompts/structure_analysis.py +329 -10
  24. package/src/ai/prompts/ux_audit.py +198 -0
  25. package/src/ai/ux-audit.js +596 -0
  26. package/src/core/animation-extractor.js +526 -0
  27. package/src/core/app-state-snapshot.js +511 -0
  28. package/src/core/content-counter.js +342 -0
  29. package/src/core/cookie-handler.js +1 -1
  30. package/src/core/css-extractor.js +4 -4
  31. package/src/core/dimension-extractor.js +93 -21
  32. package/src/core/dimension-output.js +103 -6
  33. package/src/core/discover-pages.js +242 -14
  34. package/src/core/dom-tree-analyzer.js +298 -0
  35. package/src/core/extract-assets.js +1 -1
  36. package/src/core/framework-detector.js +538 -0
  37. package/src/core/html-extractor.js +45 -4
  38. package/src/core/lazy-loader.js +7 -7
  39. package/src/core/multi-page-screenshot.js +9 -6
  40. package/src/core/page-readiness.js +8 -8
  41. package/src/core/screenshot.js +311 -7
  42. package/src/core/section-cropper.js +209 -0
  43. package/src/core/section-detector.js +386 -0
  44. package/src/core/semantic-enhancer.js +492 -0
  45. package/src/core/state-capture.js +598 -0
  46. package/src/core/tests/test-section-cropper.js +177 -0
  47. package/src/core/tests/test-section-detector.js +55 -0
  48. package/src/core/video-capture.js +546 -0
  49. package/src/route-discoverers/angular-discoverer.js +157 -0
  50. package/src/route-discoverers/astro-discoverer.js +123 -0
  51. package/src/route-discoverers/base-discoverer.js +242 -0
  52. package/src/route-discoverers/index.js +106 -0
  53. package/src/route-discoverers/next-discoverer.js +130 -0
  54. package/src/route-discoverers/nuxt-discoverer.js +138 -0
  55. package/src/route-discoverers/react-discoverer.js +139 -0
  56. package/src/route-discoverers/svelte-discoverer.js +109 -0
  57. package/src/route-discoverers/universal-discoverer.js +227 -0
  58. package/src/route-discoverers/vue-discoverer.js +118 -0
  59. package/src/utils/__init__.py +1 -1
  60. package/src/utils/__pycache__/__init__.cpython-313.pyc +0 -0
  61. package/src/utils/__pycache__/env.cpython-313.pyc +0 -0
  62. package/src/utils/browser.js +11 -37
  63. package/src/utils/playwright.js +213 -0
  64. package/src/verification/generate-audit-report.js +398 -0
  65. package/src/verification/verify-footer.js +493 -0
  66. package/src/verification/verify-header.js +486 -0
  67. package/src/verification/verify-layout.js +2 -2
  68. package/src/verification/verify-menu.js +4 -20
  69. package/src/verification/verify-slider.js +533 -0
  70. package/src/utils/puppeteer.js +0 -281
@@ -0,0 +1,138 @@
1
+ /**
2
+ * Nuxt Route Discoverer
3
+ *
4
+ * Extracts routes from Nuxt 2 and Nuxt 3 applications using:
5
+ * - window.__NUXT__ (Nuxt 2/3 state)
6
+ * - window.$nuxt.$router (Vue Router instance)
7
+ * - window.__NUXT_PATHS__ (Nuxt 3 prerendered paths)
8
+ */
9
+
10
+ import { BaseDiscoverer } from './base-discoverer.js';
11
+
12
+ export class NuxtDiscoverer extends BaseDiscoverer {
13
+ /**
14
+ * Discover routes from a Nuxt application
15
+ * @returns {Promise<import('./base-discoverer.js').DiscoveredRoute[]>}
16
+ */
17
+ async discover() {
18
+ const rawRoutes = await this.page.evaluate(() => {
19
+ const routes = [];
20
+
21
+ /**
22
+ * Recursively extract routes from Vue Router config
23
+ * @param {Array} routeList - Array of route objects
24
+ * @param {string} prefix - Path prefix for nested routes
25
+ */
26
+ function extractRoutes(routeList, prefix = '') {
27
+ if (!Array.isArray(routeList)) return;
28
+
29
+ routeList.forEach(r => {
30
+ if (!r.path) return;
31
+
32
+ // Build full path
33
+ let path = r.path;
34
+ if (!path.startsWith('/') && prefix) {
35
+ path = prefix + (prefix.endsWith('/') ? '' : '/') + path;
36
+ } else if (!path.startsWith('/')) {
37
+ path = '/' + path;
38
+ }
39
+
40
+ // Skip internal routes
41
+ if (path.startsWith('/_') || path.startsWith('/:')) {
42
+ // But process children
43
+ if (r.children) extractRoutes(r.children, path);
44
+ return;
45
+ }
46
+
47
+ routes.push({
48
+ path,
49
+ name: r.name || '',
50
+ component: r.name || r.component?.name || '',
51
+ source: 'framework'
52
+ });
53
+
54
+ // Process nested routes
55
+ if (r.children) {
56
+ extractRoutes(r.children, path);
57
+ }
58
+ });
59
+ }
60
+
61
+ // Method 1: __NUXT__ state (both Nuxt 2 and 3)
62
+ if (window.__NUXT__) {
63
+ const nuxt = window.__NUXT__;
64
+
65
+ // Current route path
66
+ if (nuxt.state?.route?.path) {
67
+ routes.push({
68
+ path: nuxt.state.route.path,
69
+ name: nuxt.state.route.name || 'Current Page',
70
+ source: 'framework'
71
+ });
72
+ }
73
+
74
+ // Nuxt 3: route from payload
75
+ if (nuxt.data?.path || nuxt.path) {
76
+ routes.push({
77
+ path: nuxt.data?.path || nuxt.path,
78
+ name: 'Current Page',
79
+ source: 'framework'
80
+ });
81
+ }
82
+ }
83
+
84
+ // Method 2: $nuxt.$router (Vue Router instance)
85
+ if (window.$nuxt?.$router?.options?.routes) {
86
+ extractRoutes(window.$nuxt.$router.options.routes);
87
+ }
88
+
89
+ // Method 3: Nuxt 3 useRouter
90
+ if (window.__NUXT_PATHS__ && Array.isArray(window.__NUXT_PATHS__)) {
91
+ window.__NUXT_PATHS__.forEach(path => {
92
+ if (!routes.some(r => r.path === path)) {
93
+ routes.push({
94
+ path,
95
+ source: 'framework'
96
+ });
97
+ }
98
+ });
99
+ }
100
+
101
+ // Method 4: NuxtLink components in DOM
102
+ document.querySelectorAll('a[href]').forEach(link => {
103
+ const href = link.getAttribute('href');
104
+ if (href && href.startsWith('/') && !href.startsWith('/_')) {
105
+ // Check for Nuxt-specific attributes
106
+ const isNuxtLink = link.hasAttribute('data-v-') ||
107
+ link.closest('[data-v-]') ||
108
+ link.classList.contains('nuxt-link-active') ||
109
+ link.classList.contains('nuxt-link-exact-active');
110
+
111
+ if (isNuxtLink || link.closest('nav, header, [role="navigation"]')) {
112
+ const text = link.textContent?.trim();
113
+ if (!routes.some(r => r.path === href)) {
114
+ routes.push({
115
+ path: href,
116
+ name: text || '',
117
+ source: isNuxtLink ? 'framework' : 'link-scrape'
118
+ });
119
+ }
120
+ }
121
+ }
122
+ });
123
+
124
+ return routes;
125
+ });
126
+
127
+ // Process and deduplicate
128
+ const processedRoutes = rawRoutes.map(route => ({
129
+ ...route,
130
+ name: route.name || this.extractPageName(route.path, route.component),
131
+ path: this.normalizeRoute(route.path)
132
+ }));
133
+
134
+ return this.deduplicateRoutes(processedRoutes);
135
+ }
136
+ }
137
+
138
+ export default NuxtDiscoverer;
@@ -0,0 +1,139 @@
1
+ /**
2
+ * React Route Discoverer
3
+ *
4
+ * React Router doesn't expose routes globally, so we use:
5
+ * - Link component scraping from DOM
6
+ * - history.pushState interception
7
+ * - Navigation area link extraction
8
+ *
9
+ * This is the most challenging discoverer due to React's lack of global state.
10
+ */
11
+
12
+ import { BaseDiscoverer } from './base-discoverer.js';
13
+
14
+ export class ReactDiscoverer extends BaseDiscoverer {
15
+ /**
16
+ * Discover routes from a React application
17
+ * @returns {Promise<import('./base-discoverer.js').DiscoveredRoute[]>}
18
+ */
19
+ async discover() {
20
+ // First, inject pushState interception
21
+ await this.injectInterception();
22
+
23
+ // Get routes from various sources
24
+ const rawRoutes = await this.page.evaluate(() => {
25
+ const routes = [];
26
+
27
+ // Method 1: React Router Link components (they render as <a> tags)
28
+ document.querySelectorAll('a[href]').forEach(link => {
29
+ const href = link.getAttribute('href');
30
+ if (!href || !href.startsWith('/')) return;
31
+
32
+ // React Router Links typically don't have target="_blank"
33
+ // and are within the app structure
34
+ const isInternalLink = !link.hasAttribute('target') ||
35
+ link.getAttribute('target') === '_self';
36
+
37
+ if (isInternalLink) {
38
+ const isInNav = link.closest('nav, header, [role="navigation"], [class*="nav"], [class*="menu"]');
39
+ const text = link.textContent?.trim();
40
+
41
+ // Detect React-specific patterns
42
+ const reactRoot = document.getElementById('root') ||
43
+ document.querySelector('[data-reactroot]');
44
+ const isInsideReact = reactRoot && reactRoot.contains(link);
45
+
46
+ if (isInNav || isInsideReact) {
47
+ routes.push({
48
+ path: href,
49
+ name: text || '',
50
+ source: isInsideReact ? 'framework' : 'link-scrape'
51
+ });
52
+ }
53
+ }
54
+ });
55
+
56
+ // Method 2: Check for intercepted routes
57
+ if (window.__DISCOVERED_ROUTES__ && Array.isArray(window.__DISCOVERED_ROUTES__)) {
58
+ window.__DISCOVERED_ROUTES__.forEach(url => {
59
+ try {
60
+ const path = new URL(url, window.location.origin).pathname;
61
+ if (!routes.some(r => r.path === path)) {
62
+ routes.push({
63
+ path,
64
+ source: 'interception'
65
+ });
66
+ }
67
+ } catch {
68
+ // Invalid URL
69
+ }
70
+ });
71
+ }
72
+
73
+ // Method 3: Look for NavLink active classes (React Router specific)
74
+ document.querySelectorAll('a.active, a[aria-current="page"]').forEach(link => {
75
+ const href = link.getAttribute('href');
76
+ if (href && href.startsWith('/')) {
77
+ if (!routes.some(r => r.path === href)) {
78
+ routes.push({
79
+ path: href,
80
+ name: link.textContent?.trim() || '',
81
+ source: 'framework'
82
+ });
83
+ }
84
+ }
85
+ });
86
+
87
+ return routes;
88
+ });
89
+
90
+ const processedRoutes = rawRoutes.map(route => ({
91
+ ...route,
92
+ name: route.name || this.extractPageName(route.path),
93
+ path: this.normalizeRoute(route.path)
94
+ }));
95
+
96
+ return this.deduplicateRoutes(processedRoutes);
97
+ }
98
+
99
+ /**
100
+ * Inject history.pushState interception script
101
+ */
102
+ async injectInterception() {
103
+ try {
104
+ await this.page.evaluate(() => {
105
+ if (window.__ROUTE_INTERCEPTION_ACTIVE__) return;
106
+
107
+ window.__DISCOVERED_ROUTES__ = [];
108
+ window.__ROUTE_INTERCEPTION_ACTIVE__ = true;
109
+
110
+ // Intercept pushState
111
+ const originalPushState = history.pushState.bind(history);
112
+ history.pushState = function(state, title, url) {
113
+ if (url) {
114
+ window.__DISCOVERED_ROUTES__.push(url.toString());
115
+ }
116
+ return originalPushState(state, title, url);
117
+ };
118
+
119
+ // Intercept replaceState
120
+ const originalReplaceState = history.replaceState.bind(history);
121
+ history.replaceState = function(state, title, url) {
122
+ if (url) {
123
+ window.__DISCOVERED_ROUTES__.push(url.toString());
124
+ }
125
+ return originalReplaceState(state, title, url);
126
+ };
127
+
128
+ // Listen for popstate
129
+ window.addEventListener('popstate', () => {
130
+ window.__DISCOVERED_ROUTES__.push(window.location.pathname);
131
+ });
132
+ });
133
+ } catch {
134
+ // Interception may fail in some browser contexts, continue without it
135
+ }
136
+ }
137
+ }
138
+
139
+ export default ReactDiscoverer;
@@ -0,0 +1,109 @@
1
+ /**
2
+ * Svelte/SvelteKit Route Discoverer
3
+ *
4
+ * Extracts routes from SvelteKit applications using:
5
+ * - SvelteKit internal routing state
6
+ * - data-sveltekit-* attributes
7
+ * - Standard link scraping for static Svelte apps
8
+ */
9
+
10
+ import { BaseDiscoverer } from './base-discoverer.js';
11
+
12
+ export class SvelteDiscoverer extends BaseDiscoverer {
13
+ /**
14
+ * Discover routes from a Svelte/SvelteKit application
15
+ * @returns {Promise<import('./base-discoverer.js').DiscoveredRoute[]>}
16
+ */
17
+ async discover() {
18
+ const rawRoutes = await this.page.evaluate(() => {
19
+ const routes = [];
20
+
21
+ // Method 1: SvelteKit internal state
22
+ if (window.__sveltekit_routes__) {
23
+ // This global may exist in dev mode
24
+ Object.keys(window.__sveltekit_routes__).forEach(path => {
25
+ routes.push({
26
+ path,
27
+ source: 'framework'
28
+ });
29
+ });
30
+ }
31
+
32
+ // Method 2: __sveltekit object
33
+ if (window.__sveltekit?.navigation) {
34
+ // May contain navigation state
35
+ }
36
+
37
+ // Method 3: data-sveltekit-preload-data links (SvelteKit's prefetching)
38
+ document.querySelectorAll('a[data-sveltekit-preload-data]').forEach(link => {
39
+ const href = link.getAttribute('href');
40
+ if (href && href.startsWith('/')) {
41
+ routes.push({
42
+ path: href,
43
+ name: link.textContent?.trim() || '',
44
+ source: 'framework'
45
+ });
46
+ }
47
+ });
48
+
49
+ // Method 4: data-sveltekit-reload links
50
+ document.querySelectorAll('a[data-sveltekit-reload]').forEach(link => {
51
+ const href = link.getAttribute('href');
52
+ if (href && href.startsWith('/')) {
53
+ if (!routes.some(r => r.path === href)) {
54
+ routes.push({
55
+ path: href,
56
+ name: link.textContent?.trim() || '',
57
+ source: 'framework'
58
+ });
59
+ }
60
+ }
61
+ });
62
+
63
+ // Method 5: data-sveltekit-noscroll links
64
+ document.querySelectorAll('a[data-sveltekit-noscroll]').forEach(link => {
65
+ const href = link.getAttribute('href');
66
+ if (href && href.startsWith('/')) {
67
+ if (!routes.some(r => r.path === href)) {
68
+ routes.push({
69
+ path: href,
70
+ name: link.textContent?.trim() || '',
71
+ source: 'framework'
72
+ });
73
+ }
74
+ }
75
+ });
76
+
77
+ // Method 6: Standard navigation links (for all Svelte apps)
78
+ document.querySelectorAll('nav a, header a, [role="navigation"] a').forEach(link => {
79
+ const href = link.getAttribute('href');
80
+ if (href && href.startsWith('/')) {
81
+ // Check if it's a SvelteKit link
82
+ const isSvelteKitLink = link.hasAttribute('data-sveltekit-preload-data') ||
83
+ link.hasAttribute('data-sveltekit-reload') ||
84
+ link.hasAttribute('data-sveltekit-noscroll');
85
+
86
+ if (!routes.some(r => r.path === href)) {
87
+ routes.push({
88
+ path: href,
89
+ name: link.textContent?.trim() || '',
90
+ source: isSvelteKitLink ? 'framework' : 'link-scrape'
91
+ });
92
+ }
93
+ }
94
+ });
95
+
96
+ return routes;
97
+ });
98
+
99
+ const processedRoutes = rawRoutes.map(route => ({
100
+ ...route,
101
+ name: route.name || this.extractPageName(route.path),
102
+ path: this.normalizeRoute(route.path)
103
+ }));
104
+
105
+ return this.deduplicateRoutes(processedRoutes);
106
+ }
107
+ }
108
+
109
+ export default SvelteDiscoverer;
@@ -0,0 +1,227 @@
1
+ /**
2
+ * Universal Route Discoverer
3
+ *
4
+ * Fallback discoverer for unknown frameworks or static sites.
5
+ * Uses comprehensive techniques:
6
+ * - history.pushState/replaceState interception
7
+ * - Exhaustive link scraping from navigation elements
8
+ * - Sitemap.xml parsing if available
9
+ */
10
+
11
+ import { BaseDiscoverer } from './base-discoverer.js';
12
+
13
+ export class UniversalDiscoverer extends BaseDiscoverer {
14
+ /**
15
+ * Discover routes using universal techniques
16
+ * @returns {Promise<import('./base-discoverer.js').DiscoveredRoute[]>}
17
+ */
18
+ async discover() {
19
+ // First, inject history interception
20
+ await this.injectHistoryInterception();
21
+
22
+ // Get routes from multiple sources
23
+ const rawRoutes = await this.page.evaluate(() => {
24
+ const routes = [];
25
+ const seenPaths = new Set();
26
+
27
+ /**
28
+ * Add route if not already seen
29
+ */
30
+ function addRoute(path, name, source) {
31
+ if (!path || seenPaths.has(path)) return;
32
+ if (!path.startsWith('/')) return;
33
+
34
+ // Skip common non-page paths
35
+ const skipPatterns = [
36
+ /\.(js|css|png|jpg|jpeg|gif|svg|ico|woff|woff2|ttf|eot|map)$/i,
37
+ /^\/api\//,
38
+ /^\/_next\//,
39
+ /^\/_nuxt\//,
40
+ /^\/static\//,
41
+ /^\/assets\//,
42
+ ];
43
+
44
+ if (skipPatterns.some(pattern => pattern.test(path))) return;
45
+
46
+ seenPaths.add(path);
47
+ routes.push({
48
+ path,
49
+ name: name || '',
50
+ source
51
+ });
52
+ }
53
+
54
+ // Method 1: History interception results
55
+ if (window.__UNIVERSAL_DISCOVERED_ROUTES__ && Array.isArray(window.__UNIVERSAL_DISCOVERED_ROUTES__)) {
56
+ window.__UNIVERSAL_DISCOVERED_ROUTES__.forEach(url => {
57
+ try {
58
+ const path = new URL(url, window.location.origin).pathname;
59
+ addRoute(path, '', 'interception');
60
+ } catch {
61
+ // Invalid URL, skip
62
+ }
63
+ });
64
+ }
65
+
66
+ // Method 2: Navigation elements (high confidence)
67
+ const navSelectors = [
68
+ 'nav a[href]',
69
+ 'header a[href]',
70
+ '[role="navigation"] a[href]',
71
+ '[class*="nav"] a[href]',
72
+ '[class*="menu"] a[href]',
73
+ '[class*="sidebar"] a[href]',
74
+ 'footer a[href]'
75
+ ];
76
+
77
+ navSelectors.forEach(selector => {
78
+ document.querySelectorAll(selector).forEach(link => {
79
+ const href = link.getAttribute('href');
80
+ if (href && href.startsWith('/')) {
81
+ addRoute(href, link.textContent?.trim() || '', 'link-scrape');
82
+ }
83
+ });
84
+ });
85
+
86
+ // Method 3: All internal links (lower confidence but comprehensive)
87
+ document.querySelectorAll('a[href^="/"]').forEach(link => {
88
+ const href = link.getAttribute('href');
89
+ if (href) {
90
+ // Skip if has target="_blank" or download attribute
91
+ if (link.hasAttribute('download')) return;
92
+ if (link.getAttribute('target') === '_blank') return;
93
+
94
+ addRoute(href, link.textContent?.trim() || '', 'link-scrape');
95
+ }
96
+ });
97
+
98
+ // Method 4: Links in main content area
99
+ const mainSelectors = ['main', '[role="main"]', '#content', '.content', 'article'];
100
+ mainSelectors.forEach(selector => {
101
+ const main = document.querySelector(selector);
102
+ if (main) {
103
+ main.querySelectorAll('a[href^="/"]').forEach(link => {
104
+ const href = link.getAttribute('href');
105
+ if (href && !link.hasAttribute('download')) {
106
+ addRoute(href, link.textContent?.trim() || '', 'link-scrape');
107
+ }
108
+ });
109
+ }
110
+ });
111
+
112
+ return routes;
113
+ });
114
+
115
+ // Try to fetch sitemap
116
+ const sitemapRoutes = await this.fetchSitemapRoutes();
117
+
118
+ // Combine all routes
119
+ const allRoutes = [...rawRoutes, ...sitemapRoutes];
120
+
121
+ const processedRoutes = allRoutes.map(route => ({
122
+ ...route,
123
+ name: route.name || this.extractPageName(route.path),
124
+ path: this.normalizeRoute(route.path)
125
+ }));
126
+
127
+ return this.deduplicateRoutes(processedRoutes);
128
+ }
129
+
130
+ /**
131
+ * Inject history.pushState/replaceState interception
132
+ */
133
+ async injectHistoryInterception() {
134
+ try {
135
+ await this.page.evaluate(() => {
136
+ if (window.__UNIVERSAL_INTERCEPTION_ACTIVE__) return;
137
+
138
+ window.__UNIVERSAL_DISCOVERED_ROUTES__ = [];
139
+ window.__UNIVERSAL_INTERCEPTION_ACTIVE__ = true;
140
+
141
+ // Intercept pushState
142
+ const originalPushState = history.pushState.bind(history);
143
+ history.pushState = function(state, title, url) {
144
+ if (url) {
145
+ window.__UNIVERSAL_DISCOVERED_ROUTES__.push(url.toString());
146
+ }
147
+ return originalPushState(state, title, url);
148
+ };
149
+
150
+ // Intercept replaceState
151
+ const originalReplaceState = history.replaceState.bind(history);
152
+ history.replaceState = function(state, title, url) {
153
+ if (url) {
154
+ window.__UNIVERSAL_DISCOVERED_ROUTES__.push(url.toString());
155
+ }
156
+ return originalReplaceState(state, title, url);
157
+ };
158
+
159
+ // Listen for popstate
160
+ window.addEventListener('popstate', () => {
161
+ window.__UNIVERSAL_DISCOVERED_ROUTES__.push(window.location.pathname);
162
+ });
163
+
164
+ // Listen for hashchange (for hash-based routing)
165
+ window.addEventListener('hashchange', () => {
166
+ window.__UNIVERSAL_DISCOVERED_ROUTES__.push(window.location.href);
167
+ });
168
+ });
169
+ } catch {
170
+ // Interception may fail in some browser contexts, continue without it
171
+ }
172
+ }
173
+
174
+ /**
175
+ * Try to fetch and parse sitemap.xml
176
+ * @returns {Promise<import('./base-discoverer.js').DiscoveredRoute[]>}
177
+ */
178
+ async fetchSitemapRoutes() {
179
+ const routes = [];
180
+
181
+ try {
182
+ const sitemapUrl = new URL('/sitemap.xml', this.baseUrl).href;
183
+
184
+ const response = await this.page.evaluate(async (url) => {
185
+ try {
186
+ // Add timeout using AbortController
187
+ const controller = new AbortController();
188
+ const timeoutId = setTimeout(() => controller.abort(), 5000);
189
+
190
+ const res = await fetch(url, { signal: controller.signal });
191
+ clearTimeout(timeoutId);
192
+
193
+ if (!res.ok) return null;
194
+ return await res.text();
195
+ } catch {
196
+ return null;
197
+ }
198
+ }, sitemapUrl);
199
+
200
+ if (response) {
201
+ // Parse sitemap XML
202
+ const urlMatches = response.matchAll(/<loc>([^<]+)<\/loc>/gi);
203
+ for (const match of urlMatches) {
204
+ try {
205
+ const url = new URL(match[1]);
206
+ // Only include paths from same origin
207
+ if (url.origin === new URL(this.baseUrl).origin) {
208
+ routes.push({
209
+ path: url.pathname,
210
+ name: '',
211
+ source: 'sitemap'
212
+ });
213
+ }
214
+ } catch {
215
+ // Invalid URL in sitemap
216
+ }
217
+ }
218
+ }
219
+ } catch {
220
+ // Sitemap fetch failed, continue without it
221
+ }
222
+
223
+ return routes;
224
+ }
225
+ }
226
+
227
+ export default UniversalDiscoverer;