afterburn-cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +281 -0
  3. package/dist/ai/gemini-client.d.ts +21 -0
  4. package/dist/ai/gemini-client.js +105 -0
  5. package/dist/ai/gemini-client.js.map +1 -0
  6. package/dist/ai/index.d.ts +1 -0
  7. package/dist/ai/index.js +3 -0
  8. package/dist/ai/index.js.map +1 -0
  9. package/dist/analysis/diagnosis-schema.d.ts +106 -0
  10. package/dist/analysis/diagnosis-schema.js +54 -0
  11. package/dist/analysis/diagnosis-schema.js.map +1 -0
  12. package/dist/analysis/error-analyzer.d.ts +9 -0
  13. package/dist/analysis/error-analyzer.js +573 -0
  14. package/dist/analysis/error-analyzer.js.map +1 -0
  15. package/dist/analysis/index.d.ts +4 -0
  16. package/dist/analysis/index.js +6 -0
  17. package/dist/analysis/index.js.map +1 -0
  18. package/dist/analysis/source-mapper.d.ts +19 -0
  19. package/dist/analysis/source-mapper.js +329 -0
  20. package/dist/analysis/source-mapper.js.map +1 -0
  21. package/dist/analysis/ui-auditor.d.ts +9 -0
  22. package/dist/analysis/ui-auditor.js +104 -0
  23. package/dist/analysis/ui-auditor.js.map +1 -0
  24. package/dist/artifacts/artifact-storage.d.ts +44 -0
  25. package/dist/artifacts/artifact-storage.js +99 -0
  26. package/dist/artifacts/artifact-storage.js.map +1 -0
  27. package/dist/artifacts/index.d.ts +1 -0
  28. package/dist/artifacts/index.js +3 -0
  29. package/dist/artifacts/index.js.map +1 -0
  30. package/dist/browser/browser-manager.d.ts +45 -0
  31. package/dist/browser/browser-manager.js +88 -0
  32. package/dist/browser/browser-manager.js.map +1 -0
  33. package/dist/browser/challenge-detector.d.ts +10 -0
  34. package/dist/browser/challenge-detector.js +58 -0
  35. package/dist/browser/challenge-detector.js.map +1 -0
  36. package/dist/browser/cookie-dismisser.d.ts +18 -0
  37. package/dist/browser/cookie-dismisser.js +76 -0
  38. package/dist/browser/cookie-dismisser.js.map +1 -0
  39. package/dist/browser/index.d.ts +4 -0
  40. package/dist/browser/index.js +6 -0
  41. package/dist/browser/index.js.map +1 -0
  42. package/dist/browser/stealth-browser.d.ts +13 -0
  43. package/dist/browser/stealth-browser.js +59 -0
  44. package/dist/browser/stealth-browser.js.map +1 -0
  45. package/dist/cli/commander-cli.d.ts +2 -0
  46. package/dist/cli/commander-cli.js +150 -0
  47. package/dist/cli/commander-cli.js.map +1 -0
  48. package/dist/cli/doctor.d.ts +34 -0
  49. package/dist/cli/doctor.js +124 -0
  50. package/dist/cli/doctor.js.map +1 -0
  51. package/dist/cli/first-run.d.ts +6 -0
  52. package/dist/cli/first-run.js +58 -0
  53. package/dist/cli/first-run.js.map +1 -0
  54. package/dist/cli/index.d.ts +3 -0
  55. package/dist/cli/index.js +5 -0
  56. package/dist/cli/index.js.map +1 -0
  57. package/dist/cli/progress.d.ts +11 -0
  58. package/dist/cli/progress.js +30 -0
  59. package/dist/cli/progress.js.map +1 -0
  60. package/dist/core/engine.d.ts +33 -0
  61. package/dist/core/engine.js +269 -0
  62. package/dist/core/engine.js.map +1 -0
  63. package/dist/core/index.d.ts +3 -0
  64. package/dist/core/index.js +4 -0
  65. package/dist/core/index.js.map +1 -0
  66. package/dist/core/validation.d.ts +52 -0
  67. package/dist/core/validation.js +228 -0
  68. package/dist/core/validation.js.map +1 -0
  69. package/dist/discovery/crawler.d.ts +58 -0
  70. package/dist/discovery/crawler.js +240 -0
  71. package/dist/discovery/crawler.js.map +1 -0
  72. package/dist/discovery/discovery-pipeline.d.ts +22 -0
  73. package/dist/discovery/discovery-pipeline.js +256 -0
  74. package/dist/discovery/discovery-pipeline.js.map +1 -0
  75. package/dist/discovery/element-mapper.d.ts +21 -0
  76. package/dist/discovery/element-mapper.js +422 -0
  77. package/dist/discovery/element-mapper.js.map +1 -0
  78. package/dist/discovery/index.d.ts +8 -0
  79. package/dist/discovery/index.js +8 -0
  80. package/dist/discovery/index.js.map +1 -0
  81. package/dist/discovery/link-validator.d.ts +15 -0
  82. package/dist/discovery/link-validator.js +137 -0
  83. package/dist/discovery/link-validator.js.map +1 -0
  84. package/dist/discovery/sitemap-builder.d.ts +19 -0
  85. package/dist/discovery/sitemap-builder.js +166 -0
  86. package/dist/discovery/sitemap-builder.js.map +1 -0
  87. package/dist/discovery/spa-detector.d.ts +12 -0
  88. package/dist/discovery/spa-detector.js +271 -0
  89. package/dist/discovery/spa-detector.js.map +1 -0
  90. package/dist/execution/error-detector.d.ts +10 -0
  91. package/dist/execution/error-detector.js +87 -0
  92. package/dist/execution/error-detector.js.map +1 -0
  93. package/dist/execution/evidence-capture.d.ts +8 -0
  94. package/dist/execution/evidence-capture.js +37 -0
  95. package/dist/execution/evidence-capture.js.map +1 -0
  96. package/dist/execution/index.d.ts +5 -0
  97. package/dist/execution/index.js +7 -0
  98. package/dist/execution/index.js.map +1 -0
  99. package/dist/execution/step-handlers.d.ts +48 -0
  100. package/dist/execution/step-handlers.js +349 -0
  101. package/dist/execution/step-handlers.js.map +1 -0
  102. package/dist/execution/test-data.d.ts +50 -0
  103. package/dist/execution/test-data.js +160 -0
  104. package/dist/execution/test-data.js.map +1 -0
  105. package/dist/execution/workflow-executor.d.ts +56 -0
  106. package/dist/execution/workflow-executor.js +331 -0
  107. package/dist/execution/workflow-executor.js.map +1 -0
  108. package/dist/index.d.ts +2 -0
  109. package/dist/index.js +5 -0
  110. package/dist/index.js.map +1 -0
  111. package/dist/mcp/entry.d.ts +2 -0
  112. package/dist/mcp/entry.js +5 -0
  113. package/dist/mcp/entry.js.map +1 -0
  114. package/dist/mcp/index.d.ts +2 -0
  115. package/dist/mcp/index.js +4 -0
  116. package/dist/mcp/index.js.map +1 -0
  117. package/dist/mcp/server.d.ts +3 -0
  118. package/dist/mcp/server.js +19 -0
  119. package/dist/mcp/server.js.map +1 -0
  120. package/dist/mcp/tools.d.ts +2 -0
  121. package/dist/mcp/tools.js +162 -0
  122. package/dist/mcp/tools.js.map +1 -0
  123. package/dist/planning/heuristic-planner.d.ts +7 -0
  124. package/dist/planning/heuristic-planner.js +238 -0
  125. package/dist/planning/heuristic-planner.js.map +1 -0
  126. package/dist/planning/index.d.ts +3 -0
  127. package/dist/planning/index.js +5 -0
  128. package/dist/planning/index.js.map +1 -0
  129. package/dist/planning/plan-schema.d.ts +74 -0
  130. package/dist/planning/plan-schema.js +39 -0
  131. package/dist/planning/plan-schema.js.map +1 -0
  132. package/dist/planning/workflow-planner.d.ts +39 -0
  133. package/dist/planning/workflow-planner.js +211 -0
  134. package/dist/planning/workflow-planner.js.map +1 -0
  135. package/dist/reports/health-scorer.d.ts +14 -0
  136. package/dist/reports/health-scorer.js +88 -0
  137. package/dist/reports/health-scorer.js.map +1 -0
  138. package/dist/reports/html-generator.d.ts +10 -0
  139. package/dist/reports/html-generator.js +155 -0
  140. package/dist/reports/html-generator.js.map +1 -0
  141. package/dist/reports/index.d.ts +4 -0
  142. package/dist/reports/index.js +6 -0
  143. package/dist/reports/index.js.map +1 -0
  144. package/dist/reports/markdown-generator.d.ts +10 -0
  145. package/dist/reports/markdown-generator.js +334 -0
  146. package/dist/reports/markdown-generator.js.map +1 -0
  147. package/dist/reports/priority-ranker.d.ts +22 -0
  148. package/dist/reports/priority-ranker.js +608 -0
  149. package/dist/reports/priority-ranker.js.map +1 -0
  150. package/dist/screenshots/dual-format.d.ts +14 -0
  151. package/dist/screenshots/dual-format.js +59 -0
  152. package/dist/screenshots/dual-format.js.map +1 -0
  153. package/dist/screenshots/index.d.ts +2 -0
  154. package/dist/screenshots/index.js +4 -0
  155. package/dist/screenshots/index.js.map +1 -0
  156. package/dist/screenshots/screenshot-manager.d.ts +33 -0
  157. package/dist/screenshots/screenshot-manager.js +86 -0
  158. package/dist/screenshots/screenshot-manager.js.map +1 -0
  159. package/dist/testing/accessibility-auditor.d.ts +23 -0
  160. package/dist/testing/accessibility-auditor.js +44 -0
  161. package/dist/testing/accessibility-auditor.js.map +1 -0
  162. package/dist/testing/index.d.ts +4 -0
  163. package/dist/testing/index.js +5 -0
  164. package/dist/testing/index.js.map +1 -0
  165. package/dist/testing/meta-auditor.d.ts +16 -0
  166. package/dist/testing/meta-auditor.js +268 -0
  167. package/dist/testing/meta-auditor.js.map +1 -0
  168. package/dist/testing/performance-monitor.d.ts +15 -0
  169. package/dist/testing/performance-monitor.js +64 -0
  170. package/dist/testing/performance-monitor.js.map +1 -0
  171. package/dist/types/artifacts.d.ts +58 -0
  172. package/dist/types/artifacts.js +3 -0
  173. package/dist/types/artifacts.js.map +1 -0
  174. package/dist/types/discovery.d.ts +124 -0
  175. package/dist/types/discovery.js +3 -0
  176. package/dist/types/discovery.js.map +1 -0
  177. package/dist/types/execution.d.ts +154 -0
  178. package/dist/types/execution.js +3 -0
  179. package/dist/types/execution.js.map +1 -0
  180. package/dist/types/index.d.ts +2 -0
  181. package/dist/types/index.js +4 -0
  182. package/dist/types/index.js.map +1 -0
  183. package/dist/utils/sanitizer.d.ts +25 -0
  184. package/dist/utils/sanitizer.js +98 -0
  185. package/dist/utils/sanitizer.js.map +1 -0
  186. package/package.json +86 -0
  187. package/templates/report.hbs +202 -0
  188. package/templates/styles/report.css +607 -0
@@ -0,0 +1,52 @@
1
+ /**
2
+ * Check if an IP address is private or reserved (loopback, link-local, private ranges)
3
+ * Blocks: 127.0.0.0/8, 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16, 169.254.0.0/16, ::1, fc00::/7
4
+ */
5
+ export declare function isPrivateOrReservedIP(ip: string): boolean;
6
+ /**
7
+ * Validate that a URL uses http:// or https:// scheme.
8
+ * Rejects file://, javascript:, data:, and other dangerous schemes.
9
+ * Also rejects URLs resolving to private/loopback IPs (SSRF protection).
10
+ */
11
+ export declare function validateUrl(url: string): string;
12
+ export type HostLookupFn = (hostname: string) => Promise<string[]>;
13
+ /**
14
+ * Enforce that a hostname resolves only to public IP addresses.
15
+ * Rejects localhost names, private ranges, and DNS failures.
16
+ */
17
+ export declare function ensurePublicHostname(hostname: string, lookup?: HostLookupFn): Promise<void>;
18
+ /**
19
+ * Validate URL format and ensure hostname resolves to public IP space.
20
+ */
21
+ export declare function validatePublicUrl(url: string, lookup?: HostLookupFn): Promise<string>;
22
+ /**
23
+ * Validate a filesystem path has no path traversal sequences.
24
+ * Resolves to absolute path and rejects ../ sequences.
25
+ */
26
+ export declare function validatePath(inputPath: string, label: string, workspaceRoot?: string): string;
27
+ /**
28
+ * Validate that a navigation URL stays within the same origin as the base URL.
29
+ * Allows same hostname or subdomains of the base hostname.
30
+ */
31
+ export declare function validateNavigationUrl(navigationUrl: string, baseUrl: string): string;
32
+ /**
33
+ * Validate and clamp maxPages to a safe range.
34
+ * Returns a safe integer between 1 and 500, defaulting to 50.
35
+ * Special case: 0 means "unlimited" but still capped at 500.
36
+ */
37
+ export declare function validateMaxPages(value: number | undefined): number;
38
+ /**
39
+ * Validate selector string length to prevent abuse.
40
+ * Returns the selector if valid, throws if too long.
41
+ */
42
+ export declare function validateSelector(selector: string): string;
43
+ /**
44
+ * Sanitize a string value by stripping potential script injection patterns.
45
+ * Removes <script> tags and javascript: URIs from step values.
46
+ */
47
+ export declare function sanitizeValue(value: string): string;
48
+ /**
49
+ * Sanitize session ID for filesystem usage
50
+ * Replaces invalid filename characters with underscores
51
+ */
52
+ export declare function sanitizeSessionId(sessionId: string): string;
@@ -0,0 +1,228 @@
1
+ // Input validation helpers for security hardening (URL, path, and numeric inputs)
2
+ import path from 'node:path';
3
+ import dns from 'node:dns/promises';
4
+ const LOCALHOST_HOSTNAMES = new Set([
5
+ 'localhost',
6
+ 'localhost.localdomain',
7
+ ]);
8
+ function allowPrivateUrls() {
9
+ return process.env.AFTERBURN_ALLOW_PRIVATE_URLS === '1';
10
+ }
11
+ function isIPv4Literal(value) {
12
+ return /^\d+\.\d+\.\d+\.\d+$/.test(value);
13
+ }
14
+ function isIPv6Literal(value) {
15
+ return value.includes(':');
16
+ }
17
+ function normalizeIpLiteral(ip) {
18
+ const lower = ip.trim().toLowerCase();
19
+ if (lower.startsWith('::ffff:')) {
20
+ return lower.slice('::ffff:'.length);
21
+ }
22
+ return lower;
23
+ }
24
+ /**
25
+ * Check if an IP address is private or reserved (loopback, link-local, private ranges)
26
+ * Blocks: 127.0.0.0/8, 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16, 169.254.0.0/16, ::1, fc00::/7
27
+ */
28
+ export function isPrivateOrReservedIP(ip) {
29
+ const normalized = normalizeIpLiteral(ip);
30
+ // Unspecified / loopback / link-local IPv6
31
+ if (normalized === '::' || normalized === '::1' || normalized === '0000:0000:0000:0000:0000:0000:0000:0001') {
32
+ return true;
33
+ }
34
+ if (normalized.startsWith('fe80:')) {
35
+ return true;
36
+ }
37
+ // IPv6 private (fc00::/7)
38
+ if (normalized.startsWith('fc') || normalized.startsWith('fd')) {
39
+ return true;
40
+ }
41
+ // IPv6 loopback
42
+ // IPv4 patterns
43
+ const parts = normalized.split('.');
44
+ if (parts.length === 4) {
45
+ const first = parseInt(parts[0], 10);
46
+ const second = parseInt(parts[1], 10);
47
+ // 127.0.0.0/8 (loopback)
48
+ if (first === 127)
49
+ return true;
50
+ // 10.0.0.0/8 (private)
51
+ if (first === 10)
52
+ return true;
53
+ // 172.16.0.0/12 (private)
54
+ if (first === 172 && second >= 16 && second <= 31)
55
+ return true;
56
+ // 192.168.0.0/16 (private)
57
+ if (first === 192 && second === 168)
58
+ return true;
59
+ // 169.254.0.0/16 (link-local)
60
+ if (first === 169 && second === 254)
61
+ return true;
62
+ }
63
+ return false;
64
+ }
65
+ /**
66
+ * Validate that a URL uses http:// or https:// scheme.
67
+ * Rejects file://, javascript:, data:, and other dangerous schemes.
68
+ * Also rejects URLs resolving to private/loopback IPs (SSRF protection).
69
+ */
70
+ export function validateUrl(url) {
71
+ const trimmed = url.trim();
72
+ let parsed;
73
+ try {
74
+ parsed = new URL(trimmed);
75
+ }
76
+ catch {
77
+ throw new Error(`Invalid URL: "${trimmed}". Must be a valid http:// or https:// URL.`);
78
+ }
79
+ const allowedProtocols = ['http:', 'https:'];
80
+ if (!allowedProtocols.includes(parsed.protocol)) {
81
+ throw new Error(`Unsafe URL scheme "${parsed.protocol}" in "${trimmed}". Only http:// and https:// are allowed.`);
82
+ }
83
+ // SSRF protection: check if hostname resolves to private IP
84
+ // Note: This is a synchronous check for IP literals. DNS resolution is async and happens in link-validator.
85
+ const hostname = parsed.hostname;
86
+ // Check if hostname is already an IP literal
87
+ if (!allowPrivateUrls() && (isIPv4Literal(hostname) || isIPv6Literal(hostname))) {
88
+ if (isPrivateOrReservedIP(hostname)) {
89
+ throw new Error(`SSRF protection: URL "${trimmed}" resolves to private/loopback address "${hostname}". Only public URLs are allowed.`);
90
+ }
91
+ }
92
+ return trimmed;
93
+ }
94
+ const defaultHostLookup = async (hostname) => {
95
+ const records = await dns.lookup(hostname, { all: true, verbatim: true });
96
+ return records.map(record => record.address);
97
+ };
98
+ /**
99
+ * Enforce that a hostname resolves only to public IP addresses.
100
+ * Rejects localhost names, private ranges, and DNS failures.
101
+ */
102
+ export async function ensurePublicHostname(hostname, lookup = defaultHostLookup) {
103
+ if (allowPrivateUrls()) {
104
+ return;
105
+ }
106
+ const normalizedHost = hostname.trim().toLowerCase();
107
+ if (LOCALHOST_HOSTNAMES.has(normalizedHost) || normalizedHost.endsWith('.localhost')) {
108
+ throw new Error(`SSRF protection: Hostname "${hostname}" resolves to localhost.`);
109
+ }
110
+ if (isIPv4Literal(normalizedHost) || isIPv6Literal(normalizedHost)) {
111
+ if (isPrivateOrReservedIP(normalizedHost)) {
112
+ throw new Error(`SSRF protection: Hostname "${hostname}" resolves to private/loopback address.`);
113
+ }
114
+ return;
115
+ }
116
+ let resolvedAddresses;
117
+ try {
118
+ resolvedAddresses = await lookup(normalizedHost);
119
+ }
120
+ catch (error) {
121
+ throw new Error(`DNS resolution failed for "${hostname}": ${error instanceof Error ? error.message : String(error)}`);
122
+ }
123
+ if (resolvedAddresses.length === 0) {
124
+ throw new Error(`DNS resolution failed for "${hostname}": no addresses returned`);
125
+ }
126
+ for (const address of resolvedAddresses) {
127
+ if (isPrivateOrReservedIP(address)) {
128
+ throw new Error(`SSRF protection: Hostname "${hostname}" resolves to private IP "${address}".`);
129
+ }
130
+ }
131
+ }
132
+ /**
133
+ * Validate URL format and ensure hostname resolves to public IP space.
134
+ */
135
+ export async function validatePublicUrl(url, lookup = defaultHostLookup) {
136
+ const validatedUrl = validateUrl(url);
137
+ const hostname = new URL(validatedUrl).hostname;
138
+ await ensurePublicHostname(hostname, lookup);
139
+ return validatedUrl;
140
+ }
141
+ /**
142
+ * Validate a filesystem path has no path traversal sequences.
143
+ * Resolves to absolute path and rejects ../ sequences.
144
+ */
145
+ export function validatePath(inputPath, label, workspaceRoot) {
146
+ const trimmed = inputPath.trim();
147
+ // Reject obvious traversal patterns before resolving
148
+ // Security: prevent escaping intended directory boundaries
149
+ if (trimmed.includes('..')) {
150
+ throw new Error(`Path traversal detected in ${label}: "${trimmed}". Paths must not contain ".." sequences.`);
151
+ }
152
+ // Resolve to absolute path
153
+ const resolved = path.resolve(trimmed);
154
+ // If workspace root specified, enforce containment
155
+ if (workspaceRoot) {
156
+ const resolvedRoot = path.resolve(workspaceRoot);
157
+ // Use path.relative for proper containment check (prevents bypass via symlinks/normalization)
158
+ const relativePath = path.relative(resolvedRoot, resolved);
159
+ if (relativePath.startsWith('..') || path.isAbsolute(relativePath)) {
160
+ throw new Error(`${label} escapes workspace root "${resolvedRoot}". Got: "${resolved}"`);
161
+ }
162
+ }
163
+ return resolved;
164
+ }
165
+ /**
166
+ * Validate that a navigation URL stays within the same origin as the base URL.
167
+ * Allows same hostname or subdomains of the base hostname.
168
+ */
169
+ export function validateNavigationUrl(navigationUrl, baseUrl) {
170
+ const validated = validateUrl(navigationUrl);
171
+ const navParsed = new URL(validated);
172
+ const baseParsed = new URL(baseUrl);
173
+ // Allow same hostname or subdomains
174
+ if (navParsed.hostname !== baseParsed.hostname &&
175
+ !navParsed.hostname.endsWith('.' + baseParsed.hostname)) {
176
+ throw new Error(`Navigation to "${navParsed.hostname}" blocked. Only same-origin navigation allowed (base: "${baseParsed.hostname}").`);
177
+ }
178
+ return validated;
179
+ }
180
+ /**
181
+ * Validate and clamp maxPages to a safe range.
182
+ * Returns a safe integer between 1 and 500, defaulting to 50.
183
+ * Special case: 0 means "unlimited" but still capped at 500.
184
+ */
185
+ export function validateMaxPages(value) {
186
+ if (value === undefined || value === null || isNaN(value) || value < 0) {
187
+ return 50; // Safe default
188
+ }
189
+ // Special case: 0 means unlimited (but we still cap at 500 for safety)
190
+ if (value === 0) {
191
+ return 500;
192
+ }
193
+ // Clamp to [1, 500] range to prevent resource exhaustion
194
+ return Math.min(Math.max(Math.floor(value), 1), 500);
195
+ }
196
+ /**
197
+ * Validate selector string length to prevent abuse.
198
+ * Returns the selector if valid, throws if too long.
199
+ */
200
+ export function validateSelector(selector) {
201
+ const MAX_SELECTOR_LENGTH = 500;
202
+ if (selector.length > MAX_SELECTOR_LENGTH) {
203
+ throw new Error(`Selector too long (${selector.length} chars, max ${MAX_SELECTOR_LENGTH}). Possible injection attempt.`);
204
+ }
205
+ return selector;
206
+ }
207
+ /**
208
+ * Sanitize a string value by stripping potential script injection patterns.
209
+ * Removes <script> tags and javascript: URIs from step values.
210
+ */
211
+ export function sanitizeValue(value) {
212
+ // Strip <script> tags (case-insensitive, handles attributes)
213
+ let sanitized = value.replace(/<script\b[^>]*>[\s\S]*?<\/script>/gi, '');
214
+ // Strip javascript: URIs
215
+ sanitized = sanitized.replace(/javascript\s*:/gi, '');
216
+ // Strip event handlers in attributes (onclick=, onerror=, etc.)
217
+ sanitized = sanitized.replace(/\bon\w+\s*=/gi, '');
218
+ return sanitized;
219
+ }
220
+ /**
221
+ * Sanitize session ID for filesystem usage
222
+ * Replaces invalid filename characters with underscores
223
+ */
224
+ export function sanitizeSessionId(sessionId) {
225
+ // Allow only alphanumeric, dash, and underscore
226
+ return sessionId.replace(/[^a-zA-Z0-9_-]/g, '_');
227
+ }
228
+ //# sourceMappingURL=validation.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"validation.js","sourceRoot":"","sources":["../../src/core/validation.ts"],"names":[],"mappings":"AAAA,kFAAkF;AAElF,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,GAAG,MAAM,mBAAmB,CAAC;AAEpC,MAAM,mBAAmB,GAAG,IAAI,GAAG,CAAC;IAClC,WAAW;IACX,uBAAuB;CACxB,CAAC,CAAC;AAEH,SAAS,gBAAgB;IACvB,OAAO,OAAO,CAAC,GAAG,CAAC,4BAA4B,KAAK,GAAG,CAAC;AAC1D,CAAC;AAED,SAAS,aAAa,CAAC,KAAa;IAClC,OAAO,sBAAsB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;AAC5C,CAAC;AAED,SAAS,aAAa,CAAC,KAAa;IAClC,OAAO,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;AAC7B,CAAC;AAED,SAAS,kBAAkB,CAAC,EAAU;IACpC,MAAM,KAAK,GAAG,EAAE,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IACtC,IAAI,KAAK,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QAChC,OAAO,KAAK,CAAC,KAAK,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;IACvC,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,qBAAqB,CAAC,EAAU;IAC9C,MAAM,UAAU,GAAG,kBAAkB,CAAC,EAAE,CAAC,CAAC;IAE1C,2CAA2C;IAC3C,IAAI,UAAU,KAAK,IAAI,IAAI,UAAU,KAAK,KAAK,IAAI,UAAU,KAAK,yCAAyC,EAAE,CAAC;QAC5G,OAAO,IAAI,CAAC;IACd,CAAC;IACD,IAAI,UAAU,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;QACnC,OAAO,IAAI,CAAC;IACd,CAAC;IAED,0BAA0B;IAC1B,IAAI,UAAU,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,UAAU,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;QAC/D,OAAO,IAAI,CAAC;IACd,CAAC;IAED,gBAAgB;IAChB,gBAAgB;IAChB,MAAM,KAAK,GAAG,UAAU,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IACpC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACrC,MAAM,MAAM,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAEtC,yBAAyB;QACzB,IAAI,KAAK,KAAK,GAAG;YAAE,OAAO,IAAI,CAAC;QAE/B,uBAAuB;QACvB,IAAI,KAAK,KAAK,EAAE;YAAE,OAAO,IAAI,CAAC;QAE9B,0BAA0B;QAC1B,IAAI,KAAK,KAAK,GAAG,IAAI,MAAM,IAAI,EAAE,IAAI,MAAM,IAAI,EAAE;YAAE,OAAO,IAAI,CAAC;QAE/D,2BAA2B;QAC3B,IAAI,KAAK,KAAK,GAAG,IAAI,MAAM,KAAK,GAAG;YAAE,OAAO,IAAI,CAAC;QAEjD,8BAA8B;QAC9B,IAAI,KAAK,KAAK,GAAG,IAAI,MAAM,KAAK,GAAG;YAAE,OAAO,IAAI,CAAC;IACnD,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,WAAW,CAAC,GAAW;IACrC,MAAM,OAAO,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC;IAC3B,IAAI,MAAW,CAAC;IAChB,IAAI,CAAC;QACH,MAAM,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC;IAC5B,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,IAAI,KAAK,CAAC,iBAAiB,OAAO,6CAA6C,CAAC,CAAC;IACzF,CAAC;IAED,MAAM,gBAAgB,GAAG,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;IAC7C,IAAI,CAAC,gBAAgB,CAAC,QAAQ,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;QAChD,MAAM,IAAI,KAAK,CACb,sBAAsB,MAAM,CAAC,QAAQ,SAAS,OAAO,2CAA2C,CACjG,CAAC;IACJ,CAAC;IAED,4DAA4D;IAC5D,4GAA4G;IAC5G,MAAM,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC;IAEjC,6CAA6C;IAC7C,IAAI,CAAC,gBAAgB,EAAE,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,IAAI,aAAa,CAAC,QAAQ,CAAC,CAAC,EAAE,CAAC;QAChF,IAAI,qBAAqB,CAAC,QAAQ,CAAC,EAAE,CAAC;YACpC,MAAM,IAAI,KAAK,CACb,yBAAyB,OAAO,2CAA2C,QAAQ,kCAAkC,CACtH,CAAC;QACJ,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAID,MAAM,iBAAiB,GAAiB,KAAK,EAAE,QAAgB,EAAE,EAAE;IACjE,MAAM,OAAO,GAAG,MAAM,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,EAAE,GAAG,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC;IAC1E,OAAO,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;AAC/C,CAAC,CAAC;AAEF;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,QAAgB,EAChB,SAAuB,iBAAiB;IAExC,IAAI,gBAAgB,EAAE,EAAE,CAAC;QACvB,OAAO;IACT,CAAC;IAED,MAAM,cAAc,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAErD,IAAI,mBAAmB,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,cAAc,CAAC,QAAQ,CAAC,YAAY,CAAC,EAAE,CAAC;QACrF,MAAM,IAAI,KAAK,CAAC,8BAA8B,QAAQ,0BAA0B,CAAC,CAAC;IACpF,CAAC;IAED,IAAI,aAAa,CAAC,cAAc,CAAC,IAAI,aAAa,CAAC,cAAc,CAAC,EAAE,CAAC;QACnE,IAAI,qBAAqB,CAAC,cAAc,CAAC,EAAE,CAAC;YAC1C,MAAM,IAAI,KAAK,CAAC,8BAA8B,QAAQ,yCAAyC,CAAC,CAAC;QACnG,CAAC;QACD,OAAO;IACT,CAAC;IAED,IAAI,iBAA2B,CAAC;IAChC,IAAI,CAAC;QACH,iBAAiB,GAAG,MAAM,MAAM,CAAC,cAAc,CAAC,CAAC;IACnD,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,8BAA8B,QAAQ,MAAM,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;IACxH,CAAC;IAED,IAAI,iBAAiB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACnC,MAAM,IAAI,KAAK,CAAC,8BAA8B,QAAQ,0BAA0B,CAAC,CAAC;IACpF,CAAC;IAED,KAAK,MAAM,OAAO,IAAI,iBAAiB,EAAE,CAAC;QACxC,IAAI,qBAAqB,CAAC,OAAO,CAAC,EAAE,CAAC;YACnC,MAAM,IAAI,KAAK,CAAC,8BAA8B,QAAQ,6BAA6B,OAAO,IAAI,CAAC,CAAC;QAClG,CAAC;IACH,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,GAAW,EACX,SAAuB,iBAAiB;IAExC,MAAM,YAAY,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC;IACtC,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,YAAY,CAAC,CAAC,QAAQ,CAAC;IAChD,MAAM,oBAAoB,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;IAC7C,OAAO,YAAY,CAAC;AACtB,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,YAAY,CAAC,SAAiB,EAAE,KAAa,EAAE,aAAsB;IACnF,MAAM,OAAO,GAAG,SAAS,CAAC,IAAI,EAAE,CAAC;IAEjC,qDAAqD;IACrD,2DAA2D;IAC3D,IAAI,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;QAC3B,MAAM,IAAI,KAAK,CACb,8BAA8B,KAAK,MAAM,OAAO,2CAA2C,CAC5F,CAAC;IACJ,CAAC;IAED,2BAA2B;IAC3B,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;IAEvC,mDAAmD;IACnD,IAAI,aAAa,EAAE,CAAC;QAClB,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC;QACjD,8FAA8F;QAC9F,MAAM,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC,YAAY,EAAE,QAAQ,CAAC,CAAC;QAC3D,IAAI,YAAY,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,YAAY,CAAC,EAAE,CAAC;YACnE,MAAM,IAAI,KAAK,CAAC,GAAG,KAAK,4BAA4B,YAAY,YAAY,QAAQ,GAAG,CAAC,CAAC;QAC3F,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,qBAAqB,CAAC,aAAqB,EAAE,OAAe;IAC1E,MAAM,SAAS,GAAG,WAAW,CAAC,aAAa,CAAC,CAAC;IAC7C,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,CAAC;IACrC,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC;IAEpC,oCAAoC;IACpC,IAAI,SAAS,CAAC,QAAQ,KAAK,UAAU,CAAC,QAAQ;QAC1C,CAAC,SAAS,CAAC,QAAQ,CAAC,QAAQ,CAAC,GAAG,GAAG,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC5D,MAAM,IAAI,KAAK,CACb,kBAAkB,SAAS,CAAC,QAAQ,0DAA0D,UAAU,CAAC,QAAQ,KAAK,CACvH,CAAC;IACJ,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,gBAAgB,CAAC,KAAyB;IACxD,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,KAAK,IAAI,IAAI,KAAK,CAAC,KAAK,CAAC,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;QACvE,OAAO,EAAE,CAAC,CAAC,eAAe;IAC5B,CAAC;IACD,uEAAuE;IACvE,IAAI,KAAK,KAAK,CAAC,EAAE,CAAC;QAChB,OAAO,GAAG,CAAC;IACb,CAAC;IACD,yDAAyD;IACzD,OAAO,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;AACvD,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,gBAAgB,CAAC,QAAgB;IAC/C,MAAM,mBAAmB,GAAG,GAAG,CAAC;IAChC,IAAI,QAAQ,CAAC,MAAM,GAAG,mBAAmB,EAAE,CAAC;QAC1C,MAAM,IAAI,KAAK,CACb,sBAAsB,QAAQ,CAAC,MAAM,eAAe,mBAAmB,gCAAgC,CACxG,CAAC;IACJ,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,aAAa,CAAC,KAAa;IACzC,6DAA6D;IAC7D,IAAI,SAAS,GAAG,KAAK,CAAC,OAAO,CAAC,qCAAqC,EAAE,EAAE,CAAC,CAAC;IACzE,yBAAyB;IACzB,SAAS,GAAG,SAAS,CAAC,OAAO,CAAC,kBAAkB,EAAE,EAAE,CAAC,CAAC;IACtD,gEAAgE;IAChE,SAAS,GAAG,SAAS,CAAC,OAAO,CAAC,eAAe,EAAE,EAAE,CAAC,CAAC;IACnD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,iBAAiB,CAAC,SAAiB;IACjD,gDAAgD;IAChD,OAAO,SAAS,CAAC,OAAO,CAAC,iBAAiB,EAAE,GAAG,CAAC,CAAC;AACnD,CAAC"}
@@ -0,0 +1,58 @@
1
+ import type { Page } from 'playwright';
2
+ import type { BrowserManager } from '../browser/browser-manager.js';
3
+ import type { CrawlResult, PageData } from '../types/discovery.js';
4
+ /**
5
+ * Options for SiteCrawler configuration
6
+ */
7
+ export interface CrawlerOptions {
8
+ maxConcurrency?: number;
9
+ maxPages?: number;
10
+ excludePatterns?: string[];
11
+ onPageCrawled?: (url: string, count: number) => void;
12
+ pageProcessor?: (page: Page, url: string) => Promise<Partial<PageData>>;
13
+ additionalUrls?: string[];
14
+ }
15
+ /**
16
+ * Recursive web crawler that visits all same-hostname pages
17
+ * Uses BrowserManager for stealth and cookie dismissal
18
+ */
19
+ export declare class SiteCrawler {
20
+ private browserManager;
21
+ private visited;
22
+ private queue;
23
+ private pages;
24
+ private hostname;
25
+ private maxConcurrency;
26
+ private maxPages;
27
+ private excludePatterns;
28
+ private onPageCrawled?;
29
+ private pageProcessor?;
30
+ constructor(browserManager: BrowserManager, options?: CrawlerOptions);
31
+ /**
32
+ * Crawls all same-hostname pages starting from seedUrl
33
+ * @param seedUrl - Starting URL to crawl from
34
+ * @returns CrawlResult with all discovered pages
35
+ */
36
+ crawl(seedUrl: string, additionalUrls?: string[]): Promise<CrawlResult>;
37
+ /**
38
+ * Crawls a single page: navigate, extract data, discover links
39
+ */
40
+ private crawlPage;
41
+ /**
42
+ * Extracts all links from a page
43
+ */
44
+ private extractLinks;
45
+ /**
46
+ * Normalizes a URL for deduplication
47
+ * - Removes fragment (#...)
48
+ * - Removes trailing slash (except root /)
49
+ * - Lowercases hostname
50
+ * - Sorts query parameters
51
+ * - Removes default ports
52
+ */
53
+ private normalizeUrl;
54
+ /**
55
+ * Checks if URL matches any exclude pattern
56
+ */
57
+ private shouldExclude;
58
+ }
@@ -0,0 +1,240 @@
1
+ // Recursive web crawler with URL queue, deduplication, and pageProcessor callback
2
+ /**
3
+ * Recursive web crawler that visits all same-hostname pages
4
+ * Uses BrowserManager for stealth and cookie dismissal
5
+ */
6
+ export class SiteCrawler {
7
+ browserManager;
8
+ visited; // normalized URLs already visited
9
+ queue; // URLs to visit
10
+ pages; // collected page data
11
+ hostname = ''; // seed URL hostname for filtering
12
+ maxConcurrency;
13
+ maxPages;
14
+ excludePatterns;
15
+ onPageCrawled;
16
+ pageProcessor;
17
+ constructor(browserManager, options) {
18
+ this.browserManager = browserManager;
19
+ this.visited = new Set();
20
+ this.queue = [];
21
+ this.pages = [];
22
+ this.maxConcurrency = options?.maxConcurrency ?? 3;
23
+ this.maxPages = options?.maxPages ?? 0;
24
+ this.excludePatterns = options?.excludePatterns ?? [];
25
+ this.onPageCrawled = options?.onPageCrawled;
26
+ this.pageProcessor = options?.pageProcessor;
27
+ }
28
+ /**
29
+ * Crawls all same-hostname pages starting from seedUrl
30
+ * @param seedUrl - Starting URL to crawl from
31
+ * @returns CrawlResult with all discovered pages
32
+ */
33
+ async crawl(seedUrl, additionalUrls) {
34
+ const startTime = Date.now();
35
+ // Extract hostname from seed URL
36
+ const seedUrlObj = new URL(seedUrl);
37
+ this.hostname = seedUrlObj.hostname;
38
+ // Add seed URL to queue
39
+ const normalizedSeed = this.normalizeUrl(seedUrl);
40
+ this.queue.push(normalizedSeed);
41
+ // Add additional URLs (e.g., from SPA route detection)
42
+ if (additionalUrls && additionalUrls.length > 0) {
43
+ for (const url of additionalUrls) {
44
+ try {
45
+ const urlObj = new URL(url, seedUrl); // Resolve relative URLs
46
+ if (urlObj.hostname === this.hostname) {
47
+ const normalized = this.normalizeUrl(urlObj.href);
48
+ if (!this.visited.has(normalized) && !this.queue.includes(normalized)) {
49
+ this.queue.push(normalized);
50
+ }
51
+ }
52
+ }
53
+ catch {
54
+ // Skip invalid URLs
55
+ }
56
+ }
57
+ }
58
+ // Process queue in batches
59
+ while (this.queue.length > 0) {
60
+ // Check maxPages limit
61
+ if (this.maxPages > 0 && this.pages.length >= this.maxPages) {
62
+ break;
63
+ }
64
+ // Warn after 50 pages
65
+ if (this.pages.length === 50) {
66
+ console.warn('⚠️ Discovered 50+ pages. Crawl continuing...');
67
+ }
68
+ // Take batch of URLs
69
+ const batch = this.queue.splice(0, this.maxConcurrency);
70
+ // Process batch concurrently
71
+ const results = await Promise.allSettled(batch.map(url => this.crawlPage(url)));
72
+ // Log errors without failing entire crawl
73
+ results.forEach((result, index) => {
74
+ if (result.status === 'rejected') {
75
+ console.error(`Error crawling ${batch[index]}: ${result.reason}`);
76
+ }
77
+ });
78
+ }
79
+ const duration = Date.now() - startTime;
80
+ return {
81
+ pages: this.pages,
82
+ brokenLinks: [], // Populated by link validator in later plan
83
+ totalPagesDiscovered: this.pages.length,
84
+ totalLinksChecked: 0, // Populated by link validator in later plan
85
+ crawlDuration: duration,
86
+ spaDetected: { framework: 'none' } // Populated by SPA detector in later plan
87
+ };
88
+ }
89
+ /**
90
+ * Crawls a single page: navigate, extract data, discover links
91
+ */
92
+ async crawlPage(url) {
93
+ // Skip if already visited or matches exclude pattern
94
+ if (this.visited.has(url)) {
95
+ return;
96
+ }
97
+ if (this.shouldExclude(url)) {
98
+ return;
99
+ }
100
+ this.visited.add(url);
101
+ let page = null;
102
+ try {
103
+ // Open page via BrowserManager (handles stealth + cookie dismissal)
104
+ page = await this.browserManager.newPage(url);
105
+ // Extract title
106
+ const title = await page.title();
107
+ // Extract links
108
+ const links = await this.extractLinks(page, url);
109
+ // Create minimal PageData
110
+ const pageData = {
111
+ url,
112
+ title,
113
+ forms: [],
114
+ buttons: [],
115
+ links,
116
+ menus: [],
117
+ otherInteractive: [],
118
+ crawledAt: new Date().toISOString()
119
+ };
120
+ // If pageProcessor provided, call it and merge results
121
+ if (this.pageProcessor) {
122
+ const extraData = await this.pageProcessor(page, url);
123
+ Object.assign(pageData, extraData);
124
+ }
125
+ // Store page data
126
+ this.pages.push(pageData);
127
+ // Notify progress callback
128
+ if (this.onPageCrawled) {
129
+ this.onPageCrawled(url, this.pages.length);
130
+ }
131
+ // Add discovered links to queue
132
+ for (const link of links) {
133
+ if (link.isInternal && !this.visited.has(link.href)) {
134
+ const normalized = this.normalizeUrl(link.href);
135
+ if (!this.queue.includes(normalized) && !this.visited.has(normalized)) {
136
+ this.queue.push(normalized);
137
+ }
138
+ }
139
+ }
140
+ }
141
+ finally {
142
+ // Always close page
143
+ if (page) {
144
+ await page.close();
145
+ }
146
+ }
147
+ }
148
+ /**
149
+ * Extracts all links from a page
150
+ */
151
+ async extractLinks(page, baseUrl) {
152
+ const links = await page.$$eval('a[href]', (anchors) => {
153
+ return anchors.map((a) => {
154
+ const anchor = a;
155
+ return {
156
+ href: anchor.href, // Already resolved by browser
157
+ text: anchor.textContent?.trim() || ''
158
+ };
159
+ });
160
+ });
161
+ // Classify links as internal/external
162
+ return links.map(link => {
163
+ try {
164
+ const linkUrl = new URL(link.href);
165
+ return {
166
+ href: link.href,
167
+ text: link.text,
168
+ isInternal: linkUrl.hostname === this.hostname
169
+ };
170
+ }
171
+ catch {
172
+ // Invalid URL
173
+ return {
174
+ href: link.href,
175
+ text: link.text,
176
+ isInternal: false
177
+ };
178
+ }
179
+ });
180
+ }
181
+ /**
182
+ * Normalizes a URL for deduplication
183
+ * - Removes fragment (#...)
184
+ * - Removes trailing slash (except root /)
185
+ * - Lowercases hostname
186
+ * - Sorts query parameters
187
+ * - Removes default ports
188
+ */
189
+ normalizeUrl(url) {
190
+ try {
191
+ const urlObj = new URL(url);
192
+ // Remove fragment
193
+ urlObj.hash = '';
194
+ // Lowercase hostname
195
+ urlObj.hostname = urlObj.hostname.toLowerCase();
196
+ // Remove default ports
197
+ if ((urlObj.protocol === 'http:' && urlObj.port === '80') ||
198
+ (urlObj.protocol === 'https:' && urlObj.port === '443')) {
199
+ urlObj.port = '';
200
+ }
201
+ // Sort query parameters
202
+ const params = Array.from(urlObj.searchParams.entries()).sort((a, b) => a[0].localeCompare(b[0]));
203
+ urlObj.search = '';
204
+ params.forEach(([key, value]) => urlObj.searchParams.append(key, value));
205
+ // Remove trailing slash (except root)
206
+ let normalized = urlObj.href;
207
+ if (urlObj.pathname !== '/' && normalized.endsWith('/')) {
208
+ normalized = normalized.slice(0, -1);
209
+ }
210
+ return normalized;
211
+ }
212
+ catch {
213
+ return url; // Return as-is if parsing fails
214
+ }
215
+ }
216
+ /**
217
+ * Checks if URL matches any exclude pattern
218
+ */
219
+ shouldExclude(url) {
220
+ return this.excludePatterns.some(pattern => {
221
+ if (pattern.startsWith('*') && pattern.endsWith('*')) {
222
+ // *pattern* - contains
223
+ return url.includes(pattern.slice(1, -1));
224
+ }
225
+ else if (pattern.startsWith('*')) {
226
+ // *.ext - ends with
227
+ return url.endsWith(pattern.slice(1));
228
+ }
229
+ else if (pattern.endsWith('*')) {
230
+ // prefix* - starts with
231
+ return url.startsWith(pattern.slice(0, -1));
232
+ }
233
+ else {
234
+ // exact match
235
+ return url.includes(pattern);
236
+ }
237
+ });
238
+ }
239
+ }
240
+ //# sourceMappingURL=crawler.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"crawler.js","sourceRoot":"","sources":["../../src/discovery/crawler.ts"],"names":[],"mappings":"AAAA,kFAAkF;AAkBlF;;;GAGG;AACH,MAAM,OAAO,WAAW;IACd,cAAc,CAAiB;IAC/B,OAAO,CAAc,CAAS,kCAAkC;IAChE,KAAK,CAAW,CAAc,gBAAgB;IAC9C,KAAK,CAAa,CAAY,sBAAsB;IACpD,QAAQ,GAAW,EAAE,CAAC,CAAQ,kCAAkC;IAChE,cAAc,CAAS;IACvB,QAAQ,CAAS;IACjB,eAAe,CAAW;IAC1B,aAAa,CAAwC;IACrD,aAAa,CAA2D;IAEhF,YAAY,cAA8B,EAAE,OAAwB;QAClE,IAAI,CAAC,cAAc,GAAG,cAAc,CAAC;QACrC,IAAI,CAAC,OAAO,GAAG,IAAI,GAAG,EAAE,CAAC;QACzB,IAAI,CAAC,KAAK,GAAG,EAAE,CAAC;QAChB,IAAI,CAAC,KAAK,GAAG,EAAE,CAAC;QAChB,IAAI,CAAC,cAAc,GAAG,OAAO,EAAE,cAAc,IAAI,CAAC,CAAC;QACnD,IAAI,CAAC,QAAQ,GAAG,OAAO,EAAE,QAAQ,IAAI,CAAC,CAAC;QACvC,IAAI,CAAC,eAAe,GAAG,OAAO,EAAE,eAAe,IAAI,EAAE,CAAC;QACtD,IAAI,CAAC,aAAa,GAAG,OAAO,EAAE,aAAa,CAAC;QAC5C,IAAI,CAAC,aAAa,GAAG,OAAO,EAAE,aAAa,CAAC;IAC9C,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,KAAK,CAAC,OAAe,EAAE,cAAyB;QACpD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE7B,iCAAiC;QACjC,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC;QACpC,IAAI,CAAC,QAAQ,GAAG,UAAU,CAAC,QAAQ,CAAC;QAEpC,wBAAwB;QACxB,MAAM,cAAc,GAAG,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC;QAClD,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QAEhC,uDAAuD;QACvD,IAAI,cAAc,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAChD,KAAK,MAAM,GAAG,IAAI,cAAc,EAAE,CAAC;gBACjC,IAAI,CAAC;oBACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC,CAAC,wBAAwB;oBAC9D,IAAI,MAAM,CAAC,QAAQ,KAAK,IAAI,CAAC,QAAQ,EAAE,CAAC;wBACtC,MAAM,UAAU,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;wBAClD,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,UAAU,CAAC,EAAE,CAAC;4BACtE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;wBAC9B,CAAC;oBACH,CAAC;gBACH,CAAC;gBAAC,MAAM,CAAC;oBACP,oBAAoB;gBACtB,CAAC;YACH,CAAC;QACH,CAAC;QAED,2BAA2B;QAC3B,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC7B,uBAAuB;YACvB,IAAI,IAAI,CAAC,QAAQ,GAAG,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;gBAC5D,MAAM;YACR,CAAC;YAED,sBAAsB;YACtB,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,KAAK,EAAE,EAAE,CAAC;gBAC7B,OAAO,CAAC,IAAI,CAAC,+CAA+C,CAAC,CAAC;YAChE,CAAC;YAED,qBAAqB;YACrB,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;YAExD,6BAA6B;YAC7B,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,CACtC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CACtC,CAAC;YAEF,0CAA0C;YAC1C,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,KAAK,EAAE,EAAE;gBAChC,IAAI,MAAM,CAAC,MAAM,KAAK,UAAU,EAAE,CAAC;oBACjC,OAAO,CAAC,KAAK,CAAC,kBAAkB,KAAK,CAAC,KAAK,CAAC,KAAK,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;gBACpE,CAAC;YACH,CAAC,CAAC,CAAC;QACL,CAAC;QAED,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;QAExC,OAAO;YACL,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,WAAW,EAAE,EAAE,EAAG,4CAA4C;YAC9D,oBAAoB,EAAE,IAAI,CAAC,KAAK,CAAC,MAAM;YACvC,iBAAiB,EAAE,CAAC,EAAG,4CAA4C;YACnE,aAAa,EAAE,QAAQ;YACvB,WAAW,EAAE,EAAE,SAAS,EAAE,MAAM,EAAE,CAAE,0CAA0C;SAC/E,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,SAAS,CAAC,GAAW;QACjC,qDAAqD;QACrD,IAAI,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;YAC1B,OAAO;QACT,CAAC;QAED,IAAI,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,EAAE,CAAC;YAC5B,OAAO;QACT,CAAC;QAED,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAEtB,IAAI,IAAI,GAAgB,IAAI,CAAC;QAE7B,IAAI,CAAC;YACH,oEAAoE;YACpE,IAAI,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;YAE9C,gBAAgB;YAChB,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC;YAEjC,gBAAgB;YAChB,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;YAEjD,0BAA0B;YAC1B,MAAM,QAAQ,GAAa;gBACzB,GAAG;gBACH,KAAK;gBACL,KAAK,EAAE,EAAE;gBACT,OAAO,EAAE,EAAE;gBACX,KAAK;gBACL,KAAK,EAAE,EAAE;gBACT,gBAAgB,EAAE,EAAE;gBACpB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;aACpC,CAAC;YAEF,uDAAuD;YACvD,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;gBACvB,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;gBACtD,MAAM,CAAC,MAAM,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;YACrC,CAAC;YAED,kBAAkB;YAClB,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YAE1B,2BAA2B;YAC3B,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;gBACvB,IAAI,CAAC,aAAa,CAAC,GAAG,EAAE,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;YAC7C,CAAC;YAED,gCAAgC;YAChC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;gBACzB,IAAI,IAAI,CAAC,UAAU,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;oBACpD,MAAM,UAAU,GAAG,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;oBAChD,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;wBACtE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;oBAC9B,CAAC;gBACH,CAAC;YACH,CAAC;QAEH,CAAC;gBAAS,CAAC;YACT,oBAAoB;YACpB,IAAI,IAAI,EAAE,CAAC;gBACT,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC;YACrB,CAAC;QACH,CAAC;IACH,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,YAAY,CAAC,IAAU,EAAE,OAAe;QACpD,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC,OAAO,EAAE,EAAE;YACrD,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;gBACvB,MAAM,MAAM,GAAG,CAAsB,CAAC;gBACtC,OAAO;oBACL,IAAI,EAAE,MAAM,CAAC,IAAI,EAAG,8BAA8B;oBAClD,IAAI,EAAE,MAAM,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE;iBACvC,CAAC;YACJ,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,sCAAsC;QACtC,OAAO,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE;YACtB,IAAI,CAAC;gBACH,MAAM,OAAO,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACnC,OAAO;oBACL,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,UAAU,EAAE,OAAO,CAAC,QAAQ,KAAK,IAAI,CAAC,QAAQ;iBAC/C,CAAC;YACJ,CAAC;YAAC,MAAM,CAAC;gBACP,cAAc;gBACd,OAAO;oBACL,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,UAAU,EAAE,KAAK;iBAClB,CAAC;YACJ,CAAC;QACH,CAAC,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;OAOG;IACK,YAAY,CAAC,GAAW;QAC9B,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;YAE5B,kBAAkB;YAClB,MAAM,CAAC,IAAI,GAAG,EAAE,CAAC;YAEjB,qBAAqB;YACrB,MAAM,CAAC,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;YAEhD,uBAAuB;YACvB,IAAI,CAAC,MAAM,CAAC,QAAQ,KAAK,OAAO,IAAI,MAAM,CAAC,IAAI,KAAK,IAAI,CAAC;gBACrD,CAAC,MAAM,CAAC,QAAQ,KAAK,QAAQ,IAAI,MAAM,CAAC,IAAI,KAAK,KAAK,CAAC,EAAE,CAAC;gBAC5D,MAAM,CAAC,IAAI,GAAG,EAAE,CAAC;YACnB,CAAC;YAED,wBAAwB;YACxB,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC,OAAO,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CACrE,CAAC,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CACzB,CAAC;YACF,MAAM,CAAC,MAAM,GAAG,EAAE,CAAC;YACnB,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,YAAY,CAAC,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC,CAAC;YAEzE,sCAAsC;YACtC,IAAI,UAAU,GAAG,MAAM,CAAC,IAAI,CAAC;YAC7B,IAAI,MAAM,CAAC,QAAQ,KAAK,GAAG,IAAI,UAAU,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBACxD,UAAU,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;YACvC,CAAC;YAED,OAAO,UAAU,CAAC;QACpB,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,GAAG,CAAC,CAAE,gCAAgC;QAC/C,CAAC;IACH,CAAC;IAED;;OAEG;IACK,aAAa,CAAC,GAAW;QAC/B,OAAO,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE;YACzC,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBACrD,uBAAuB;gBACvB,OAAO,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;YAC5C,CAAC;iBAAM,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;gBACnC,oBAAoB;gBACpB,OAAO,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;YACxC,CAAC;iBAAM,IAAI,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBACjC,wBAAwB;gBACxB,OAAO,GAAG,CAAC,UAAU,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9C,CAAC;iBAAM,CAAC;gBACN,cAAc;gBACd,OAAO,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;YAC/B,CAAC;QACH,CAAC,CAAC,CAAC;IACL,CAAC;CACF"}
@@ -0,0 +1,22 @@
1
+ import type { DiscoveryArtifact, SitemapNode, WorkflowPlan } from '../types/discovery.js';
2
+ export interface DiscoveryOptions {
3
+ targetUrl: string;
4
+ sessionId: string;
5
+ userHints?: string[];
6
+ maxPages?: number;
7
+ headless?: boolean;
8
+ onProgress?: (message: string) => void;
9
+ }
10
+ export interface WorkflowPlanResolution {
11
+ workflowPlans: WorkflowPlan[];
12
+ usedHeuristicFallback: boolean;
13
+ }
14
+ export declare function resolveWorkflowPlans(planPromise: Promise<WorkflowPlan[]> | null, sitemap: SitemapNode): Promise<WorkflowPlanResolution>;
15
+ /**
16
+ * Runs complete Phase 2 discovery pipeline: crawl, discover elements, detect SPA,
17
+ * validate links, build sitemap, generate workflow plans.
18
+ *
19
+ * @param options Discovery configuration
20
+ * @returns Complete discovery artifact with sitemap and workflow plans
21
+ */
22
+ export declare function runDiscovery(options: DiscoveryOptions): Promise<DiscoveryArtifact>;