afterburn-cli 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +281 -0
- package/dist/ai/gemini-client.d.ts +21 -0
- package/dist/ai/gemini-client.js +105 -0
- package/dist/ai/gemini-client.js.map +1 -0
- package/dist/ai/index.d.ts +1 -0
- package/dist/ai/index.js +3 -0
- package/dist/ai/index.js.map +1 -0
- package/dist/analysis/diagnosis-schema.d.ts +106 -0
- package/dist/analysis/diagnosis-schema.js +54 -0
- package/dist/analysis/diagnosis-schema.js.map +1 -0
- package/dist/analysis/error-analyzer.d.ts +9 -0
- package/dist/analysis/error-analyzer.js +573 -0
- package/dist/analysis/error-analyzer.js.map +1 -0
- package/dist/analysis/index.d.ts +4 -0
- package/dist/analysis/index.js +6 -0
- package/dist/analysis/index.js.map +1 -0
- package/dist/analysis/source-mapper.d.ts +19 -0
- package/dist/analysis/source-mapper.js +329 -0
- package/dist/analysis/source-mapper.js.map +1 -0
- package/dist/analysis/ui-auditor.d.ts +9 -0
- package/dist/analysis/ui-auditor.js +104 -0
- package/dist/analysis/ui-auditor.js.map +1 -0
- package/dist/artifacts/artifact-storage.d.ts +44 -0
- package/dist/artifacts/artifact-storage.js +99 -0
- package/dist/artifacts/artifact-storage.js.map +1 -0
- package/dist/artifacts/index.d.ts +1 -0
- package/dist/artifacts/index.js +3 -0
- package/dist/artifacts/index.js.map +1 -0
- package/dist/browser/browser-manager.d.ts +45 -0
- package/dist/browser/browser-manager.js +88 -0
- package/dist/browser/browser-manager.js.map +1 -0
- package/dist/browser/challenge-detector.d.ts +10 -0
- package/dist/browser/challenge-detector.js +58 -0
- package/dist/browser/challenge-detector.js.map +1 -0
- package/dist/browser/cookie-dismisser.d.ts +18 -0
- package/dist/browser/cookie-dismisser.js +76 -0
- package/dist/browser/cookie-dismisser.js.map +1 -0
- package/dist/browser/index.d.ts +4 -0
- package/dist/browser/index.js +6 -0
- package/dist/browser/index.js.map +1 -0
- package/dist/browser/stealth-browser.d.ts +13 -0
- package/dist/browser/stealth-browser.js +59 -0
- package/dist/browser/stealth-browser.js.map +1 -0
- package/dist/cli/commander-cli.d.ts +2 -0
- package/dist/cli/commander-cli.js +150 -0
- package/dist/cli/commander-cli.js.map +1 -0
- package/dist/cli/doctor.d.ts +34 -0
- package/dist/cli/doctor.js +124 -0
- package/dist/cli/doctor.js.map +1 -0
- package/dist/cli/first-run.d.ts +6 -0
- package/dist/cli/first-run.js +58 -0
- package/dist/cli/first-run.js.map +1 -0
- package/dist/cli/index.d.ts +3 -0
- package/dist/cli/index.js +5 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/cli/progress.d.ts +11 -0
- package/dist/cli/progress.js +30 -0
- package/dist/cli/progress.js.map +1 -0
- package/dist/core/engine.d.ts +33 -0
- package/dist/core/engine.js +269 -0
- package/dist/core/engine.js.map +1 -0
- package/dist/core/index.d.ts +3 -0
- package/dist/core/index.js +4 -0
- package/dist/core/index.js.map +1 -0
- package/dist/core/validation.d.ts +52 -0
- package/dist/core/validation.js +228 -0
- package/dist/core/validation.js.map +1 -0
- package/dist/discovery/crawler.d.ts +58 -0
- package/dist/discovery/crawler.js +240 -0
- package/dist/discovery/crawler.js.map +1 -0
- package/dist/discovery/discovery-pipeline.d.ts +22 -0
- package/dist/discovery/discovery-pipeline.js +256 -0
- package/dist/discovery/discovery-pipeline.js.map +1 -0
- package/dist/discovery/element-mapper.d.ts +21 -0
- package/dist/discovery/element-mapper.js +422 -0
- package/dist/discovery/element-mapper.js.map +1 -0
- package/dist/discovery/index.d.ts +8 -0
- package/dist/discovery/index.js +8 -0
- package/dist/discovery/index.js.map +1 -0
- package/dist/discovery/link-validator.d.ts +15 -0
- package/dist/discovery/link-validator.js +137 -0
- package/dist/discovery/link-validator.js.map +1 -0
- package/dist/discovery/sitemap-builder.d.ts +19 -0
- package/dist/discovery/sitemap-builder.js +166 -0
- package/dist/discovery/sitemap-builder.js.map +1 -0
- package/dist/discovery/spa-detector.d.ts +12 -0
- package/dist/discovery/spa-detector.js +271 -0
- package/dist/discovery/spa-detector.js.map +1 -0
- package/dist/execution/error-detector.d.ts +10 -0
- package/dist/execution/error-detector.js +87 -0
- package/dist/execution/error-detector.js.map +1 -0
- package/dist/execution/evidence-capture.d.ts +8 -0
- package/dist/execution/evidence-capture.js +37 -0
- package/dist/execution/evidence-capture.js.map +1 -0
- package/dist/execution/index.d.ts +5 -0
- package/dist/execution/index.js +7 -0
- package/dist/execution/index.js.map +1 -0
- package/dist/execution/step-handlers.d.ts +48 -0
- package/dist/execution/step-handlers.js +349 -0
- package/dist/execution/step-handlers.js.map +1 -0
- package/dist/execution/test-data.d.ts +50 -0
- package/dist/execution/test-data.js +160 -0
- package/dist/execution/test-data.js.map +1 -0
- package/dist/execution/workflow-executor.d.ts +56 -0
- package/dist/execution/workflow-executor.js +331 -0
- package/dist/execution/workflow-executor.js.map +1 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +5 -0
- package/dist/index.js.map +1 -0
- package/dist/mcp/entry.d.ts +2 -0
- package/dist/mcp/entry.js +5 -0
- package/dist/mcp/entry.js.map +1 -0
- package/dist/mcp/index.d.ts +2 -0
- package/dist/mcp/index.js +4 -0
- package/dist/mcp/index.js.map +1 -0
- package/dist/mcp/server.d.ts +3 -0
- package/dist/mcp/server.js +19 -0
- package/dist/mcp/server.js.map +1 -0
- package/dist/mcp/tools.d.ts +2 -0
- package/dist/mcp/tools.js +162 -0
- package/dist/mcp/tools.js.map +1 -0
- package/dist/planning/heuristic-planner.d.ts +7 -0
- package/dist/planning/heuristic-planner.js +238 -0
- package/dist/planning/heuristic-planner.js.map +1 -0
- package/dist/planning/index.d.ts +3 -0
- package/dist/planning/index.js +5 -0
- package/dist/planning/index.js.map +1 -0
- package/dist/planning/plan-schema.d.ts +74 -0
- package/dist/planning/plan-schema.js +39 -0
- package/dist/planning/plan-schema.js.map +1 -0
- package/dist/planning/workflow-planner.d.ts +39 -0
- package/dist/planning/workflow-planner.js +211 -0
- package/dist/planning/workflow-planner.js.map +1 -0
- package/dist/reports/health-scorer.d.ts +14 -0
- package/dist/reports/health-scorer.js +88 -0
- package/dist/reports/health-scorer.js.map +1 -0
- package/dist/reports/html-generator.d.ts +10 -0
- package/dist/reports/html-generator.js +155 -0
- package/dist/reports/html-generator.js.map +1 -0
- package/dist/reports/index.d.ts +4 -0
- package/dist/reports/index.js +6 -0
- package/dist/reports/index.js.map +1 -0
- package/dist/reports/markdown-generator.d.ts +10 -0
- package/dist/reports/markdown-generator.js +334 -0
- package/dist/reports/markdown-generator.js.map +1 -0
- package/dist/reports/priority-ranker.d.ts +22 -0
- package/dist/reports/priority-ranker.js +608 -0
- package/dist/reports/priority-ranker.js.map +1 -0
- package/dist/screenshots/dual-format.d.ts +14 -0
- package/dist/screenshots/dual-format.js +59 -0
- package/dist/screenshots/dual-format.js.map +1 -0
- package/dist/screenshots/index.d.ts +2 -0
- package/dist/screenshots/index.js +4 -0
- package/dist/screenshots/index.js.map +1 -0
- package/dist/screenshots/screenshot-manager.d.ts +33 -0
- package/dist/screenshots/screenshot-manager.js +86 -0
- package/dist/screenshots/screenshot-manager.js.map +1 -0
- package/dist/testing/accessibility-auditor.d.ts +23 -0
- package/dist/testing/accessibility-auditor.js +44 -0
- package/dist/testing/accessibility-auditor.js.map +1 -0
- package/dist/testing/index.d.ts +4 -0
- package/dist/testing/index.js +5 -0
- package/dist/testing/index.js.map +1 -0
- package/dist/testing/meta-auditor.d.ts +16 -0
- package/dist/testing/meta-auditor.js +268 -0
- package/dist/testing/meta-auditor.js.map +1 -0
- package/dist/testing/performance-monitor.d.ts +15 -0
- package/dist/testing/performance-monitor.js +64 -0
- package/dist/testing/performance-monitor.js.map +1 -0
- package/dist/types/artifacts.d.ts +58 -0
- package/dist/types/artifacts.js +3 -0
- package/dist/types/artifacts.js.map +1 -0
- package/dist/types/discovery.d.ts +124 -0
- package/dist/types/discovery.js +3 -0
- package/dist/types/discovery.js.map +1 -0
- package/dist/types/execution.d.ts +154 -0
- package/dist/types/execution.js +3 -0
- package/dist/types/execution.js.map +1 -0
- package/dist/types/index.d.ts +2 -0
- package/dist/types/index.js +4 -0
- package/dist/types/index.js.map +1 -0
- package/dist/utils/sanitizer.d.ts +25 -0
- package/dist/utils/sanitizer.js +98 -0
- package/dist/utils/sanitizer.js.map +1 -0
- package/package.json +86 -0
- package/templates/report.hbs +202 -0
- package/templates/styles/report.css +607 -0
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Check if an IP address is private or reserved (loopback, link-local, private ranges)
|
|
3
|
+
* Blocks: 127.0.0.0/8, 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16, 169.254.0.0/16, ::1, fc00::/7
|
|
4
|
+
*/
|
|
5
|
+
export declare function isPrivateOrReservedIP(ip: string): boolean;
|
|
6
|
+
/**
|
|
7
|
+
* Validate that a URL uses http:// or https:// scheme.
|
|
8
|
+
* Rejects file://, javascript:, data:, and other dangerous schemes.
|
|
9
|
+
* Also rejects URLs resolving to private/loopback IPs (SSRF protection).
|
|
10
|
+
*/
|
|
11
|
+
export declare function validateUrl(url: string): string;
|
|
12
|
+
export type HostLookupFn = (hostname: string) => Promise<string[]>;
|
|
13
|
+
/**
|
|
14
|
+
* Enforce that a hostname resolves only to public IP addresses.
|
|
15
|
+
* Rejects localhost names, private ranges, and DNS failures.
|
|
16
|
+
*/
|
|
17
|
+
export declare function ensurePublicHostname(hostname: string, lookup?: HostLookupFn): Promise<void>;
|
|
18
|
+
/**
|
|
19
|
+
* Validate URL format and ensure hostname resolves to public IP space.
|
|
20
|
+
*/
|
|
21
|
+
export declare function validatePublicUrl(url: string, lookup?: HostLookupFn): Promise<string>;
|
|
22
|
+
/**
|
|
23
|
+
* Validate a filesystem path has no path traversal sequences.
|
|
24
|
+
* Resolves to absolute path and rejects ../ sequences.
|
|
25
|
+
*/
|
|
26
|
+
export declare function validatePath(inputPath: string, label: string, workspaceRoot?: string): string;
|
|
27
|
+
/**
|
|
28
|
+
* Validate that a navigation URL stays within the same origin as the base URL.
|
|
29
|
+
* Allows same hostname or subdomains of the base hostname.
|
|
30
|
+
*/
|
|
31
|
+
export declare function validateNavigationUrl(navigationUrl: string, baseUrl: string): string;
|
|
32
|
+
/**
|
|
33
|
+
* Validate and clamp maxPages to a safe range.
|
|
34
|
+
* Returns a safe integer between 1 and 500, defaulting to 50.
|
|
35
|
+
* Special case: 0 means "unlimited" but still capped at 500.
|
|
36
|
+
*/
|
|
37
|
+
export declare function validateMaxPages(value: number | undefined): number;
|
|
38
|
+
/**
|
|
39
|
+
* Validate selector string length to prevent abuse.
|
|
40
|
+
* Returns the selector if valid, throws if too long.
|
|
41
|
+
*/
|
|
42
|
+
export declare function validateSelector(selector: string): string;
|
|
43
|
+
/**
|
|
44
|
+
* Sanitize a string value by stripping potential script injection patterns.
|
|
45
|
+
* Removes <script> tags and javascript: URIs from step values.
|
|
46
|
+
*/
|
|
47
|
+
export declare function sanitizeValue(value: string): string;
|
|
48
|
+
/**
|
|
49
|
+
* Sanitize session ID for filesystem usage
|
|
50
|
+
* Replaces invalid filename characters with underscores
|
|
51
|
+
*/
|
|
52
|
+
export declare function sanitizeSessionId(sessionId: string): string;
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
// Input validation helpers for security hardening (URL, path, and numeric inputs)
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
import dns from 'node:dns/promises';
|
|
4
|
+
const LOCALHOST_HOSTNAMES = new Set([
|
|
5
|
+
'localhost',
|
|
6
|
+
'localhost.localdomain',
|
|
7
|
+
]);
|
|
8
|
+
function allowPrivateUrls() {
|
|
9
|
+
return process.env.AFTERBURN_ALLOW_PRIVATE_URLS === '1';
|
|
10
|
+
}
|
|
11
|
+
function isIPv4Literal(value) {
|
|
12
|
+
return /^\d+\.\d+\.\d+\.\d+$/.test(value);
|
|
13
|
+
}
|
|
14
|
+
function isIPv6Literal(value) {
|
|
15
|
+
return value.includes(':');
|
|
16
|
+
}
|
|
17
|
+
function normalizeIpLiteral(ip) {
|
|
18
|
+
const lower = ip.trim().toLowerCase();
|
|
19
|
+
if (lower.startsWith('::ffff:')) {
|
|
20
|
+
return lower.slice('::ffff:'.length);
|
|
21
|
+
}
|
|
22
|
+
return lower;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Check if an IP address is private or reserved (loopback, link-local, private ranges)
|
|
26
|
+
* Blocks: 127.0.0.0/8, 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16, 169.254.0.0/16, ::1, fc00::/7
|
|
27
|
+
*/
|
|
28
|
+
export function isPrivateOrReservedIP(ip) {
|
|
29
|
+
const normalized = normalizeIpLiteral(ip);
|
|
30
|
+
// Unspecified / loopback / link-local IPv6
|
|
31
|
+
if (normalized === '::' || normalized === '::1' || normalized === '0000:0000:0000:0000:0000:0000:0000:0001') {
|
|
32
|
+
return true;
|
|
33
|
+
}
|
|
34
|
+
if (normalized.startsWith('fe80:')) {
|
|
35
|
+
return true;
|
|
36
|
+
}
|
|
37
|
+
// IPv6 private (fc00::/7)
|
|
38
|
+
if (normalized.startsWith('fc') || normalized.startsWith('fd')) {
|
|
39
|
+
return true;
|
|
40
|
+
}
|
|
41
|
+
// IPv6 loopback
|
|
42
|
+
// IPv4 patterns
|
|
43
|
+
const parts = normalized.split('.');
|
|
44
|
+
if (parts.length === 4) {
|
|
45
|
+
const first = parseInt(parts[0], 10);
|
|
46
|
+
const second = parseInt(parts[1], 10);
|
|
47
|
+
// 127.0.0.0/8 (loopback)
|
|
48
|
+
if (first === 127)
|
|
49
|
+
return true;
|
|
50
|
+
// 10.0.0.0/8 (private)
|
|
51
|
+
if (first === 10)
|
|
52
|
+
return true;
|
|
53
|
+
// 172.16.0.0/12 (private)
|
|
54
|
+
if (first === 172 && second >= 16 && second <= 31)
|
|
55
|
+
return true;
|
|
56
|
+
// 192.168.0.0/16 (private)
|
|
57
|
+
if (first === 192 && second === 168)
|
|
58
|
+
return true;
|
|
59
|
+
// 169.254.0.0/16 (link-local)
|
|
60
|
+
if (first === 169 && second === 254)
|
|
61
|
+
return true;
|
|
62
|
+
}
|
|
63
|
+
return false;
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Validate that a URL uses http:// or https:// scheme.
|
|
67
|
+
* Rejects file://, javascript:, data:, and other dangerous schemes.
|
|
68
|
+
* Also rejects URLs resolving to private/loopback IPs (SSRF protection).
|
|
69
|
+
*/
|
|
70
|
+
export function validateUrl(url) {
|
|
71
|
+
const trimmed = url.trim();
|
|
72
|
+
let parsed;
|
|
73
|
+
try {
|
|
74
|
+
parsed = new URL(trimmed);
|
|
75
|
+
}
|
|
76
|
+
catch {
|
|
77
|
+
throw new Error(`Invalid URL: "${trimmed}". Must be a valid http:// or https:// URL.`);
|
|
78
|
+
}
|
|
79
|
+
const allowedProtocols = ['http:', 'https:'];
|
|
80
|
+
if (!allowedProtocols.includes(parsed.protocol)) {
|
|
81
|
+
throw new Error(`Unsafe URL scheme "${parsed.protocol}" in "${trimmed}". Only http:// and https:// are allowed.`);
|
|
82
|
+
}
|
|
83
|
+
// SSRF protection: check if hostname resolves to private IP
|
|
84
|
+
// Note: This is a synchronous check for IP literals. DNS resolution is async and happens in link-validator.
|
|
85
|
+
const hostname = parsed.hostname;
|
|
86
|
+
// Check if hostname is already an IP literal
|
|
87
|
+
if (!allowPrivateUrls() && (isIPv4Literal(hostname) || isIPv6Literal(hostname))) {
|
|
88
|
+
if (isPrivateOrReservedIP(hostname)) {
|
|
89
|
+
throw new Error(`SSRF protection: URL "${trimmed}" resolves to private/loopback address "${hostname}". Only public URLs are allowed.`);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
return trimmed;
|
|
93
|
+
}
|
|
94
|
+
const defaultHostLookup = async (hostname) => {
|
|
95
|
+
const records = await dns.lookup(hostname, { all: true, verbatim: true });
|
|
96
|
+
return records.map(record => record.address);
|
|
97
|
+
};
|
|
98
|
+
/**
|
|
99
|
+
* Enforce that a hostname resolves only to public IP addresses.
|
|
100
|
+
* Rejects localhost names, private ranges, and DNS failures.
|
|
101
|
+
*/
|
|
102
|
+
export async function ensurePublicHostname(hostname, lookup = defaultHostLookup) {
|
|
103
|
+
if (allowPrivateUrls()) {
|
|
104
|
+
return;
|
|
105
|
+
}
|
|
106
|
+
const normalizedHost = hostname.trim().toLowerCase();
|
|
107
|
+
if (LOCALHOST_HOSTNAMES.has(normalizedHost) || normalizedHost.endsWith('.localhost')) {
|
|
108
|
+
throw new Error(`SSRF protection: Hostname "${hostname}" resolves to localhost.`);
|
|
109
|
+
}
|
|
110
|
+
if (isIPv4Literal(normalizedHost) || isIPv6Literal(normalizedHost)) {
|
|
111
|
+
if (isPrivateOrReservedIP(normalizedHost)) {
|
|
112
|
+
throw new Error(`SSRF protection: Hostname "${hostname}" resolves to private/loopback address.`);
|
|
113
|
+
}
|
|
114
|
+
return;
|
|
115
|
+
}
|
|
116
|
+
let resolvedAddresses;
|
|
117
|
+
try {
|
|
118
|
+
resolvedAddresses = await lookup(normalizedHost);
|
|
119
|
+
}
|
|
120
|
+
catch (error) {
|
|
121
|
+
throw new Error(`DNS resolution failed for "${hostname}": ${error instanceof Error ? error.message : String(error)}`);
|
|
122
|
+
}
|
|
123
|
+
if (resolvedAddresses.length === 0) {
|
|
124
|
+
throw new Error(`DNS resolution failed for "${hostname}": no addresses returned`);
|
|
125
|
+
}
|
|
126
|
+
for (const address of resolvedAddresses) {
|
|
127
|
+
if (isPrivateOrReservedIP(address)) {
|
|
128
|
+
throw new Error(`SSRF protection: Hostname "${hostname}" resolves to private IP "${address}".`);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
/**
|
|
133
|
+
* Validate URL format and ensure hostname resolves to public IP space.
|
|
134
|
+
*/
|
|
135
|
+
export async function validatePublicUrl(url, lookup = defaultHostLookup) {
|
|
136
|
+
const validatedUrl = validateUrl(url);
|
|
137
|
+
const hostname = new URL(validatedUrl).hostname;
|
|
138
|
+
await ensurePublicHostname(hostname, lookup);
|
|
139
|
+
return validatedUrl;
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* Validate a filesystem path has no path traversal sequences.
|
|
143
|
+
* Resolves to absolute path and rejects ../ sequences.
|
|
144
|
+
*/
|
|
145
|
+
export function validatePath(inputPath, label, workspaceRoot) {
|
|
146
|
+
const trimmed = inputPath.trim();
|
|
147
|
+
// Reject obvious traversal patterns before resolving
|
|
148
|
+
// Security: prevent escaping intended directory boundaries
|
|
149
|
+
if (trimmed.includes('..')) {
|
|
150
|
+
throw new Error(`Path traversal detected in ${label}: "${trimmed}". Paths must not contain ".." sequences.`);
|
|
151
|
+
}
|
|
152
|
+
// Resolve to absolute path
|
|
153
|
+
const resolved = path.resolve(trimmed);
|
|
154
|
+
// If workspace root specified, enforce containment
|
|
155
|
+
if (workspaceRoot) {
|
|
156
|
+
const resolvedRoot = path.resolve(workspaceRoot);
|
|
157
|
+
// Use path.relative for proper containment check (prevents bypass via symlinks/normalization)
|
|
158
|
+
const relativePath = path.relative(resolvedRoot, resolved);
|
|
159
|
+
if (relativePath.startsWith('..') || path.isAbsolute(relativePath)) {
|
|
160
|
+
throw new Error(`${label} escapes workspace root "${resolvedRoot}". Got: "${resolved}"`);
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
return resolved;
|
|
164
|
+
}
|
|
165
|
+
/**
|
|
166
|
+
* Validate that a navigation URL stays within the same origin as the base URL.
|
|
167
|
+
* Allows same hostname or subdomains of the base hostname.
|
|
168
|
+
*/
|
|
169
|
+
export function validateNavigationUrl(navigationUrl, baseUrl) {
|
|
170
|
+
const validated = validateUrl(navigationUrl);
|
|
171
|
+
const navParsed = new URL(validated);
|
|
172
|
+
const baseParsed = new URL(baseUrl);
|
|
173
|
+
// Allow same hostname or subdomains
|
|
174
|
+
if (navParsed.hostname !== baseParsed.hostname &&
|
|
175
|
+
!navParsed.hostname.endsWith('.' + baseParsed.hostname)) {
|
|
176
|
+
throw new Error(`Navigation to "${navParsed.hostname}" blocked. Only same-origin navigation allowed (base: "${baseParsed.hostname}").`);
|
|
177
|
+
}
|
|
178
|
+
return validated;
|
|
179
|
+
}
|
|
180
|
+
/**
|
|
181
|
+
* Validate and clamp maxPages to a safe range.
|
|
182
|
+
* Returns a safe integer between 1 and 500, defaulting to 50.
|
|
183
|
+
* Special case: 0 means "unlimited" but still capped at 500.
|
|
184
|
+
*/
|
|
185
|
+
export function validateMaxPages(value) {
|
|
186
|
+
if (value === undefined || value === null || isNaN(value) || value < 0) {
|
|
187
|
+
return 50; // Safe default
|
|
188
|
+
}
|
|
189
|
+
// Special case: 0 means unlimited (but we still cap at 500 for safety)
|
|
190
|
+
if (value === 0) {
|
|
191
|
+
return 500;
|
|
192
|
+
}
|
|
193
|
+
// Clamp to [1, 500] range to prevent resource exhaustion
|
|
194
|
+
return Math.min(Math.max(Math.floor(value), 1), 500);
|
|
195
|
+
}
|
|
196
|
+
/**
|
|
197
|
+
* Validate selector string length to prevent abuse.
|
|
198
|
+
* Returns the selector if valid, throws if too long.
|
|
199
|
+
*/
|
|
200
|
+
export function validateSelector(selector) {
|
|
201
|
+
const MAX_SELECTOR_LENGTH = 500;
|
|
202
|
+
if (selector.length > MAX_SELECTOR_LENGTH) {
|
|
203
|
+
throw new Error(`Selector too long (${selector.length} chars, max ${MAX_SELECTOR_LENGTH}). Possible injection attempt.`);
|
|
204
|
+
}
|
|
205
|
+
return selector;
|
|
206
|
+
}
|
|
207
|
+
/**
|
|
208
|
+
* Sanitize a string value by stripping potential script injection patterns.
|
|
209
|
+
* Removes <script> tags and javascript: URIs from step values.
|
|
210
|
+
*/
|
|
211
|
+
export function sanitizeValue(value) {
|
|
212
|
+
// Strip <script> tags (case-insensitive, handles attributes)
|
|
213
|
+
let sanitized = value.replace(/<script\b[^>]*>[\s\S]*?<\/script>/gi, '');
|
|
214
|
+
// Strip javascript: URIs
|
|
215
|
+
sanitized = sanitized.replace(/javascript\s*:/gi, '');
|
|
216
|
+
// Strip event handlers in attributes (onclick=, onerror=, etc.)
|
|
217
|
+
sanitized = sanitized.replace(/\bon\w+\s*=/gi, '');
|
|
218
|
+
return sanitized;
|
|
219
|
+
}
|
|
220
|
+
/**
|
|
221
|
+
* Sanitize session ID for filesystem usage
|
|
222
|
+
* Replaces invalid filename characters with underscores
|
|
223
|
+
*/
|
|
224
|
+
export function sanitizeSessionId(sessionId) {
|
|
225
|
+
// Allow only alphanumeric, dash, and underscore
|
|
226
|
+
return sessionId.replace(/[^a-zA-Z0-9_-]/g, '_');
|
|
227
|
+
}
|
|
228
|
+
//# sourceMappingURL=validation.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"validation.js","sourceRoot":"","sources":["../../src/core/validation.ts"],"names":[],"mappings":"AAAA,kFAAkF;AAElF,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,GAAG,MAAM,mBAAmB,CAAC;AAEpC,MAAM,mBAAmB,GAAG,IAAI,GAAG,CAAC;IAClC,WAAW;IACX,uBAAuB;CACxB,CAAC,CAAC;AAEH,SAAS,gBAAgB;IACvB,OAAO,OAAO,CAAC,GAAG,CAAC,4BAA4B,KAAK,GAAG,CAAC;AAC1D,CAAC;AAED,SAAS,aAAa,CAAC,KAAa;IAClC,OAAO,sBAAsB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;AAC5C,CAAC;AAED,SAAS,aAAa,CAAC,KAAa;IAClC,OAAO,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;AAC7B,CAAC;AAED,SAAS,kBAAkB,CAAC,EAAU;IACpC,MAAM,KAAK,GAAG,EAAE,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IACtC,IAAI,KAAK,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QAChC,OAAO,KAAK,CAAC,KAAK,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;IACvC,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,qBAAqB,CAAC,EAAU;IAC9C,MAAM,UAAU,GAAG,kBAAkB,CAAC,EAAE,CAAC,CAAC;IAE1C,2CAA2C;IAC3C,IAAI,UAAU,KAAK,IAAI,IAAI,UAAU,KAAK,KAAK,IAAI,UAAU,KAAK,yCAAyC,EAAE,CAAC;QAC5G,OAAO,IAAI,CAAC;IACd,CAAC;IACD,IAAI,UAAU,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;QACnC,OAAO,IAAI,CAAC;IACd,CAAC;IAED,0BAA0B;IAC1B,IAAI,UAAU,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,UAAU,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;QAC/D,OAAO,IAAI,CAAC;IACd,CAAC;IAED,gBAAgB;IAChB,gBAAgB;IAChB,MAAM,KAAK,GAAG,UAAU,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IACpC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACrC,MAAM,MAAM,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAEtC,yBAAyB;QACzB,IAAI,KAAK,KAAK,GAAG;YAAE,OAAO,IAAI,CAAC;QAE/B,uBAAuB;QACvB,IAAI,KAAK,KAAK,EAAE;YAAE,OAAO,IAAI,CAAC;QAE9B,0BAA0B;QAC1B,IAAI,KAAK,KAAK,GAAG,IAAI,MAAM,IAAI,EAAE,IAAI,MAAM,IAAI,EAAE;YAAE,OAAO,IAAI,CAAC;QAE/D,2BAA2B;QAC3B,IAAI,KAAK,KAAK,GAAG,IAAI,MAAM,KAAK,GAAG;YAAE,OAAO,IAAI,CAAC;QAEjD,8BAA8B;QAC9B,IAAI,KAAK,KAAK,GAAG,IAAI,MAAM,KAAK,GAAG;YAAE,OAAO,IAAI,CAAC;IACnD,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,WAAW,CAAC,GAAW;IACrC,MAAM,OAAO,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC;IAC3B,IAAI,MAAW,CAAC;IAChB,IAAI,CAAC;QACH,MAAM,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC;IAC5B,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,IAAI,KAAK,CAAC,iBAAiB,OAAO,6CAA6C,CAAC,CAAC;IACzF,CAAC;IAED,MAAM,gBAAgB,GAAG,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;IAC7C,IAAI,CAAC,gBAAgB,CAAC,QAAQ,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;QAChD,MAAM,IAAI,KAAK,CACb,sBAAsB,MAAM,CAAC,QAAQ,SAAS,OAAO,2CAA2C,CACjG,CAAC;IACJ,CAAC;IAED,4DAA4D;IAC5D,4GAA4G;IAC5G,MAAM,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC;IAEjC,6CAA6C;IAC7C,IAAI,CAAC,gBAAgB,EAAE,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,IAAI,aAAa,CAAC,QAAQ,CAAC,CAAC,EAAE,CAAC;QAChF,IAAI,qBAAqB,CAAC,QAAQ,CAAC,EAAE,CAAC;YACpC,MAAM,IAAI,KAAK,CACb,yBAAyB,OAAO,2CAA2C,QAAQ,kCAAkC,CACtH,CAAC;QACJ,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAID,MAAM,iBAAiB,GAAiB,KAAK,EAAE,QAAgB,EAAE,EAAE;IACjE,MAAM,OAAO,GAAG,MAAM,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,EAAE,GAAG,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC;IAC1E,OAAO,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;AAC/C,CAAC,CAAC;AAEF;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,QAAgB,EAChB,SAAuB,iBAAiB;IAExC,IAAI,gBAAgB,EAAE,EAAE,CAAC;QACvB,OAAO;IACT,CAAC;IAED,MAAM,cAAc,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAErD,IAAI,mBAAmB,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,cAAc,CAAC,QAAQ,CAAC,YAAY,CAAC,EAAE,CAAC;QACrF,MAAM,IAAI,KAAK,CAAC,8BAA8B,QAAQ,0BAA0B,CAAC,CAAC;IACpF,CAAC;IAED,IAAI,aAAa,CAAC,cAAc,CAAC,IAAI,aAAa,CAAC,cAAc,CAAC,EAAE,CAAC;QACnE,IAAI,qBAAqB,CAAC,cAAc,CAAC,EAAE,CAAC;YAC1C,MAAM,IAAI,KAAK,CAAC,8BAA8B,QAAQ,yCAAyC,CAAC,CAAC;QACnG,CAAC;QACD,OAAO;IACT,CAAC;IAED,IAAI,iBAA2B,CAAC;IAChC,IAAI,CAAC;QACH,iBAAiB,GAAG,MAAM,MAAM,CAAC,cAAc,CAAC,CAAC;IACnD,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,8BAA8B,QAAQ,MAAM,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;IACxH,CAAC;IAED,IAAI,iBAAiB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACnC,MAAM,IAAI,KAAK,CAAC,8BAA8B,QAAQ,0BAA0B,CAAC,CAAC;IACpF,CAAC;IAED,KAAK,MAAM,OAAO,IAAI,iBAAiB,EAAE,CAAC;QACxC,IAAI,qBAAqB,CAAC,OAAO,CAAC,EAAE,CAAC;YACnC,MAAM,IAAI,KAAK,CAAC,8BAA8B,QAAQ,6BAA6B,OAAO,IAAI,CAAC,CAAC;QAClG,CAAC;IACH,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,GAAW,EACX,SAAuB,iBAAiB;IAExC,MAAM,YAAY,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC;IACtC,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,YAAY,CAAC,CAAC,QAAQ,CAAC;IAChD,MAAM,oBAAoB,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;IAC7C,OAAO,YAAY,CAAC;AACtB,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,YAAY,CAAC,SAAiB,EAAE,KAAa,EAAE,aAAsB;IACnF,MAAM,OAAO,GAAG,SAAS,CAAC,IAAI,EAAE,CAAC;IAEjC,qDAAqD;IACrD,2DAA2D;IAC3D,IAAI,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;QAC3B,MAAM,IAAI,KAAK,CACb,8BAA8B,KAAK,MAAM,OAAO,2CAA2C,CAC5F,CAAC;IACJ,CAAC;IAED,2BAA2B;IAC3B,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;IAEvC,mDAAmD;IACnD,IAAI,aAAa,EAAE,CAAC;QAClB,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC;QACjD,8FAA8F;QAC9F,MAAM,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC,YAAY,EAAE,QAAQ,CAAC,CAAC;QAC3D,IAAI,YAAY,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,YAAY,CAAC,EAAE,CAAC;YACnE,MAAM,IAAI,KAAK,CAAC,GAAG,KAAK,4BAA4B,YAAY,YAAY,QAAQ,GAAG,CAAC,CAAC;QAC3F,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,qBAAqB,CAAC,aAAqB,EAAE,OAAe;IAC1E,MAAM,SAAS,GAAG,WAAW,CAAC,aAAa,CAAC,CAAC;IAC7C,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,CAAC;IACrC,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC;IAEpC,oCAAoC;IACpC,IAAI,SAAS,CAAC,QAAQ,KAAK,UAAU,CAAC,QAAQ;QAC1C,CAAC,SAAS,CAAC,QAAQ,CAAC,QAAQ,CAAC,GAAG,GAAG,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC5D,MAAM,IAAI,KAAK,CACb,kBAAkB,SAAS,CAAC,QAAQ,0DAA0D,UAAU,CAAC,QAAQ,KAAK,CACvH,CAAC;IACJ,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,gBAAgB,CAAC,KAAyB;IACxD,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,KAAK,IAAI,IAAI,KAAK,CAAC,KAAK,CAAC,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;QACvE,OAAO,EAAE,CAAC,CAAC,eAAe;IAC5B,CAAC;IACD,uEAAuE;IACvE,IAAI,KAAK,KAAK,CAAC,EAAE,CAAC;QAChB,OAAO,GAAG,CAAC;IACb,CAAC;IACD,yDAAyD;IACzD,OAAO,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;AACvD,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,gBAAgB,CAAC,QAAgB;IAC/C,MAAM,mBAAmB,GAAG,GAAG,CAAC;IAChC,IAAI,QAAQ,CAAC,MAAM,GAAG,mBAAmB,EAAE,CAAC;QAC1C,MAAM,IAAI,KAAK,CACb,sBAAsB,QAAQ,CAAC,MAAM,eAAe,mBAAmB,gCAAgC,CACxG,CAAC;IACJ,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,aAAa,CAAC,KAAa;IACzC,6DAA6D;IAC7D,IAAI,SAAS,GAAG,KAAK,CAAC,OAAO,CAAC,qCAAqC,EAAE,EAAE,CAAC,CAAC;IACzE,yBAAyB;IACzB,SAAS,GAAG,SAAS,CAAC,OAAO,CAAC,kBAAkB,EAAE,EAAE,CAAC,CAAC;IACtD,gEAAgE;IAChE,SAAS,GAAG,SAAS,CAAC,OAAO,CAAC,eAAe,EAAE,EAAE,CAAC,CAAC;IACnD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,iBAAiB,CAAC,SAAiB;IACjD,gDAAgD;IAChD,OAAO,SAAS,CAAC,OAAO,CAAC,iBAAiB,EAAE,GAAG,CAAC,CAAC;AACnD,CAAC"}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import type { Page } from 'playwright';
|
|
2
|
+
import type { BrowserManager } from '../browser/browser-manager.js';
|
|
3
|
+
import type { CrawlResult, PageData } from '../types/discovery.js';
|
|
4
|
+
/**
|
|
5
|
+
* Options for SiteCrawler configuration
|
|
6
|
+
*/
|
|
7
|
+
export interface CrawlerOptions {
|
|
8
|
+
maxConcurrency?: number;
|
|
9
|
+
maxPages?: number;
|
|
10
|
+
excludePatterns?: string[];
|
|
11
|
+
onPageCrawled?: (url: string, count: number) => void;
|
|
12
|
+
pageProcessor?: (page: Page, url: string) => Promise<Partial<PageData>>;
|
|
13
|
+
additionalUrls?: string[];
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Recursive web crawler that visits all same-hostname pages
|
|
17
|
+
* Uses BrowserManager for stealth and cookie dismissal
|
|
18
|
+
*/
|
|
19
|
+
export declare class SiteCrawler {
|
|
20
|
+
private browserManager;
|
|
21
|
+
private visited;
|
|
22
|
+
private queue;
|
|
23
|
+
private pages;
|
|
24
|
+
private hostname;
|
|
25
|
+
private maxConcurrency;
|
|
26
|
+
private maxPages;
|
|
27
|
+
private excludePatterns;
|
|
28
|
+
private onPageCrawled?;
|
|
29
|
+
private pageProcessor?;
|
|
30
|
+
constructor(browserManager: BrowserManager, options?: CrawlerOptions);
|
|
31
|
+
/**
|
|
32
|
+
* Crawls all same-hostname pages starting from seedUrl
|
|
33
|
+
* @param seedUrl - Starting URL to crawl from
|
|
34
|
+
* @returns CrawlResult with all discovered pages
|
|
35
|
+
*/
|
|
36
|
+
crawl(seedUrl: string, additionalUrls?: string[]): Promise<CrawlResult>;
|
|
37
|
+
/**
|
|
38
|
+
* Crawls a single page: navigate, extract data, discover links
|
|
39
|
+
*/
|
|
40
|
+
private crawlPage;
|
|
41
|
+
/**
|
|
42
|
+
* Extracts all links from a page
|
|
43
|
+
*/
|
|
44
|
+
private extractLinks;
|
|
45
|
+
/**
|
|
46
|
+
* Normalizes a URL for deduplication
|
|
47
|
+
* - Removes fragment (#...)
|
|
48
|
+
* - Removes trailing slash (except root /)
|
|
49
|
+
* - Lowercases hostname
|
|
50
|
+
* - Sorts query parameters
|
|
51
|
+
* - Removes default ports
|
|
52
|
+
*/
|
|
53
|
+
private normalizeUrl;
|
|
54
|
+
/**
|
|
55
|
+
* Checks if URL matches any exclude pattern
|
|
56
|
+
*/
|
|
57
|
+
private shouldExclude;
|
|
58
|
+
}
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
// Recursive web crawler with URL queue, deduplication, and pageProcessor callback
|
|
2
|
+
/**
|
|
3
|
+
* Recursive web crawler that visits all same-hostname pages
|
|
4
|
+
* Uses BrowserManager for stealth and cookie dismissal
|
|
5
|
+
*/
|
|
6
|
+
export class SiteCrawler {
|
|
7
|
+
browserManager;
|
|
8
|
+
visited; // normalized URLs already visited
|
|
9
|
+
queue; // URLs to visit
|
|
10
|
+
pages; // collected page data
|
|
11
|
+
hostname = ''; // seed URL hostname for filtering
|
|
12
|
+
maxConcurrency;
|
|
13
|
+
maxPages;
|
|
14
|
+
excludePatterns;
|
|
15
|
+
onPageCrawled;
|
|
16
|
+
pageProcessor;
|
|
17
|
+
constructor(browserManager, options) {
|
|
18
|
+
this.browserManager = browserManager;
|
|
19
|
+
this.visited = new Set();
|
|
20
|
+
this.queue = [];
|
|
21
|
+
this.pages = [];
|
|
22
|
+
this.maxConcurrency = options?.maxConcurrency ?? 3;
|
|
23
|
+
this.maxPages = options?.maxPages ?? 0;
|
|
24
|
+
this.excludePatterns = options?.excludePatterns ?? [];
|
|
25
|
+
this.onPageCrawled = options?.onPageCrawled;
|
|
26
|
+
this.pageProcessor = options?.pageProcessor;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Crawls all same-hostname pages starting from seedUrl
|
|
30
|
+
* @param seedUrl - Starting URL to crawl from
|
|
31
|
+
* @returns CrawlResult with all discovered pages
|
|
32
|
+
*/
|
|
33
|
+
async crawl(seedUrl, additionalUrls) {
|
|
34
|
+
const startTime = Date.now();
|
|
35
|
+
// Extract hostname from seed URL
|
|
36
|
+
const seedUrlObj = new URL(seedUrl);
|
|
37
|
+
this.hostname = seedUrlObj.hostname;
|
|
38
|
+
// Add seed URL to queue
|
|
39
|
+
const normalizedSeed = this.normalizeUrl(seedUrl);
|
|
40
|
+
this.queue.push(normalizedSeed);
|
|
41
|
+
// Add additional URLs (e.g., from SPA route detection)
|
|
42
|
+
if (additionalUrls && additionalUrls.length > 0) {
|
|
43
|
+
for (const url of additionalUrls) {
|
|
44
|
+
try {
|
|
45
|
+
const urlObj = new URL(url, seedUrl); // Resolve relative URLs
|
|
46
|
+
if (urlObj.hostname === this.hostname) {
|
|
47
|
+
const normalized = this.normalizeUrl(urlObj.href);
|
|
48
|
+
if (!this.visited.has(normalized) && !this.queue.includes(normalized)) {
|
|
49
|
+
this.queue.push(normalized);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
catch {
|
|
54
|
+
// Skip invalid URLs
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
// Process queue in batches
|
|
59
|
+
while (this.queue.length > 0) {
|
|
60
|
+
// Check maxPages limit
|
|
61
|
+
if (this.maxPages > 0 && this.pages.length >= this.maxPages) {
|
|
62
|
+
break;
|
|
63
|
+
}
|
|
64
|
+
// Warn after 50 pages
|
|
65
|
+
if (this.pages.length === 50) {
|
|
66
|
+
console.warn('⚠️ Discovered 50+ pages. Crawl continuing...');
|
|
67
|
+
}
|
|
68
|
+
// Take batch of URLs
|
|
69
|
+
const batch = this.queue.splice(0, this.maxConcurrency);
|
|
70
|
+
// Process batch concurrently
|
|
71
|
+
const results = await Promise.allSettled(batch.map(url => this.crawlPage(url)));
|
|
72
|
+
// Log errors without failing entire crawl
|
|
73
|
+
results.forEach((result, index) => {
|
|
74
|
+
if (result.status === 'rejected') {
|
|
75
|
+
console.error(`Error crawling ${batch[index]}: ${result.reason}`);
|
|
76
|
+
}
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
const duration = Date.now() - startTime;
|
|
80
|
+
return {
|
|
81
|
+
pages: this.pages,
|
|
82
|
+
brokenLinks: [], // Populated by link validator in later plan
|
|
83
|
+
totalPagesDiscovered: this.pages.length,
|
|
84
|
+
totalLinksChecked: 0, // Populated by link validator in later plan
|
|
85
|
+
crawlDuration: duration,
|
|
86
|
+
spaDetected: { framework: 'none' } // Populated by SPA detector in later plan
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Crawls a single page: navigate, extract data, discover links
|
|
91
|
+
*/
|
|
92
|
+
async crawlPage(url) {
|
|
93
|
+
// Skip if already visited or matches exclude pattern
|
|
94
|
+
if (this.visited.has(url)) {
|
|
95
|
+
return;
|
|
96
|
+
}
|
|
97
|
+
if (this.shouldExclude(url)) {
|
|
98
|
+
return;
|
|
99
|
+
}
|
|
100
|
+
this.visited.add(url);
|
|
101
|
+
let page = null;
|
|
102
|
+
try {
|
|
103
|
+
// Open page via BrowserManager (handles stealth + cookie dismissal)
|
|
104
|
+
page = await this.browserManager.newPage(url);
|
|
105
|
+
// Extract title
|
|
106
|
+
const title = await page.title();
|
|
107
|
+
// Extract links
|
|
108
|
+
const links = await this.extractLinks(page, url);
|
|
109
|
+
// Create minimal PageData
|
|
110
|
+
const pageData = {
|
|
111
|
+
url,
|
|
112
|
+
title,
|
|
113
|
+
forms: [],
|
|
114
|
+
buttons: [],
|
|
115
|
+
links,
|
|
116
|
+
menus: [],
|
|
117
|
+
otherInteractive: [],
|
|
118
|
+
crawledAt: new Date().toISOString()
|
|
119
|
+
};
|
|
120
|
+
// If pageProcessor provided, call it and merge results
|
|
121
|
+
if (this.pageProcessor) {
|
|
122
|
+
const extraData = await this.pageProcessor(page, url);
|
|
123
|
+
Object.assign(pageData, extraData);
|
|
124
|
+
}
|
|
125
|
+
// Store page data
|
|
126
|
+
this.pages.push(pageData);
|
|
127
|
+
// Notify progress callback
|
|
128
|
+
if (this.onPageCrawled) {
|
|
129
|
+
this.onPageCrawled(url, this.pages.length);
|
|
130
|
+
}
|
|
131
|
+
// Add discovered links to queue
|
|
132
|
+
for (const link of links) {
|
|
133
|
+
if (link.isInternal && !this.visited.has(link.href)) {
|
|
134
|
+
const normalized = this.normalizeUrl(link.href);
|
|
135
|
+
if (!this.queue.includes(normalized) && !this.visited.has(normalized)) {
|
|
136
|
+
this.queue.push(normalized);
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
finally {
|
|
142
|
+
// Always close page
|
|
143
|
+
if (page) {
|
|
144
|
+
await page.close();
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
/**
|
|
149
|
+
* Extracts all links from a page
|
|
150
|
+
*/
|
|
151
|
+
async extractLinks(page, baseUrl) {
|
|
152
|
+
const links = await page.$$eval('a[href]', (anchors) => {
|
|
153
|
+
return anchors.map((a) => {
|
|
154
|
+
const anchor = a;
|
|
155
|
+
return {
|
|
156
|
+
href: anchor.href, // Already resolved by browser
|
|
157
|
+
text: anchor.textContent?.trim() || ''
|
|
158
|
+
};
|
|
159
|
+
});
|
|
160
|
+
});
|
|
161
|
+
// Classify links as internal/external
|
|
162
|
+
return links.map(link => {
|
|
163
|
+
try {
|
|
164
|
+
const linkUrl = new URL(link.href);
|
|
165
|
+
return {
|
|
166
|
+
href: link.href,
|
|
167
|
+
text: link.text,
|
|
168
|
+
isInternal: linkUrl.hostname === this.hostname
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
catch {
|
|
172
|
+
// Invalid URL
|
|
173
|
+
return {
|
|
174
|
+
href: link.href,
|
|
175
|
+
text: link.text,
|
|
176
|
+
isInternal: false
|
|
177
|
+
};
|
|
178
|
+
}
|
|
179
|
+
});
|
|
180
|
+
}
|
|
181
|
+
/**
|
|
182
|
+
* Normalizes a URL for deduplication
|
|
183
|
+
* - Removes fragment (#...)
|
|
184
|
+
* - Removes trailing slash (except root /)
|
|
185
|
+
* - Lowercases hostname
|
|
186
|
+
* - Sorts query parameters
|
|
187
|
+
* - Removes default ports
|
|
188
|
+
*/
|
|
189
|
+
normalizeUrl(url) {
|
|
190
|
+
try {
|
|
191
|
+
const urlObj = new URL(url);
|
|
192
|
+
// Remove fragment
|
|
193
|
+
urlObj.hash = '';
|
|
194
|
+
// Lowercase hostname
|
|
195
|
+
urlObj.hostname = urlObj.hostname.toLowerCase();
|
|
196
|
+
// Remove default ports
|
|
197
|
+
if ((urlObj.protocol === 'http:' && urlObj.port === '80') ||
|
|
198
|
+
(urlObj.protocol === 'https:' && urlObj.port === '443')) {
|
|
199
|
+
urlObj.port = '';
|
|
200
|
+
}
|
|
201
|
+
// Sort query parameters
|
|
202
|
+
const params = Array.from(urlObj.searchParams.entries()).sort((a, b) => a[0].localeCompare(b[0]));
|
|
203
|
+
urlObj.search = '';
|
|
204
|
+
params.forEach(([key, value]) => urlObj.searchParams.append(key, value));
|
|
205
|
+
// Remove trailing slash (except root)
|
|
206
|
+
let normalized = urlObj.href;
|
|
207
|
+
if (urlObj.pathname !== '/' && normalized.endsWith('/')) {
|
|
208
|
+
normalized = normalized.slice(0, -1);
|
|
209
|
+
}
|
|
210
|
+
return normalized;
|
|
211
|
+
}
|
|
212
|
+
catch {
|
|
213
|
+
return url; // Return as-is if parsing fails
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
/**
|
|
217
|
+
* Checks if URL matches any exclude pattern
|
|
218
|
+
*/
|
|
219
|
+
shouldExclude(url) {
|
|
220
|
+
return this.excludePatterns.some(pattern => {
|
|
221
|
+
if (pattern.startsWith('*') && pattern.endsWith('*')) {
|
|
222
|
+
// *pattern* - contains
|
|
223
|
+
return url.includes(pattern.slice(1, -1));
|
|
224
|
+
}
|
|
225
|
+
else if (pattern.startsWith('*')) {
|
|
226
|
+
// *.ext - ends with
|
|
227
|
+
return url.endsWith(pattern.slice(1));
|
|
228
|
+
}
|
|
229
|
+
else if (pattern.endsWith('*')) {
|
|
230
|
+
// prefix* - starts with
|
|
231
|
+
return url.startsWith(pattern.slice(0, -1));
|
|
232
|
+
}
|
|
233
|
+
else {
|
|
234
|
+
// exact match
|
|
235
|
+
return url.includes(pattern);
|
|
236
|
+
}
|
|
237
|
+
});
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
//# sourceMappingURL=crawler.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"crawler.js","sourceRoot":"","sources":["../../src/discovery/crawler.ts"],"names":[],"mappings":"AAAA,kFAAkF;AAkBlF;;;GAGG;AACH,MAAM,OAAO,WAAW;IACd,cAAc,CAAiB;IAC/B,OAAO,CAAc,CAAS,kCAAkC;IAChE,KAAK,CAAW,CAAc,gBAAgB;IAC9C,KAAK,CAAa,CAAY,sBAAsB;IACpD,QAAQ,GAAW,EAAE,CAAC,CAAQ,kCAAkC;IAChE,cAAc,CAAS;IACvB,QAAQ,CAAS;IACjB,eAAe,CAAW;IAC1B,aAAa,CAAwC;IACrD,aAAa,CAA2D;IAEhF,YAAY,cAA8B,EAAE,OAAwB;QAClE,IAAI,CAAC,cAAc,GAAG,cAAc,CAAC;QACrC,IAAI,CAAC,OAAO,GAAG,IAAI,GAAG,EAAE,CAAC;QACzB,IAAI,CAAC,KAAK,GAAG,EAAE,CAAC;QAChB,IAAI,CAAC,KAAK,GAAG,EAAE,CAAC;QAChB,IAAI,CAAC,cAAc,GAAG,OAAO,EAAE,cAAc,IAAI,CAAC,CAAC;QACnD,IAAI,CAAC,QAAQ,GAAG,OAAO,EAAE,QAAQ,IAAI,CAAC,CAAC;QACvC,IAAI,CAAC,eAAe,GAAG,OAAO,EAAE,eAAe,IAAI,EAAE,CAAC;QACtD,IAAI,CAAC,aAAa,GAAG,OAAO,EAAE,aAAa,CAAC;QAC5C,IAAI,CAAC,aAAa,GAAG,OAAO,EAAE,aAAa,CAAC;IAC9C,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,KAAK,CAAC,OAAe,EAAE,cAAyB;QACpD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE7B,iCAAiC;QACjC,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC;QACpC,IAAI,CAAC,QAAQ,GAAG,UAAU,CAAC,QAAQ,CAAC;QAEpC,wBAAwB;QACxB,MAAM,cAAc,GAAG,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC;QAClD,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QAEhC,uDAAuD;QACvD,IAAI,cAAc,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAChD,KAAK,MAAM,GAAG,IAAI,cAAc,EAAE,CAAC;gBACjC,IAAI,CAAC;oBACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC,CAAC,wBAAwB;oBAC9D,IAAI,MAAM,CAAC,QAAQ,KAAK,IAAI,CAAC,QAAQ,EAAE,CAAC;wBACtC,MAAM,UAAU,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;wBAClD,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,UAAU,CAAC,EAAE,CAAC;4BACtE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;wBAC9B,CAAC;oBACH,CAAC;gBACH,CAAC;gBAAC,MAAM,CAAC;oBACP,oBAAoB;gBACtB,CAAC;YACH,CAAC;QACH,CAAC;QAED,2BAA2B;QAC3B,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC7B,uBAAuB;YACvB,IAAI,IAAI,CAAC,QAAQ,GAAG,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;gBAC5D,MAAM;YACR,CAAC;YAED,sBAAsB;YACtB,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,KAAK,EAAE,EAAE,CAAC;gBAC7B,OAAO,CAAC,IAAI,CAAC,+CAA+C,CAAC,CAAC;YAChE,CAAC;YAED,qBAAqB;YACrB,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;YAExD,6BAA6B;YAC7B,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,CACtC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CACtC,CAAC;YAEF,0CAA0C;YAC1C,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,KAAK,EAAE,EAAE;gBAChC,IAAI,MAAM,CAAC,MAAM,KAAK,UAAU,EAAE,CAAC;oBACjC,OAAO,CAAC,KAAK,CAAC,kBAAkB,KAAK,CAAC,KAAK,CAAC,KAAK,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;gBACpE,CAAC;YACH,CAAC,CAAC,CAAC;QACL,CAAC;QAED,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;QAExC,OAAO;YACL,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,WAAW,EAAE,EAAE,EAAG,4CAA4C;YAC9D,oBAAoB,EAAE,IAAI,CAAC,KAAK,CAAC,MAAM;YACvC,iBAAiB,EAAE,CAAC,EAAG,4CAA4C;YACnE,aAAa,EAAE,QAAQ;YACvB,WAAW,EAAE,EAAE,SAAS,EAAE,MAAM,EAAE,CAAE,0CAA0C;SAC/E,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,SAAS,CAAC,GAAW;QACjC,qDAAqD;QACrD,IAAI,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;YAC1B,OAAO;QACT,CAAC;QAED,IAAI,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,EAAE,CAAC;YAC5B,OAAO;QACT,CAAC;QAED,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAEtB,IAAI,IAAI,GAAgB,IAAI,CAAC;QAE7B,IAAI,CAAC;YACH,oEAAoE;YACpE,IAAI,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;YAE9C,gBAAgB;YAChB,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC;YAEjC,gBAAgB;YAChB,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;YAEjD,0BAA0B;YAC1B,MAAM,QAAQ,GAAa;gBACzB,GAAG;gBACH,KAAK;gBACL,KAAK,EAAE,EAAE;gBACT,OAAO,EAAE,EAAE;gBACX,KAAK;gBACL,KAAK,EAAE,EAAE;gBACT,gBAAgB,EAAE,EAAE;gBACpB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;aACpC,CAAC;YAEF,uDAAuD;YACvD,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;gBACvB,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;gBACtD,MAAM,CAAC,MAAM,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;YACrC,CAAC;YAED,kBAAkB;YAClB,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YAE1B,2BAA2B;YAC3B,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;gBACvB,IAAI,CAAC,aAAa,CAAC,GAAG,EAAE,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;YAC7C,CAAC;YAED,gCAAgC;YAChC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;gBACzB,IAAI,IAAI,CAAC,UAAU,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;oBACpD,MAAM,UAAU,GAAG,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;oBAChD,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;wBACtE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;oBAC9B,CAAC;gBACH,CAAC;YACH,CAAC;QAEH,CAAC;gBAAS,CAAC;YACT,oBAAoB;YACpB,IAAI,IAAI,EAAE,CAAC;gBACT,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC;YACrB,CAAC;QACH,CAAC;IACH,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,YAAY,CAAC,IAAU,EAAE,OAAe;QACpD,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC,OAAO,EAAE,EAAE;YACrD,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;gBACvB,MAAM,MAAM,GAAG,CAAsB,CAAC;gBACtC,OAAO;oBACL,IAAI,EAAE,MAAM,CAAC,IAAI,EAAG,8BAA8B;oBAClD,IAAI,EAAE,MAAM,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE;iBACvC,CAAC;YACJ,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,sCAAsC;QACtC,OAAO,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE;YACtB,IAAI,CAAC;gBACH,MAAM,OAAO,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACnC,OAAO;oBACL,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,UAAU,EAAE,OAAO,CAAC,QAAQ,KAAK,IAAI,CAAC,QAAQ;iBAC/C,CAAC;YACJ,CAAC;YAAC,MAAM,CAAC;gBACP,cAAc;gBACd,OAAO;oBACL,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,UAAU,EAAE,KAAK;iBAClB,CAAC;YACJ,CAAC;QACH,CAAC,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;OAOG;IACK,YAAY,CAAC,GAAW;QAC9B,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;YAE5B,kBAAkB;YAClB,MAAM,CAAC,IAAI,GAAG,EAAE,CAAC;YAEjB,qBAAqB;YACrB,MAAM,CAAC,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;YAEhD,uBAAuB;YACvB,IAAI,CAAC,MAAM,CAAC,QAAQ,KAAK,OAAO,IAAI,MAAM,CAAC,IAAI,KAAK,IAAI,CAAC;gBACrD,CAAC,MAAM,CAAC,QAAQ,KAAK,QAAQ,IAAI,MAAM,CAAC,IAAI,KAAK,KAAK,CAAC,EAAE,CAAC;gBAC5D,MAAM,CAAC,IAAI,GAAG,EAAE,CAAC;YACnB,CAAC;YAED,wBAAwB;YACxB,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC,OAAO,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CACrE,CAAC,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CACzB,CAAC;YACF,MAAM,CAAC,MAAM,GAAG,EAAE,CAAC;YACnB,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,YAAY,CAAC,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC,CAAC;YAEzE,sCAAsC;YACtC,IAAI,UAAU,GAAG,MAAM,CAAC,IAAI,CAAC;YAC7B,IAAI,MAAM,CAAC,QAAQ,KAAK,GAAG,IAAI,UAAU,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBACxD,UAAU,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;YACvC,CAAC;YAED,OAAO,UAAU,CAAC;QACpB,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,GAAG,CAAC,CAAE,gCAAgC;QAC/C,CAAC;IACH,CAAC;IAED;;OAEG;IACK,aAAa,CAAC,GAAW;QAC/B,OAAO,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE;YACzC,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBACrD,uBAAuB;gBACvB,OAAO,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;YAC5C,CAAC;iBAAM,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;gBACnC,oBAAoB;gBACpB,OAAO,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;YACxC,CAAC;iBAAM,IAAI,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBACjC,wBAAwB;gBACxB,OAAO,GAAG,CAAC,UAAU,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9C,CAAC;iBAAM,CAAC;gBACN,cAAc;gBACd,OAAO,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;YAC/B,CAAC;QACH,CAAC,CAAC,CAAC;IACL,CAAC;CACF"}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import type { DiscoveryArtifact, SitemapNode, WorkflowPlan } from '../types/discovery.js';
|
|
2
|
+
export interface DiscoveryOptions {
|
|
3
|
+
targetUrl: string;
|
|
4
|
+
sessionId: string;
|
|
5
|
+
userHints?: string[];
|
|
6
|
+
maxPages?: number;
|
|
7
|
+
headless?: boolean;
|
|
8
|
+
onProgress?: (message: string) => void;
|
|
9
|
+
}
|
|
10
|
+
export interface WorkflowPlanResolution {
|
|
11
|
+
workflowPlans: WorkflowPlan[];
|
|
12
|
+
usedHeuristicFallback: boolean;
|
|
13
|
+
}
|
|
14
|
+
export declare function resolveWorkflowPlans(planPromise: Promise<WorkflowPlan[]> | null, sitemap: SitemapNode): Promise<WorkflowPlanResolution>;
|
|
15
|
+
/**
|
|
16
|
+
* Runs complete Phase 2 discovery pipeline: crawl, discover elements, detect SPA,
|
|
17
|
+
* validate links, build sitemap, generate workflow plans.
|
|
18
|
+
*
|
|
19
|
+
* @param options Discovery configuration
|
|
20
|
+
* @returns Complete discovery artifact with sitemap and workflow plans
|
|
21
|
+
*/
|
|
22
|
+
export declare function runDiscovery(options: DiscoveryOptions): Promise<DiscoveryArtifact>;
|