playwright-archaeologist 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4469 @@
1
+ import {
2
+ normalizeUrl,
3
+ shouldCrawl
4
+ } from "./chunk-RWPEKZOW.js";
5
+
6
+ // src/types/config.ts
7
+ import { z } from "zod";
8
+ var ViewportSchema = z.object({
9
+ width: z.number().int().min(320, "Viewport width must be >= 320").max(7680, "Viewport width must be <= 7680"),
10
+ height: z.number().int().min(240, "Viewport height must be >= 240").max(4320, "Viewport height must be <= 4320")
11
+ });
12
+ function parseViewport(input) {
13
+ const match = input.match(/^(\d+)x(\d+)$/);
14
+ if (!match) return null;
15
+ const width = parseInt(match[1], 10);
16
+ const height = parseInt(match[2], 10);
17
+ const result = ViewportSchema.safeParse({ width, height });
18
+ return result.success ? result.data : null;
19
+ }
20
+ var OutputFormatSchema = z.enum(["html", "json", "openapi", "both"]);
21
+ function normalizeEntryUrl(raw) {
22
+ const warnings = [];
23
+ let url = raw.trim();
24
+ if (!/^https?:\/\//i.test(url)) {
25
+ if (/^[a-zA-Z][a-zA-Z0-9+.-]*:/.test(url)) {
26
+ throw new Error(`Unsupported protocol in URL: ${url}. Only http and https are allowed.`);
27
+ }
28
+ url = `https://${url}`;
29
+ warnings.push(`No protocol specified. Using https://${raw.trim()}`);
30
+ }
31
+ try {
32
+ const parsed = new URL(url);
33
+ if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
34
+ throw new Error(`Unsupported protocol "${parsed.protocol}". Only http: and https: are allowed.`);
35
+ }
36
+ if (!parsed.hostname || parsed.hostname.length === 0) {
37
+ throw new Error("URL has no hostname.");
38
+ }
39
+ } catch (err) {
40
+ if (err instanceof TypeError) {
41
+ throw new Error(`Invalid URL: ${url}`);
42
+ }
43
+ throw err;
44
+ }
45
+ return { url, warnings };
46
+ }
47
+ var CrawlConfigSchema = z.object({
48
+ // ---- Target ----
49
+ /** Entry URL to start crawling. Must be http or https. */
50
+ targetUrl: z.string().url("Target URL must be a valid URL"),
51
+ // ---- Discovery ----
52
+ /** Maximum crawl depth from the entry URL. 0 = entry URL only. */
53
+ depth: z.number().int().min(0, "Depth must be >= 0").max(100, "Depth is clamped to 100").default(5),
54
+ /** Maximum total pages to visit. */
55
+ maxPages: z.number().int().min(1, "Max pages must be >= 1").max(1e5, "Max pages is clamped to 100,000").default(1e3),
56
+ /** URL glob patterns to include. Only URLs matching at least one pattern are crawled. */
57
+ include: z.array(z.string()).default([]),
58
+ /** URL glob patterns to exclude. URLs matching any pattern are skipped. */
59
+ exclude: z.array(z.string()).default([]),
60
+ /** Whether to follow links to external origins. Default: false (same-origin only). */
61
+ followExternal: z.boolean().default(false),
62
+ /** Enable Tier 3 clicking: click non-link interactive elements to discover SPA states. */
63
+ deepClick: z.boolean().default(false),
64
+ /** Crawl cross-origin iframe content. Default: false (record URL only). */
65
+ includeIframes: z.boolean().default(false),
66
+ // ---- Performance ----
67
+ /** Number of parallel browser contexts. */
68
+ concurrency: z.number().int().min(1, "Concurrency must be >= 1").max(20, "Concurrency is clamped to 20").default(3),
69
+ /** Delay in milliseconds between page visits per context. */
70
+ delay: z.number().int().min(0, "Delay must be >= 0").max(6e4, "Delay must be <= 60000ms").default(0),
71
+ /** Per-page navigation timeout in milliseconds. */
72
+ timeout: z.number().int().min(1e3, "Timeout must be >= 1000ms").max(3e5, "Timeout must be <= 300000ms").default(3e4),
73
+ /** Global crawl timeout in seconds. 0 = no limit. */
74
+ maxTime: z.number().int().min(0, "Max time must be >= 0").max(86400, "Max time must be <= 86400s (24 hours)").default(3600),
75
+ // ---- Auth ----
76
+ /** Path to auth script (.ts or .js) exporting a default async function. */
77
+ authScript: z.string().optional(),
78
+ /** Path to cookies JSON file for session injection. */
79
+ cookiesFile: z.string().optional(),
80
+ /** Include cookies/auth headers in output (default: scrubbed). */
81
+ includeCookies: z.boolean().default(false),
82
+ // ---- Output ----
83
+ /** Output directory path. Created if it does not exist. Parent must exist. */
84
+ outputDir: z.string().default(".archaeologist"),
85
+ /** Output format(s). */
86
+ format: OutputFormatSchema.default("both"),
87
+ /** Skip screenshot capture entirely. */
88
+ noScreenshots: z.boolean().default(false),
89
+ /** Skip HAR recording entirely. */
90
+ noHar: z.boolean().default(false),
91
+ // ---- Display ----
92
+ /** Primary viewport dimensions. */
93
+ viewport: ViewportSchema.default({ width: 1280, height: 720 }),
94
+ /** Additional viewports for responsive screenshots (empty = primary only). */
95
+ additionalViewports: z.array(ViewportSchema).default([]),
96
+ // ---- Resume ----
97
+ /** Resume from last checkpoint in the output directory. */
98
+ resume: z.boolean().default(false),
99
+ /** Skip confirmation prompts (auth script execution, etc.). */
100
+ yes: z.boolean().default(false),
101
+ // ---- Security ----
102
+ /** Allow crawling private/internal IP ranges (SSRF protection bypass). */
103
+ allowPrivate: z.boolean().default(false),
104
+ // ---- Diff (for `pa diff` subcommand) ----
105
+ /** Pixel diff threshold (0-1 scale, lower = more sensitive). */
106
+ diffThreshold: z.number().min(0, "Diff threshold must be >= 0").max(1, "Diff threshold must be <= 1").default(0.1),
107
+ /** Maximum diff ratio (0-100%) before a screenshot is considered "changed". */
108
+ diffMaxRatio: z.number().min(0, "Diff max ratio must be >= 0").max(100, "Diff max ratio must be <= 100").default(0.5),
109
+ /** Fields to ignore during API diff (comma-separated or array). */
110
+ diffIgnoreFields: z.array(z.string()).default([
111
+ "timestamp",
112
+ "createdAt",
113
+ "updatedAt",
114
+ "date",
115
+ "requestId",
116
+ "traceId",
117
+ "correlationId",
118
+ "token",
119
+ "nonce",
120
+ "csrf",
121
+ "etag",
122
+ "lastModified"
123
+ ])
124
+ });
125
+ var DiffConfigSchema = z.object({
126
+ /** Path to the "old" .archaeologist bundle. */
127
+ oldBundle: z.string(),
128
+ /** Path to the "new" .archaeologist bundle. */
129
+ newBundle: z.string(),
130
+ /** Output directory for diff report and artifacts. */
131
+ outputDir: z.string().default(".archaeologist"),
132
+ /** Pixel diff threshold. */
133
+ diffThreshold: CrawlConfigSchema.shape.diffThreshold,
134
+ /** Maximum diff ratio. */
135
+ diffMaxRatio: CrawlConfigSchema.shape.diffMaxRatio,
136
+ /** Fields to ignore during API response diff. */
137
+ diffIgnoreFields: CrawlConfigSchema.shape.diffIgnoreFields,
138
+ /** Normalize dynamic values (UUIDs, timestamps, JWTs) before diffing. */
139
+ normalizeDynamicValues: z.boolean().default(false),
140
+ /** Show detailed value-level diffs (default: schema-level only). */
141
+ detailed: z.boolean().default(false),
142
+ /** Output formats for the diff report. */
143
+ outputFormats: z.object({
144
+ html: z.string().optional(),
145
+ json: z.string().optional(),
146
+ junit: z.string().optional(),
147
+ markdown: z.string().optional()
148
+ }).default({})
149
+ });
150
+
151
+ // src/types/errors.ts
152
+ var ArchaeologistError = class extends Error {
153
+ /** Machine-readable error code for programmatic handling. */
154
+ code;
155
+ constructor(message, code, options) {
156
+ super(message, options);
157
+ this.name = "ArchaeologistError";
158
+ this.code = code;
159
+ }
160
+ };
161
+ var ConfigError = class extends ArchaeologistError {
162
+ /** The config field that failed validation (e.g. "depth", "viewport"). */
163
+ field;
164
+ /** The invalid value the user supplied. */
165
+ value;
166
+ constructor(field, message, value) {
167
+ super(message, "ERR_CONFIG");
168
+ this.name = "ConfigError";
169
+ this.field = field;
170
+ this.value = value;
171
+ }
172
+ };
173
+ var AuthError = class extends ArchaeologistError {
174
+ reason;
175
+ scriptPath;
176
+ constructor(reason, message, scriptPath, options) {
177
+ super(message, "ERR_AUTH", options);
178
+ this.name = "AuthError";
179
+ this.reason = reason;
180
+ this.scriptPath = scriptPath;
181
+ }
182
+ };
183
+ var CrawlError = class extends ArchaeologistError {
184
+ /** The URL being processed when the error occurred. */
185
+ url;
186
+ constructor(message, code, url, options) {
187
+ super(message, code, options);
188
+ this.name = "CrawlError";
189
+ this.url = url;
190
+ }
191
+ };
192
+ var NavigationError = class extends CrawlError {
193
+ status;
194
+ /** HTTP status code, if applicable (e.g. 404, 500). */
195
+ httpStatus;
196
+ constructor(url, status, message, httpStatus, options) {
197
+ super(message, "ERR_NAVIGATION", url, options);
198
+ this.name = "NavigationError";
199
+ this.status = status;
200
+ this.httpStatus = httpStatus;
201
+ }
202
+ };
203
+ var CollectorError = class extends CrawlError {
204
+ collector;
205
+ constructor(collector, url, message, options) {
206
+ super(message, "ERR_COLLECTOR", url, options);
207
+ this.name = "CollectorError";
208
+ this.collector = collector;
209
+ }
210
+ };
211
+ var DiffError = class extends ArchaeologistError {
212
+ reason;
213
+ constructor(reason, message, options) {
214
+ super(message, "ERR_DIFF", options);
215
+ this.name = "DiffError";
216
+ this.reason = reason;
217
+ }
218
+ };
219
+ var BundleError = class extends ArchaeologistError {
220
+ reason;
221
+ bundlePath;
222
+ constructor(reason, message, bundlePath, options) {
223
+ super(message, "ERR_BUNDLE", options);
224
+ this.name = "BundleError";
225
+ this.reason = reason;
226
+ this.bundlePath = bundlePath;
227
+ }
228
+ };
229
+
230
+ // src/utils/logger.ts
231
+ var RESET = "\x1B[0m";
232
+ var DIM = "\x1B[2m";
233
+ var RED = "\x1B[31m";
234
+ var YELLOW = "\x1B[33m";
235
+ var GREEN = "\x1B[32m";
236
+ var CYAN = "\x1B[36m";
237
+ var BOLD = "\x1B[1m";
238
+ var LEVEL_PRIORITY = {
239
+ debug: 0,
240
+ info: 1,
241
+ warn: 2,
242
+ error: 3
243
+ };
244
+ function timestamp() {
245
+ const now = /* @__PURE__ */ new Date();
246
+ const h = String(now.getHours()).padStart(2, "0");
247
+ const m = String(now.getMinutes()).padStart(2, "0");
248
+ const s = String(now.getSeconds()).padStart(2, "0");
249
+ const ms = String(now.getMilliseconds()).padStart(3, "0");
250
+ return `${h}:${m}:${s}.${ms}`;
251
+ }
252
+ var Logger = class {
253
+ level;
254
+ constructor(level = "info") {
255
+ this.level = level;
256
+ }
257
+ /** Update the minimum log level at runtime. */
258
+ setLevel(level) {
259
+ this.level = level;
260
+ }
261
+ /** Get the current minimum log level. */
262
+ getLevel() {
263
+ return this.level;
264
+ }
265
+ /** Check whether a given level would be emitted. */
266
+ isLevelEnabled(level) {
267
+ return LEVEL_PRIORITY[level] >= LEVEL_PRIORITY[this.level];
268
+ }
269
+ debug(message, ...args) {
270
+ if (!this.isLevelEnabled("debug")) return;
271
+ const ts = `${DIM}${timestamp()}${RESET}`;
272
+ const prefix = `${CYAN}DEBUG${RESET}`;
273
+ console.error(`${ts} ${prefix} ${message}`, ...args);
274
+ }
275
+ info(message, ...args) {
276
+ if (!this.isLevelEnabled("info")) return;
277
+ const ts = `${DIM}${timestamp()}${RESET}`;
278
+ const prefix = `${BOLD}INFO${RESET}`;
279
+ console.error(`${ts} ${prefix} ${message}`, ...args);
280
+ }
281
+ warn(message, ...args) {
282
+ if (!this.isLevelEnabled("warn")) return;
283
+ const ts = `${DIM}${timestamp()}${RESET}`;
284
+ const prefix = `${YELLOW}WARN${RESET}`;
285
+ console.error(`${ts} ${prefix} ${message}`, ...args);
286
+ }
287
+ error(message, ...args) {
288
+ if (!this.isLevelEnabled("error")) return;
289
+ const ts = `${DIM}${timestamp()}${RESET}`;
290
+ const prefix = `${RED}ERROR${RESET}`;
291
+ console.error(`${ts} ${prefix} ${message}`, ...args);
292
+ }
293
+ /** Success message — always shown at info level or below. */
294
+ success(message, ...args) {
295
+ if (!this.isLevelEnabled("info")) return;
296
+ const ts = `${DIM}${timestamp()}${RESET}`;
297
+ const prefix = `${GREEN}OK${RESET}`;
298
+ console.error(`${ts} ${prefix} ${message}`, ...args);
299
+ }
300
+ };
301
+ var logger = new Logger("info");
302
+
303
+ // src/crawl/frontier.ts
304
+ var Frontier = class {
305
+ queue = [];
306
+ head = 0;
307
+ seen = /* @__PURE__ */ new Set();
308
+ maxDepth;
309
+ constructor(options = {}) {
310
+ this.maxDepth = options.maxDepth ?? Infinity;
311
+ }
312
+ /**
313
+ * Add a URL to the queue. Returns false if already seen or exceeds max depth.
314
+ */
315
+ enqueue(entry) {
316
+ if (entry.depth > this.maxDepth) {
317
+ return false;
318
+ }
319
+ const normalized = normalizeUrl(entry.url);
320
+ if (this.seen.has(normalized)) {
321
+ return false;
322
+ }
323
+ this.seen.add(normalized);
324
+ this.queue.push(entry);
325
+ return true;
326
+ }
327
+ /**
328
+ * Remove and return the next URL from the queue (FIFO).
329
+ * Returns undefined if queue is empty.
330
+ */
331
+ dequeue() {
332
+ if (this.head >= this.queue.length) {
333
+ return void 0;
334
+ }
335
+ const entry = this.queue[this.head++];
336
+ if (this.head > this.queue.length / 2) {
337
+ this.queue = this.queue.slice(this.head);
338
+ this.head = 0;
339
+ }
340
+ return entry;
341
+ }
342
+ /**
343
+ * Check if a URL has been seen (ever enqueued, whether still queued or already dequeued).
344
+ */
345
+ hasSeen(url) {
346
+ const normalized = normalizeUrl(url);
347
+ return this.seen.has(normalized);
348
+ }
349
+ /**
350
+ * Get the number of URLs remaining in the queue.
351
+ */
352
+ get size() {
353
+ return this.queue.length - this.head;
354
+ }
355
+ /**
356
+ * Get the total number of URLs seen (visited + queued).
357
+ */
358
+ get totalSeen() {
359
+ return this.seen.size;
360
+ }
361
+ /**
362
+ * Check if the queue is empty.
363
+ */
364
+ get isEmpty() {
365
+ return this.head >= this.queue.length;
366
+ }
367
+ };
368
+
369
+ // src/security/output-sanitizer.ts
370
+ import { resolve, extname, isAbsolute, normalize } from "path";
371
+ import { accessSync, constants } from "fs";
372
+ import { platform } from "os";
373
+ var WINDOWS_RESERVED = /* @__PURE__ */ new Set([
374
+ "CON",
375
+ "PRN",
376
+ "AUX",
377
+ "NUL",
378
+ "COM1",
379
+ "COM2",
380
+ "COM3",
381
+ "COM4",
382
+ "COM5",
383
+ "COM6",
384
+ "COM7",
385
+ "COM8",
386
+ "COM9",
387
+ "LPT1",
388
+ "LPT2",
389
+ "LPT3",
390
+ "LPT4",
391
+ "LPT5",
392
+ "LPT6",
393
+ "LPT7",
394
+ "LPT8",
395
+ "LPT9"
396
+ ]);
397
+ function sanitizeFilename(input) {
398
+ if (!input) return "_unnamed";
399
+ let result = input;
400
+ try {
401
+ result = decodeURIComponent(result);
402
+ } catch {
403
+ }
404
+ result = result.replace(/\0/g, "");
405
+ result = result.replace(/\.\.\//g, "");
406
+ result = result.replace(/\.\.\\/g, "");
407
+ result = result.replace(/\.\./g, "");
408
+ result = result.replace(/[/\\:*?"<>|\x00-\x1F\x7F]/g, "_");
409
+ result = result.replace(/[^a-zA-Z0-9._-]/g, "_");
410
+ result = result.replace(/_{2,}/g, "_");
411
+ result = result.replace(/^[_]+/, "");
412
+ result = result.replace(/[_]+$/, "");
413
+ if (!result || result === "." || result === "..") {
414
+ return "_unnamed";
415
+ }
416
+ const ext = extname(result);
417
+ const nameWithoutExt = ext ? result.slice(0, -ext.length) : result;
418
+ if (WINDOWS_RESERVED.has(nameWithoutExt.toUpperCase())) {
419
+ result = `_${nameWithoutExt}${ext}`;
420
+ }
421
+ const MAX_LENGTH = 255;
422
+ if (result.length > MAX_LENGTH) {
423
+ const extension = extname(result);
424
+ if (extension && extension.length < 20) {
425
+ const maxBase = MAX_LENGTH - extension.length;
426
+ result = result.slice(0, maxBase) + extension;
427
+ } else {
428
+ result = result.slice(0, MAX_LENGTH);
429
+ }
430
+ }
431
+ return result;
432
+ }
433
+ function urlToFilename(url) {
434
+ let result;
435
+ try {
436
+ const parsed = new URL(url);
437
+ result = parsed.hostname + parsed.pathname;
438
+ if (parsed.search) {
439
+ result += parsed.search;
440
+ }
441
+ } catch {
442
+ result = url;
443
+ }
444
+ return sanitizeFilename(result);
445
+ }
446
+
447
+ // src/collectors/screenshot-capturer.ts
448
+ import { createHash } from "crypto";
449
+ import { mkdir, stat } from "fs/promises";
450
+ import { join } from "path";
451
+ var DEFAULT_VIEWPORT = { width: 1280, height: 720 };
452
+ var MAX_SCROLL_INCREMENTS = 3;
453
+ var SCROLL_SETTLE_MS = 200;
454
+ async function captureScreenshots(page, baseUrl, outputDir, viewport) {
455
+ const effectiveViewport = viewport ?? DEFAULT_VIEWPORT;
456
+ const pageUrl = page.url();
457
+ const baseName = urlToFilename(pageUrl);
458
+ const fullPageFilename = `${baseName}_full.png`;
459
+ const viewportFilename = `${baseName}_viewport.png`;
460
+ const fullPagePath = join(outputDir, fullPageFilename);
461
+ const viewportPath = join(outputDir, viewportFilename);
462
+ try {
463
+ await mkdir(outputDir, { recursive: true });
464
+ await triggerLazyContent(page, effectiveViewport);
465
+ const modalDetected = await detectModals(page);
466
+ const fullPageBuffer = await page.screenshot({ fullPage: true, path: fullPagePath });
467
+ const viewportBuffer = await page.screenshot({ path: viewportPath });
468
+ const fullPageHash = computeSha256(fullPageBuffer);
469
+ const viewportHash = computeSha256(viewportBuffer);
470
+ const dimensions = extractPngDimensions(fullPageBuffer);
471
+ const fileSizeBytes = await getFileSize(fullPagePath, fullPageBuffer);
472
+ return {
473
+ pageUrl,
474
+ fullPagePath,
475
+ viewportPath,
476
+ viewport: effectiveViewport,
477
+ fullPageHash,
478
+ viewportHash,
479
+ dimensions,
480
+ fileSizeBytes,
481
+ modalDetected
482
+ };
483
+ } catch {
484
+ return {
485
+ pageUrl,
486
+ fullPagePath: "",
487
+ viewportPath: "",
488
+ viewport: effectiveViewport,
489
+ fullPageHash: "0".repeat(64),
490
+ viewportHash: "0".repeat(64),
491
+ dimensions: { width: 0, height: 0 },
492
+ fileSizeBytes: 0,
493
+ modalDetected: false
494
+ };
495
+ }
496
+ }
497
+ async function triggerLazyContent(page, viewport) {
498
+ const scrollHeight = await page.evaluate(() => document.documentElement.scrollHeight);
499
+ const viewportHeight = viewport.height;
500
+ const maxScrollDistance = viewportHeight * MAX_SCROLL_INCREMENTS;
501
+ const targetScroll = Math.min(scrollHeight, maxScrollDistance);
502
+ for (let scrolled = viewportHeight; scrolled <= targetScroll; scrolled += viewportHeight) {
503
+ await page.evaluate((y) => window.scrollTo(0, y), scrolled);
504
+ await page.evaluate(
505
+ (ms) => new Promise((resolve2) => setTimeout(resolve2, ms)),
506
+ SCROLL_SETTLE_MS
507
+ );
508
+ }
509
+ await page.evaluate(() => window.scrollTo(0, 0));
510
+ }
511
+ async function detectModals(page) {
512
+ const modalSelector = 'dialog[open], [role="dialog"], .modal.show, .modal.active';
513
+ const locator = page.locator(modalSelector);
514
+ try {
515
+ const count = await locator.count();
516
+ if (count === 0) return false;
517
+ for (let i = 0; i < count; i++) {
518
+ const isVisible = await locator.nth(i).isVisible();
519
+ if (isVisible) return true;
520
+ }
521
+ return false;
522
+ } catch {
523
+ return false;
524
+ }
525
+ }
526
+ function computeSha256(buffer) {
527
+ return createHash("sha256").update(buffer).digest("hex");
528
+ }
529
+ function extractPngDimensions(buffer) {
530
+ if (buffer.length < 24) {
531
+ return { width: 0, height: 0 };
532
+ }
533
+ const width = buffer.readUInt32BE(16);
534
+ const height = buffer.readUInt32BE(20);
535
+ return { width, height };
536
+ }
537
+ async function getFileSize(filePath, buffer) {
538
+ try {
539
+ const stats = await stat(filePath);
540
+ return stats.size;
541
+ } catch {
542
+ return buffer.length;
543
+ }
544
+ }
545
+
546
+ // src/collectors/form-prober.ts
547
+ async function probeForms(page, pageUrl) {
548
+ const forms = [];
549
+ const formLocators = await page.locator("form").all();
550
+ for (const formLocator of formLocators) {
551
+ try {
552
+ const rawForm = await formLocator.evaluate(
553
+ (formEl, currentPageUrl) => {
554
+ const rawAction = formEl.getAttribute("action");
555
+ let action = "";
556
+ if (rawAction !== null && rawAction !== "") {
557
+ try {
558
+ action = new URL(rawAction, currentPageUrl).href;
559
+ } catch {
560
+ action = rawAction;
561
+ }
562
+ }
563
+ const method = (formEl.getAttribute("method") ?? "GET").toUpperCase();
564
+ const id = formEl.getAttribute("id");
565
+ const name = formEl.getAttribute("name");
566
+ const enctype = formEl.getAttribute("enctype");
567
+ const ariaLabel = formEl.getAttribute("aria-label");
568
+ const fieldEls = formEl.querySelectorAll("input, select, textarea");
569
+ const fields = [];
570
+ const processedRadioGroups = /* @__PURE__ */ new Set();
571
+ fieldEls.forEach((el) => {
572
+ const tagName = el.tagName.toLowerCase();
573
+ const fieldName = el.getAttribute("name") ?? "";
574
+ let fieldType;
575
+ if (tagName === "select") {
576
+ fieldType = "select";
577
+ } else if (tagName === "textarea") {
578
+ fieldType = "textarea";
579
+ } else {
580
+ fieldType = el.type ?? "text";
581
+ }
582
+ if ((fieldType === "radio" || fieldType === "checkbox") && fieldName) {
583
+ if (processedRadioGroups.has(fieldName)) return;
584
+ processedRadioGroups.add(fieldName);
585
+ const groupEls = formEl.querySelectorAll(
586
+ `input[name="${CSS.escape(fieldName)}"]`
587
+ );
588
+ const options2 = [];
589
+ groupEls.forEach((groupEl) => {
590
+ const inp = groupEl;
591
+ const groupLabel = deriveLabel(groupEl);
592
+ options2.push({
593
+ value: inp.value ?? "",
594
+ label: groupLabel ?? inp.value ?? "",
595
+ selected: inp.checked
596
+ });
597
+ });
598
+ const firstEl = groupEls[0];
599
+ fields.push({
600
+ name: fieldName,
601
+ type: fieldType,
602
+ required: firstEl.required || firstEl.getAttribute("aria-required") === "true",
603
+ pattern: null,
604
+ placeholder: null,
605
+ label: deriveLabel(firstEl),
606
+ options: options2,
607
+ min: null,
608
+ max: null,
609
+ step: null,
610
+ maxLength: null,
611
+ multiple: false,
612
+ accept: null,
613
+ defaultValue: null,
614
+ isPassword: false
615
+ });
616
+ return;
617
+ }
618
+ if (fieldType === "radio" || fieldType === "checkbox") {
619
+ if (fieldName && processedRadioGroups.has(fieldName)) return;
620
+ }
621
+ const required = el.required || el.getAttribute("aria-required") === "true";
622
+ const pattern = el.getAttribute("pattern");
623
+ const placeholder = el.getAttribute("placeholder");
624
+ const label = deriveLabel(el);
625
+ let options = null;
626
+ if (tagName === "select") {
627
+ const selectEl = el;
628
+ options = [];
629
+ const optionEls = selectEl.querySelectorAll("option");
630
+ optionEls.forEach((opt) => {
631
+ options.push({
632
+ value: opt.value,
633
+ label: opt.textContent?.trim() ?? opt.value,
634
+ selected: opt.selected
635
+ });
636
+ });
637
+ }
638
+ const min = el.getAttribute("min");
639
+ const max = el.getAttribute("max");
640
+ const step = el.getAttribute("step");
641
+ const maxLengthAttr = el.getAttribute("maxlength");
642
+ const maxLength = maxLengthAttr !== null ? parseInt(maxLengthAttr, 10) : null;
643
+ const multiple = el.multiple || el.multiple || false;
644
+ const accept = el.getAttribute("accept");
645
+ const isPassword = fieldType === "password";
646
+ let defaultValue = null;
647
+ if (tagName === "textarea") {
648
+ defaultValue = el.value || null;
649
+ } else if (tagName === "select") {
650
+ const selectedOpt = el.selectedOptions[0];
651
+ defaultValue = selectedOpt?.value ?? null;
652
+ } else {
653
+ defaultValue = el.value || null;
654
+ }
655
+ fields.push({
656
+ name: fieldName,
657
+ type: fieldType,
658
+ required,
659
+ pattern,
660
+ placeholder,
661
+ label,
662
+ options,
663
+ min,
664
+ max,
665
+ step,
666
+ maxLength: maxLength !== null && !isNaN(maxLength) ? maxLength : null,
667
+ multiple,
668
+ accept,
669
+ defaultValue,
670
+ isPassword
671
+ });
672
+ });
673
+ let submitButtonText = null;
674
+ const submitBtn = formEl.querySelector('button[type="submit"]') ?? formEl.querySelector('input[type="submit"]');
675
+ if (submitBtn) {
676
+ if (submitBtn.tagName.toLowerCase() === "input") {
677
+ submitButtonText = submitBtn.value || null;
678
+ } else {
679
+ submitButtonText = submitBtn.textContent?.trim() || null;
680
+ }
681
+ } else {
682
+ const firstButton = formEl.querySelector("button");
683
+ if (firstButton) {
684
+ submitButtonText = firstButton.textContent?.trim() || null;
685
+ }
686
+ }
687
+ return {
688
+ action,
689
+ method,
690
+ id,
691
+ name,
692
+ enctype,
693
+ ariaLabel,
694
+ fields,
695
+ submitButtonText
696
+ };
697
+ function deriveLabel(element) {
698
+ const ariaLbl = element.getAttribute("aria-label");
699
+ if (ariaLbl) return ariaLbl;
700
+ const labelledBy = element.getAttribute("aria-labelledby");
701
+ if (labelledBy) {
702
+ const labelEl = document.getElementById(labelledBy);
703
+ if (labelEl) {
704
+ const text = labelEl.textContent?.trim();
705
+ if (text) return text;
706
+ }
707
+ }
708
+ const elId = element.getAttribute("id");
709
+ if (elId) {
710
+ const labelEl = document.querySelector(
711
+ `label[for="${CSS.escape(elId)}"]`
712
+ );
713
+ if (labelEl) {
714
+ const text = labelEl.textContent?.trim();
715
+ if (text) return text;
716
+ }
717
+ }
718
+ const parentLabel = element.closest("label");
719
+ if (parentLabel) {
720
+ const text = parentLabel.textContent?.trim();
721
+ if (text) return text;
722
+ }
723
+ return null;
724
+ }
725
+ },
726
+ pageUrl
727
+ );
728
+ forms.push(convertRawFormToFormInfo(rawForm, false));
729
+ } catch {
730
+ continue;
731
+ }
732
+ }
733
+ try {
734
+ const implicitForms = await page.evaluate(
735
+ (currentPageUrl) => {
736
+ const allFields = document.querySelectorAll(
737
+ "input, select, textarea"
738
+ );
739
+ const orphanedFields = [];
740
+ allFields.forEach((field) => {
741
+ if (!field.closest("form")) {
742
+ const tagName = field.tagName.toLowerCase();
743
+ const type = field.type;
744
+ if (tagName === "input" && (type === "hidden" || type === "submit" || type === "button")) {
745
+ return;
746
+ }
747
+ orphanedFields.push(field);
748
+ }
749
+ });
750
+ if (orphanedFields.length === 0) return [];
751
+ const fields = orphanedFields.map((el) => {
752
+ const tagName = el.tagName.toLowerCase();
753
+ const fieldName = el.getAttribute("name") ?? "";
754
+ let fieldType;
755
+ if (tagName === "select") {
756
+ fieldType = "select";
757
+ } else if (tagName === "textarea") {
758
+ fieldType = "textarea";
759
+ } else {
760
+ fieldType = el.type ?? "text";
761
+ }
762
+ const required = el.required || el.getAttribute("aria-required") === "true";
763
+ let options = null;
764
+ if (tagName === "select") {
765
+ options = [];
766
+ const optionEls = el.querySelectorAll("option");
767
+ optionEls.forEach((opt) => {
768
+ options.push({
769
+ value: opt.value,
770
+ label: opt.textContent?.trim() ?? opt.value,
771
+ selected: opt.selected
772
+ });
773
+ });
774
+ }
775
+ const isPassword = fieldType === "password";
776
+ let defaultValue = null;
777
+ if (tagName === "textarea") {
778
+ defaultValue = el.value || null;
779
+ } else if (tagName === "select") {
780
+ const selectedOpt = el.selectedOptions[0];
781
+ defaultValue = selectedOpt?.value ?? null;
782
+ } else {
783
+ defaultValue = el.value || null;
784
+ }
785
+ let label = null;
786
+ const ariaLbl = el.getAttribute("aria-label");
787
+ if (ariaLbl) {
788
+ label = ariaLbl;
789
+ } else {
790
+ const elId = el.getAttribute("id");
791
+ if (elId) {
792
+ const labelEl = document.querySelector(
793
+ `label[for="${CSS.escape(elId)}"]`
794
+ );
795
+ if (labelEl) {
796
+ label = labelEl.textContent?.trim() ?? null;
797
+ }
798
+ }
799
+ if (!label) {
800
+ const parentLabel = el.closest("label");
801
+ if (parentLabel) {
802
+ label = parentLabel.textContent?.trim() ?? null;
803
+ }
804
+ }
805
+ }
806
+ return {
807
+ name: fieldName,
808
+ type: fieldType,
809
+ required,
810
+ pattern: el.getAttribute("pattern"),
811
+ placeholder: el.getAttribute("placeholder"),
812
+ label,
813
+ options,
814
+ min: el.getAttribute("min"),
815
+ max: el.getAttribute("max"),
816
+ step: el.getAttribute("step"),
817
+ maxLength: el.getAttribute("maxlength") ? parseInt(el.getAttribute("maxlength"), 10) : null,
818
+ multiple: el.multiple || false,
819
+ accept: el.getAttribute("accept"),
820
+ defaultValue,
821
+ isPassword
822
+ };
823
+ });
824
+ let submitButtonText = null;
825
+ const submitBtns = document.querySelectorAll(
826
+ 'button[type="submit"], input[type="submit"]'
827
+ );
828
+ submitBtns.forEach((btn) => {
829
+ if (!btn.closest("form") && !submitButtonText) {
830
+ if (btn.tagName.toLowerCase() === "input") {
831
+ submitButtonText = btn.value || null;
832
+ } else {
833
+ submitButtonText = btn.textContent?.trim() || null;
834
+ }
835
+ }
836
+ });
837
+ return [
838
+ {
839
+ action: "",
840
+ method: "GET",
841
+ id: null,
842
+ name: null,
843
+ enctype: null,
844
+ ariaLabel: null,
845
+ fields,
846
+ submitButtonText
847
+ }
848
+ ];
849
+ },
850
+ pageUrl
851
+ );
852
+ for (const rawForm of implicitForms) {
853
+ forms.push(convertRawFormToFormInfo(rawForm, true));
854
+ }
855
+ } catch {
856
+ }
857
+ return {
858
+ pageUrl,
859
+ forms
860
+ };
861
+ }
862
+ function convertRawFormToFormInfo(raw, isImplicit) {
863
+ const fields = raw.fields.map((f) => {
864
+ const field = {
865
+ name: f.name,
866
+ type: f.type,
867
+ required: f.required
868
+ };
869
+ if (f.pattern != null) field.pattern = f.pattern;
870
+ if (f.placeholder != null) field.placeholder = f.placeholder;
871
+ if (f.label != null) field.label = f.label;
872
+ if (f.options != null) field.options = f.options;
873
+ if (f.min != null) field.min = f.min;
874
+ if (f.max != null) field.max = f.max;
875
+ if (f.step != null) field.step = f.step;
876
+ if (f.maxLength != null) field.maxLength = f.maxLength;
877
+ if (f.multiple) field.multiple = f.multiple;
878
+ if (f.accept != null) field.accept = f.accept;
879
+ if (f.isPassword && f.defaultValue != null) {
880
+ field.defaultValue = "[REDACTED]";
881
+ } else if (f.defaultValue != null) {
882
+ field.defaultValue = f.defaultValue;
883
+ }
884
+ return field;
885
+ });
886
+ const formInfo = {
887
+ action: raw.action,
888
+ method: raw.method,
889
+ isImplicit,
890
+ fields,
891
+ validationMessages: [],
892
+ wasHidden: false
893
+ };
894
+ if (raw.id != null) formInfo.id = raw.id;
895
+ if (raw.name != null) formInfo.name = raw.name;
896
+ if (raw.submitButtonText != null) formInfo.submitButtonText = raw.submitButtonText;
897
+ if (raw.enctype != null) formInfo.enctype = raw.enctype;
898
+ if (raw.ariaLabel != null) formInfo.ariaLabel = raw.ariaLabel;
899
+ return formInfo;
900
+ }
901
+
902
+ // src/security/credential-scrubber.ts
903
+ var ALWAYS_SCRUB_HEADERS = /* @__PURE__ */ new Set([
904
+ "authorization",
905
+ "proxy-authorization",
906
+ "x-api-key",
907
+ "x-auth-token",
908
+ "x-session-id",
909
+ "x-csrf-token",
910
+ "x-xsrf-token"
911
+ ]);
912
+ var COOKIE_HEADERS = /* @__PURE__ */ new Set([
913
+ "cookie",
914
+ "set-cookie"
915
+ ]);
916
+ function scrubHeaders(headers, options) {
917
+ const result = {};
918
+ const includeCookies = options?.includeCookies ?? false;
919
+ for (const [key, value] of Object.entries(headers)) {
920
+ const lower = key.toLowerCase();
921
+ if (ALWAYS_SCRUB_HEADERS.has(lower)) {
922
+ result[key] = "[REDACTED]";
923
+ continue;
924
+ }
925
+ if (COOKIE_HEADERS.has(lower)) {
926
+ if (includeCookies) {
927
+ result[key] = value;
928
+ } else {
929
+ result[key] = "[REDACTED]";
930
+ }
931
+ continue;
932
+ }
933
+ result[key] = value;
934
+ }
935
+ return result;
936
+ }
937
+
938
+ // src/collectors/network-logger.ts
939
+ var MAX_BODY_SIZE = 100 * 1024;
940
+ var MAX_WS_MESSAGES_PER_DIRECTION = 5;
941
+ var ANALYTICS_DOMAINS = [
942
+ "google-analytics.com",
943
+ "googletagmanager.com",
944
+ "analytics.google.com",
945
+ "segment.io",
946
+ "segment.com",
947
+ "cdn.segment.com",
948
+ "api.segment.io",
949
+ "mixpanel.com",
950
+ "hotjar.com",
951
+ "fullstory.com",
952
+ "heap.io",
953
+ "heapanalytics.com",
954
+ "amplitude.com",
955
+ "plausible.io",
956
+ "clarity.ms",
957
+ "newrelic.com",
958
+ "nr-data.net",
959
+ "sentry.io",
960
+ "datadog",
961
+ "pendo.io"
962
+ ];
963
+ var STATIC_EXTENSIONS = /* @__PURE__ */ new Set([
964
+ ".css",
965
+ ".js",
966
+ ".png",
967
+ ".jpg",
968
+ ".jpeg",
969
+ ".gif",
970
+ ".svg",
971
+ ".woff",
972
+ ".woff2",
973
+ ".ttf",
974
+ ".eot",
975
+ ".otf",
976
+ ".ico",
977
+ ".webp",
978
+ ".avif",
979
+ ".mp4",
980
+ ".webm",
981
+ ".mp3",
982
+ ".ogg",
983
+ ".wav",
984
+ ".map"
985
+ ]);
986
+ var STATIC_RESOURCE_TYPES = /* @__PURE__ */ new Set([
987
+ "stylesheet",
988
+ "image",
989
+ "font",
990
+ "media"
991
+ ]);
992
+ function createNetworkLogger(page, pageUrl, options) {
993
+ const includeCookies = options?.includeCookies ?? false;
994
+ let counter = 0;
995
+ const requestIdMap = /* @__PURE__ */ new Map();
996
+ const requestTimings = /* @__PURE__ */ new Map();
997
+ const requests = [];
998
+ const responses = [];
999
+ const failedRequests = [];
1000
+ const graphqlOperations = [];
1001
+ const webSocketConnections = [];
1002
+ let pageOrigin = "";
1003
+ try {
1004
+ pageOrigin = new URL(pageUrl).origin;
1005
+ } catch {
1006
+ }
1007
+ function classifyRequest(url, resourceType, contentType) {
1008
+ if (resourceType === "websocket") {
1009
+ return "websocket";
1010
+ }
1011
+ try {
1012
+ const hostname = new URL(url).hostname;
1013
+ for (const domain of ANALYTICS_DOMAINS) {
1014
+ if (hostname.includes(domain)) {
1015
+ return "analytics";
1016
+ }
1017
+ }
1018
+ } catch {
1019
+ }
1020
+ if (url.includes("/api/") || url.includes("/graphql") || contentType && contentType.includes("application/json")) {
1021
+ return "api";
1022
+ }
1023
+ if (STATIC_RESOURCE_TYPES.has(resourceType)) {
1024
+ return "static";
1025
+ }
1026
+ try {
1027
+ const pathname = new URL(url).pathname;
1028
+ const dotIndex = pathname.lastIndexOf(".");
1029
+ if (dotIndex !== -1) {
1030
+ const ext = pathname.slice(dotIndex).toLowerCase();
1031
+ if (STATIC_EXTENSIONS.has(ext)) {
1032
+ return "static";
1033
+ }
1034
+ }
1035
+ } catch {
1036
+ }
1037
+ if (pageOrigin) {
1038
+ try {
1039
+ const requestOrigin = new URL(url).origin;
1040
+ if (requestOrigin !== pageOrigin) {
1041
+ return "third-party";
1042
+ }
1043
+ } catch {
1044
+ }
1045
+ }
1046
+ return "other";
1047
+ }
1048
+ function mapInitiator(resourceType) {
1049
+ switch (resourceType) {
1050
+ case "document":
1051
+ return "navigation";
1052
+ case "script":
1053
+ case "stylesheet":
1054
+ return "script";
1055
+ case "fetch":
1056
+ return "fetch";
1057
+ case "xhr":
1058
+ return "xhr";
1059
+ default:
1060
+ return "other";
1061
+ }
1062
+ }
1063
+ function tryDetectGraphQL(request, requestId, url, body) {
1064
+ if (request.method() !== "POST") return;
1065
+ if (!url.includes("graphql")) return;
1066
+ if (!body) return;
1067
+ try {
1068
+ const parsed = JSON.parse(body);
1069
+ const operationName = parsed.operationName ?? "unknown";
1070
+ const query = parsed.query ?? "";
1071
+ const variables = parsed.variables;
1072
+ let operationType = "query";
1073
+ const trimmedQuery = query.trimStart();
1074
+ if (trimmedQuery.startsWith("mutation")) {
1075
+ operationType = "mutation";
1076
+ } else if (trimmedQuery.startsWith("subscription")) {
1077
+ operationType = "subscription";
1078
+ }
1079
+ const op = {
1080
+ pageUrl,
1081
+ endpointUrl: url,
1082
+ operationName,
1083
+ operationType,
1084
+ query
1085
+ };
1086
+ if (variables !== void 0 && variables !== null) {
1087
+ op.variables = variables;
1088
+ }
1089
+ graphqlOperations.push(op);
1090
+ } catch {
1091
+ }
1092
+ }
1093
+ function onRequest(request) {
1094
+ const id = `req_${counter++}`;
1095
+ requestIdMap.set(request, id);
1096
+ requestTimings.set(id, Date.now());
1097
+ const url = request.url();
1098
+ const resourceType = request.resourceType();
1099
+ const method = request.method();
1100
+ const rawHeaders = request.headers();
1101
+ const postData = request.postData() ?? void 0;
1102
+ const contentType = rawHeaders["content-type"] ?? void 0;
1103
+ const classification = classifyRequest(url, resourceType, contentType);
1104
+ const initiator = mapInitiator(resourceType);
1105
+ const captured = {
1106
+ requestId: id,
1107
+ url,
1108
+ method,
1109
+ headers: scrubHeaders(rawHeaders, { includeCookies }),
1110
+ resourceType,
1111
+ classification,
1112
+ initiator
1113
+ };
1114
+ if (postData !== void 0) {
1115
+ captured.body = postData;
1116
+ }
1117
+ if (contentType !== void 0) {
1118
+ captured.contentType = contentType;
1119
+ }
1120
+ requests.push(captured);
1121
+ tryDetectGraphQL(request, id, url, postData);
1122
+ }
1123
+ async function onResponse(response) {
1124
+ const request = response.request();
1125
+ const id = requestIdMap.get(request);
1126
+ if (!id) return;
1127
+ const startTime = requestTimings.get(id) ?? Date.now();
1128
+ const timing = Date.now() - startTime;
1129
+ const rawHeaders = response.headers();
1130
+ const statusCode = response.status();
1131
+ const contentType = rawHeaders["content-type"] ?? void 0;
1132
+ let body;
1133
+ let bodySize = 0;
1134
+ try {
1135
+ const buffer = await response.body();
1136
+ bodySize = buffer.length;
1137
+ if (contentType && isTextContentType(contentType)) {
1138
+ const text = buffer.toString("utf-8");
1139
+ body = text.length > MAX_BODY_SIZE ? text.slice(0, MAX_BODY_SIZE) : text;
1140
+ }
1141
+ } catch {
1142
+ }
1143
+ const captured = {
1144
+ requestId: id,
1145
+ statusCode,
1146
+ headers: scrubHeaders(rawHeaders, { includeCookies }),
1147
+ bodySize,
1148
+ timing
1149
+ };
1150
+ if (body !== void 0) {
1151
+ captured.body = body;
1152
+ }
1153
+ if (contentType !== void 0) {
1154
+ captured.contentType = contentType;
1155
+ }
1156
+ responses.push(captured);
1157
+ if (body) {
1158
+ const gqlOp = graphqlOperations.find((op) => {
1159
+ const matchingReq = requests.find((r) => r.requestId === id);
1160
+ return matchingReq && op.endpointUrl === matchingReq.url;
1161
+ });
1162
+ if (gqlOp && gqlOp.responseData === void 0) {
1163
+ try {
1164
+ gqlOp.responseData = JSON.parse(body);
1165
+ } catch {
1166
+ }
1167
+ }
1168
+ }
1169
+ }
1170
+ function onRequestFailed(request) {
1171
+ const id = requestIdMap.get(request) ?? `req_${counter++}`;
1172
+ const url = request.url();
1173
+ const method = request.method();
1174
+ const resourceType = request.resourceType();
1175
+ const classification = classifyRequest(url, resourceType);
1176
+ const failure = request.failure();
1177
+ const errorText = failure?.errorText ?? "Unknown error";
1178
+ failedRequests.push({
1179
+ requestId: id,
1180
+ url,
1181
+ method,
1182
+ errorText,
1183
+ classification
1184
+ });
1185
+ }
1186
+ function onWebSocket(ws) {
1187
+ const wsUrl = ws.url();
1188
+ let sentCount = 0;
1189
+ let receivedCount = 0;
1190
+ const connection = {
1191
+ url: wsUrl,
1192
+ pageUrl,
1193
+ connected: true,
1194
+ messageCount: 0,
1195
+ sampleMessages: []
1196
+ };
1197
+ webSocketConnections.push(connection);
1198
+ ws.on("framereceived", (data) => {
1199
+ connection.messageCount++;
1200
+ if (receivedCount < MAX_WS_MESSAGES_PER_DIRECTION) {
1201
+ receivedCount++;
1202
+ connection.sampleMessages.push({
1203
+ direction: "received",
1204
+ data: typeof data.payload === "string" ? data.payload : data.payload.toString("utf-8"),
1205
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
1206
+ });
1207
+ }
1208
+ });
1209
+ ws.on("framesent", (data) => {
1210
+ connection.messageCount++;
1211
+ if (sentCount < MAX_WS_MESSAGES_PER_DIRECTION) {
1212
+ sentCount++;
1213
+ connection.sampleMessages.push({
1214
+ direction: "sent",
1215
+ data: typeof data.payload === "string" ? data.payload : data.payload.toString("utf-8"),
1216
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
1217
+ });
1218
+ }
1219
+ });
1220
+ ws.on("close", () => {
1221
+ connection.connected = false;
1222
+ });
1223
+ }
1224
+ return {
1225
+ start() {
1226
+ page.on("request", onRequest);
1227
+ page.on("response", onResponse);
1228
+ page.on("requestfailed", onRequestFailed);
1229
+ page.on("websocket", onWebSocket);
1230
+ },
1231
+ stop() {
1232
+ page.off("request", onRequest);
1233
+ page.off("response", onResponse);
1234
+ page.off("requestfailed", onRequestFailed);
1235
+ page.off("websocket", onWebSocket);
1236
+ return {
1237
+ pageUrl,
1238
+ requests,
1239
+ responses,
1240
+ failedRequests,
1241
+ graphqlOperations,
1242
+ webSocketConnections,
1243
+ cookieMutations: []
1244
+ };
1245
+ }
1246
+ };
1247
+ }
1248
+ function isTextContentType(contentType) {
1249
+ const lower = contentType.toLowerCase();
1250
+ return lower.includes("text/") || lower.includes("application/json") || lower.includes("application/xml") || lower.includes("application/javascript") || lower.includes("application/xhtml") || lower.includes("application/graphql") || lower.includes("+json") || lower.includes("+xml");
1251
+ }
1252
+
1253
+ // src/assembler/api-grouper.ts
1254
+ var UUID_WITH_DASHES = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
1255
+ var UUID_WITHOUT_DASHES = /^[0-9a-f]{32}$/i;
1256
+ function isNumericId(segment) {
1257
+ return /^\d+$/.test(segment);
1258
+ }
1259
+ function isUuid(segment) {
1260
+ return UUID_WITH_DASHES.test(segment) || UUID_WITHOUT_DASHES.test(segment);
1261
+ }
1262
+ function isSlug(segment) {
1263
+ if (!segment || !segment.includes("-")) return false;
1264
+ if (!/^[a-z0-9]+(-[a-z0-9]+)+$/.test(segment)) return false;
1265
+ if (isUuid(segment)) return false;
1266
+ return true;
1267
+ }
1268
+ function parameterizePath(path2) {
1269
+ const pathOnly = path2.split("?")[0];
1270
+ const segments = pathOnly.split("/");
1271
+ const parameterized = segments.map((segment) => {
1272
+ if (!segment) return segment;
1273
+ if (isUuid(segment)) return ":id";
1274
+ if (isNumericId(segment)) return ":id";
1275
+ if (isSlug(segment)) return ":slug";
1276
+ return segment;
1277
+ });
1278
+ return parameterized.join("/");
1279
+ }
1280
+ function extractParameterTypes(path2) {
1281
+ const pathOnly = path2.split("?")[0];
1282
+ const segments = pathOnly.split("/");
1283
+ const result = {};
1284
+ let idIndex = 0;
1285
+ let slugIndex = 0;
1286
+ for (const segment of segments) {
1287
+ if (!segment) continue;
1288
+ if (isUuid(segment)) {
1289
+ result[`:id_${idIndex++}`] = "uuid";
1290
+ } else if (isNumericId(segment)) {
1291
+ result[`:id_${idIndex++}`] = "id";
1292
+ } else if (isSlug(segment)) {
1293
+ result[`:slug_${slugIndex++}`] = "slug";
1294
+ }
1295
+ }
1296
+ return result;
1297
+ }
1298
+ function groupEndpoints(endpoints) {
1299
+ if (endpoints.length === 0) return [];
1300
+ const groupMap = /* @__PURE__ */ new Map();
1301
+ for (const endpoint of endpoints) {
1302
+ const pattern = parameterizePath(endpoint.url);
1303
+ const key = `${endpoint.method} ${pattern}`;
1304
+ const existing = groupMap.get(key);
1305
+ if (existing) {
1306
+ existing.examples.push(endpoint);
1307
+ } else {
1308
+ groupMap.set(key, {
1309
+ pattern,
1310
+ method: endpoint.method,
1311
+ examples: [endpoint],
1312
+ parameterTypes: extractParameterTypes(endpoint.url)
1313
+ });
1314
+ }
1315
+ }
1316
+ return Array.from(groupMap.values());
1317
+ }
1318
+
1319
+ // src/assembler/flow-graph.ts
1320
+ var MAX_LABEL_LENGTH = 30;
1321
+ var DEFAULT_MAX_NODES = 50;
1322
+ function urlToLabel(url) {
1323
+ let path2;
1324
+ try {
1325
+ const parsed = new URL(url);
1326
+ path2 = parsed.pathname;
1327
+ } catch {
1328
+ path2 = url.split("?")[0];
1329
+ }
1330
+ if (path2 === "/" || path2 === "") {
1331
+ return "/";
1332
+ }
1333
+ const stripped = path2.startsWith("/") ? path2.slice(1) : path2;
1334
+ const parameterized = parameterizePath("/" + stripped).slice(1);
1335
+ if (parameterized.length > MAX_LABEL_LENGTH) {
1336
+ return parameterized.slice(0, MAX_LABEL_LENGTH - 3) + "...";
1337
+ }
1338
+ return parameterized;
1339
+ }
1340
+ function detectCycles(edges) {
1341
+ if (edges.length === 0) return [];
1342
+ const adjacency = /* @__PURE__ */ new Map();
1343
+ const allNodes = /* @__PURE__ */ new Set();
1344
+ for (const edge of edges) {
1345
+ allNodes.add(edge.from);
1346
+ allNodes.add(edge.to);
1347
+ let neighbors = adjacency.get(edge.from);
1348
+ if (!neighbors) {
1349
+ neighbors = /* @__PURE__ */ new Set();
1350
+ adjacency.set(edge.from, neighbors);
1351
+ }
1352
+ neighbors.add(edge.to);
1353
+ }
1354
+ const cycleNodeSet = /* @__PURE__ */ new Set();
1355
+ const state = /* @__PURE__ */ new Map();
1356
+ const pathStack = [];
1357
+ function dfs(node) {
1358
+ state.set(node, 1);
1359
+ pathStack.push(node);
1360
+ const neighbors = adjacency.get(node);
1361
+ if (neighbors) {
1362
+ for (const neighbor of neighbors) {
1363
+ const neighborState = state.get(neighbor) ?? 0;
1364
+ if (neighborState === 1) {
1365
+ const cycleStartIndex = pathStack.indexOf(neighbor);
1366
+ if (cycleStartIndex >= 0) {
1367
+ for (let i = cycleStartIndex; i < pathStack.length; i++) {
1368
+ cycleNodeSet.add(pathStack[i]);
1369
+ }
1370
+ }
1371
+ } else if (neighborState === 0) {
1372
+ dfs(neighbor);
1373
+ }
1374
+ }
1375
+ }
1376
+ pathStack.pop();
1377
+ state.set(node, 2);
1378
+ }
1379
+ for (const node of allNodes) {
1380
+ if ((state.get(node) ?? 0) === 0) {
1381
+ dfs(node);
1382
+ }
1383
+ }
1384
+ return Array.from(cycleNodeSet);
1385
+ }
1386
+ function clusterByPathPrefix(urls) {
1387
+ const clusters = /* @__PURE__ */ new Map();
1388
+ for (const url of urls) {
1389
+ let path2;
1390
+ try {
1391
+ path2 = new URL(url).pathname;
1392
+ } catch {
1393
+ path2 = url.split("?")[0];
1394
+ }
1395
+ const segments = path2.split("/").filter(Boolean);
1396
+ const prefix = segments.length > 0 ? segments[0] : "/";
1397
+ let group = clusters.get(prefix);
1398
+ if (!group) {
1399
+ group = [];
1400
+ clusters.set(prefix, group);
1401
+ }
1402
+ group.push(url);
1403
+ }
1404
+ return clusters;
1405
+ }
1406
+ function buildFlowGraph(edges, entryUrl) {
1407
+ const edgeKeys = /* @__PURE__ */ new Set();
1408
+ const dedupedEdges = [];
1409
+ for (const edge of edges) {
1410
+ const key = `${edge.from}\0${edge.to}\0${edge.trigger}`;
1411
+ if (!edgeKeys.has(key)) {
1412
+ edgeKeys.add(key);
1413
+ dedupedEdges.push(edge);
1414
+ }
1415
+ }
1416
+ const urlSet = /* @__PURE__ */ new Set();
1417
+ urlSet.add(entryUrl);
1418
+ for (const edge of dedupedEdges) {
1419
+ urlSet.add(edge.from);
1420
+ urlSet.add(edge.to);
1421
+ }
1422
+ const inDegree = /* @__PURE__ */ new Map();
1423
+ const outDegree = /* @__PURE__ */ new Map();
1424
+ for (const url of urlSet) {
1425
+ inDegree.set(url, 0);
1426
+ outDegree.set(url, 0);
1427
+ }
1428
+ for (const edge of dedupedEdges) {
1429
+ inDegree.set(edge.to, (inDegree.get(edge.to) ?? 0) + 1);
1430
+ outDegree.set(edge.from, (outDegree.get(edge.from) ?? 0) + 1);
1431
+ }
1432
+ const cycleNodes = detectCycles(dedupedEdges);
1433
+ const hasCycles = cycleNodes.length > 0;
1434
+ const allUrls = Array.from(urlSet);
1435
+ const clusters = clusterByPathPrefix(allUrls);
1436
+ const urlToCluster = /* @__PURE__ */ new Map();
1437
+ for (const [prefix, urls] of clusters) {
1438
+ for (const url of urls) {
1439
+ urlToCluster.set(url, prefix);
1440
+ }
1441
+ }
1442
+ const nodes = allUrls.map((url) => ({
1443
+ url,
1444
+ label: urlToLabel(url),
1445
+ isEntry: url === entryUrl,
1446
+ isExit: (outDegree.get(url) ?? 0) === 0,
1447
+ inDegree: inDegree.get(url) ?? 0,
1448
+ outDegree: outDegree.get(url) ?? 0,
1449
+ cluster: urlToCluster.get(url)
1450
+ }));
1451
+ return {
1452
+ nodes,
1453
+ edges: dedupedEdges,
1454
+ entryUrl,
1455
+ hasCycles,
1456
+ cycleNodes,
1457
+ clusters
1458
+ };
1459
+ }
1460
+ function sanitizeMermaidLabel(label) {
1461
+ return label.replace(/"/g, "#quot;").replace(/\[/g, "#lsqb;").replace(/\]/g, "#rsqb;").replace(/\(/g, "#lpar;").replace(/\)/g, "#rpar;").replace(/\{/g, "#lcub;").replace(/\}/g, "#rcub;").replace(/</g, "#lt;").replace(/>/g, "#gt;");
1462
+ }
1463
+ function nodeId(url, index) {
1464
+ return `N${index}`;
1465
+ }
1466
+ function edgeStyle(trigger) {
1467
+ switch (trigger) {
1468
+ case "link":
1469
+ case "click":
1470
+ case "form-submit":
1471
+ return "-->";
1472
+ case "redirect":
1473
+ case "meta-refresh":
1474
+ case "js-redirect":
1475
+ return "-.->";
1476
+ case "pushState":
1477
+ case "replaceState":
1478
+ case "navigation-api":
1479
+ return "-.->";
1480
+ default:
1481
+ return "-->";
1482
+ }
1483
+ }
1484
+ function generateMermaidDefinition(graph, options) {
1485
+ const maxNodes = options?.maxNodes ?? DEFAULT_MAX_NODES;
1486
+ if (graph.nodes.length === 0) {
1487
+ return "flowchart LR\n";
1488
+ }
1489
+ const urlIndex = /* @__PURE__ */ new Map();
1490
+ graph.nodes.forEach((node, i) => {
1491
+ urlIndex.set(node.url, i);
1492
+ });
1493
+ const lines = ["flowchart LR"];
1494
+ const useSubgraphs = graph.nodes.length > maxNodes;
1495
+ if (useSubgraphs) {
1496
+ const clusterNodes = /* @__PURE__ */ new Map();
1497
+ for (const node of graph.nodes) {
1498
+ const cluster = node.cluster ?? "/";
1499
+ let group = clusterNodes.get(cluster);
1500
+ if (!group) {
1501
+ group = [];
1502
+ clusterNodes.set(cluster, group);
1503
+ }
1504
+ group.push(node);
1505
+ }
1506
+ for (const [cluster, nodes] of clusterNodes) {
1507
+ const sanitizedCluster = sanitizeMermaidLabel(cluster);
1508
+ lines.push(` subgraph ${sanitizedCluster}`);
1509
+ for (const node of nodes) {
1510
+ const id = nodeId(node.url, urlIndex.get(node.url));
1511
+ const label = sanitizeMermaidLabel(node.label);
1512
+ if (node.isEntry) {
1513
+ lines.push(` ${id}(["${label}"])`);
1514
+ } else if (node.isExit) {
1515
+ lines.push(` ${id}[/"${label}"/]`);
1516
+ } else {
1517
+ lines.push(` ${id}["${label}"]`);
1518
+ }
1519
+ }
1520
+ lines.push(" end");
1521
+ }
1522
+ } else {
1523
+ for (const node of graph.nodes) {
1524
+ const id = nodeId(node.url, urlIndex.get(node.url));
1525
+ const label = sanitizeMermaidLabel(node.label);
1526
+ if (node.isEntry) {
1527
+ lines.push(` ${id}(["${label}"])`);
1528
+ } else if (node.isExit) {
1529
+ lines.push(` ${id}[/"${label}"/]`);
1530
+ } else {
1531
+ lines.push(` ${id}["${label}"]`);
1532
+ }
1533
+ }
1534
+ }
1535
+ for (const edge of graph.edges) {
1536
+ const fromIdx = urlIndex.get(edge.from);
1537
+ const toIdx = urlIndex.get(edge.to);
1538
+ if (fromIdx === void 0 || toIdx === void 0) continue;
1539
+ const fromId = nodeId(edge.from, fromIdx);
1540
+ const toId = nodeId(edge.to, toIdx);
1541
+ const style = edgeStyle(edge.trigger);
1542
+ const triggerLabel = edge.triggerText ? ` |${sanitizeMermaidLabel(edge.triggerText)}|` : "";
1543
+ lines.push(` ${fromId} ${style}${triggerLabel} ${toId}`);
1544
+ }
1545
+ return lines.join("\n") + "\n";
1546
+ }
1547
+
1548
+ // src/report/openapi-output.ts
1549
+ import { writeFile, mkdir as mkdir2 } from "fs/promises";
1550
+ import { dirname } from "path";
1551
+ var STATUS_DESCRIPTIONS = {
1552
+ 200: "Success",
1553
+ 201: "Created",
1554
+ 204: "No Content",
1555
+ 400: "Bad Request",
1556
+ 401: "Unauthorized",
1557
+ 403: "Forbidden",
1558
+ 404: "Not Found",
1559
+ 500: "Internal Server Error"
1560
+ };
1561
+ function inferSchema(value, depth = 0) {
1562
+ if (value === null || value === void 0) {
1563
+ return { type: "string", nullable: true };
1564
+ }
1565
+ if (Array.isArray(value)) {
1566
+ if (value.length === 0 || depth >= 2) {
1567
+ return { type: "array", items: {} };
1568
+ }
1569
+ return { type: "array", items: inferSchema(value[0], depth + 1) };
1570
+ }
1571
+ if (typeof value === "object") {
1572
+ if (depth >= 2) {
1573
+ return { type: "object" };
1574
+ }
1575
+ const properties = {};
1576
+ for (const [key, val] of Object.entries(value)) {
1577
+ properties[key] = inferSchema(val, depth + 1);
1578
+ }
1579
+ return { type: "object", properties };
1580
+ }
1581
+ if (typeof value === "number") {
1582
+ return Number.isInteger(value) ? { type: "integer" } : { type: "number" };
1583
+ }
1584
+ if (typeof value === "boolean") {
1585
+ return { type: "boolean" };
1586
+ }
1587
+ return { type: "string" };
1588
+ }
1589
+ function toOpenApiPath(pattern) {
1590
+ return pattern.replace(/:([a-zA-Z_][a-zA-Z0-9_]*)/g, "{$1}");
1591
+ }
1592
+ function extractPathParams(pattern) {
1593
+ const params = [];
1594
+ const re = /:([a-zA-Z_][a-zA-Z0-9_]*)/g;
1595
+ let match;
1596
+ while ((match = re.exec(pattern)) !== null) {
1597
+ params.push(match[1]);
1598
+ }
1599
+ return params;
1600
+ }
1601
+ function generateOperationId(method, pattern) {
1602
+ const segments = pattern.split("/").filter(Boolean).map((seg) => {
1603
+ if (seg.startsWith(":")) {
1604
+ const paramName = seg.slice(1);
1605
+ return "By" + paramName.charAt(0).toUpperCase() + paramName.slice(1);
1606
+ }
1607
+ return seg.charAt(0).toUpperCase() + seg.slice(1);
1608
+ });
1609
+ return method.toLowerCase() + segments.join("");
1610
+ }
1611
+ function extractTag(pattern) {
1612
+ const segments = pattern.split("/").filter(Boolean);
1613
+ const prefixes = /* @__PURE__ */ new Set(["api", "v1", "v2", "v3", "v4", "rest"]);
1614
+ for (const seg of segments) {
1615
+ if (seg.startsWith(":")) continue;
1616
+ if (prefixes.has(seg.toLowerCase())) continue;
1617
+ return seg;
1618
+ }
1619
+ return void 0;
1620
+ }
1621
+ var BODY_METHODS = /* @__PURE__ */ new Set(["POST", "PUT", "PATCH"]);
1622
+ function generateOpenApiSpec(endpoints, options) {
1623
+ if (!options.title) {
1624
+ throw new Error("OpenAPI title is required");
1625
+ }
1626
+ if (!options.targetUrl) {
1627
+ throw new Error("OpenAPI targetUrl is required");
1628
+ }
1629
+ const info = {
1630
+ title: options.title,
1631
+ version: options.version ?? "1.0.0"
1632
+ };
1633
+ if (options.description) {
1634
+ info.description = options.description;
1635
+ }
1636
+ const serverUrl = options.targetUrl.replace(/\/+$/, "");
1637
+ const paths = {};
1638
+ for (const endpoint of endpoints) {
1639
+ const openApiPath = toOpenApiPath(endpoint.pattern);
1640
+ const methodKey = endpoint.method.toLowerCase();
1641
+ if (!paths[openApiPath]) {
1642
+ paths[openApiPath] = {};
1643
+ }
1644
+ const operation = buildOperation(endpoint);
1645
+ paths[openApiPath][methodKey] = operation;
1646
+ }
1647
+ return {
1648
+ openapi: "3.0.3",
1649
+ info,
1650
+ servers: [{ url: serverUrl }],
1651
+ paths
1652
+ };
1653
+ }
1654
+ function buildOperation(endpoint) {
1655
+ const operationId = generateOperationId(endpoint.method, endpoint.pattern);
1656
+ const summary = `${endpoint.method} ${endpoint.pattern}`;
1657
+ const operation = {
1658
+ summary,
1659
+ operationId,
1660
+ responses: {}
1661
+ };
1662
+ const paramNames = extractPathParams(endpoint.pattern);
1663
+ if (paramNames.length > 0) {
1664
+ operation.parameters = paramNames.map((name) => ({
1665
+ name,
1666
+ in: "path",
1667
+ required: true,
1668
+ schema: { type: "string" }
1669
+ }));
1670
+ }
1671
+ const tag = extractTag(endpoint.pattern);
1672
+ if (tag) {
1673
+ operation.tags = [tag];
1674
+ }
1675
+ if (BODY_METHODS.has(endpoint.method.toUpperCase()) && endpoint.exampleRequest.body) {
1676
+ const contentType = endpoint.exampleRequest.contentType ?? "application/json";
1677
+ let parsed;
1678
+ try {
1679
+ parsed = JSON.parse(endpoint.exampleRequest.body);
1680
+ } catch {
1681
+ parsed = endpoint.exampleRequest.body;
1682
+ }
1683
+ operation.requestBody = {
1684
+ content: {
1685
+ [contentType]: {
1686
+ schema: inferSchema(parsed),
1687
+ example: parsed
1688
+ }
1689
+ }
1690
+ };
1691
+ }
1692
+ for (const statusCode of endpoint.observedStatusCodes) {
1693
+ const description = STATUS_DESCRIPTIONS[statusCode] ?? "Response";
1694
+ const responseEntry = {
1695
+ description
1696
+ };
1697
+ if (statusCode === endpoint.exampleResponse.statusCode && endpoint.exampleResponse.body) {
1698
+ const contentType = endpoint.exampleResponse.contentType ?? "application/json";
1699
+ let parsed;
1700
+ try {
1701
+ parsed = JSON.parse(endpoint.exampleResponse.body);
1702
+ } catch {
1703
+ parsed = endpoint.exampleResponse.body;
1704
+ }
1705
+ responseEntry.content = {
1706
+ [contentType]: {
1707
+ schema: inferSchema(parsed),
1708
+ example: parsed
1709
+ }
1710
+ };
1711
+ }
1712
+ operation.responses[String(statusCode)] = responseEntry;
1713
+ }
1714
+ if (endpoint.observedStatusCodes.length === 0) {
1715
+ operation.responses["200"] = { description: "Success" };
1716
+ }
1717
+ return operation;
1718
+ }
1719
+ async function writeOpenApiSpec(spec, outputPath) {
1720
+ if (!outputPath) {
1721
+ throw new Error("Output path is required");
1722
+ }
1723
+ await mkdir2(dirname(outputPath), { recursive: true });
1724
+ const json = JSON.stringify(spec, null, 2) + "\n";
1725
+ await writeFile(outputPath, json, "utf-8");
1726
+ }
1727
+
1728
+ // src/report/html/escape.ts
1729
+ function escapeHtml(input) {
1730
+ const stripped = input.replace(/\0/g, "");
1731
+ return stripped.replace(/[&<>"']/g, (char) => {
1732
+ switch (char) {
1733
+ case "&":
1734
+ return "&amp;";
1735
+ case "<":
1736
+ return "&lt;";
1737
+ case ">":
1738
+ return "&gt;";
1739
+ case '"':
1740
+ return "&quot;";
1741
+ case "'":
1742
+ return "&#039;";
1743
+ default:
1744
+ return char;
1745
+ }
1746
+ });
1747
+ }
1748
+ function escapeAttribute(input) {
1749
+ const htmlEscaped = escapeHtml(input);
1750
+ return htmlEscaped.replace(/\//g, "&#x2F;").replace(/`/g, "&#96;");
1751
+ }
1752
+ function escapeJsonInHtml(jsonString) {
1753
+ return jsonString.replace(/<\//g, "<\\/").replace(/<!--/g, "<\\!--");
1754
+ }
1755
+
1756
+ // src/report/html/template.ts
1757
+ var CSP_META = `<meta http-equiv="Content-Security-Policy" content="default-src 'none'; style-src 'unsafe-inline'; script-src 'unsafe-inline'; img-src data: blob:;">`;
1758
+ function generateCss() {
1759
+ return `
1760
+ :root {
1761
+ --bg: #ffffff;
1762
+ --bg-surface: #f8f9fa;
1763
+ --bg-surface-hover: #f0f1f3;
1764
+ --bg-nav: #1a1a2e;
1765
+ --text: #1a1a2e;
1766
+ --text-muted: #6c757d;
1767
+ --text-nav: #ffffff;
1768
+ --border: #dee2e6;
1769
+ --border-light: #e9ecef;
1770
+ --accent: #4361ee;
1771
+ --accent-hover: #3a56d4;
1772
+ --status-2xx: #198754;
1773
+ --status-3xx: #0d6efd;
1774
+ --status-4xx: #e67700;
1775
+ --status-5xx: #dc3545;
1776
+ --method-get: #198754;
1777
+ --method-post: #0d6efd;
1778
+ --method-put: #e67700;
1779
+ --method-delete: #dc3545;
1780
+ --method-patch: #cc8800;
1781
+ --badge-bg: #e9ecef;
1782
+ --shadow: 0 1px 3px rgba(0,0,0,0.08);
1783
+ --radius: 6px;
1784
+ }
1785
+
1786
+ @media (prefers-color-scheme: dark) {
1787
+ :root {
1788
+ --bg: #0d1117;
1789
+ --bg-surface: #161b22;
1790
+ --bg-surface-hover: #1c2129;
1791
+ --bg-nav: #010409;
1792
+ --text: #e6edf3;
1793
+ --text-muted: #8b949e;
1794
+ --text-nav: #e6edf3;
1795
+ --border: #30363d;
1796
+ --border-light: #21262d;
1797
+ --accent: #58a6ff;
1798
+ --accent-hover: #79b8ff;
1799
+ --status-2xx: #3fb950;
1800
+ --status-3xx: #58a6ff;
1801
+ --status-4xx: #d29922;
1802
+ --status-5xx: #f85149;
1803
+ --method-get: #3fb950;
1804
+ --method-post: #58a6ff;
1805
+ --method-put: #d29922;
1806
+ --method-delete: #f85149;
1807
+ --method-patch: #e3b341;
1808
+ --badge-bg: #21262d;
1809
+ --shadow: 0 1px 3px rgba(0,0,0,0.3);
1810
+ }
1811
+ }
1812
+
1813
+ * { box-sizing: border-box; margin: 0; padding: 0; }
1814
+
1815
+ body {
1816
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
1817
+ line-height: 1.6;
1818
+ color: var(--text);
1819
+ background: var(--bg);
1820
+ }
1821
+
1822
+ /* Navigation */
1823
+ .nav {
1824
+ position: sticky;
1825
+ top: 0;
1826
+ z-index: 100;
1827
+ background: var(--bg-nav);
1828
+ color: var(--text-nav);
1829
+ display: flex;
1830
+ align-items: center;
1831
+ gap: 0.25rem;
1832
+ padding: 0 1.5rem;
1833
+ box-shadow: var(--shadow);
1834
+ flex-wrap: wrap;
1835
+ }
1836
+ .nav-brand {
1837
+ font-weight: 700;
1838
+ font-size: 0.95rem;
1839
+ padding: 0.75rem 0.5rem 0.75rem 0;
1840
+ margin-right: 1rem;
1841
+ white-space: nowrap;
1842
+ }
1843
+ .nav a {
1844
+ color: var(--text-nav);
1845
+ text-decoration: none;
1846
+ padding: 0.75rem 0.75rem;
1847
+ font-size: 0.875rem;
1848
+ opacity: 0.8;
1849
+ transition: opacity 0.15s;
1850
+ }
1851
+ .nav a:hover, .nav a:focus { opacity: 1; }
1852
+
1853
+ /* Main */
1854
+ .container {
1855
+ max-width: 1200px;
1856
+ margin: 0 auto;
1857
+ padding: 1.5rem;
1858
+ }
1859
+
1860
+ /* Stats bar */
1861
+ .stats {
1862
+ display: grid;
1863
+ grid-template-columns: repeat(auto-fit, minmax(160px, 1fr));
1864
+ gap: 1rem;
1865
+ margin-bottom: 2rem;
1866
+ }
1867
+ .stat-card {
1868
+ background: var(--bg-surface);
1869
+ border: 1px solid var(--border-light);
1870
+ border-radius: var(--radius);
1871
+ padding: 1rem 1.25rem;
1872
+ }
1873
+ .stat-label {
1874
+ font-size: 0.75rem;
1875
+ text-transform: uppercase;
1876
+ letter-spacing: 0.05em;
1877
+ color: var(--text-muted);
1878
+ margin-bottom: 0.25rem;
1879
+ }
1880
+ .stat-value {
1881
+ font-size: 1.5rem;
1882
+ font-weight: 700;
1883
+ }
1884
+ .stat-value a {
1885
+ color: var(--accent);
1886
+ text-decoration: none;
1887
+ }
1888
+
1889
+ /* Search */
1890
+ .search-bar {
1891
+ margin-bottom: 1.5rem;
1892
+ }
1893
+ .search-bar input {
1894
+ width: 100%;
1895
+ max-width: 400px;
1896
+ padding: 0.5rem 0.75rem;
1897
+ border: 1px solid var(--border);
1898
+ border-radius: var(--radius);
1899
+ font-size: 0.875rem;
1900
+ background: var(--bg);
1901
+ color: var(--text);
1902
+ }
1903
+ .search-bar input:focus {
1904
+ outline: 2px solid var(--accent);
1905
+ outline-offset: -1px;
1906
+ }
1907
+
1908
+ /* Sections */
1909
+ .section {
1910
+ margin-bottom: 2.5rem;
1911
+ }
1912
+ .section-title {
1913
+ font-size: 1.25rem;
1914
+ font-weight: 700;
1915
+ margin-bottom: 1rem;
1916
+ padding-bottom: 0.5rem;
1917
+ border-bottom: 2px solid var(--border-light);
1918
+ }
1919
+
1920
+ /* Details / Summary */
1921
+ details {
1922
+ margin: 0.75rem 0;
1923
+ border: 1px solid var(--border-light);
1924
+ border-radius: var(--radius);
1925
+ overflow: hidden;
1926
+ }
1927
+ summary {
1928
+ cursor: pointer;
1929
+ font-weight: 600;
1930
+ padding: 0.75rem 1rem;
1931
+ background: var(--bg-surface);
1932
+ user-select: none;
1933
+ }
1934
+ summary:hover { background: var(--bg-surface-hover); }
1935
+ details > :not(summary) { padding: 0 1rem; }
1936
+ details[open] > :not(summary) { padding: 0.75rem 1rem; }
1937
+
1938
+ /* Tables */
1939
+ table {
1940
+ width: 100%;
1941
+ border-collapse: collapse;
1942
+ font-size: 0.875rem;
1943
+ }
1944
+ th, td {
1945
+ padding: 0.625rem 0.75rem;
1946
+ text-align: left;
1947
+ border-bottom: 1px solid var(--border-light);
1948
+ }
1949
+ th {
1950
+ background: var(--bg-surface);
1951
+ font-weight: 600;
1952
+ font-size: 0.8rem;
1953
+ text-transform: uppercase;
1954
+ letter-spacing: 0.03em;
1955
+ color: var(--text-muted);
1956
+ position: sticky;
1957
+ top: 0;
1958
+ }
1959
+ tbody tr:hover { background: var(--bg-surface-hover); }
1960
+ tr[data-searchable] { transition: none; }
1961
+
1962
+ /* Badges */
1963
+ .badge {
1964
+ display: inline-block;
1965
+ padding: 0.15rem 0.5rem;
1966
+ border-radius: 3px;
1967
+ font-size: 0.75rem;
1968
+ font-weight: 600;
1969
+ font-family: 'SF Mono', SFMono-Regular, Consolas, 'Liberation Mono', Menlo, monospace;
1970
+ }
1971
+ .badge-get { background: var(--method-get); color: #fff; }
1972
+ .badge-post { background: var(--method-post); color: #fff; }
1973
+ .badge-put { background: var(--method-put); color: #fff; }
1974
+ .badge-delete { background: var(--method-delete); color: #fff; }
1975
+ .badge-patch { background: var(--method-patch); color: #fff; }
1976
+ .badge-default { background: var(--badge-bg); color: var(--text); }
1977
+ .badge-field {
1978
+ background: var(--badge-bg);
1979
+ color: var(--text);
1980
+ margin: 0.1rem;
1981
+ }
1982
+ .badge-required {
1983
+ background: var(--status-5xx);
1984
+ color: #fff;
1985
+ }
1986
+
1987
+ /* Status colors */
1988
+ .status-2xx { color: var(--status-2xx); font-weight: 600; }
1989
+ .status-3xx { color: var(--status-3xx); font-weight: 600; }
1990
+ .status-4xx { color: var(--status-4xx); font-weight: 600; }
1991
+ .status-5xx { color: var(--status-5xx); font-weight: 600; }
1992
+
1993
+ /* Sitemap tree */
1994
+ .sitemap-tree { list-style: none; padding-left: 0; }
1995
+ .sitemap-tree ul { list-style: none; padding-left: 1.25rem; }
1996
+ .sitemap-tree li { padding: 0.2rem 0; }
1997
+ .sitemap-tree .tree-label {
1998
+ font-family: 'SF Mono', SFMono-Regular, Consolas, 'Liberation Mono', Menlo, monospace;
1999
+ font-size: 0.85rem;
2000
+ }
2001
+ .sitemap-tree .tree-title {
2002
+ color: var(--text-muted);
2003
+ font-size: 0.8rem;
2004
+ margin-left: 0.5rem;
2005
+ }
2006
+
2007
+ /* Screenshots grid */
2008
+ .screenshot-grid {
2009
+ display: grid;
2010
+ grid-template-columns: repeat(auto-fill, minmax(280px, 1fr));
2011
+ gap: 1rem;
2012
+ }
2013
+ .screenshot-card {
2014
+ border: 1px solid var(--border-light);
2015
+ border-radius: var(--radius);
2016
+ overflow: hidden;
2017
+ cursor: pointer;
2018
+ transition: box-shadow 0.15s;
2019
+ }
2020
+ .screenshot-card:hover {
2021
+ box-shadow: 0 4px 12px rgba(0,0,0,0.12);
2022
+ }
2023
+ .screenshot-card img {
2024
+ width: 100%;
2025
+ height: auto;
2026
+ display: block;
2027
+ background: var(--bg-surface);
2028
+ }
2029
+ .screenshot-label {
2030
+ padding: 0.5rem 0.75rem;
2031
+ font-size: 0.8rem;
2032
+ color: var(--text-muted);
2033
+ background: var(--bg-surface);
2034
+ overflow: hidden;
2035
+ text-overflow: ellipsis;
2036
+ white-space: nowrap;
2037
+ }
2038
+
2039
+ /* Dialog lightbox */
2040
+ dialog {
2041
+ border: none;
2042
+ border-radius: var(--radius);
2043
+ padding: 0;
2044
+ max-width: 90vw;
2045
+ max-height: 90vh;
2046
+ background: var(--bg);
2047
+ box-shadow: 0 8px 30px rgba(0,0,0,0.3);
2048
+ }
2049
+ dialog::backdrop {
2050
+ background: rgba(0,0,0,0.6);
2051
+ }
2052
+ .dialog-content {
2053
+ display: flex;
2054
+ flex-direction: column;
2055
+ align-items: center;
2056
+ padding: 1rem;
2057
+ }
2058
+ .dialog-content img {
2059
+ max-width: 100%;
2060
+ max-height: 80vh;
2061
+ }
2062
+ .dialog-actions {
2063
+ display: flex;
2064
+ gap: 0.75rem;
2065
+ margin-top: 0.75rem;
2066
+ align-items: center;
2067
+ }
2068
+ .dialog-actions a, .dialog-actions button {
2069
+ font-size: 0.85rem;
2070
+ padding: 0.4rem 0.8rem;
2071
+ border-radius: var(--radius);
2072
+ cursor: pointer;
2073
+ }
2074
+ .dialog-actions button {
2075
+ border: 1px solid var(--border);
2076
+ background: var(--bg-surface);
2077
+ color: var(--text);
2078
+ }
2079
+ .dialog-actions a {
2080
+ background: var(--accent);
2081
+ color: #fff;
2082
+ text-decoration: none;
2083
+ }
2084
+
2085
+ /* Flow diagram */
2086
+ .flow-svg-container { overflow-x: auto; }
2087
+ .flow-svg-container svg { max-width: 100%; height: auto; }
2088
+
2089
+ /* Examples sub-rows */
2090
+ .examples-list {
2091
+ list-style: none;
2092
+ padding: 0;
2093
+ margin: 0;
2094
+ }
2095
+ .examples-list li {
2096
+ padding: 0.2rem 0;
2097
+ font-size: 0.8rem;
2098
+ font-family: 'SF Mono', SFMono-Regular, Consolas, 'Liberation Mono', Menlo, monospace;
2099
+ }
2100
+
2101
+ /* Responsive */
2102
+ @media (max-width: 768px) {
2103
+ .nav { padding: 0 0.75rem; }
2104
+ .nav a { padding: 0.5rem 0.5rem; font-size: 0.8rem; }
2105
+ .container { padding: 1rem; }
2106
+ .stats { grid-template-columns: repeat(2, 1fr); }
2107
+ .screenshot-grid { grid-template-columns: repeat(auto-fill, minmax(200px, 1fr)); }
2108
+ th, td { padding: 0.4rem 0.5rem; font-size: 0.8rem; }
2109
+ }
2110
+
2111
+ /* Print */
2112
+ @media print {
2113
+ .nav { display: none; }
2114
+ .search-bar { display: none; }
2115
+ body { background: #fff; color: #000; }
2116
+ .container { max-width: none; padding: 0; }
2117
+ details { border: none; }
2118
+ details[open] > summary { background: none; border-bottom: 1px solid #ccc; }
2119
+ details { break-inside: avoid; }
2120
+ details > * { display: block !important; }
2121
+ details:not([open]) > :not(summary) { display: block !important; }
2122
+ .screenshot-card { break-inside: avoid; }
2123
+ dialog { display: none; }
2124
+ table { font-size: 0.75rem; }
2125
+ th { background: #f0f0f0 !important; }
2126
+ }
2127
+ `;
2128
+ }
2129
+ function generateJs(hasScreenshots) {
2130
+ return `
2131
+ (function() {
2132
+ // Search filter
2133
+ var searchInput = document.getElementById('report-search');
2134
+ if (searchInput) {
2135
+ searchInput.addEventListener('input', function(e) {
2136
+ var term = e.target.value.toLowerCase();
2137
+ var rows = document.querySelectorAll('tr[data-searchable]');
2138
+ for (var i = 0; i < rows.length; i++) {
2139
+ var text = rows[i].textContent.toLowerCase();
2140
+ rows[i].style.display = term && text.indexOf(term) === -1 ? 'none' : '';
2141
+ }
2142
+ var items = document.querySelectorAll('li[data-searchable]');
2143
+ for (var j = 0; j < items.length; j++) {
2144
+ var itemText = items[j].textContent.toLowerCase();
2145
+ items[j].style.display = term && itemText.indexOf(term) === -1 ? 'none' : '';
2146
+ }
2147
+ });
2148
+ }
2149
+ ${hasScreenshots ? `
2150
+ // Screenshot lightbox
2151
+ var dialog = document.getElementById('screenshot-dialog');
2152
+ var dialogImg = document.getElementById('dialog-img');
2153
+ var dialogLink = document.getElementById('dialog-link');
2154
+ var cards = document.querySelectorAll('.screenshot-card');
2155
+ for (var k = 0; k < cards.length; k++) {
2156
+ cards[k].addEventListener('click', function() {
2157
+ var src = this.getAttribute('data-full');
2158
+ var thumb = this.querySelector('img');
2159
+ if (dialogImg && thumb) dialogImg.src = thumb.src;
2160
+ if (dialogLink) dialogLink.href = src;
2161
+ if (dialog && dialog.showModal) dialog.showModal();
2162
+ });
2163
+ }
2164
+ var closeBtn = document.getElementById('dialog-close');
2165
+ if (closeBtn && dialog) {
2166
+ closeBtn.addEventListener('click', function() { dialog.close(); });
2167
+ }
2168
+ if (dialog) {
2169
+ dialog.addEventListener('click', function(e) {
2170
+ if (e.target === dialog) dialog.close();
2171
+ });
2172
+ }
2173
+ ` : ""}
2174
+ })();
2175
+ `;
2176
+ }
2177
+ function statusClass(status) {
2178
+ if (status >= 200 && status < 300) return "status-2xx";
2179
+ if (status >= 300 && status < 400) return "status-3xx";
2180
+ if (status >= 400 && status < 500) return "status-4xx";
2181
+ if (status >= 500) return "status-5xx";
2182
+ return "";
2183
+ }
2184
+ function methodBadgeClass(method) {
2185
+ const m = method.toUpperCase();
2186
+ switch (m) {
2187
+ case "GET":
2188
+ return "badge badge-get";
2189
+ case "POST":
2190
+ return "badge badge-post";
2191
+ case "PUT":
2192
+ return "badge badge-put";
2193
+ case "DELETE":
2194
+ return "badge badge-delete";
2195
+ case "PATCH":
2196
+ return "badge badge-patch";
2197
+ default:
2198
+ return "badge badge-default";
2199
+ }
2200
+ }
2201
+ function formatDuration(seconds) {
2202
+ if (seconds < 60) return `${seconds.toFixed(1)}s`;
2203
+ const mins = Math.floor(seconds / 60);
2204
+ const secs = seconds % 60;
2205
+ return `${mins}m ${secs.toFixed(0)}s`;
2206
+ }
2207
+ function buildTree(pages) {
2208
+ const root = { segment: "", children: /* @__PURE__ */ new Map() };
2209
+ for (const page of pages) {
2210
+ let path2;
2211
+ try {
2212
+ const u = new URL(page.url);
2213
+ path2 = u.pathname;
2214
+ } catch {
2215
+ path2 = page.url;
2216
+ }
2217
+ const segments = path2.split("/").filter(Boolean);
2218
+ let current = root;
2219
+ for (const seg of segments) {
2220
+ if (!current.children.has(seg)) {
2221
+ current.children.set(seg, { segment: seg, children: /* @__PURE__ */ new Map() });
2222
+ }
2223
+ current = current.children.get(seg);
2224
+ }
2225
+ current.page = page;
2226
+ if (segments.length === 0) {
2227
+ root.page = page;
2228
+ }
2229
+ }
2230
+ return root;
2231
+ }
2232
+ function renderTree(node, depth = 0) {
2233
+ const lines = [];
2234
+ if (node.page) {
2235
+ const page = node.page;
2236
+ const label = node.segment || "/";
2237
+ const statusCls = statusClass(page.status);
2238
+ const titlePart = page.title ? ` <span class="tree-title">${escapeHtml(page.title)}</span>` : "";
2239
+ lines.push(
2240
+ `<li data-searchable><span class="tree-label">${escapeHtml(label)}</span> <span class="${statusCls}">${page.status}</span>${titlePart}</li>`
2241
+ );
2242
+ } else if (depth > 0) {
2243
+ lines.push(`<li><span class="tree-label">${escapeHtml(node.segment)}/</span></li>`);
2244
+ }
2245
+ const sortedChildren = [...node.children.entries()].sort((a, b) => a[0].localeCompare(b[0]));
2246
+ if (sortedChildren.length > 0) {
2247
+ lines.push("<ul>");
2248
+ for (const [, child] of sortedChildren) {
2249
+ lines.push(renderTree(child, depth + 1));
2250
+ }
2251
+ lines.push("</ul>");
2252
+ }
2253
+ return lines.join("\n");
2254
+ }
2255
+ function renderNavigation(input) {
2256
+ const hasScreenshots = input.screenshots && input.screenshots.length > 0;
2257
+ const hasFlow = !!input.flowDiagramSvg;
2258
+ const pages = input.pages ?? input.sitemap ?? [];
2259
+ const links = [];
2260
+ if (pages.length > 0) links.push('<a href="#sitemap">Sitemap</a>');
2261
+ if (input.forms.length > 0) links.push('<a href="#forms">Forms</a>');
2262
+ if (input.apiEndpoints.length > 0) links.push('<a href="#api">API</a>');
2263
+ if (hasScreenshots) links.push('<a href="#screenshots">Screenshots</a>');
2264
+ if (hasFlow) links.push('<a href="#flow">Flow</a>');
2265
+ return `<nav class="nav" role="navigation" aria-label="Report sections">
2266
+ <span class="nav-brand">Archaeologist</span>
2267
+ ${links.join("\n ")}
2268
+ </nav>`;
2269
+ }
2270
+ function renderStatsBar(input) {
2271
+ return `<div class="stats">
2272
+ <div class="stat-card">
2273
+ <div class="stat-label">Target</div>
2274
+ <div class="stat-value"><a href="${escapeAttribute(input.targetUrl)}">${escapeHtml(truncateUrl(input.targetUrl))}</a></div>
2275
+ </div>
2276
+ <div class="stat-card">
2277
+ <div class="stat-label">Crawl Date</div>
2278
+ <div class="stat-value">${escapeHtml(input.crawlDate)}</div>
2279
+ </div>
2280
+ <div class="stat-card">
2281
+ <div class="stat-label">Duration</div>
2282
+ <div class="stat-value">${escapeHtml(formatDuration(input.duration))}</div>
2283
+ </div>
2284
+ <div class="stat-card">
2285
+ <div class="stat-label">Pages Visited</div>
2286
+ <div class="stat-value">${input.pagesVisited}</div>
2287
+ </div>
2288
+ <div class="stat-card">
2289
+ <div class="stat-label">Errors</div>
2290
+ <div class="stat-value">${input.errors}</div>
2291
+ </div>
2292
+ </div>`;
2293
+ }
2294
+ function truncateUrl(url) {
2295
+ if (url.length <= 40) return url;
2296
+ try {
2297
+ const u = new URL(url);
2298
+ return u.hostname + (u.pathname.length > 1 ? u.pathname : "");
2299
+ } catch {
2300
+ return url;
2301
+ }
2302
+ }
2303
+ function renderSitemapSection(pages) {
2304
+ if (pages.length === 0) return "";
2305
+ const tree = buildTree(pages);
2306
+ const treeHtml = renderTree(tree);
2307
+ return `<div class="section" id="sitemap">
2308
+ <h2 class="section-title">Sitemap (${pages.length} pages)</h2>
2309
+ <details open>
2310
+ <summary>Page Tree</summary>
2311
+ <ul class="sitemap-tree">
2312
+ ${treeHtml}
2313
+ </ul>
2314
+ </details>
2315
+ </div>`;
2316
+ }
2317
+ function renderFormsSection(forms) {
2318
+ if (forms.length === 0) return "";
2319
+ const rows = forms.map((form) => {
2320
+ const fieldsHtml = form.fields.map((f) => {
2321
+ const reqClass = f.required ? "badge badge-required" : "badge badge-field";
2322
+ return `<span class="${reqClass}">${escapeHtml(f.name)}: ${escapeHtml(f.type)}</span>`;
2323
+ }).join(" ");
2324
+ return `<tr data-searchable>
2325
+ <td>${escapeHtml(form.url)}</td>
2326
+ <td>${escapeHtml(form.action)}</td>
2327
+ <td><span class="${methodBadgeClass(form.method)}">${escapeHtml(form.method.toUpperCase())}</span></td>
2328
+ <td>${fieldsHtml}</td>
2329
+ </tr>`;
2330
+ }).join("\n");
2331
+ return `<div class="section" id="forms">
2332
+ <h2 class="section-title">Forms (${forms.length})</h2>
2333
+ <table>
2334
+ <thead><tr><th>Page URL</th><th>Action</th><th>Method</th><th>Fields</th></tr></thead>
2335
+ <tbody>${rows}</tbody>
2336
+ </table>
2337
+ </div>`;
2338
+ }
2339
+ function renderApiSection(endpoints) {
2340
+ if (endpoints.length === 0) return "";
2341
+ const rows = endpoints.map((ep) => {
2342
+ const pattern = ep.pattern ?? ep.url ?? "";
2343
+ const examplesHtml = ep.examples && ep.examples.length > 0 ? `<details><summary>${ep.examples.length} example(s)</summary><ul class="examples-list">${ep.examples.map(
2344
+ (ex) => `<li>${escapeHtml(ex.url)} <span class="${statusClass(ex.status)}">${ex.status}</span></li>`
2345
+ ).join("")}</ul></details>` : ep.status != null ? `<span class="${statusClass(ep.status)}">${ep.status}</span>` : "";
2346
+ return `<tr data-searchable>
2347
+ <td><span class="${methodBadgeClass(ep.method)}">${escapeHtml(ep.method.toUpperCase())}</span></td>
2348
+ <td>${escapeHtml(pattern)}</td>
2349
+ <td>${examplesHtml}</td>
2350
+ </tr>`;
2351
+ }).join("\n");
2352
+ return `<div class="section" id="api">
2353
+ <h2 class="section-title">API Endpoints (${endpoints.length})</h2>
2354
+ <table>
2355
+ <thead><tr><th>Method</th><th>Pattern</th><th>Details</th></tr></thead>
2356
+ <tbody>${rows}</tbody>
2357
+ </table>
2358
+ </div>`;
2359
+ }
2360
+ function renderScreenshotsSection(screenshots) {
2361
+ if (screenshots.length === 0) return "";
2362
+ const cards = screenshots.map((ss) => {
2363
+ let label;
2364
+ try {
2365
+ const u = new URL(ss.url);
2366
+ label = u.pathname || "/";
2367
+ } catch {
2368
+ label = ss.url;
2369
+ }
2370
+ return `<div class="screenshot-card" data-full="${escapeAttribute(ss.fullPath)}" data-searchable>
2371
+ <img src="data:image/jpeg;base64,${escapeAttribute(ss.thumbnailBase64)}" alt="Screenshot of ${escapeAttribute(label)}" loading="lazy">
2372
+ <div class="screenshot-label">${escapeHtml(label)}</div>
2373
+ </div>`;
2374
+ }).join("\n");
2375
+ return `<div class="section" id="screenshots">
2376
+ <h2 class="section-title">Screenshots (${screenshots.length})</h2>
2377
+ <div class="screenshot-grid">
2378
+ ${cards}
2379
+ </div>
2380
+ </div>
2381
+
2382
+ <dialog id="screenshot-dialog">
2383
+ <div class="dialog-content">
2384
+ <img id="dialog-img" src="" alt="Full screenshot">
2385
+ <div class="dialog-actions">
2386
+ <a id="dialog-link" href="" target="_blank" rel="noopener">Open Full Image</a>
2387
+ <button id="dialog-close" type="button">Close</button>
2388
+ </div>
2389
+ </div>
2390
+ </dialog>`;
2391
+ }
2392
+ function renderFlowSection(svgContent) {
2393
+ return `<div class="section" id="flow">
2394
+ <h2 class="section-title">Flow Diagram</h2>
2395
+ <details open>
2396
+ <summary>Navigation Flow</summary>
2397
+ <div class="flow-svg-container">
2398
+ ${svgContent}
2399
+ </div>
2400
+ </details>
2401
+ </div>`;
2402
+ }
2403
+ function generateReportHtml(input) {
2404
+ const pages = input.pages ?? input.sitemap ?? [];
2405
+ const screenshots = input.screenshots ?? [];
2406
+ const hasScreenshots = screenshots.length > 0;
2407
+ const hasFlow = !!input.flowDiagramSvg;
2408
+ const reportTitle = input.title ? escapeHtml(input.title) : `Archaeologist Report: ${escapeHtml(input.targetUrl)}`;
2409
+ const reportData = {
2410
+ targetUrl: input.targetUrl,
2411
+ crawlDate: input.crawlDate,
2412
+ duration: input.duration,
2413
+ pagesVisited: input.pagesVisited,
2414
+ errors: input.errors,
2415
+ pageCount: pages.length,
2416
+ formCount: input.forms.length,
2417
+ apiEndpointCount: input.apiEndpoints.length,
2418
+ screenshotCount: screenshots.length
2419
+ };
2420
+ return `<!DOCTYPE html>
2421
+ <html lang="en">
2422
+ <head>
2423
+ <meta charset="UTF-8">
2424
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
2425
+ ${CSP_META}
2426
+ <title>${reportTitle}</title>
2427
+ <style>${generateCss()}</style>
2428
+ </head>
2429
+ <body>
2430
+ ${renderNavigation(input)}
2431
+
2432
+ <div class="container">
2433
+ <h1>${reportTitle}</h1>
2434
+
2435
+ ${renderStatsBar(input)}
2436
+
2437
+ <div class="search-bar">
2438
+ <input type="text" id="report-search" placeholder="Filter pages, forms, endpoints..." aria-label="Search report">
2439
+ </div>
2440
+
2441
+ ${renderSitemapSection(pages)}
2442
+ ${renderFormsSection(input.forms)}
2443
+ ${renderApiSection(input.apiEndpoints)}
2444
+ ${hasScreenshots ? renderScreenshotsSection(screenshots) : ""}
2445
+ ${hasFlow ? renderFlowSection(input.flowDiagramSvg) : ""}
2446
+ </div>
2447
+
2448
+ <script type="application/json" id="report-data">${escapeJsonInHtml(JSON.stringify(reportData))}</script>
2449
+ <script>${generateJs(hasScreenshots)}</script>
2450
+ </body>
2451
+ </html>`;
2452
+ }
2453
+
2454
+ // src/crawl/orchestrator.ts
2455
+ import * as fs from "fs/promises";
2456
+ import * as path from "path";
2457
+ import { chromium } from "playwright";
2458
+
2459
+ // src/security/network-guard.ts
2460
+ import { lookup } from "dns/promises";
2461
+ var METADATA_HOSTNAMES = /* @__PURE__ */ new Set([
2462
+ "metadata.google.internal",
2463
+ "169.254.169.254",
2464
+ "kubernetes.default.svc"
2465
+ ]);
2466
+ var ALLOWED_PROTOCOLS = /* @__PURE__ */ new Set(["http:", "https:"]);
2467
+ function parseOctet(s) {
2468
+ if (s === "") return NaN;
2469
+ if (/^0x/i.test(s)) {
2470
+ return parseInt(s, 16);
2471
+ }
2472
+ if (s.length > 1 && s.startsWith("0") && /^\d+$/.test(s)) {
2473
+ return parseInt(s, 8);
2474
+ }
2475
+ const n = Number(s);
2476
+ return Number.isInteger(n) ? n : NaN;
2477
+ }
2478
+ function parseIpv4ToNumber(ip) {
2479
+ if (/^(0x[\da-fA-F]+|\d+)$/.test(ip)) {
2480
+ const n = ip.startsWith("0x") || ip.startsWith("0X") ? parseInt(ip, 16) : Number(ip);
2481
+ if (Number.isFinite(n) && n >= 0 && n <= 4294967295) {
2482
+ return n;
2483
+ }
2484
+ return null;
2485
+ }
2486
+ const parts = ip.split(".");
2487
+ if (parts.length !== 4) return null;
2488
+ let result = 0;
2489
+ for (let i = 0; i < 4; i++) {
2490
+ const octet = parseOctet(parts[i]);
2491
+ if (isNaN(octet) || octet < 0 || octet > 255) return null;
2492
+ result = result << 8 | octet;
2493
+ }
2494
+ return result >>> 0;
2495
+ }
2496
+ function isPrivateIpv4Number(num) {
2497
+ if (num >>> 24 === 0) return true;
2498
+ if (num >>> 24 === 10) return true;
2499
+ if (num >>> 24 === 127) return true;
2500
+ if (num >>> 24 === 172) {
2501
+ const second = num >>> 16 & 255;
2502
+ if (second >= 16 && second <= 31) return true;
2503
+ }
2504
+ if (num >>> 16 === (192 << 8 | 168)) return true;
2505
+ if (num >>> 16 === (169 << 8 | 254)) return true;
2506
+ return false;
2507
+ }
2508
+ function isPrivateIp(ip) {
2509
+ if (!ip || typeof ip !== "string") return false;
2510
+ const trimmed = ip.trim();
2511
+ if (trimmed.includes(":")) {
2512
+ return isPrivateIpv6(trimmed);
2513
+ }
2514
+ const num = parseIpv4ToNumber(trimmed);
2515
+ if (num === null) return false;
2516
+ return isPrivateIpv4Number(num);
2517
+ }
2518
+ function isPrivateIpv6(ip) {
2519
+ let cleaned = ip.replace(/%.*$/, "");
2520
+ const v4MappedMatch = cleaned.match(
2521
+ /^(?:::ffff:|0{1,4}:0{1,4}:0{1,4}:0{1,4}:0{1,4}:ffff:)(\d+\.\d+\.\d+\.\d+)$/i
2522
+ );
2523
+ if (v4MappedMatch) {
2524
+ return isPrivateIp(v4MappedMatch[1]);
2525
+ }
2526
+ const expanded = expandIpv6(cleaned);
2527
+ if (!expanded) return false;
2528
+ if (expanded === "0000:0000:0000:0000:0000:0000:0000:0001") return true;
2529
+ if (expanded === "0000:0000:0000:0000:0000:0000:0000:0000") return true;
2530
+ const firstGroup = parseInt(expanded.substring(0, 4), 16);
2531
+ if ((firstGroup & 65024) === 64512) return true;
2532
+ if ((firstGroup & 65472) === 65152) return true;
2533
+ return false;
2534
+ }
2535
+ function expandIpv6(ip) {
2536
+ let addr = ip.replace(/^\[|\]$/g, "");
2537
+ const lastColon = addr.lastIndexOf(":");
2538
+ const possibleV4 = addr.substring(lastColon + 1);
2539
+ if (possibleV4.includes(".")) {
2540
+ const num = parseIpv4ToNumber(possibleV4);
2541
+ if (num === null) return null;
2542
+ const hi = num >>> 16 & 65535;
2543
+ const lo = num & 65535;
2544
+ addr = addr.substring(0, lastColon + 1) + hi.toString(16).padStart(4, "0") + ":" + lo.toString(16).padStart(4, "0");
2545
+ }
2546
+ const parts = addr.split("::");
2547
+ if (parts.length > 2) return null;
2548
+ if (parts.length === 2) {
2549
+ const left = parts[0] ? parts[0].split(":") : [];
2550
+ const right = parts[1] ? parts[1].split(":") : [];
2551
+ const missing = 8 - left.length - right.length;
2552
+ if (missing < 0) return null;
2553
+ const mid = Array(missing).fill("0000");
2554
+ const all = [...left, ...mid, ...right];
2555
+ return all.map((g) => g.padStart(4, "0")).join(":");
2556
+ }
2557
+ const groups = addr.split(":");
2558
+ if (groups.length !== 8) return null;
2559
+ return groups.map((g) => g.padStart(4, "0")).join(":");
2560
+ }
2561
+ function isMetadataHostname(hostname) {
2562
+ if (!hostname) return false;
2563
+ const lower = hostname.toLowerCase().trim();
2564
+ return METADATA_HOSTNAMES.has(lower);
2565
+ }
2566
+ function isAllowedProtocol(url) {
2567
+ try {
2568
+ const parsed = new URL(url);
2569
+ return ALLOWED_PROTOCOLS.has(parsed.protocol);
2570
+ } catch {
2571
+ const lower = url.toLowerCase().trim();
2572
+ if (lower.startsWith("http:") || lower.startsWith("https:")) return true;
2573
+ return false;
2574
+ }
2575
+ }
2576
+ function extractHostname(urlStr) {
2577
+ try {
2578
+ const parsed = new URL(urlStr);
2579
+ return parsed.hostname;
2580
+ } catch {
2581
+ return "";
2582
+ }
2583
+ }
2584
+ async function isBlockedUrl(url, options) {
2585
+ const allowPrivate = options?.allowPrivate ?? false;
2586
+ if (!isAllowedProtocol(url)) {
2587
+ return { blocked: true, reason: "Blocked protocol \u2014 only http: and https: are allowed" };
2588
+ }
2589
+ let hostname;
2590
+ try {
2591
+ hostname = extractHostname(url);
2592
+ } catch {
2593
+ return { blocked: true, reason: "Failed to parse URL (fail-closed)" };
2594
+ }
2595
+ if (!hostname) {
2596
+ return { blocked: true, reason: "Empty hostname (fail-closed)" };
2597
+ }
2598
+ let decodedHostname;
2599
+ try {
2600
+ decodedHostname = decodeURIComponent(hostname);
2601
+ } catch {
2602
+ return { blocked: true, reason: "Invalid hostname encoding (fail-closed)" };
2603
+ }
2604
+ if (isMetadataHostname(decodedHostname)) {
2605
+ if (allowPrivate) return { blocked: false };
2606
+ return { blocked: true, reason: "Blocked metadata hostname" };
2607
+ }
2608
+ const cleanedHost = decodedHostname.replace(/^\[|\]$/g, "");
2609
+ if (isPrivateIp(cleanedHost)) {
2610
+ if (allowPrivate) return { blocked: false };
2611
+ return { blocked: true, reason: `Blocked private IP: ${cleanedHost}` };
2612
+ }
2613
+ const ipNum = parseIpv4ToNumber(cleanedHost);
2614
+ if (ipNum !== null) {
2615
+ if (isPrivateIpv4Number(ipNum)) {
2616
+ if (allowPrivate) return { blocked: false };
2617
+ return { blocked: true, reason: `Blocked private IP (encoded): ${cleanedHost}` };
2618
+ }
2619
+ return { blocked: false };
2620
+ }
2621
+ try {
2622
+ const result = await lookup(decodedHostname, { all: true });
2623
+ const addresses = Array.isArray(result) ? result : [result];
2624
+ for (const entry of addresses) {
2625
+ const addr = typeof entry === "string" ? entry : entry.address;
2626
+ if (isPrivateIp(addr)) {
2627
+ if (allowPrivate) return { blocked: false };
2628
+ return { blocked: true, reason: `DNS resolved to private IP: ${addr}` };
2629
+ }
2630
+ }
2631
+ return { blocked: false };
2632
+ } catch {
2633
+ return { blocked: true, reason: "DNS resolution failed (fail-closed)" };
2634
+ }
2635
+ }
2636
+
2637
+ // src/security/browser-hardening.ts
2638
+ var TOOL_VERSION = "0.1.0";
2639
+ var LAUNCH_OPTIONS = {
2640
+ handleSIGINT: false,
2641
+ handleSIGTERM: false,
2642
+ handleSIGHUP: false,
2643
+ args: [
2644
+ "--disable-webrtc",
2645
+ "--enforce-webrtc-ip-permission-check",
2646
+ "--force-webrtc-ip-handling-policy=disable_non_proxied_udp"
2647
+ ]
2648
+ };
2649
+ function getSecureContextOptions(opts) {
2650
+ return {
2651
+ bypassCSP: true,
2652
+ acceptDownloads: false,
2653
+ javaScriptEnabled: true,
2654
+ ignoreHTTPSErrors: false,
2655
+ serviceWorkers: "block",
2656
+ permissions: [],
2657
+ viewport: opts?.viewport ?? { width: 1280, height: 720 },
2658
+ ...opts?.userAgent && { userAgent: opts.userAgent },
2659
+ ...opts?.storageState && { storageState: opts.storageState },
2660
+ ...opts?.recordHar && { recordHar: opts.recordHar }
2661
+ };
2662
+ }
2663
+ function getSecureLaunchOptions() {
2664
+ return { ...LAUNCH_OPTIONS };
2665
+ }
2666
+
2667
+ // src/crawl/orchestrator.ts
2668
+ var INIT_SCRIPT = `
2669
+ (() => {
2670
+ // DOM stability detection \u2014 sets window.__pa_isDomStable when no mutations
2671
+ // have occurred for 500ms.
2672
+ let _paStableTimer = null;
2673
+ let _paIsStable = false;
2674
+
2675
+ window.__pa_isDomStable = () => _paIsStable;
2676
+
2677
+ const observer = new MutationObserver(() => {
2678
+ _paIsStable = false;
2679
+ if (_paStableTimer) clearTimeout(_paStableTimer);
2680
+ _paStableTimer = setTimeout(() => { _paIsStable = true; }, 500);
2681
+ });
2682
+
2683
+ // Start observing once the DOM is ready.
2684
+ if (document.documentElement) {
2685
+ observer.observe(document.documentElement, {
2686
+ childList: true,
2687
+ subtree: true,
2688
+ attributes: true,
2689
+ });
2690
+ } else {
2691
+ document.addEventListener('DOMContentLoaded', () => {
2692
+ observer.observe(document.documentElement, {
2693
+ childList: true,
2694
+ subtree: true,
2695
+ attributes: true,
2696
+ });
2697
+ });
2698
+ }
2699
+
2700
+ // Mark stable initially after a short delay (for static pages).
2701
+ _paStableTimer = setTimeout(() => { _paIsStable = true; }, 500);
2702
+ })();
2703
+ `;
2704
+ function sleep(ms) {
2705
+ return new Promise((resolve2) => setTimeout(resolve2, ms));
2706
+ }
2707
+ async function visitPage(page, entry, config) {
2708
+ try {
2709
+ const response = await page.goto(entry.url, {
2710
+ waitUntil: "load",
2711
+ timeout: config.timeout
2712
+ });
2713
+ if (!response) {
2714
+ return { status: "no_response", url: entry.url, finalUrl: entry.url };
2715
+ }
2716
+ const finalUrl = response.url();
2717
+ const httpStatus = response.status();
2718
+ if (httpStatus >= 400) {
2719
+ return {
2720
+ status: "http_error",
2721
+ url: entry.url,
2722
+ finalUrl,
2723
+ httpStatus,
2724
+ response
2725
+ };
2726
+ }
2727
+ await page.waitForFunction("window.__pa_isDomStable && window.__pa_isDomStable()", {
2728
+ timeout: 5e3
2729
+ }).catch(() => {
2730
+ logger.debug(`DOM stability timeout on ${entry.url} \u2014 proceeding anyway`);
2731
+ });
2732
+ return {
2733
+ status: "ok",
2734
+ url: entry.url,
2735
+ finalUrl,
2736
+ httpStatus,
2737
+ response
2738
+ };
2739
+ } catch (err) {
2740
+ const message = err instanceof Error ? err.message : String(err);
2741
+ if (message.includes("net::ERR_")) {
2742
+ return {
2743
+ status: "network_error",
2744
+ url: entry.url,
2745
+ finalUrl: entry.url,
2746
+ error: message
2747
+ };
2748
+ }
2749
+ if (message.includes("Timeout") || message.includes("timeout")) {
2750
+ return {
2751
+ status: "timeout",
2752
+ url: entry.url,
2753
+ finalUrl: entry.url,
2754
+ error: message
2755
+ };
2756
+ }
2757
+ return {
2758
+ status: "network_error",
2759
+ url: entry.url,
2760
+ finalUrl: entry.url,
2761
+ error: message
2762
+ };
2763
+ }
2764
+ }
2765
+ async function scanPage(page, targetUrl) {
2766
+ try {
2767
+ const mod = await import("./page-scanner-Q76HROEW.js");
2768
+ if (typeof mod.scanPage === "function") {
2769
+ return await mod.scanPage(page, targetUrl);
2770
+ }
2771
+ } catch {
2772
+ }
2773
+ return inlineScanPage(page);
2774
+ }
2775
+ async function inlineScanPage(page) {
2776
+ const url = page.url();
2777
+ const data = await page.evaluate(() => {
2778
+ const title = document.title || "";
2779
+ const anchors = Array.from(document.querySelectorAll("a[href]"));
2780
+ const links = anchors.map((a) => {
2781
+ const el = a;
2782
+ const href = el.href;
2783
+ if (!href || href.startsWith("javascript:") || href.startsWith("mailto:")) {
2784
+ return null;
2785
+ }
2786
+ return {
2787
+ href,
2788
+ text: (el.textContent || "").trim().slice(0, 200),
2789
+ isExternal: el.origin !== window.location.origin,
2790
+ rel: el.rel || void 0
2791
+ };
2792
+ }).filter(Boolean);
2793
+ const headings = Array.from(document.querySelectorAll("h1, h2, h3, h4, h5, h6")).map((h) => ({
2794
+ level: parseInt(h.tagName[1], 10),
2795
+ text: (h.textContent || "").trim().slice(0, 500)
2796
+ }));
2797
+ const metaTags = Array.from(document.querySelectorAll("meta[name], meta[property]")).map((m) => {
2798
+ const el = m;
2799
+ return {
2800
+ name: el.name || void 0,
2801
+ property: el.getAttribute("property") || void 0,
2802
+ content: el.content || ""
2803
+ };
2804
+ });
2805
+ const landmarkRoles = ["banner", "navigation", "main", "complementary", "contentinfo", "search", "form", "region"];
2806
+ const landmarks = Array.from(document.querySelectorAll("[role]")).filter((el) => landmarkRoles.includes((el.getAttribute("role") || "").toLowerCase())).map((el) => ({
2807
+ role: el.getAttribute("role"),
2808
+ tagName: el.tagName.toLowerCase(),
2809
+ label: el.getAttribute("aria-label") || void 0
2810
+ }));
2811
+ const textContent = (document.body?.innerText || "").slice(0, 1e4);
2812
+ let hash = 0;
2813
+ for (let i = 0; i < textContent.length; i++) {
2814
+ const char = textContent.charCodeAt(i);
2815
+ hash = (hash << 5) - hash + char | 0;
2816
+ }
2817
+ const contentHash = Math.abs(hash).toString(16).padStart(8, "0");
2818
+ const perfEntry = performance.getEntriesByType("navigation")[0];
2819
+ const timing = {
2820
+ loadTime: perfEntry ? Math.round(perfEntry.loadEventEnd - perfEntry.startTime) : 0,
2821
+ domContentLoaded: perfEntry ? Math.round(perfEntry.domContentLoadedEventEnd - perfEntry.startTime) : 0,
2822
+ firstContentfulPaint: void 0
2823
+ };
2824
+ const fcp = performance.getEntriesByName("first-contentful-paint")[0];
2825
+ if (fcp) {
2826
+ timing.firstContentfulPaint = Math.round(fcp.startTime);
2827
+ }
2828
+ const hashRoutingDetected = window.location.hash.startsWith("#/") || window.location.hash.startsWith("#!/");
2829
+ return {
2830
+ title,
2831
+ links,
2832
+ headings,
2833
+ metaTags,
2834
+ landmarks,
2835
+ contentHash,
2836
+ timing,
2837
+ hashRoutingDetected
2838
+ };
2839
+ });
2840
+ return {
2841
+ url,
2842
+ statusCode: 200,
2843
+ title: data.title,
2844
+ metaTags: data.metaTags,
2845
+ headings: data.headings,
2846
+ landmarks: data.landmarks,
2847
+ links: data.links,
2848
+ interactiveElements: [],
2849
+ timing: data.timing,
2850
+ contentHash: data.contentHash,
2851
+ hashRoutingDetected: data.hashRoutingDetected
2852
+ };
2853
+ }
2854
+ function buildRouteTree(pages, targetUrl) {
2855
+ const root = {
2856
+ segment: "/",
2857
+ url: targetUrl,
2858
+ title: "",
2859
+ statusCode: 0,
2860
+ contentHash: "",
2861
+ headings: [],
2862
+ landmarks: [],
2863
+ depth: 0,
2864
+ children: [],
2865
+ formCount: 0,
2866
+ apiCallCount: 0,
2867
+ hasScreenshot: false
2868
+ };
2869
+ for (const page of pages) {
2870
+ if (page.url === targetUrl && page.pageScan) {
2871
+ root.title = page.pageScan.title;
2872
+ root.statusCode = page.pageScan.statusCode;
2873
+ root.contentHash = page.pageScan.contentHash;
2874
+ root.headings = page.pageScan.headings;
2875
+ root.landmarks = page.pageScan.landmarks;
2876
+ root.hasScreenshot = !!page.screenshot;
2877
+ continue;
2878
+ }
2879
+ let segment;
2880
+ try {
2881
+ const parsed = new URL(page.url);
2882
+ segment = parsed.pathname.split("/").filter(Boolean).pop() || parsed.pathname;
2883
+ } catch {
2884
+ segment = page.url;
2885
+ }
2886
+ const node = {
2887
+ segment,
2888
+ url: page.url,
2889
+ title: page.pageScan?.title || "",
2890
+ statusCode: page.pageScan?.statusCode || page.httpStatus || 0,
2891
+ contentHash: page.pageScan?.contentHash || "",
2892
+ headings: page.pageScan?.headings || [],
2893
+ landmarks: page.pageScan?.landmarks || [],
2894
+ depth: page.depth,
2895
+ children: [],
2896
+ formCount: 0,
2897
+ apiCallCount: 0,
2898
+ hasScreenshot: !!page.screenshot
2899
+ };
2900
+ root.children.push(node);
2901
+ }
2902
+ return root;
2903
+ }
2904
+ function pagesToRouteInfos(pages) {
2905
+ return pages.filter((p) => p.status === "ok").map((p) => ({
2906
+ url: p.url,
2907
+ title: p.pageScan?.title || "",
2908
+ statusCode: p.pageScan?.statusCode || p.httpStatus || 0,
2909
+ contentHash: p.pageScan?.contentHash || "",
2910
+ depth: p.depth,
2911
+ formCount: 0,
2912
+ apiCallCount: 0,
2913
+ hasScreenshot: !!p.screenshot,
2914
+ discoveryMethod: p.discoveryMethod
2915
+ }));
2916
+ }
2917
+ function buildApiEndpointGroupsFromPages(pages) {
2918
+ const groupMap = /* @__PURE__ */ new Map();
2919
+ for (const page of pages) {
2920
+ if (!page.networkLog) continue;
2921
+ const log = page.networkLog;
2922
+ for (const req of log.requests) {
2923
+ if (req.classification !== "api") continue;
2924
+ let urlPath;
2925
+ try {
2926
+ urlPath = new URL(req.url).pathname;
2927
+ } catch {
2928
+ urlPath = req.url;
2929
+ }
2930
+ const pattern = parameterizePath(urlPath);
2931
+ const key = `${req.method} ${pattern}`;
2932
+ let group = groupMap.get(key);
2933
+ if (!group) {
2934
+ group = {
2935
+ pattern,
2936
+ method: req.method,
2937
+ urls: [],
2938
+ callingPages: /* @__PURE__ */ new Set(),
2939
+ statusCodes: /* @__PURE__ */ new Set(),
2940
+ contentTypes: /* @__PURE__ */ new Set(),
2941
+ isGraphQL: req.url.includes("graphql"),
2942
+ graphqlOps: /* @__PURE__ */ new Set()
2943
+ };
2944
+ groupMap.set(key, group);
2945
+ }
2946
+ group.urls.push(req.url);
2947
+ group.callingPages.add(page.url);
2948
+ const resp = log.responses.find((r) => r.requestId === req.requestId);
2949
+ if (resp) {
2950
+ group.statusCodes.add(resp.statusCode);
2951
+ if (resp.contentType) group.contentTypes.add(resp.contentType);
2952
+ }
2953
+ if (!group.firstRequest) {
2954
+ group.firstRequest = {
2955
+ url: req.url,
2956
+ headers: req.headers,
2957
+ body: req.body,
2958
+ contentType: req.contentType
2959
+ };
2960
+ }
2961
+ if (!group.firstResponse && resp) {
2962
+ group.firstResponse = {
2963
+ statusCode: resp.statusCode,
2964
+ headers: resp.headers,
2965
+ body: resp.body,
2966
+ contentType: resp.contentType,
2967
+ bodySize: resp.bodySize
2968
+ };
2969
+ }
2970
+ }
2971
+ for (const gqlOp of log.graphqlOperations) {
2972
+ const key = `POST ${parameterizePath(new URL(gqlOp.endpointUrl).pathname)}`;
2973
+ const group = groupMap.get(key);
2974
+ if (group) {
2975
+ group.isGraphQL = true;
2976
+ group.graphqlOps.add(gqlOp.operationName);
2977
+ }
2978
+ }
2979
+ }
2980
+ const result = [];
2981
+ for (const g of groupMap.values()) {
2982
+ result.push({
2983
+ pattern: g.pattern,
2984
+ method: g.method,
2985
+ classification: "api",
2986
+ observedUrls: g.urls,
2987
+ callCount: g.urls.length,
2988
+ callingPages: Array.from(g.callingPages),
2989
+ exampleRequest: g.firstRequest ?? {
2990
+ url: g.urls[0] ?? "",
2991
+ headers: {}
2992
+ },
2993
+ exampleResponse: g.firstResponse ?? {
2994
+ statusCode: 0,
2995
+ headers: {},
2996
+ bodySize: 0
2997
+ },
2998
+ observedStatusCodes: Array.from(g.statusCodes),
2999
+ observedContentTypes: Array.from(g.contentTypes),
3000
+ isGraphQL: g.isGraphQL,
3001
+ graphqlOperations: g.graphqlOps.size > 0 ? Array.from(g.graphqlOps) : void 0
3002
+ });
3003
+ }
3004
+ return result;
3005
+ }
3006
+ function buildScreenshotManifest(pages, outputDir) {
3007
+ return pages.filter((p) => p.screenshot && p.screenshot.fullPagePath).map((p) => {
3008
+ const ss = p.screenshot;
3009
+ const fullPageRel = path.relative(outputDir, ss.fullPagePath);
3010
+ const viewportRel = path.relative(outputDir, ss.viewportPath);
3011
+ return {
3012
+ url: ss.pageUrl,
3013
+ title: p.pageScan?.title || "",
3014
+ fullPagePath: fullPageRel,
3015
+ viewportPath: viewportRel,
3016
+ viewport: ss.viewport,
3017
+ dimensions: ss.dimensions,
3018
+ fullPageHash: ss.fullPageHash,
3019
+ thumbnailBase64: ""
3020
+ // Thumbnail generation is a separate step (not in M1).
3021
+ };
3022
+ });
3023
+ }
3024
+ function buildAssembledArtifacts(crawlResult, pages) {
3025
+ const routeTree = buildRouteTree(pages, crawlResult.config.targetUrl);
3026
+ const routes = pagesToRouteInfos(pages);
3027
+ const allForms = pages.filter((p) => p.formProbe).flatMap((p) => p.formProbe.forms);
3028
+ const apiEndpoints = buildApiEndpointGroupsFromPages(pages);
3029
+ const allGraphqlOps = pages.filter((p) => p.networkLog).flatMap((p) => p.networkLog.graphqlOperations);
3030
+ const allWebSockets = pages.filter((p) => p.networkLog).flatMap((p) => p.networkLog.webSocketConnections);
3031
+ const allEdges = pages.flatMap((p) => p.navigationEdges);
3032
+ const flowGraph = allEdges.length > 0 ? buildFlowGraph(allEdges, crawlResult.config.targetUrl) : void 0;
3033
+ const mermaidDef = flowGraph ? generateMermaidDefinition(flowGraph) : "";
3034
+ const screenshots = buildScreenshotManifest(pages, crawlResult.config.targetUrl);
3035
+ for (const node of [routeTree, ...routeTree.children]) {
3036
+ const pageForNode = pages.find((p) => p.url === node.url);
3037
+ if (pageForNode) {
3038
+ node.formCount = pageForNode.formProbe?.forms.length ?? 0;
3039
+ node.apiCallCount = pageForNode.networkLog?.requests.filter((r) => r.classification === "api").length ?? 0;
3040
+ node.hasScreenshot = !!pageForNode.screenshot;
3041
+ }
3042
+ }
3043
+ const enrichedRoutes = routes.map((r) => {
3044
+ const pageForRoute = pages.find((p) => p.url === r.url);
3045
+ return {
3046
+ ...r,
3047
+ formCount: pageForRoute?.formProbe?.forms.length ?? 0,
3048
+ apiCallCount: pageForRoute?.networkLog?.requests.filter((req) => req.classification === "api").length ?? 0,
3049
+ hasScreenshot: !!pageForRoute?.screenshot
3050
+ };
3051
+ });
3052
+ return {
3053
+ meta: {
3054
+ toolVersion: TOOL_VERSION,
3055
+ targetUrl: crawlResult.config.targetUrl,
3056
+ crawlDate: crawlResult.startedAt,
3057
+ duration: crawlResult.durationMs,
3058
+ pagesVisited: pages.length,
3059
+ pagesDiscovered: crawlResult.pages.length + crawlResult.unvisitedUrls.length,
3060
+ formsFound: allForms.length,
3061
+ apiEndpointsFound: apiEndpoints.length,
3062
+ screenshotsTaken: screenshots.length,
3063
+ errorCount: crawlResult.errors.length,
3064
+ completionStatus: crawlResult.completionStatus,
3065
+ viewport: crawlResult.config.viewport
3066
+ },
3067
+ routeTree,
3068
+ routes: enrichedRoutes,
3069
+ forms: allForms,
3070
+ apiEndpoints,
3071
+ graphqlOperations: allGraphqlOps,
3072
+ webSocketConnections: allWebSockets,
3073
+ flowGraph: flowGraph ? {
3074
+ nodes: flowGraph.nodes.map((n) => ({
3075
+ url: n.url,
3076
+ title: pages.find((p) => p.url === n.url)?.pageScan?.title || "",
3077
+ cluster: n.cluster ?? "/"
3078
+ })),
3079
+ edges: flowGraph.edges,
3080
+ entryPoint: flowGraph.entryUrl,
3081
+ deadEnds: flowGraph.nodes.filter((n) => n.isExit).map((n) => n.url),
3082
+ cycles: flowGraph.cycleNodes.length > 0 ? [flowGraph.cycleNodes] : []
3083
+ } : {
3084
+ nodes: pages.filter((p) => p.status === "ok").map((p) => ({
3085
+ url: p.url,
3086
+ title: p.pageScan?.title || "",
3087
+ cluster: "/"
3088
+ })),
3089
+ edges: [],
3090
+ entryPoint: crawlResult.config.targetUrl,
3091
+ deadEnds: [],
3092
+ cycles: []
3093
+ },
3094
+ flowDiagrams: {
3095
+ overview: { definition: mermaidDef },
3096
+ sections: []
3097
+ },
3098
+ screenshots,
3099
+ errors: crawlResult.errors
3100
+ };
3101
+ }
3102
+ async function writeOutputFiles(outputDir, crawlResult, pages) {
3103
+ const paths = {};
3104
+ const sitemap = pages.filter((p) => p.status === "ok").map((p) => ({
3105
+ url: p.url,
3106
+ title: p.pageScan?.title || "",
3107
+ depth: p.depth,
3108
+ statusCode: p.httpStatus,
3109
+ links: p.pageScan?.links.length ?? 0
3110
+ }));
3111
+ const sitemapPath = path.join(outputDir, "sitemap.json");
3112
+ await fs.writeFile(sitemapPath, JSON.stringify(sitemap, null, 2), "utf-8");
3113
+ paths.sitemapJson = sitemapPath;
3114
+ const summary = {
3115
+ targetUrl: crawlResult.config.targetUrl,
3116
+ startedAt: crawlResult.startedAt,
3117
+ finishedAt: crawlResult.finishedAt,
3118
+ durationMs: crawlResult.durationMs,
3119
+ pagesVisited: crawlResult.pages.length,
3120
+ pagesDiscovered: crawlResult.pages.length + crawlResult.unvisitedUrls.length,
3121
+ completionStatus: crawlResult.completionStatus,
3122
+ errors: crawlResult.errors.length
3123
+ };
3124
+ const summaryPath = path.join(outputDir, "crawl-summary.json");
3125
+ await fs.writeFile(summaryPath, JSON.stringify(summary, null, 2), "utf-8");
3126
+ return paths;
3127
+ }
3128
+ async function ensureOutputDir(outputDir) {
3129
+ await fs.mkdir(outputDir, { recursive: true });
3130
+ }
3131
+ async function checkBrowserInstallation() {
3132
+ let browser;
3133
+ try {
3134
+ browser = await chromium.launch({ handleSIGINT: false, handleSIGTERM: false, handleSIGHUP: false });
3135
+ } catch (err) {
3136
+ const message = err instanceof Error ? err.message : String(err);
3137
+ throw new Error(
3138
+ `Chromium browser not found. Run \`pa install\` to install it.
3139
+
3140
+ Details: ${message}`
3141
+ );
3142
+ } finally {
3143
+ if (browser) {
3144
+ await browser.close();
3145
+ }
3146
+ }
3147
+ }
3148
+ async function dig(config) {
3149
+ const startedAt = /* @__PURE__ */ new Date();
3150
+ const errors = [];
3151
+ const pages = [];
3152
+ let completionStatus = "complete";
3153
+ let interrupted = false;
3154
+ let browser;
3155
+ let context;
3156
+ const sigintHandler = () => {
3157
+ logger.warn("SIGINT received \u2014 shutting down gracefully...");
3158
+ interrupted = true;
3159
+ completionStatus = "interrupted";
3160
+ };
3161
+ process.on("SIGINT", sigintHandler);
3162
+ try {
3163
+ logger.info(`Crawling ${config.targetUrl}...`);
3164
+ logger.info(` depth=${config.depth}, maxPages=${config.maxPages}, concurrency=${config.concurrency}`);
3165
+ logger.info(` viewport=${config.viewport.width}x${config.viewport.height}`);
3166
+ logger.info(` output=${config.outputDir}, format=${config.format}`);
3167
+ await ensureOutputDir(config.outputDir);
3168
+ await checkBrowserInstallation();
3169
+ browser = await chromium.launch(getSecureLaunchOptions());
3170
+ logger.debug("Browser launched");
3171
+ const contextOptions = getSecureContextOptions({
3172
+ viewport: config.viewport,
3173
+ recordHar: config.noHar ? void 0 : {
3174
+ path: path.join(config.outputDir, "crawl.har"),
3175
+ content: "embed",
3176
+ mode: "minimal"
3177
+ }
3178
+ });
3179
+ context = await browser.newContext(contextOptions);
3180
+ await context.addInitScript(INIT_SCRIPT);
3181
+ const frontier = new Frontier({ maxDepth: config.depth });
3182
+ logger.debug("Browser context created with secure defaults");
3183
+ frontier.enqueue({ url: config.targetUrl, depth: 0 });
3184
+ logger.debug(`Frontier initialized with entry URL: ${config.targetUrl}`);
3185
+ let visitedCount = 0;
3186
+ const crawlStartTime = Date.now();
3187
+ while (!frontier.isEmpty && visitedCount < config.maxPages && !interrupted) {
3188
+ if (config.maxTime > 0) {
3189
+ const elapsed = (Date.now() - crawlStartTime) / 1e3;
3190
+ if (elapsed >= config.maxTime) {
3191
+ logger.warn(`Max crawl time reached (${config.maxTime}s) \u2014 stopping`);
3192
+ completionStatus = "max_time_reached";
3193
+ break;
3194
+ }
3195
+ }
3196
+ const entry = frontier.dequeue();
3197
+ if (!entry) break;
3198
+ if (!config.allowPrivate) {
3199
+ const blockResult = await isBlockedUrl(entry.url);
3200
+ if (blockResult.blocked) {
3201
+ logger.warn(`SSRF blocked: ${entry.url} \u2014 ${blockResult.reason}`);
3202
+ errors.push({
3203
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
3204
+ url: entry.url,
3205
+ code: "ERR_SECURITY_BLOCK",
3206
+ message: blockResult.reason || "Blocked by SSRF protection",
3207
+ securityReason: "private_ip"
3208
+ });
3209
+ continue;
3210
+ }
3211
+ }
3212
+ const page = await context.newPage();
3213
+ const pageStartTime = Date.now();
3214
+ page.on("popup", async (popup) => {
3215
+ try {
3216
+ const popupUrl = popup.url();
3217
+ if (popupUrl && popupUrl !== "about:blank") {
3218
+ logger.debug(`Popup detected: ${popupUrl}`);
3219
+ if (shouldCrawl(popupUrl, config.targetUrl, {
3220
+ include: config.include,
3221
+ exclude: config.exclude,
3222
+ followExternal: config.followExternal
3223
+ })) {
3224
+ frontier.enqueue({ url: popupUrl, depth: entry.depth + 1 });
3225
+ }
3226
+ }
3227
+ await popup.close();
3228
+ } catch {
3229
+ }
3230
+ });
3231
+ const jsErrors = [];
3232
+ page.on("pageerror", (err) => {
3233
+ jsErrors.push(err.message);
3234
+ logger.debug(`JS error on ${entry.url}: ${err.message}`);
3235
+ });
3236
+ page.on("dialog", async (dialog) => {
3237
+ logger.debug(`Dialog on ${entry.url}: ${dialog.type()} "${dialog.message()}"`);
3238
+ await dialog.dismiss();
3239
+ });
3240
+ const networkLogger = createNetworkLogger(page, entry.url, {
3241
+ includeCookies: config.includeCookies
3242
+ });
3243
+ networkLogger.start();
3244
+ const visitResult = await visitPage(page, entry, config);
3245
+ visitedCount++;
3246
+ const pageVisitResult = {
3247
+ url: entry.url,
3248
+ finalUrl: visitResult.finalUrl,
3249
+ status: visitResult.status,
3250
+ httpStatus: visitResult.httpStatus,
3251
+ depth: entry.depth,
3252
+ referrer: entry.referrer,
3253
+ discoveryMethod: entry.referrer ? "link" : "entry",
3254
+ visitedAt: new Date(pageStartTime).toISOString(),
3255
+ durationMs: 0,
3256
+ collectorErrors: [],
3257
+ navigationEdges: []
3258
+ };
3259
+ if (visitResult.status === "ok") {
3260
+ try {
3261
+ const scanResult = await scanPage(page, config.targetUrl);
3262
+ pageVisitResult.pageScan = scanResult;
3263
+ pageVisitResult.httpStatus = scanResult.statusCode;
3264
+ for (const link of scanResult.links) {
3265
+ if (shouldCrawl(link.href, config.targetUrl, {
3266
+ include: config.include,
3267
+ exclude: config.exclude,
3268
+ followExternal: config.followExternal
3269
+ })) {
3270
+ const enqueued = frontier.enqueue({
3271
+ url: link.href,
3272
+ depth: entry.depth + 1,
3273
+ referrer: entry.url
3274
+ });
3275
+ if (enqueued) {
3276
+ pageVisitResult.navigationEdges.push({
3277
+ from: entry.url,
3278
+ to: link.href,
3279
+ trigger: "link",
3280
+ triggerText: link.text || void 0
3281
+ });
3282
+ }
3283
+ }
3284
+ }
3285
+ } catch (err) {
3286
+ const message = err instanceof Error ? err.message : String(err);
3287
+ logger.warn(`Scanner error on ${entry.url}: ${message}`);
3288
+ pageVisitResult.collectorErrors.push({
3289
+ collector: "page-scanner",
3290
+ message
3291
+ });
3292
+ }
3293
+ if (!config.noScreenshots) {
3294
+ try {
3295
+ const screenshotsDir = path.join(config.outputDir, "screenshots");
3296
+ const ssResult = await captureScreenshots(page, config.targetUrl, screenshotsDir, config.viewport);
3297
+ pageVisitResult.screenshot = ssResult;
3298
+ } catch (err) {
3299
+ const message = err instanceof Error ? err.message : String(err);
3300
+ logger.warn(`Screenshot error on ${entry.url}: ${message}`);
3301
+ pageVisitResult.collectorErrors.push({ collector: "screenshot-capturer", message });
3302
+ }
3303
+ }
3304
+ try {
3305
+ const formResult = await probeForms(page, entry.url);
3306
+ pageVisitResult.formProbe = formResult;
3307
+ } catch (err) {
3308
+ const message = err instanceof Error ? err.message : String(err);
3309
+ logger.warn(`Form probe error on ${entry.url}: ${message}`);
3310
+ pageVisitResult.collectorErrors.push({ collector: "form-prober", message });
3311
+ }
3312
+ } else {
3313
+ const errorMessage = visitResult.error || `Navigation failed: ${visitResult.status}`;
3314
+ errors.push({
3315
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
3316
+ url: entry.url,
3317
+ code: "ERR_NAVIGATION",
3318
+ message: errorMessage,
3319
+ status: visitResult.status === "timeout" ? "timeout" : "network_error",
3320
+ httpStatus: visitResult.httpStatus
3321
+ });
3322
+ }
3323
+ try {
3324
+ const networkResult = networkLogger.stop();
3325
+ pageVisitResult.networkLog = networkResult;
3326
+ } catch (err) {
3327
+ const message = err instanceof Error ? err.message : String(err);
3328
+ logger.warn(`Network log error on ${entry.url}: ${message}`);
3329
+ pageVisitResult.collectorErrors.push({ collector: "network-logger", message });
3330
+ }
3331
+ pageVisitResult.durationMs = Date.now() - pageStartTime;
3332
+ pages.push(pageVisitResult);
3333
+ try {
3334
+ await page.close();
3335
+ } catch {
3336
+ }
3337
+ if (config.delay > 0 && !interrupted) {
3338
+ await sleep(config.delay);
3339
+ }
3340
+ const statusIcon = visitResult.status === "ok" ? "OK" : visitResult.status.toUpperCase();
3341
+ logger.info(
3342
+ `[${visitedCount}/${frontier.totalSeen}] ${statusIcon} ${entry.url}${entry.depth > 0 ? ` (depth ${entry.depth})` : ""}`
3343
+ );
3344
+ if (visitResult.status !== "ok" && visitResult.error) {
3345
+ logger.warn(` Error: ${visitResult.error}`);
3346
+ }
3347
+ }
3348
+ if (!interrupted && completionStatus === "complete") {
3349
+ if (visitedCount >= config.maxPages && !frontier.isEmpty) {
3350
+ completionStatus = "max_pages_reached";
3351
+ }
3352
+ }
3353
+ logger.debug("Closing browser context...");
3354
+ if (context) {
3355
+ await context.close().catch((err) => {
3356
+ const message = err instanceof Error ? err.message : String(err);
3357
+ logger.warn(`Context close error (HAR may not be written): ${message}`);
3358
+ });
3359
+ }
3360
+ logger.debug("Closing browser...");
3361
+ if (browser) {
3362
+ await browser.close().catch((err) => {
3363
+ const message = err instanceof Error ? err.message : String(err);
3364
+ logger.warn(`Browser close error: ${message}`);
3365
+ });
3366
+ }
3367
+ context = void 0;
3368
+ browser = void 0;
3369
+ const finishedAt = /* @__PURE__ */ new Date();
3370
+ const durationMs = finishedAt.getTime() - startedAt.getTime();
3371
+ const unvisitedUrls = [];
3372
+ while (!frontier.isEmpty) {
3373
+ const remaining = frontier.dequeue();
3374
+ if (remaining) unvisitedUrls.push(remaining.url);
3375
+ }
3376
+ const crawlResult = {
3377
+ config: {
3378
+ targetUrl: config.targetUrl,
3379
+ depth: config.depth,
3380
+ maxPages: config.maxPages,
3381
+ concurrency: config.concurrency,
3382
+ viewport: config.viewport,
3383
+ followExternal: config.followExternal,
3384
+ deepClick: config.deepClick
3385
+ },
3386
+ startedAt: startedAt.toISOString(),
3387
+ finishedAt: finishedAt.toISOString(),
3388
+ durationMs,
3389
+ pages,
3390
+ unvisitedUrls,
3391
+ errors,
3392
+ completionStatus
3393
+ };
3394
+ let outputPaths = {};
3395
+ try {
3396
+ outputPaths = await writeOutputFiles(config.outputDir, crawlResult, pages);
3397
+ } catch (err) {
3398
+ const message = err instanceof Error ? err.message : String(err);
3399
+ logger.error(`Failed to write output files: ${message}`);
3400
+ }
3401
+ const artifacts = buildAssembledArtifacts(crawlResult, pages);
3402
+ try {
3403
+ if (artifacts.forms.length > 0) {
3404
+ await fs.writeFile(
3405
+ path.join(config.outputDir, "forms.json"),
3406
+ JSON.stringify(artifacts.forms, null, 2),
3407
+ "utf-8"
3408
+ );
3409
+ logger.info(` Forms: ${artifacts.forms.length} found`);
3410
+ }
3411
+ if (artifacts.apiEndpoints.length > 0) {
3412
+ await fs.writeFile(
3413
+ path.join(config.outputDir, "api-map.json"),
3414
+ JSON.stringify(artifacts.apiEndpoints, null, 2),
3415
+ "utf-8"
3416
+ );
3417
+ logger.info(` API endpoints: ${artifacts.apiEndpoints.length} groups`);
3418
+ }
3419
+ const allEdges = pages.flatMap((p) => p.navigationEdges);
3420
+ if (allEdges.length > 0) {
3421
+ const flowGraph = buildFlowGraph(allEdges, config.targetUrl);
3422
+ const mermaidDef = generateMermaidDefinition(flowGraph);
3423
+ await fs.writeFile(
3424
+ path.join(config.outputDir, "flow-graph.mmd"),
3425
+ mermaidDef,
3426
+ "utf-8"
3427
+ );
3428
+ logger.info(` Flow graph: ${flowGraph.nodes.length} nodes, ${flowGraph.edges.length} edges`);
3429
+ }
3430
+ if (artifacts.apiEndpoints.length > 0) {
3431
+ try {
3432
+ const spec = generateOpenApiSpec(artifacts.apiEndpoints, {
3433
+ title: `API - ${config.targetUrl}`,
3434
+ targetUrl: config.targetUrl
3435
+ });
3436
+ await writeOpenApiSpec(spec, path.join(config.outputDir, "openapi.json"));
3437
+ logger.info(" OpenAPI spec written");
3438
+ } catch (err) {
3439
+ logger.warn(`OpenAPI generation failed: ${err instanceof Error ? err.message : String(err)}`);
3440
+ }
3441
+ }
3442
+ try {
3443
+ const reportInput = {
3444
+ targetUrl: config.targetUrl,
3445
+ crawlDate: new Date(crawlStartTime).toISOString(),
3446
+ duration: (Date.now() - crawlStartTime) / 1e3,
3447
+ pagesVisited: visitedCount,
3448
+ errors: errors.length,
3449
+ pages: pages.filter((p) => p.status === "ok").map((p) => ({
3450
+ url: p.url,
3451
+ title: p.pageScan?.title || "",
3452
+ status: p.httpStatus || 0,
3453
+ depth: p.depth,
3454
+ contentHash: p.pageScan?.contentHash
3455
+ })),
3456
+ forms: artifacts.forms.map((f) => ({
3457
+ url: f.action || "",
3458
+ action: f.action,
3459
+ method: f.method,
3460
+ fields: f.fields.map((field) => ({
3461
+ name: field.name,
3462
+ type: field.type,
3463
+ required: field.required
3464
+ }))
3465
+ })),
3466
+ apiEndpoints: artifacts.apiEndpoints.map((g) => ({
3467
+ pattern: g.pattern,
3468
+ method: g.method,
3469
+ examples: g.observedUrls.slice(0, 5).map((url) => ({
3470
+ url,
3471
+ status: g.exampleResponse.statusCode
3472
+ }))
3473
+ })),
3474
+ // Mermaid-to-SVG rendering is a later feature; omit flow diagram from HTML for now.
3475
+ flowDiagramSvg: void 0
3476
+ };
3477
+ const html = generateReportHtml(reportInput);
3478
+ await fs.writeFile(path.join(config.outputDir, "report.html"), html, "utf-8");
3479
+ logger.success(`HTML report: ${path.join(config.outputDir, "report.html")}`);
3480
+ } catch (err) {
3481
+ logger.warn(`Report generation failed: ${err instanceof Error ? err.message : String(err)}`);
3482
+ }
3483
+ } catch (err) {
3484
+ const message = err instanceof Error ? err.message : String(err);
3485
+ logger.warn(`Additional output generation error: ${message}`);
3486
+ }
3487
+ logger.success(
3488
+ `Crawl complete: ${pages.length} pages visited, ${frontier.totalSeen} URLs discovered, ${errors.length} errors`
3489
+ );
3490
+ logger.info(` Duration: ${(durationMs / 1e3).toFixed(1)}s`);
3491
+ logger.info(` Status: ${completionStatus}`);
3492
+ logger.info(` Output: ${config.outputDir}`);
3493
+ return {
3494
+ artifacts,
3495
+ outputPaths: {
3496
+ outputDir: config.outputDir,
3497
+ sitemapJson: outputPaths.sitemapJson,
3498
+ harFile: config.noHar ? void 0 : path.join(config.outputDir, "crawl.har")
3499
+ },
3500
+ errors,
3501
+ completionStatus
3502
+ };
3503
+ } catch (err) {
3504
+ const message = err instanceof Error ? err.message : String(err);
3505
+ logger.error(`Fatal crawl error: ${message}`);
3506
+ errors.push({
3507
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
3508
+ url: config.targetUrl,
3509
+ code: "ERR_FATAL",
3510
+ message
3511
+ });
3512
+ const finishedAt = /* @__PURE__ */ new Date();
3513
+ const durationMs = finishedAt.getTime() - startedAt.getTime();
3514
+ try {
3515
+ await ensureOutputDir(config.outputDir);
3516
+ await writeOutputFiles(
3517
+ config.outputDir,
3518
+ {
3519
+ config: {
3520
+ targetUrl: config.targetUrl,
3521
+ depth: config.depth,
3522
+ maxPages: config.maxPages,
3523
+ concurrency: config.concurrency,
3524
+ viewport: config.viewport,
3525
+ followExternal: config.followExternal,
3526
+ deepClick: config.deepClick
3527
+ },
3528
+ startedAt: startedAt.toISOString(),
3529
+ finishedAt: finishedAt.toISOString(),
3530
+ durationMs,
3531
+ pages,
3532
+ unvisitedUrls: [],
3533
+ errors,
3534
+ completionStatus: "error"
3535
+ },
3536
+ pages
3537
+ );
3538
+ } catch {
3539
+ }
3540
+ const artifacts = buildAssembledArtifacts(
3541
+ {
3542
+ config: {
3543
+ targetUrl: config.targetUrl,
3544
+ depth: config.depth,
3545
+ maxPages: config.maxPages,
3546
+ concurrency: config.concurrency,
3547
+ viewport: config.viewport,
3548
+ followExternal: config.followExternal,
3549
+ deepClick: config.deepClick
3550
+ },
3551
+ startedAt: startedAt.toISOString(),
3552
+ finishedAt: finishedAt.toISOString(),
3553
+ durationMs,
3554
+ pages,
3555
+ unvisitedUrls: [],
3556
+ errors,
3557
+ completionStatus: "error"
3558
+ },
3559
+ pages
3560
+ );
3561
+ return {
3562
+ artifacts,
3563
+ outputPaths: { outputDir: config.outputDir },
3564
+ errors,
3565
+ completionStatus: "error"
3566
+ };
3567
+ } finally {
3568
+ process.removeListener("SIGINT", sigintHandler);
3569
+ if (context) {
3570
+ await context.close().catch(() => {
3571
+ });
3572
+ }
3573
+ if (browser) {
3574
+ await browser.close().catch(() => {
3575
+ });
3576
+ }
3577
+ }
3578
+ }
3579
+
3580
+ // src/diff/diff-engine.ts
3581
+ import { readFile as readFile2 } from "fs/promises";
3582
+ import { join as join3 } from "path";
3583
+ async function loadJson(filePath) {
3584
+ try {
3585
+ const content = await readFile2(filePath, "utf-8");
3586
+ return JSON.parse(content);
3587
+ } catch {
3588
+ return null;
3589
+ }
3590
+ }
3591
+ async function loadManifest(bundleDir) {
3592
+ const manifestPath = join3(bundleDir, "manifest.json");
3593
+ const manifest = await loadJson(manifestPath);
3594
+ if (!manifest) {
3595
+ throw new DiffError(
3596
+ "manifest_invalid",
3597
+ `Could not load manifest.json from bundle: ${bundleDir}`
3598
+ );
3599
+ }
3600
+ if (manifest.version !== 1) {
3601
+ throw new DiffError(
3602
+ "manifest_invalid",
3603
+ `Unsupported manifest version ${manifest.version} in: ${bundleDir}`
3604
+ );
3605
+ }
3606
+ return manifest;
3607
+ }
3608
+ function formKey(form) {
3609
+ if (form.id) return `id:${form.id}`;
3610
+ return `${form.action}:${form.method}`;
3611
+ }
3612
+ function apiKey(endpoint) {
3613
+ return `${endpoint.method} ${endpoint.pattern}`;
3614
+ }
3615
+ function diffRoutes(oldRoutes, newRoutes) {
3616
+ const oldByUrl = new Map(oldRoutes.map((r) => [r.url, r]));
3617
+ const newByUrl = new Map(newRoutes.map((r) => [r.url, r]));
3618
+ const added = [];
3619
+ const removed = [];
3620
+ const changed = [];
3621
+ let unchangedCount = 0;
3622
+ for (const [url, newRoute] of newByUrl) {
3623
+ const oldRoute = oldByUrl.get(url);
3624
+ if (!oldRoute) {
3625
+ added.push(newRoute);
3626
+ continue;
3627
+ }
3628
+ const changes = {};
3629
+ let hasChanges = false;
3630
+ if (oldRoute.title !== newRoute.title) {
3631
+ changes.title = { old: oldRoute.title, new: newRoute.title };
3632
+ hasChanges = true;
3633
+ }
3634
+ if (oldRoute.statusCode !== newRoute.statusCode) {
3635
+ changes.statusCode = { old: oldRoute.statusCode, new: newRoute.statusCode };
3636
+ hasChanges = true;
3637
+ }
3638
+ if (oldRoute.contentHash !== newRoute.contentHash) {
3639
+ changes.contentHash = { old: oldRoute.contentHash, new: newRoute.contentHash };
3640
+ hasChanges = true;
3641
+ }
3642
+ if (hasChanges) {
3643
+ changed.push({ url, changes });
3644
+ } else {
3645
+ unchangedCount++;
3646
+ }
3647
+ }
3648
+ for (const [url, oldRoute] of oldByUrl) {
3649
+ if (!newByUrl.has(url)) {
3650
+ removed.push(oldRoute);
3651
+ }
3652
+ }
3653
+ return { added, removed, changed, unchangedCount };
3654
+ }
3655
+ function diffFields(oldFields, newFields) {
3656
+ const oldByName = new Map(oldFields.map((f) => [f.name, f]));
3657
+ const newByName = new Map(newFields.map((f) => [f.name, f]));
3658
+ const fieldsAdded = [];
3659
+ const fieldsRemoved = [];
3660
+ const fieldsChanged = [];
3661
+ for (const [name, newField] of newByName) {
3662
+ const oldField = oldByName.get(name);
3663
+ if (!oldField) {
3664
+ fieldsAdded.push(newField);
3665
+ continue;
3666
+ }
3667
+ const changes = {};
3668
+ let hasChanges = false;
3669
+ if (oldField.type !== newField.type) {
3670
+ changes.type = { old: oldField.type, new: newField.type };
3671
+ hasChanges = true;
3672
+ }
3673
+ if (oldField.required !== newField.required) {
3674
+ changes.required = { old: oldField.required, new: newField.required };
3675
+ hasChanges = true;
3676
+ }
3677
+ if (oldField.pattern !== newField.pattern) {
3678
+ changes.validationPattern = {
3679
+ old: oldField.pattern ?? "",
3680
+ new: newField.pattern ?? ""
3681
+ };
3682
+ hasChanges = true;
3683
+ }
3684
+ if (oldField.placeholder !== newField.placeholder) {
3685
+ changes.placeholder = {
3686
+ old: oldField.placeholder ?? "",
3687
+ new: newField.placeholder ?? ""
3688
+ };
3689
+ hasChanges = true;
3690
+ }
3691
+ if (oldField.options || newField.options) {
3692
+ const oldValues = new Set((oldField.options ?? []).map((o) => o.value));
3693
+ const newValues = new Set((newField.options ?? []).map((o) => o.value));
3694
+ const optionsAdded = [...newValues].filter((v) => !oldValues.has(v));
3695
+ const optionsRemoved = [...oldValues].filter((v) => !newValues.has(v));
3696
+ if (optionsAdded.length > 0 || optionsRemoved.length > 0) {
3697
+ changes.options = { added: optionsAdded, removed: optionsRemoved };
3698
+ hasChanges = true;
3699
+ }
3700
+ }
3701
+ if (hasChanges) {
3702
+ fieldsChanged.push({ name, changes });
3703
+ }
3704
+ }
3705
+ for (const [name, oldField] of oldByName) {
3706
+ if (!newByName.has(name)) {
3707
+ fieldsRemoved.push(oldField);
3708
+ }
3709
+ }
3710
+ return { fieldsAdded, fieldsRemoved, fieldsChanged };
3711
+ }
3712
+ function diffForms(oldForms, newForms) {
3713
+ const oldByKey = new Map(oldForms.map((f) => [formKey(f), f]));
3714
+ const newByKey = new Map(newForms.map((f) => [formKey(f), f]));
3715
+ const added = [];
3716
+ const removed = [];
3717
+ const changed = [];
3718
+ let unchangedCount = 0;
3719
+ for (const [key, newForm] of newByKey) {
3720
+ const oldForm = oldByKey.get(key);
3721
+ if (!oldForm) {
3722
+ added.push(newForm);
3723
+ continue;
3724
+ }
3725
+ const fieldDiff = diffFields(oldForm.fields, newForm.fields);
3726
+ const changeEntry = {};
3727
+ let hasChanges = false;
3728
+ if (fieldDiff.fieldsAdded.length > 0) {
3729
+ changeEntry.fieldsAdded = fieldDiff.fieldsAdded;
3730
+ hasChanges = true;
3731
+ }
3732
+ if (fieldDiff.fieldsRemoved.length > 0) {
3733
+ changeEntry.fieldsRemoved = fieldDiff.fieldsRemoved;
3734
+ hasChanges = true;
3735
+ }
3736
+ if (fieldDiff.fieldsChanged.length > 0) {
3737
+ changeEntry.fieldsChanged = fieldDiff.fieldsChanged;
3738
+ hasChanges = true;
3739
+ }
3740
+ if (oldForm.action !== newForm.action) {
3741
+ changeEntry.actionChanged = { old: oldForm.action, new: newForm.action };
3742
+ hasChanges = true;
3743
+ }
3744
+ if (oldForm.method !== newForm.method) {
3745
+ changeEntry.methodChanged = { old: oldForm.method, new: newForm.method };
3746
+ hasChanges = true;
3747
+ }
3748
+ if (hasChanges) {
3749
+ changed.push({
3750
+ formId: key,
3751
+ pageUrl: newForm.action,
3752
+ // best available context
3753
+ changes: changeEntry
3754
+ });
3755
+ } else {
3756
+ unchangedCount++;
3757
+ }
3758
+ }
3759
+ for (const [key, oldForm] of oldByKey) {
3760
+ if (!newByKey.has(key)) {
3761
+ removed.push(oldForm);
3762
+ }
3763
+ }
3764
+ return { added, removed, changed, unchangedCount };
3765
+ }
3766
+ function diffApi(oldEndpoints, newEndpoints) {
3767
+ const oldByKey = new Map(oldEndpoints.map((e) => [apiKey(e), e]));
3768
+ const newByKey = new Map(newEndpoints.map((e) => [apiKey(e), e]));
3769
+ const added = [];
3770
+ const removed = [];
3771
+ const changed = [];
3772
+ let unchangedCount = 0;
3773
+ for (const [key, newEp] of newByKey) {
3774
+ const oldEp = oldByKey.get(key);
3775
+ if (!oldEp) {
3776
+ added.push(newEp);
3777
+ continue;
3778
+ }
3779
+ const changes = {};
3780
+ let hasChanges = false;
3781
+ const oldStatuses = new Set(oldEp.observedStatusCodes);
3782
+ const newStatuses = new Set(newEp.observedStatusCodes);
3783
+ const statusCodesAdded = [...newStatuses].filter((s) => !oldStatuses.has(s));
3784
+ const statusCodesRemoved = [...oldStatuses].filter((s) => !newStatuses.has(s));
3785
+ if (statusCodesAdded.length > 0) {
3786
+ changes.statusCodesAdded = statusCodesAdded;
3787
+ hasChanges = true;
3788
+ }
3789
+ if (statusCodesRemoved.length > 0) {
3790
+ changes.statusCodesRemoved = statusCodesRemoved;
3791
+ hasChanges = true;
3792
+ }
3793
+ const oldContentTypes = oldEp.observedContentTypes;
3794
+ const newContentTypes = newEp.observedContentTypes;
3795
+ if (oldContentTypes.length > 0 && newContentTypes.length > 0 && oldContentTypes[0] !== newContentTypes[0]) {
3796
+ changes.contentTypeChanged = {
3797
+ old: oldContentTypes[0],
3798
+ new: newContentTypes[0]
3799
+ };
3800
+ hasChanges = true;
3801
+ }
3802
+ const oldSize = oldEp.exampleResponse.bodySize;
3803
+ const newSize = newEp.exampleResponse.bodySize;
3804
+ if (oldSize > 0 && newSize > 0) {
3805
+ const percentChange = Math.abs((newSize - oldSize) / oldSize * 100);
3806
+ if (percentChange > 10) {
3807
+ changes.bodySizeChange = {
3808
+ old: oldSize,
3809
+ new: newSize,
3810
+ percentChange: Math.round(percentChange * 100) / 100
3811
+ };
3812
+ hasChanges = true;
3813
+ }
3814
+ }
3815
+ if (hasChanges) {
3816
+ changed.push({ endpointId: key, changes });
3817
+ } else {
3818
+ unchangedCount++;
3819
+ }
3820
+ }
3821
+ for (const [key, oldEp] of oldByKey) {
3822
+ if (!newByKey.has(key)) {
3823
+ removed.push(oldEp);
3824
+ }
3825
+ }
3826
+ return { added, removed, changed, unchangedCount };
3827
+ }
3828
+ function diffScreenshots(oldScreenshots, newScreenshots, oldDir, newDir) {
3829
+ const oldByUrl = new Map(oldScreenshots.map((s) => [s.url, s]));
3830
+ const newByUrl = new Map(newScreenshots.map((s) => [s.url, s]));
3831
+ const added = [];
3832
+ const removed = [];
3833
+ const changed = [];
3834
+ let unchangedCount = 0;
3835
+ for (const [url, newSs] of newByUrl) {
3836
+ const oldSs = oldByUrl.get(url);
3837
+ if (!oldSs) {
3838
+ added.push({ url, screenshotPath: join3(newDir, newSs.path) });
3839
+ continue;
3840
+ }
3841
+ if (oldSs.sha256 !== newSs.sha256) {
3842
+ changed.push({
3843
+ url,
3844
+ diffPercentage: 100,
3845
+ diffPixelCount: 0,
3846
+ // unknown without pixel analysis
3847
+ totalPixels: 0,
3848
+ // unknown without pixel analysis
3849
+ diffImagePath: "",
3850
+ // no diff image generated in hash-only mode
3851
+ oldScreenshotPath: join3(oldDir, oldSs.path),
3852
+ newScreenshotPath: join3(newDir, newSs.path)
3853
+ });
3854
+ } else {
3855
+ unchangedCount++;
3856
+ }
3857
+ }
3858
+ for (const [url] of oldByUrl) {
3859
+ if (!newByUrl.has(url)) {
3860
+ removed.push({ url });
3861
+ }
3862
+ }
3863
+ return { added, removed, changed, unchangedCount };
3864
+ }
3865
+ async function loadScreenshotEntries(bundleDir, manifest) {
3866
+ const screenshotManifest = await loadJson(join3(bundleDir, "data", "screenshot-manifest.json"));
3867
+ if (screenshotManifest && Array.isArray(screenshotManifest)) {
3868
+ return screenshotManifest.map((s) => ({
3869
+ url: s.url,
3870
+ sha256: s.fullPageHash,
3871
+ path: s.fullPagePath
3872
+ }));
3873
+ }
3874
+ return manifest.files.filter((f) => f.type === "screenshot").map((f) => ({
3875
+ url: f.path,
3876
+ // best available identifier
3877
+ sha256: f.sha256,
3878
+ path: f.path
3879
+ }));
3880
+ }
3881
+ async function diffBundles(oldDir, newDir) {
3882
+ const [oldManifest, newManifest] = await Promise.all([
3883
+ loadManifest(oldDir),
3884
+ loadManifest(newDir)
3885
+ ]);
3886
+ const [oldSitemap, newSitemap] = await Promise.all([
3887
+ loadJson(join3(oldDir, "data", "sitemap.json")),
3888
+ loadJson(join3(newDir, "data", "sitemap.json"))
3889
+ ]);
3890
+ const [oldForms, newForms] = await Promise.all([
3891
+ loadJson(join3(oldDir, "data", "forms.json")),
3892
+ loadJson(join3(newDir, "data", "forms.json"))
3893
+ ]);
3894
+ const [oldApiMap, newApiMap] = await Promise.all([
3895
+ loadJson(join3(oldDir, "data", "api-map.json")),
3896
+ loadJson(join3(newDir, "data", "api-map.json"))
3897
+ ]);
3898
+ const [oldScreenshotEntries, newScreenshotEntries] = await Promise.all([
3899
+ loadScreenshotEntries(oldDir, oldManifest),
3900
+ loadScreenshotEntries(newDir, newManifest)
3901
+ ]);
3902
+ const sitemap = diffRoutes(oldSitemap ?? [], newSitemap ?? []);
3903
+ const forms = diffForms(oldForms ?? [], newForms ?? []);
3904
+ const api = diffApi(oldApiMap ?? [], newApiMap ?? []);
3905
+ const screenshots = diffScreenshots(
3906
+ oldScreenshotEntries,
3907
+ newScreenshotEntries,
3908
+ oldDir,
3909
+ newDir
3910
+ );
3911
+ const summary = {
3912
+ routes: {
3913
+ added: sitemap.added.length,
3914
+ removed: sitemap.removed.length,
3915
+ changed: sitemap.changed.length,
3916
+ unchanged: sitemap.unchangedCount
3917
+ },
3918
+ forms: {
3919
+ added: forms.added.length,
3920
+ removed: forms.removed.length,
3921
+ changed: forms.changed.length,
3922
+ unchanged: forms.unchangedCount
3923
+ },
3924
+ api: {
3925
+ added: api.added.length,
3926
+ removed: api.removed.length,
3927
+ changed: api.changed.length,
3928
+ unchanged: api.unchangedCount
3929
+ },
3930
+ screenshots: {
3931
+ added: screenshots.added.length,
3932
+ removed: screenshots.removed.length,
3933
+ changed: screenshots.changed.length,
3934
+ unchanged: screenshots.unchangedCount
3935
+ }
3936
+ };
3937
+ const hasChanges = summary.routes.added + summary.routes.removed + summary.routes.changed + summary.forms.added + summary.forms.removed + summary.forms.changed + summary.api.added + summary.api.removed + summary.api.changed + summary.screenshots.added + summary.screenshots.removed + summary.screenshots.changed > 0;
3938
+ return {
3939
+ meta: {
3940
+ oldBundle: {
3941
+ path: oldDir,
3942
+ createdAt: oldManifest.createdAt,
3943
+ targetUrl: oldManifest.config.targetUrl,
3944
+ toolVersion: oldManifest.toolVersion
3945
+ },
3946
+ newBundle: {
3947
+ path: newDir,
3948
+ createdAt: newManifest.createdAt,
3949
+ targetUrl: newManifest.config.targetUrl,
3950
+ toolVersion: newManifest.toolVersion
3951
+ },
3952
+ comparedAt: (/* @__PURE__ */ new Date()).toISOString()
3953
+ },
3954
+ hasChanges,
3955
+ sitemap,
3956
+ forms,
3957
+ api,
3958
+ screenshots,
3959
+ summary
3960
+ };
3961
+ }
3962
+
3963
+ // src/diff/diff-report.ts
3964
+ var CSP_META2 = `<meta http-equiv="Content-Security-Policy" content="default-src 'none'; style-src 'unsafe-inline'; script-src 'unsafe-inline'; img-src data: blob:;">`;
3965
+ var STYLES = `
3966
+ * { margin: 0; padding: 0; box-sizing: border-box; }
3967
+ body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; line-height: 1.6; color: #1a1a2e; background: #f8f9fa; padding: 2rem; max-width: 1200px; margin: 0 auto; }
3968
+ h1 { font-size: 1.8rem; margin-bottom: 0.5rem; }
3969
+ h2 { font-size: 1.4rem; margin: 2rem 0 1rem; border-bottom: 2px solid #e0e0e0; padding-bottom: 0.5rem; }
3970
+ h3 { font-size: 1.1rem; margin: 1rem 0 0.5rem; }
3971
+ .meta { color: #666; font-size: 0.9rem; margin-bottom: 2rem; }
3972
+ .summary { display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
3973
+ .summary-card { background: #fff; border-radius: 8px; padding: 1rem; box-shadow: 0 1px 3px rgba(0,0,0,0.1); }
3974
+ .summary-card h3 { margin: 0 0 0.5rem; font-size: 1rem; }
3975
+ .summary-card .count { font-size: 2rem; font-weight: bold; }
3976
+ .no-changes { background: #d4edda; color: #155724; padding: 1rem; border-radius: 8px; text-align: center; font-size: 1.2rem; }
3977
+ .badge { display: inline-block; padding: 2px 8px; border-radius: 4px; font-size: 0.75rem; font-weight: 600; text-transform: uppercase; }
3978
+ .badge-added { background: #d4edda; color: #155724; }
3979
+ .badge-removed { background: #f8d7da; color: #721c24; }
3980
+ .badge-changed { background: #fff3cd; color: #856404; }
3981
+ .badge-unchanged { background: #e2e3e5; color: #383d41; }
3982
+ table { width: 100%; border-collapse: collapse; margin: 1rem 0; background: #fff; border-radius: 8px; overflow: hidden; box-shadow: 0 1px 3px rgba(0,0,0,0.1); }
3983
+ th, td { padding: 0.75rem 1rem; text-align: left; border-bottom: 1px solid #e0e0e0; }
3984
+ th { background: #f1f3f5; font-weight: 600; font-size: 0.85rem; text-transform: uppercase; color: #495057; }
3985
+ tr:last-child td { border-bottom: none; }
3986
+ .change-detail { font-size: 0.85rem; color: #495057; }
3987
+ .old-value { color: #721c24; text-decoration: line-through; }
3988
+ .new-value { color: #155724; font-weight: 500; }
3989
+ .section { margin-bottom: 2rem; }
3990
+ .empty-section { color: #6c757d; font-style: italic; padding: 1rem; }
3991
+ ul.change-list { list-style: none; padding: 0; }
3992
+ ul.change-list li { padding: 0.5rem 0; border-bottom: 1px solid #f0f0f0; }
3993
+ ul.change-list li:last-child { border-bottom: none; }
3994
+ `;
3995
+ function badge(type) {
3996
+ return `<span class="badge badge-${type}">${type}</span>`;
3997
+ }
3998
+ function summaryCard(title, counts) {
3999
+ const total = counts.added + counts.removed + counts.changed;
4000
+ return `
4001
+ <div class="summary-card">
4002
+ <h3>${escapeHtml(title)}</h3>
4003
+ <div class="count">${total}</div>
4004
+ <div class="change-detail">
4005
+ ${counts.added > 0 ? `${badge("added")} ${counts.added}` : ""}
4006
+ ${counts.removed > 0 ? `${badge("removed")} ${counts.removed}` : ""}
4007
+ ${counts.changed > 0 ? `${badge("changed")} ${counts.changed}` : ""}
4008
+ ${counts.unchanged > 0 ? `${badge("unchanged")} ${counts.unchanged}` : ""}
4009
+ </div>
4010
+ </div>`;
4011
+ }
4012
+ function renderRoutesSection(diff) {
4013
+ const { sitemap } = diff;
4014
+ const hasContent = sitemap.added.length + sitemap.removed.length + sitemap.changed.length > 0;
4015
+ if (!hasContent) {
4016
+ return `
4017
+ <div class="section">
4018
+ <h2>Routes</h2>
4019
+ <p class="empty-section">No route changes detected.</p>
4020
+ </div>`;
4021
+ }
4022
+ let html = `<div class="section"><h2>Routes</h2>`;
4023
+ if (sitemap.added.length > 0) {
4024
+ html += `<h3>${badge("added")} Added Routes (${sitemap.added.length})</h3>`;
4025
+ html += `<table><thead><tr><th>URL</th><th>Title</th><th>Status</th></tr></thead><tbody>`;
4026
+ for (const route of sitemap.added) {
4027
+ html += `<tr><td>${escapeHtml(route.url)}</td><td>${escapeHtml(route.title)}</td><td>${route.statusCode}</td></tr>`;
4028
+ }
4029
+ html += `</tbody></table>`;
4030
+ }
4031
+ if (sitemap.removed.length > 0) {
4032
+ html += `<h3>${badge("removed")} Removed Routes (${sitemap.removed.length})</h3>`;
4033
+ html += `<table><thead><tr><th>URL</th><th>Title</th><th>Status</th></tr></thead><tbody>`;
4034
+ for (const route of sitemap.removed) {
4035
+ html += `<tr><td>${escapeHtml(route.url)}</td><td>${escapeHtml(route.title)}</td><td>${route.statusCode}</td></tr>`;
4036
+ }
4037
+ html += `</tbody></table>`;
4038
+ }
4039
+ if (sitemap.changed.length > 0) {
4040
+ html += `<h3>${badge("changed")} Modified Routes (${sitemap.changed.length})</h3>`;
4041
+ html += `<table><thead><tr><th>URL</th><th>Changes</th></tr></thead><tbody>`;
4042
+ for (const change of sitemap.changed) {
4043
+ const changeDescs = [];
4044
+ if (change.changes.title) {
4045
+ changeDescs.push(
4046
+ `Title: <span class="old-value">${escapeHtml(change.changes.title.old)}</span> &rarr; <span class="new-value">${escapeHtml(change.changes.title.new)}</span>`
4047
+ );
4048
+ }
4049
+ if (change.changes.statusCode) {
4050
+ changeDescs.push(
4051
+ `Status: <span class="old-value">${change.changes.statusCode.old}</span> &rarr; <span class="new-value">${change.changes.statusCode.new}</span>`
4052
+ );
4053
+ }
4054
+ if (change.changes.contentHash) {
4055
+ changeDescs.push(`Content changed (hash differs)`);
4056
+ }
4057
+ html += `<tr><td>${escapeHtml(change.url)}</td><td class="change-detail">${changeDescs.join("<br>")}</td></tr>`;
4058
+ }
4059
+ html += `</tbody></table>`;
4060
+ }
4061
+ html += `</div>`;
4062
+ return html;
4063
+ }
4064
+ function renderFormsSection2(diff) {
4065
+ const { forms } = diff;
4066
+ const hasContent = forms.added.length + forms.removed.length + forms.changed.length > 0;
4067
+ if (!hasContent) {
4068
+ return `
4069
+ <div class="section">
4070
+ <h2>Forms</h2>
4071
+ <p class="empty-section">No form changes detected.</p>
4072
+ </div>`;
4073
+ }
4074
+ let html = `<div class="section"><h2>Forms</h2>`;
4075
+ if (forms.added.length > 0) {
4076
+ html += `<h3>${badge("added")} Added Forms (${forms.added.length})</h3>`;
4077
+ html += `<table><thead><tr><th>Action</th><th>Method</th><th>Fields</th></tr></thead><tbody>`;
4078
+ for (const form of forms.added) {
4079
+ const fieldNames = form.fields.map((f) => escapeHtml(f.name)).join(", ");
4080
+ html += `<tr><td>${escapeHtml(form.action)}</td><td>${escapeHtml(form.method)}</td><td>${fieldNames}</td></tr>`;
4081
+ }
4082
+ html += `</tbody></table>`;
4083
+ }
4084
+ if (forms.removed.length > 0) {
4085
+ html += `<h3>${badge("removed")} Removed Forms (${forms.removed.length})</h3>`;
4086
+ html += `<table><thead><tr><th>Action</th><th>Method</th><th>Fields</th></tr></thead><tbody>`;
4087
+ for (const form of forms.removed) {
4088
+ const fieldNames = form.fields.map((f) => escapeHtml(f.name)).join(", ");
4089
+ html += `<tr><td>${escapeHtml(form.action)}</td><td>${escapeHtml(form.method)}</td><td>${fieldNames}</td></tr>`;
4090
+ }
4091
+ html += `</tbody></table>`;
4092
+ }
4093
+ if (forms.changed.length > 0) {
4094
+ html += `<h3>${badge("changed")} Modified Forms (${forms.changed.length})</h3>`;
4095
+ html += `<ul class="change-list">`;
4096
+ for (const change of forms.changed) {
4097
+ html += `<li><strong>${escapeHtml(change.formId)}</strong>`;
4098
+ const descs = [];
4099
+ if (change.changes.fieldsAdded?.length) {
4100
+ descs.push(`Fields added: ${change.changes.fieldsAdded.map((f) => escapeHtml(f.name)).join(", ")}`);
4101
+ }
4102
+ if (change.changes.fieldsRemoved?.length) {
4103
+ descs.push(`Fields removed: ${change.changes.fieldsRemoved.map((f) => escapeHtml(f.name)).join(", ")}`);
4104
+ }
4105
+ if (change.changes.actionChanged) {
4106
+ descs.push(`Action: <span class="old-value">${escapeHtml(change.changes.actionChanged.old)}</span> &rarr; <span class="new-value">${escapeHtml(change.changes.actionChanged.new)}</span>`);
4107
+ }
4108
+ if (change.changes.methodChanged) {
4109
+ descs.push(`Method: <span class="old-value">${escapeHtml(change.changes.methodChanged.old)}</span> &rarr; <span class="new-value">${escapeHtml(change.changes.methodChanged.new)}</span>`);
4110
+ }
4111
+ html += `<div class="change-detail">${descs.join("<br>")}</div></li>`;
4112
+ }
4113
+ html += `</ul>`;
4114
+ }
4115
+ html += `</div>`;
4116
+ return html;
4117
+ }
4118
+ function renderApiSection2(diff) {
4119
+ const { api } = diff;
4120
+ const hasContent = api.added.length + api.removed.length + api.changed.length > 0;
4121
+ if (!hasContent) {
4122
+ return `
4123
+ <div class="section">
4124
+ <h2>API Endpoints</h2>
4125
+ <p class="empty-section">No API endpoint changes detected.</p>
4126
+ </div>`;
4127
+ }
4128
+ let html = `<div class="section"><h2>API Endpoints</h2>`;
4129
+ if (api.added.length > 0) {
4130
+ html += `<h3>${badge("added")} Added Endpoints (${api.added.length})</h3>`;
4131
+ html += `<table><thead><tr><th>Method</th><th>Pattern</th><th>Status Codes</th></tr></thead><tbody>`;
4132
+ for (const ep of api.added) {
4133
+ html += `<tr><td>${escapeHtml(ep.method)}</td><td>${escapeHtml(ep.pattern)}</td><td>${ep.observedStatusCodes.join(", ")}</td></tr>`;
4134
+ }
4135
+ html += `</tbody></table>`;
4136
+ }
4137
+ if (api.removed.length > 0) {
4138
+ html += `<h3>${badge("removed")} Removed Endpoints (${api.removed.length})</h3>`;
4139
+ html += `<table><thead><tr><th>Method</th><th>Pattern</th><th>Status Codes</th></tr></thead><tbody>`;
4140
+ for (const ep of api.removed) {
4141
+ html += `<tr><td>${escapeHtml(ep.method)}</td><td>${escapeHtml(ep.pattern)}</td><td>${ep.observedStatusCodes.join(", ")}</td></tr>`;
4142
+ }
4143
+ html += `</tbody></table>`;
4144
+ }
4145
+ if (api.changed.length > 0) {
4146
+ html += `<h3>${badge("changed")} Modified Endpoints (${api.changed.length})</h3>`;
4147
+ html += `<ul class="change-list">`;
4148
+ for (const change of api.changed) {
4149
+ html += `<li><strong>${escapeHtml(change.endpointId)}</strong>`;
4150
+ const descs = [];
4151
+ if (change.changes.statusCodesAdded?.length) {
4152
+ descs.push(`New status codes: ${change.changes.statusCodesAdded.join(", ")}`);
4153
+ }
4154
+ if (change.changes.statusCodesRemoved?.length) {
4155
+ descs.push(`Removed status codes: ${change.changes.statusCodesRemoved.join(", ")}`);
4156
+ }
4157
+ if (change.changes.contentTypeChanged) {
4158
+ descs.push(`Content-Type: <span class="old-value">${escapeHtml(change.changes.contentTypeChanged.old)}</span> &rarr; <span class="new-value">${escapeHtml(change.changes.contentTypeChanged.new)}</span>`);
4159
+ }
4160
+ if (change.changes.bodySizeChange) {
4161
+ descs.push(`Body size: ${change.changes.bodySizeChange.old}B &rarr; ${change.changes.bodySizeChange.new}B (${change.changes.bodySizeChange.percentChange}%)`);
4162
+ }
4163
+ html += `<div class="change-detail">${descs.join("<br>")}</div></li>`;
4164
+ }
4165
+ html += `</ul>`;
4166
+ }
4167
+ html += `</div>`;
4168
+ return html;
4169
+ }
4170
+ function renderScreenshotsSection2(diff) {
4171
+ const { screenshots } = diff;
4172
+ const hasContent = screenshots.added.length + screenshots.removed.length + screenshots.changed.length > 0;
4173
+ if (!hasContent) {
4174
+ return `
4175
+ <div class="section">
4176
+ <h2>Screenshots</h2>
4177
+ <p class="empty-section">No screenshot changes detected.</p>
4178
+ </div>`;
4179
+ }
4180
+ let html = `<div class="section"><h2>Screenshots</h2>`;
4181
+ if (screenshots.added.length > 0) {
4182
+ html += `<h3>${badge("added")} New Screenshots (${screenshots.added.length})</h3>`;
4183
+ html += `<table><thead><tr><th>URL</th></tr></thead><tbody>`;
4184
+ for (const ss of screenshots.added) {
4185
+ html += `<tr><td>${escapeHtml(ss.url)}</td></tr>`;
4186
+ }
4187
+ html += `</tbody></table>`;
4188
+ }
4189
+ if (screenshots.removed.length > 0) {
4190
+ html += `<h3>${badge("removed")} Removed Screenshots (${screenshots.removed.length})</h3>`;
4191
+ html += `<table><thead><tr><th>URL</th></tr></thead><tbody>`;
4192
+ for (const ss of screenshots.removed) {
4193
+ html += `<tr><td>${escapeHtml(ss.url)}</td></tr>`;
4194
+ }
4195
+ html += `</tbody></table>`;
4196
+ }
4197
+ if (screenshots.changed.length > 0) {
4198
+ html += `<h3>${badge("changed")} Changed Screenshots (${screenshots.changed.length})</h3>`;
4199
+ html += `<table><thead><tr><th>URL</th><th>Diff</th></tr></thead><tbody>`;
4200
+ for (const ss of screenshots.changed) {
4201
+ html += `<tr><td>${escapeHtml(ss.url)}</td><td>${ss.diffPercentage}% changed</td></tr>`;
4202
+ }
4203
+ html += `</tbody></table>`;
4204
+ }
4205
+ html += `</div>`;
4206
+ return html;
4207
+ }
4208
+ function generateDiffReportHtml(diff) {
4209
+ const { meta, summary, hasChanges } = diff;
4210
+ const summarySection = `
4211
+ <div class="summary">
4212
+ ${summaryCard("Routes", summary.routes)}
4213
+ ${summaryCard("Forms", summary.forms)}
4214
+ ${summaryCard("API Endpoints", summary.api)}
4215
+ ${summaryCard("Screenshots", summary.screenshots)}
4216
+ </div>`;
4217
+ const noChangesNotice = !hasChanges ? `<div class="no-changes">No changes detected between bundles.</div>` : "";
4218
+ return `<!DOCTYPE html>
4219
+ <html lang="en">
4220
+ <head>
4221
+ <meta charset="UTF-8">
4222
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
4223
+ ${CSP_META2}
4224
+ <title>Diff Report \u2014 playwright-archaeologist</title>
4225
+ <style>${STYLES}</style>
4226
+ </head>
4227
+ <body>
4228
+ <h1>Regression Diff Report</h1>
4229
+ <div class="meta">
4230
+ <p>Old bundle: ${escapeHtml(meta.oldBundle.targetUrl || meta.oldBundle.path)} (${escapeHtml(meta.oldBundle.createdAt)})</p>
4231
+ <p>New bundle: ${escapeHtml(meta.newBundle.targetUrl || meta.newBundle.path)} (${escapeHtml(meta.newBundle.createdAt)})</p>
4232
+ <p>Compared at: ${escapeHtml(meta.comparedAt)}</p>
4233
+ </div>
4234
+
4235
+ ${summarySection}
4236
+ ${noChangesNotice}
4237
+
4238
+ ${renderRoutesSection(diff)}
4239
+ ${renderFormsSection2(diff)}
4240
+ ${renderApiSection2(diff)}
4241
+ ${renderScreenshotsSection2(diff)}
4242
+ </body>
4243
+ </html>`;
4244
+ }
4245
+
4246
+ // src/bundle/bundle-creator.ts
4247
+ import { createHash as createHash2 } from "crypto";
4248
+ import {
4249
+ readdir,
4250
+ readFile as readFile3,
4251
+ stat as stat2,
4252
+ mkdir as mkdir4,
4253
+ copyFile,
4254
+ writeFile as writeFile3
4255
+ } from "fs/promises";
4256
+ import { join as join4, basename as basename2, extname as extname2 } from "path";
4257
+ function classifyFile(filePath) {
4258
+ const ext = extname2(filePath).toLowerCase();
4259
+ switch (ext) {
4260
+ case ".json":
4261
+ return "json";
4262
+ case ".png":
4263
+ case ".jpg":
4264
+ case ".jpeg":
4265
+ case ".webp":
4266
+ return "screenshot";
4267
+ case ".har":
4268
+ return "api-snapshot";
4269
+ case ".html":
4270
+ return "json";
4271
+ // HTML report stored alongside data
4272
+ default:
4273
+ return null;
4274
+ }
4275
+ }
4276
+ async function computeSha2562(filePath) {
4277
+ const content = await readFile3(filePath);
4278
+ return createHash2("sha256").update(content).digest("hex");
4279
+ }
4280
+ async function collectFiles(dir) {
4281
+ const results = [];
4282
+ let entries;
4283
+ try {
4284
+ entries = await readdir(dir, { withFileTypes: true });
4285
+ } catch {
4286
+ return results;
4287
+ }
4288
+ for (const entry of entries) {
4289
+ const fullPath = join4(dir, entry.name);
4290
+ if (entry.isDirectory()) {
4291
+ const nested = await collectFiles(fullPath);
4292
+ results.push(...nested);
4293
+ } else if (entry.isFile()) {
4294
+ results.push(fullPath);
4295
+ }
4296
+ }
4297
+ return results;
4298
+ }
4299
+ function targetSubdir(filePath) {
4300
+ const ext = extname2(filePath).toLowerCase();
4301
+ const name = basename2(filePath).toLowerCase();
4302
+ if (name === "report.html") return "";
4303
+ if ([".png", ".jpg", ".jpeg", ".webp"].includes(ext)) return "screenshots";
4304
+ if ([".json", ".har", ".html"].includes(ext)) return "data";
4305
+ return "data";
4306
+ }
4307
+ async function loadStats(outputDir) {
4308
+ const defaults = {
4309
+ pagesVisited: 0,
4310
+ formsFound: 0,
4311
+ apiEndpoints: 0,
4312
+ screenshotCount: 0,
4313
+ duration: 0
4314
+ };
4315
+ try {
4316
+ const sitemapPath = join4(outputDir, "sitemap.json");
4317
+ const sitemapContent = await readFile3(sitemapPath, "utf-8");
4318
+ const sitemap = JSON.parse(sitemapContent);
4319
+ if (Array.isArray(sitemap)) {
4320
+ defaults.pagesVisited = sitemap.length;
4321
+ }
4322
+ } catch {
4323
+ }
4324
+ try {
4325
+ const formsPath = join4(outputDir, "forms.json");
4326
+ const formsContent = await readFile3(formsPath, "utf-8");
4327
+ const forms = JSON.parse(formsContent);
4328
+ if (Array.isArray(forms)) {
4329
+ defaults.formsFound = forms.length;
4330
+ }
4331
+ } catch {
4332
+ }
4333
+ try {
4334
+ const apiPath = join4(outputDir, "api-map.json");
4335
+ const apiContent = await readFile3(apiPath, "utf-8");
4336
+ const api = JSON.parse(apiContent);
4337
+ if (Array.isArray(api)) {
4338
+ defaults.apiEndpoints = api.length;
4339
+ }
4340
+ } catch {
4341
+ }
4342
+ return defaults;
4343
+ }
4344
+ async function loadConfig(outputDir) {
4345
+ const defaults = {
4346
+ targetUrl: "",
4347
+ depth: 0,
4348
+ viewport: { width: 1280, height: 720 },
4349
+ concurrency: 1,
4350
+ maxPages: 100,
4351
+ followExternal: false,
4352
+ deepClick: false
4353
+ };
4354
+ try {
4355
+ const metaPath = join4(outputDir, "meta.json");
4356
+ const content = await readFile3(metaPath, "utf-8");
4357
+ const meta = JSON.parse(content);
4358
+ if (typeof meta.targetUrl === "string") defaults.targetUrl = meta.targetUrl;
4359
+ if (typeof meta.depth === "number") defaults.depth = meta.depth;
4360
+ if (typeof meta.duration === "number") {
4361
+ }
4362
+ if (meta.viewport && typeof meta.viewport === "object" && "width" in meta.viewport && "height" in meta.viewport) {
4363
+ defaults.viewport = meta.viewport;
4364
+ }
4365
+ } catch {
4366
+ }
4367
+ return defaults;
4368
+ }
4369
+ async function createBundle(outputDir, bundlePath) {
4370
+ try {
4371
+ const outputStat = await stat2(outputDir);
4372
+ if (!outputStat.isDirectory()) {
4373
+ throw new BundleError(
4374
+ "create_failed",
4375
+ `Output path is not a directory: ${outputDir}`,
4376
+ bundlePath
4377
+ );
4378
+ }
4379
+ } catch (err) {
4380
+ if (err instanceof BundleError) throw err;
4381
+ throw new BundleError(
4382
+ "create_failed",
4383
+ `Output directory not found: ${outputDir}`,
4384
+ bundlePath,
4385
+ { cause: err }
4386
+ );
4387
+ }
4388
+ await mkdir4(join4(bundlePath, "screenshots"), { recursive: true });
4389
+ await mkdir4(join4(bundlePath, "data"), { recursive: true });
4390
+ const allFiles = await collectFiles(outputDir);
4391
+ const fileEntries = [];
4392
+ let screenshotCount = 0;
4393
+ for (const filePath of allFiles) {
4394
+ const fileType = classifyFile(filePath);
4395
+ if (fileType === null) continue;
4396
+ const fileName = basename2(filePath);
4397
+ const subdir = targetSubdir(filePath);
4398
+ const bundleRelPath = subdir ? join4(subdir, fileName) : fileName;
4399
+ const destPath = join4(bundlePath, bundleRelPath);
4400
+ await copyFile(filePath, destPath);
4401
+ const sha256 = await computeSha2562(filePath);
4402
+ const fileStat = await stat2(filePath);
4403
+ if (fileType === "screenshot") {
4404
+ screenshotCount++;
4405
+ }
4406
+ fileEntries.push({
4407
+ path: bundleRelPath.replace(/\\/g, "/"),
4408
+ // normalize to forward slashes
4409
+ sha256,
4410
+ size: fileStat.size,
4411
+ type: fileType
4412
+ });
4413
+ }
4414
+ const [stats, config] = await Promise.all([
4415
+ loadStats(outputDir),
4416
+ loadConfig(outputDir)
4417
+ ]);
4418
+ stats.screenshotCount = screenshotCount;
4419
+ const manifest = {
4420
+ version: 1,
4421
+ tool: "playwright-archaeologist",
4422
+ toolVersion: "0.1.0",
4423
+ // TODO: read from package.json at build time
4424
+ createdAt: (/* @__PURE__ */ new Date()).toISOString(),
4425
+ config,
4426
+ stats,
4427
+ files: fileEntries
4428
+ };
4429
+ const manifestPath = join4(bundlePath, "manifest.json");
4430
+ await writeFile3(manifestPath, JSON.stringify(manifest, null, 2), "utf-8");
4431
+ return manifest;
4432
+ }
4433
+
4434
+ export {
4435
+ ViewportSchema,
4436
+ parseViewport,
4437
+ normalizeEntryUrl,
4438
+ CrawlConfigSchema,
4439
+ DiffConfigSchema,
4440
+ ArchaeologistError,
4441
+ ConfigError,
4442
+ AuthError,
4443
+ CrawlError,
4444
+ NavigationError,
4445
+ CollectorError,
4446
+ DiffError,
4447
+ BundleError,
4448
+ Logger,
4449
+ logger,
4450
+ Frontier,
4451
+ captureScreenshots,
4452
+ probeForms,
4453
+ createNetworkLogger,
4454
+ groupEndpoints,
4455
+ detectCycles,
4456
+ buildFlowGraph,
4457
+ generateMermaidDefinition,
4458
+ generateOpenApiSpec,
4459
+ writeOpenApiSpec,
4460
+ escapeHtml,
4461
+ escapeAttribute,
4462
+ escapeJsonInHtml,
4463
+ generateReportHtml,
4464
+ dig,
4465
+ diffBundles,
4466
+ generateDiffReportHtml,
4467
+ createBundle
4468
+ };
4469
+ //# sourceMappingURL=chunk-F5WCXM7I.js.map