playwright-archaeologist 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 playwright-archaeologist contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,392 @@
1
+ # playwright-archaeologist
2
+
3
+ **Generate a complete behavioral specification of any running web app — no source code required.**
4
+
5
+ [![npm version](https://img.shields.io/npm/v/playwright-archaeologist)](https://www.npmjs.com/package/playwright-archaeologist)
6
+ [![license](https://img.shields.io/npm/l/playwright-archaeologist)](./LICENSE)
7
+ [![node](https://img.shields.io/node/v/playwright-archaeologist)](https://nodejs.org/)
8
+
9
+ Point `playwright-archaeologist` at a URL and get back a full behavioral spec: sitemap, form catalog, API map with OpenAPI 3.0 schema, screenshots, navigation flow graph, and a regression baseline you can diff later.
10
+
11
+ ---
12
+
13
+ ## Quick Start
14
+
15
+ ```bash
16
+ # Install globally
17
+ npm install -g playwright-archaeologist
18
+
19
+ # Download Chromium (one-time)
20
+ pa install
21
+
22
+ # Crawl a site
23
+ pa dig https://example.com
24
+
25
+ # View the report
26
+ open .archaeologist/report.html
27
+ ```
28
+
29
+ Or use `npx` without installing:
30
+
31
+ ```bash
32
+ npx playwright-archaeologist install
33
+ npx playwright-archaeologist dig https://example.com
34
+ ```
35
+
36
+ ---
37
+
38
+ ## Features
39
+
40
+ - **Zero source code access** — works on any running web app, staging or production
41
+ - **SPA-aware crawling** — Navigation API + History API patching + MutationObserver for client-side route detection
42
+ - **Authenticated crawling** — run auth scripts or inject cookies before crawling protected sites
43
+ - **Screenshot atlas** — full-page and viewport screenshots with a browsable gallery
44
+ - **API discovery** — auto-generates OpenAPI 3.0 specs from observed network traffic
45
+ - **Form catalog** — extracts every form with field metadata, validation rules, and structure
46
+ - **Flow graph** — Mermaid navigation diagrams showing how pages connect
47
+ - **Regression diff** — compare two crawl snapshots, detect structural and visual changes
48
+ - **Security-first** — SSRF protection, credential scrubbing, browser CSP hardening
49
+ - **Resume support** — checkpoint and resume interrupted crawls
50
+
51
+ ---
52
+
53
+ ## Installation
54
+
55
+ ### Requirements
56
+
57
+ - Node.js >= 20.0.0
58
+ - Chromium is downloaded automatically via `pa install`
59
+
60
+ ### npm
61
+
62
+ ```bash
63
+ npm install -g playwright-archaeologist
64
+ pa install
65
+ ```
66
+
67
+ ### As a dev dependency
68
+
69
+ ```bash
70
+ npm install --save-dev playwright-archaeologist
71
+ npx pa install
72
+ ```
73
+
74
+ ---
75
+
76
+ ## Usage
77
+
78
+ ### Crawl a website
79
+
80
+ ```bash
81
+ # Basic crawl
82
+ pa dig https://myapp.com
83
+
84
+ # Limit depth and pages
85
+ pa dig https://myapp.com --depth 3 --max-pages 100
86
+
87
+ # Custom viewport
88
+ pa dig https://myapp.com --viewport 1440x900
89
+
90
+ # Skip screenshots for a faster crawl
91
+ pa dig https://myapp.com --no-screenshots
92
+
93
+ # Enable deep click exploration for SPAs
94
+ pa dig https://myapp.com --deep-click
95
+
96
+ # Custom output directory
97
+ pa dig https://myapp.com -o ./crawl-output
98
+
99
+ # Resume an interrupted crawl
100
+ pa dig https://myapp.com --resume
101
+ ```
102
+
103
+ ### Compare two snapshots
104
+
105
+ ```bash
106
+ # Compare crawl bundles (exit code 0 = identical, 1 = changes)
107
+ pa diff .archaeologist/bundle-old.zip .archaeologist/bundle-new.zip
108
+
109
+ # Generate an HTML diff report
110
+ pa diff old.zip new.zip --format-html diff-report.html
111
+
112
+ # Generate a JSON diff report
113
+ pa diff old.zip new.zip --format-json diff-report.json
114
+ ```
115
+
116
+ ### Authenticated crawling
117
+
118
+ ```bash
119
+ # Using an auth script
120
+ pa dig https://myapp.com --auth ./login.js
121
+
122
+ # Using cookies
123
+ pa dig https://myapp.com --cookies ./cookies.json
124
+ ```
125
+
126
+ ---
127
+
128
+ ## Configuration Reference
129
+
130
+ ### `pa dig` options
131
+
132
+ | Option | Default | Description |
133
+ |---|---|---|
134
+ | `-d, --depth <n>` | `5` | Maximum crawl depth from the entry URL |
135
+ | `--max-pages <n>` | `1000` | Maximum number of pages to visit |
136
+ | `-c, --concurrency <n>` | `3` | Number of parallel browser contexts |
137
+ | `--auth <script>` | — | Path to an auth script (runs before crawling) |
138
+ | `--cookies <file>` | — | Path to a cookies JSON file |
139
+ | `-o, --output <dir>` | `.archaeologist` | Output directory for all artifacts |
140
+ | `--no-screenshots` | `false` | Skip screenshot capture |
141
+ | `--viewport <WxH>` | `1280x720` | Viewport dimensions |
142
+ | `--viewports <list>` | — | Comma-separated viewport list for multi-viewport screenshots |
143
+ | `--deep-click` | `false` | Click interactive elements to discover SPA routes |
144
+ | `--resume` | `false` | Resume from the last checkpoint |
145
+ | `--include <pattern>` | — | URL patterns to include (repeatable) |
146
+ | `--exclude <pattern>` | — | URL patterns to exclude (repeatable) |
147
+
148
+ ### `pa diff` options
149
+
150
+ | Option | Description |
151
+ |---|---|
152
+ | `--format-html <path>` | Write an HTML diff report |
153
+ | `--format-json <path>` | Write a JSON diff report |
154
+
155
+ ---
156
+
157
+ ## Output Structure
158
+
159
+ After a crawl, the `.archaeologist/` directory contains:
160
+
161
+ ```
162
+ .archaeologist/
163
+ report.html # Browsable HTML report with all findings
164
+ sitemap.json # Discovered pages with metadata
165
+ forms.json # Form catalog with field details
166
+ api.json # Observed API endpoints
167
+ openapi.yaml # Generated OpenAPI 3.0 specification
168
+ flow-graph.svg # Navigation flow diagram (Mermaid)
169
+ screenshots/ # Full-page and viewport screenshots
170
+ index.png
171
+ about.png
172
+ ...
173
+ bundle.zip # Snapshot bundle for regression diffing
174
+ checkpoint.json # Resume checkpoint (deleted on completion)
175
+ ```
176
+
177
+ ---
178
+
179
+ ## Programmatic API
180
+
181
+ Use `playwright-archaeologist` as a library in your own tools:
182
+
183
+ ```typescript
184
+ import { dig } from 'playwright-archaeologist';
185
+
186
+ const result = await dig({
187
+ entryUrl: 'https://myapp.com',
188
+ depth: 3,
189
+ maxPages: 50,
190
+ concurrency: 2,
191
+ output: './my-output',
192
+ screenshots: true,
193
+ viewport: { width: 1280, height: 720 },
194
+ });
195
+
196
+ console.log(`Crawled ${result.pages.length} pages`);
197
+ console.log(`Found ${result.forms.length} forms`);
198
+ console.log(`Discovered ${result.apis.length} API endpoints`);
199
+ ```
200
+
201
+ ### Comparing snapshots programmatically
202
+
203
+ ```typescript
204
+ import { diffBundles, generateDiffReportHtml } from 'playwright-archaeologist';
205
+
206
+ const diff = await diffBundles('./old-bundle.zip', './new-bundle.zip');
207
+
208
+ if (diff.hasChanges) {
209
+ console.log('Changes detected:');
210
+ console.log(` Pages added: ${diff.pages.added.length}`);
211
+ console.log(` Pages removed: ${diff.pages.removed.length}`);
212
+ console.log(` APIs changed: ${diff.apis.modified.length}`);
213
+
214
+ // Generate HTML report
215
+ const html = generateDiffReportHtml(diff);
216
+ await fs.writeFile('diff-report.html', html);
217
+ }
218
+ ```
219
+
220
+ ### Using individual collectors
221
+
222
+ ```typescript
223
+ import { scanPage, probeForms, captureScreenshots } from 'playwright-archaeologist';
224
+ import { chromium } from 'playwright';
225
+
226
+ const browser = await chromium.launch();
227
+ const context = await browser.newContext();
228
+ const page = await context.newPage();
229
+
230
+ await page.goto('https://myapp.com/login');
231
+
232
+ // Scan page structure
233
+ const scan = await scanPage(page);
234
+
235
+ // Probe forms
236
+ const forms = await probeForms(page);
237
+
238
+ // Capture screenshots
239
+ const screenshots = await captureScreenshots(page, {
240
+ viewport: { width: 1280, height: 720 },
241
+ });
242
+
243
+ await browser.close();
244
+ ```
245
+
246
+ ---
247
+
248
+ ## Auth Script Example
249
+
250
+ Auth scripts run in a real browser context before crawling begins. They receive a Playwright `page` object:
251
+
252
+ ```javascript
253
+ // login.js
254
+ export default async function authenticate(page) {
255
+ await page.goto('https://myapp.com/login');
256
+ await page.fill('#email', 'test@example.com');
257
+ await page.fill('#password', process.env.TEST_PASSWORD);
258
+ await page.click('button[type="submit"]');
259
+ await page.waitForURL('**/dashboard');
260
+ }
261
+ ```
262
+
263
+ ```bash
264
+ TEST_PASSWORD=secret pa dig https://myapp.com --auth ./login.js
265
+ ```
266
+
267
+ Auth scripts are statically analyzed before execution and require confirmation for scripts that access the filesystem, network, or run shell commands.
268
+
269
+ ---
270
+
271
+ ## Cookies File Format
272
+
273
+ The cookies file follows the Playwright cookie format:
274
+
275
+ ```json
276
+ [
277
+ {
278
+ "name": "session",
279
+ "value": "abc123",
280
+ "domain": "myapp.com",
281
+ "path": "/",
282
+ "httpOnly": true,
283
+ "secure": true
284
+ }
285
+ ]
286
+ ```
287
+
288
+ ---
289
+
290
+ ## Security Considerations
291
+
292
+ `playwright-archaeologist` is designed to crawl potentially untrusted web applications. Several protections are built in:
293
+
294
+ - **SSRF protection** — Private/internal IP ranges (10.x, 172.16-31.x, 169.254.x, 127.x, ::1) are blocked by default. Only same-origin navigation is permitted unless explicitly expanded.
295
+ - **Credential scrubbing** — Authorization headers, cookies, and bearer tokens are redacted from all output artifacts by default.
296
+ - **Browser hardening** — `bypassCSP: true` for instrumentation, `serviceWorkers: 'block'`, `acceptDownloads: false`, and automatic dialog dismissal to prevent crawler hangs.
297
+ - **Auth script sandboxing** — Auth scripts undergo static analysis before execution. Scripts accessing `fs`, `child_process`, or making network requests outside the target domain trigger a confirmation prompt.
298
+ - **Output sanitization** — All target-sourced data is entity-encoded in HTML reports. Reports include a restrictive CSP meta tag.
299
+
300
+ ---
301
+
302
+ ## CI / Regression Testing
303
+
304
+ Use `playwright-archaeologist` in CI to catch behavioral regressions:
305
+
306
+ ```yaml
307
+ # .github/workflows/behavioral-regression.yml
308
+ name: Behavioral Regression
309
+ on: [pull_request]
310
+
311
+ jobs:
312
+ regression:
313
+ runs-on: ubuntu-latest
314
+ steps:
315
+ - uses: actions/checkout@v4
316
+
317
+ - name: Start app
318
+ run: npm start &
319
+
320
+ - name: Install pa
321
+ run: npx playwright-archaeologist install
322
+
323
+ - name: Crawl
324
+ run: npx pa dig http://localhost:3000 -o ./current
325
+
326
+ - name: Download baseline
327
+ uses: actions/download-artifact@v4
328
+ with:
329
+ name: behavioral-baseline
330
+ path: ./baseline
331
+
332
+ - name: Diff
333
+ run: |
334
+ npx pa diff ./baseline/bundle.zip ./current/bundle.zip \
335
+ --format-html regression-report.html
336
+
337
+ - name: Upload report
338
+ if: failure()
339
+ uses: actions/upload-artifact@v4
340
+ with:
341
+ name: regression-report
342
+ path: regression-report.html
343
+ ```
344
+
345
+ ---
346
+
347
+ ## Contributing
348
+
349
+ Contributions are welcome. Please open an issue first to discuss what you would like to change.
350
+
351
+ ```bash
352
+ # Clone and install
353
+ git clone https://github.com/AshGw/playwright-archaeologist.git
354
+ cd playwright-archaeologist
355
+ npm install
356
+
357
+ # Build
358
+ npm run build
359
+
360
+ # Run tests
361
+ npm test
362
+
363
+ # Run tests in watch mode
364
+ npm run test:watch
365
+
366
+ # Run benchmarks
367
+ npm run bench
368
+ ```
369
+
370
+ ### Project structure
371
+
372
+ ```
373
+ src/
374
+ cli.ts # CLI entry point (Commander.js)
375
+ index.ts # Programmatic API exports
376
+ crawl/ # BFS crawler, frontier, context pool, checkpoints
377
+ collectors/ # Page scanner, form prober, network logger, screenshots
378
+ assembler/ # API grouper, flow graph builder
379
+ auth/ # Auth script handler
380
+ report/ # HTML report generator
381
+ diff/ # Snapshot diff engine and reports
382
+ bundle/ # ZIP bundle creator
383
+ security/ # SSRF guard, credential scrubber, output sanitizer
384
+ types/ # TypeScript interfaces, Zod schemas, error hierarchy
385
+ utils/ # Logger, URL utilities, progress tracker
386
+ ```
387
+
388
+ ---
389
+
390
+ ## License
391
+
392
+ [MIT](./LICENSE)
package/bin/cli.js ADDED
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env node
2
+ import '../dist/cli.js';
@@ -0,0 +1,255 @@
1
+ import {
2
+ isSameOrigin,
3
+ resolveUrl
4
+ } from "./chunk-RWPEKZOW.js";
5
+
6
+ // src/collectors/page-scanner.ts
7
+ async function scanPage(page, baseUrl, response) {
8
+ const url = page.url();
9
+ const statusCode = response?.status() ?? 200;
10
+ const [domData, timingData] = await Promise.all([
11
+ extractDomData(page),
12
+ extractTiming(page)
13
+ ]);
14
+ const links = processLinks(domData.links, url, baseUrl);
15
+ const interactiveElements = processInteractiveElements(domData.interactiveElements);
16
+ const hashRoutingDetected = domData.hashIndicators >= 3;
17
+ return {
18
+ url,
19
+ canonicalUrl: domData.canonicalHref ?? void 0,
20
+ statusCode,
21
+ title: domData.title,
22
+ metaTags: domData.metaTags,
23
+ headings: domData.headings,
24
+ landmarks: domData.landmarks,
25
+ links,
26
+ interactiveElements,
27
+ timing: timingData,
28
+ contentHash: domData.contentHash,
29
+ hashRoutingDetected
30
+ };
31
+ }
32
+ async function extractDomData(page) {
33
+ try {
34
+ return await page.evaluate(() => {
35
+ const canonicalEl = document.querySelector('link[rel="canonical"]');
36
+ const canonicalHref = canonicalEl?.href ?? null;
37
+ const title = document.title ?? "";
38
+ const metaEls = document.querySelectorAll("meta[name], meta[property], meta[content]");
39
+ const metaTags = [];
40
+ metaEls.forEach((el) => {
41
+ const content = el.getAttribute("content");
42
+ if (!content) return;
43
+ const entry = { content };
44
+ const name = el.getAttribute("name");
45
+ const property = el.getAttribute("property");
46
+ if (name) entry.name = name;
47
+ if (property) entry.property = property;
48
+ if (name || property) {
49
+ metaTags.push(entry);
50
+ }
51
+ });
52
+ const headingEls = document.querySelectorAll("h1, h2, h3, h4, h5, h6");
53
+ const headings = [];
54
+ headingEls.forEach((el) => {
55
+ const text = (el.textContent ?? "").trim();
56
+ if (!text) return;
57
+ const level = parseInt(el.tagName[1], 10);
58
+ headings.push({ level, text });
59
+ });
60
+ const landmarkSelectors = "nav, main, aside, footer, header, [role]";
61
+ const landmarkEls = document.querySelectorAll(landmarkSelectors);
62
+ const landmarks = [];
63
+ const seenLandmarks = /* @__PURE__ */ new Set();
64
+ landmarkEls.forEach((el) => {
65
+ const tagName = el.tagName.toLowerCase();
66
+ const explicitRole = el.getAttribute("role");
67
+ let role;
68
+ if (explicitRole) {
69
+ role = explicitRole;
70
+ } else {
71
+ const implicitRoles = {
72
+ nav: "navigation",
73
+ main: "main",
74
+ aside: "complementary",
75
+ footer: "contentinfo",
76
+ header: "banner"
77
+ };
78
+ role = implicitRoles[tagName] ?? tagName;
79
+ }
80
+ const label = el.getAttribute("aria-label") ?? el.getAttribute("aria-labelledby") ?? void 0;
81
+ const key = `${role}|${tagName}|${label ?? ""}`;
82
+ if (seenLandmarks.has(key)) return;
83
+ seenLandmarks.add(key);
84
+ const entry = { role, tagName };
85
+ if (label) entry.label = label;
86
+ landmarks.push(entry);
87
+ });
88
+ const linkEls = document.querySelectorAll("a[href]");
89
+ const links = [];
90
+ const seenHrefs = /* @__PURE__ */ new Set();
91
+ linkEls.forEach((el) => {
92
+ const href = el.getAttribute("href");
93
+ if (!href || href === "#" || href.startsWith("javascript:")) return;
94
+ const resolvedHref = el.href;
95
+ if (seenHrefs.has(resolvedHref)) return;
96
+ seenHrefs.add(resolvedHref);
97
+ const text = (el.textContent ?? "").trim();
98
+ const rel = el.getAttribute("rel");
99
+ links.push({ href: resolvedHref, text, rel });
100
+ });
101
+ const interactiveSelectors = 'button, input, select, textarea, [role="button"], [role="tab"], [role="combobox"], [role="listbox"], [role="slider"], [role="spinbutton"], [role="switch"]';
102
+ const interactiveEls = document.querySelectorAll(interactiveSelectors);
103
+ const interactiveElements = [];
104
+ interactiveEls.forEach((el, index) => {
105
+ const tagName = el.tagName.toLowerCase();
106
+ const type = el.getAttribute("type");
107
+ const role = el.getAttribute("role");
108
+ const ariaLabel = el.getAttribute("aria-label");
109
+ if (tagName === "input" && type === "hidden") return;
110
+ let text = "";
111
+ if (tagName === "input" || tagName === "textarea") {
112
+ text = el.placeholder ?? el.value ?? "";
113
+ } else {
114
+ text = (el.textContent ?? "").trim();
115
+ }
116
+ let selector;
117
+ const id = el.getAttribute("id");
118
+ if (id) {
119
+ selector = `${tagName}#${CSS.escape(id)}`;
120
+ } else {
121
+ const name = el.getAttribute("name");
122
+ if (name) {
123
+ selector = `${tagName}[name="${CSS.escape(name)}"]`;
124
+ } else {
125
+ const parent = el.parentElement;
126
+ if (parent) {
127
+ const siblings = Array.from(parent.querySelectorAll(`:scope > ${tagName}`));
128
+ const nth = siblings.indexOf(el) + 1;
129
+ const parentId = parent.getAttribute("id");
130
+ if (parentId) {
131
+ selector = `#${CSS.escape(parentId)} > ${tagName}:nth-of-type(${nth})`;
132
+ } else {
133
+ selector = `${tagName}:nth-of-type(${nth})`;
134
+ }
135
+ } else {
136
+ selector = `${tagName}[data-arch-index="${index}"]`;
137
+ }
138
+ }
139
+ }
140
+ interactiveElements.push({
141
+ tagName,
142
+ type,
143
+ text: text.slice(0, 200),
144
+ // cap text length
145
+ role,
146
+ ariaLabel,
147
+ selector
148
+ });
149
+ });
150
+ const bodyText = (document.body?.innerText ?? "").trim();
151
+ let hash = 5381;
152
+ for (let i = 0; i < bodyText.length; i++) {
153
+ hash = (hash << 5) + hash + bodyText.charCodeAt(i) | 0;
154
+ }
155
+ const contentHash = (hash >>> 0).toString(16).padStart(8, "0");
156
+ let hashIndicators = 0;
157
+ linkEls.forEach((el) => {
158
+ const h = el.getAttribute("href");
159
+ if (h && (h.startsWith("#/") || h.startsWith("#!/"))) {
160
+ hashIndicators++;
161
+ }
162
+ });
163
+ return {
164
+ canonicalHref,
165
+ title,
166
+ metaTags,
167
+ headings,
168
+ landmarks,
169
+ links,
170
+ interactiveElements,
171
+ contentHash,
172
+ hashIndicators
173
+ };
174
+ });
175
+ } catch {
176
+ return {
177
+ canonicalHref: null,
178
+ title: "",
179
+ metaTags: [],
180
+ headings: [],
181
+ landmarks: [],
182
+ links: [],
183
+ interactiveElements: [],
184
+ contentHash: "00000000",
185
+ hashIndicators: 0
186
+ };
187
+ }
188
+ }
189
+ async function extractTiming(page) {
190
+ try {
191
+ const raw = await page.evaluate(() => {
192
+ const entries = performance.getEntriesByType("navigation");
193
+ if (entries.length === 0) {
194
+ return { loadTime: 0, domContentLoaded: 0, firstContentfulPaint: null };
195
+ }
196
+ const nav = entries[0];
197
+ const loadTime = nav.loadEventEnd > 0 ? Math.round(nav.loadEventEnd - nav.startTime) : 0;
198
+ const domContentLoaded = nav.domContentLoadedEventEnd > 0 ? Math.round(nav.domContentLoadedEventEnd - nav.startTime) : 0;
199
+ let firstContentfulPaint = null;
200
+ const paintEntries = performance.getEntriesByType("paint");
201
+ for (const entry of paintEntries) {
202
+ if (entry.name === "first-contentful-paint") {
203
+ firstContentfulPaint = Math.round(entry.startTime);
204
+ break;
205
+ }
206
+ }
207
+ return { loadTime, domContentLoaded, firstContentfulPaint };
208
+ });
209
+ const timing = {
210
+ loadTime: raw.loadTime,
211
+ domContentLoaded: raw.domContentLoaded
212
+ };
213
+ if (raw.firstContentfulPaint != null) {
214
+ timing.firstContentfulPaint = raw.firstContentfulPaint;
215
+ }
216
+ return timing;
217
+ } catch {
218
+ return { loadTime: 0, domContentLoaded: 0 };
219
+ }
220
+ }
221
+ function processLinks(rawLinks, pageUrl, baseUrl) {
222
+ const results = [];
223
+ for (const raw of rawLinks) {
224
+ const resolved = resolveUrl(raw.href, pageUrl);
225
+ const isExternal = !isSameOrigin(resolved, baseUrl);
226
+ const link = {
227
+ href: resolved,
228
+ text: raw.text,
229
+ isExternal
230
+ };
231
+ if (raw.rel) {
232
+ link.rel = raw.rel;
233
+ }
234
+ results.push(link);
235
+ }
236
+ return results;
237
+ }
238
+ function processInteractiveElements(rawElements) {
239
+ return rawElements.map((raw) => {
240
+ const el = {
241
+ tagName: raw.tagName,
242
+ text: raw.text,
243
+ selector: raw.selector
244
+ };
245
+ if (raw.type) el.type = raw.type;
246
+ if (raw.role) el.role = raw.role;
247
+ if (raw.ariaLabel) el.ariaLabel = raw.ariaLabel;
248
+ return el;
249
+ });
250
+ }
251
+
252
+ export {
253
+ scanPage
254
+ };
255
+ //# sourceMappingURL=chunk-7ZQGW5OV.js.map