pagerts 1.5.1 โ†’ 1.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -14,6 +14,8 @@ PagerTS is a secure, modern command-line utility that transforms URLs into struc
14
14
  - โšก **Fast**: Efficient parsing with LinkeDOM and concurrent request handling
15
15
  - ๐Ÿงช **Well-Tested**: Comprehensive test coverage with Jest
16
16
  - ๐Ÿ“ฆ **Easy to Use**: Simple CLI interface with sensible defaults
17
+ - ๐Ÿ—‚๏ธ **Local File Support**: bare `pagerts` parses local file paths and `file:///...` inputs
18
+ - ๐Ÿงญ **Request Header Override**: Optional `--user-agent` flag for sites that behave differently by client
17
19
 
18
20
  ## Installation
19
21
 
@@ -21,13 +23,13 @@ PagerTS is a secure, modern command-line utility that transforms URLs into struc
21
23
 
22
24
  ```bash
23
25
  npm install -g pagerts
24
- pagerts <url>
26
+ pagerts ./page.html
25
27
  ```
26
28
 
27
29
  ### Using npx (No Installation Required)
28
30
 
29
31
  ```bash
30
- npx pagerts <url>
32
+ npx pagerts ./page.html
31
33
  ```
32
34
 
33
35
  ### From Source
@@ -44,22 +46,34 @@ npm link
44
46
 
45
47
  ### Basic Usage
46
48
 
47
- Extract resources from a remote URL:
49
+ Extract resources from a local HTML file path:
48
50
 
49
51
  ```bash
50
- pagerts https://example.com
52
+ pagerts ./page.html
51
53
  ```
52
54
 
53
- Extract from multiple URLs:
55
+ Extract resources from a local file URL:
54
56
 
55
57
  ```bash
56
- pagerts https://example.com https://example.org
58
+ pagerts file:///path/to/file.html
57
59
  ```
58
60
 
59
- Extract from a local HTML file:
61
+ Fetch resources from a remote URL:
60
62
 
61
63
  ```bash
62
- pagerts file:///path/to/file.html
64
+ pagerts fetch https://website.com
65
+ ```
66
+
67
+ Override the HTTP user-agent for remote fetches:
68
+
69
+ ```bash
70
+ pagerts fetch --user-agent "Mozilla/5.0 (X11; Linux x86_64; rv:139.0) Gecko/20100101 Firefox/139.0" https://example.com
71
+ ```
72
+
73
+ Fetch from multiple remote URLs:
74
+
75
+ ```bash
76
+ pagerts fetch https://example.com https://example.org
63
77
  ```
64
78
 
65
79
  ### Output Format
@@ -98,7 +112,8 @@ PagerTS takes security seriously. See [SECURITY.md](./SECURITY.md) for:
98
112
 
99
113
  ### Built-in Security Features
100
114
 
101
- - โœ… URL validation (only allows `http://`, `https://`, `file://`)
115
+ - โœ… URL validation for remote fetches (only allows `http://` and `https://`)
116
+ - โœ… Local filesystem parsing through plain paths and `file://` inputs on the root command
102
117
  - โœ… Input sanitization to prevent XSS attacks
103
118
  - โœ… Rate limiting (50 requests/minute by default)
104
119
  - โœ… Request timeouts to prevent hanging
@@ -226,6 +241,13 @@ This project is licensed under the MIT License - see the [LICENSE](./LICENSE) fi
226
241
 
227
242
  ## Changelog
228
243
 
244
+ ### v1.5.3
245
+
246
+ - Added `--user-agent` support to the `fetch` command so callers can override the HTTP User-Agent header for remote requests.
247
+ - Made the root `pagerts` command parse local file paths and `file:///` inputs directly, while keeping `fetch` remote-only.
248
+ - Improved CLI/runtime compatibility for locally resolved entrypoints and packaged builds.
249
+ - Updated focused tests to cover the new file-protocol validation and user-agent override behavior.
250
+
229
251
  ### v0.3.0 -> v1.4.3 summary
230
252
 
231
253
  Key changes in this range:
package/bin/main.js CHANGED
@@ -5,6 +5,7 @@ import { Command, createArgument, Option } from "commander";
5
5
  import { createRequire } from "node:module";
6
6
  import { resolve } from "node:path";
7
7
  import { fileURLToPath } from "node:url";
8
+ import { realpathSync } from "node:fs";
8
9
 
9
10
  // src/extractors/AbstractExtractor.ts
10
11
  var AbstractExtractor = class {
@@ -81,9 +82,11 @@ import { parseHTML } from "linkedom";
81
82
  var PageFetcher = class {
82
83
  timeout;
83
84
  maxRetries;
84
- constructor(timeout = 1e4, maxRetries = 2) {
85
+ userAgent;
86
+ constructor(timeout = 1e4, maxRetries = 2, userAgent) {
85
87
  this.timeout = timeout;
86
88
  this.maxRetries = maxRetries;
89
+ this.userAgent = userAgent;
87
90
  }
88
91
  buildDOMResult(html, url) {
89
92
  const { document } = parseHTML(html);
@@ -105,13 +108,16 @@ var PageFetcher = class {
105
108
  controller.abort(new Error("Request timeout"));
106
109
  }, this.timeout);
107
110
  }
108
- const content = await fetch(url, { signal: controller.signal }).then(async (response) => {
109
- const buffer = await response.arrayBuffer();
110
- const contentType = response.headers.get("content-type") ?? "";
111
- const charsetMatch = /charset=([^\s;]+)/i.exec(contentType);
112
- const html = this.decodeHtml(buffer, charsetMatch?.[1] ?? "utf-8");
113
- return this.buildDOMResult(html, url);
114
- });
111
+ const headers = this.userAgent ? { "user-agent": this.userAgent } : void 0;
112
+ const content = await fetch(url, { headers, signal: controller.signal }).then(
113
+ async (response) => {
114
+ const buffer = await response.arrayBuffer();
115
+ const contentType = response.headers.get("content-type") ?? "";
116
+ const charsetMatch = /charset=([^\s;]+)/i.exec(contentType);
117
+ const html = this.decodeHtml(buffer, charsetMatch?.[1] ?? "utf-8");
118
+ return this.buildDOMResult(html, url);
119
+ }
120
+ );
115
121
  return { url, content };
116
122
  } catch (error) {
117
123
  const abortTimeout = error instanceof Error && error.name === "AbortError";
@@ -276,21 +282,66 @@ async function buildPageMetadata(responses) {
276
282
  }
277
283
  return pageMetadatas;
278
284
  }
285
+ function normalizeLocalPath(value) {
286
+ if (value.startsWith("file://")) {
287
+ return fileURLToPath(value);
288
+ }
289
+ return value;
290
+ }
291
+ async function runFileCommand(paths, options) {
292
+ if (options.failsafe && paths.length > MAX_FILES_FAILSAFE) {
293
+ console.error(
294
+ `
295
+ \u274C ${paths.length} files specified exceeds the safety limit of ${MAX_FILES_FAILSAFE}.`
296
+ );
297
+ console.error(` Pass --no-failsafe to bypass this check and process all files.`);
298
+ process.exit(1);
299
+ }
300
+ if (!options.failsafe && paths.length > MAX_FILES_FAILSAFE) {
301
+ console.error(
302
+ `
303
+ \u26A0\uFE0F Failsafe bypassed: processing ${paths.length} files (limit is ${MAX_FILES_FAILSAFE}).`
304
+ );
305
+ }
306
+ console.error(`
307
+ \u2705 Processing ${paths.length} file(s)...`);
308
+ const fileFetcher = new FileFetcher();
309
+ const normalizedPaths = paths.map((pathValue) => normalizeLocalPath(pathValue));
310
+ const responses = await fileFetcher.fetchAll(normalizedPaths);
311
+ const pageMetadatas = await buildPageMetadata(
312
+ responses.map(({ path, content, error }) => ({ path, content, error }))
313
+ );
314
+ await printer.print(...pageMetadatas);
315
+ }
279
316
  function isCliEntrypoint() {
280
317
  const invokedPath = process.argv[1];
281
318
  if (!invokedPath) {
282
319
  return false;
283
320
  }
284
- return fileURLToPath(import.meta.url) === resolve(invokedPath);
321
+ try {
322
+ return realpathSync(fileURLToPath(import.meta.url)) === realpathSync(resolve(invokedPath));
323
+ } catch {
324
+ return false;
325
+ }
285
326
  }
286
327
  async function runCli(argv = process.argv) {
287
328
  program.name(name).version(version, "-v, --version").description(description);
288
- program.command("fetch", { isDefault: true }).description("fetch and extract resources from remote URL(s)").addArgument(urlArg).addOption(
329
+ program.addArgument(fileArg).addOption(
330
+ new Option("--no-failsafe", `bypass the ${MAX_FILES_FAILSAFE}-file limit safety check`)
331
+ ).action(async (paths, options) => {
332
+ try {
333
+ await runFileCommand(paths, options);
334
+ } catch (error) {
335
+ console.error("\n\u274C An error occurred:", error instanceof Error ? error.message : error);
336
+ process.exit(1);
337
+ }
338
+ });
339
+ program.command("fetch").description("fetch and extract resources from remote URL(s)").addArgument(urlArg).addOption(
289
340
  new Option(
290
341
  "--watch",
291
342
  "keep running: SIGWINCH re-fetches after resize, Ctrl-D releases in-flight requests, Ctrl-C exits"
292
343
  )
293
- ).action(async (urls, options) => {
344
+ ).addOption(new Option("-A, --user-agent <value>", "override the HTTP User-Agent header")).action(async (urls, options) => {
294
345
  try {
295
346
  const { validUrls, errors } = validateUrls(urls);
296
347
  if (errors.length > 0) {
@@ -305,7 +356,7 @@ async function runCli(argv = process.argv) {
305
356
  }
306
357
  console.error(`
307
358
  \u2705 Processing ${validUrls.length} valid URL(s)...`);
308
- const pageFetcher = new PageFetcher(options.watch ? 0 : 1e4, 2);
359
+ const pageFetcher = new PageFetcher(options.watch ? 0 : 1e4, 2, options.userAgent);
309
360
  const execute = async () => {
310
361
  const responses = await pageFetcher.fetchAll(validUrls);
311
362
  const pageMetadatas = await buildPageMetadata(responses);
@@ -342,28 +393,7 @@ async function runCli(argv = process.argv) {
342
393
  new Option("--no-failsafe", `bypass the ${MAX_FILES_FAILSAFE}-file limit safety check`)
343
394
  ).action(async (paths, options) => {
344
395
  try {
345
- if (options.failsafe && paths.length > MAX_FILES_FAILSAFE) {
346
- console.error(
347
- `
348
- \u274C ${paths.length} files specified exceeds the safety limit of ${MAX_FILES_FAILSAFE}.`
349
- );
350
- console.error(` Pass --no-failsafe to bypass this check and process all files.`);
351
- process.exit(1);
352
- }
353
- if (!options.failsafe && paths.length > MAX_FILES_FAILSAFE) {
354
- console.error(
355
- `
356
- \u26A0\uFE0F Failsafe bypassed: processing ${paths.length} files (limit is ${MAX_FILES_FAILSAFE}).`
357
- );
358
- }
359
- console.error(`
360
- \u2705 Processing ${paths.length} file(s)...`);
361
- const fileFetcher = new FileFetcher();
362
- const responses = await fileFetcher.fetchAll(paths);
363
- const pageMetadatas = await buildPageMetadata(
364
- responses.map(({ path, content, error }) => ({ path, content, error }))
365
- );
366
- await printer.print(...pageMetadatas);
396
+ await runFileCommand(paths, options);
367
397
  } catch (error) {
368
398
  console.error("\n\u274C An error occurred:", error instanceof Error ? error.message : error);
369
399
  process.exit(1);
package/bin/main.js.map CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "version": 3,
3
3
  "sources": ["../src/main.ts", "../src/extractors/AbstractExtractor.ts", "../src/extractors/PageExtractor.ts", "../src/resource.ts", "../src/extractors/ResourceExtractor.ts", "../src/page/PageFetcher.ts", "../src/page/FileFetcher.ts", "../src/printers/AbstractResourcePrinter.ts", "../src/printers/JSONStylePrinter.ts", "../src/security.ts"],
4
- "sourcesContent": ["#!/usr/bin/env node\nimport { Command, createArgument, Option } from 'commander';\nimport { createRequire } from 'node:module';\nimport { resolve } from 'node:path';\nimport { fileURLToPath } from 'node:url';\n\nimport { PageExtractor, ResourceExtractor } from './extractors/index.js';\nimport { FileFetcher, MAX_FILES_FAILSAFE, PageFetcher, type PageMetadata } from './page/index.js';\nimport { JSONStylePrinter } from './printers/index.js';\nimport { validateUrls } from './security.js';\n\nconst require = createRequire(import.meta.url);\nconst pkg = require('../package.json') as {\n description: string;\n name: string;\n version: string;\n};\n\nconst { description, name, version } = pkg;\n\nconst program = new Command();\n\nconst urlArg = createArgument('<url...>', 'remote https://URL to extract from');\nconst fileArg = createArgument('<paths...>', 'local file paths to extract from');\n\n// Shared extractor instances.\nconst pageExtractor = new PageExtractor();\nconst resourceExtractor = new ResourceExtractor(['a', 'meta', 'link', 'embed', 'script']);\nconst printer = new JSONStylePrinter();\n\nasync function buildPageMetadata(\n responses: Array<{\n url?: string;\n path?: string;\n content?: import('./page/index.js').DOMResult;\n error?: string;\n }>\n): Promise<PageMetadata[]> {\n const pageMetadatas: PageMetadata[] = [];\n\n for (const { content, url: responseUrl, path, error } of responses) {\n const resolvedUrl = responseUrl ?? path ?? '';\n const resources =\n error !== undefined || !content ? [] : await resourceExtractor.extract(content);\n const descriptor =\n error !== undefined || !content\n ? { url: resolvedUrl, error: error ?? 'Unknown error', resources }\n : await pageExtractor.extract(content);\n pageMetadatas.push({ ...descriptor, resources });\n }\n\n return pageMetadatas;\n}\n\nfunction isCliEntrypoint(): boolean {\n const invokedPath = process.argv[1];\n if (!invokedPath) {\n return false;\n }\n\n return fileURLToPath(import.meta.url) === resolve(invokedPath);\n}\n\nexport async function runCli(argv: string[] = process.argv): Promise<void> {\n program.name(name).version(version, '-v, --version').description(description);\n\n // \u2500\u2500 fetch subcommand (default remote URL mode) \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n program\n .command('fetch', { isDefault: true })\n .description('fetch and extract resources from remote URL(s)')\n .addArgument(urlArg)\n .addOption(\n new Option(\n '--watch',\n 'keep running: SIGWINCH re-fetches after resize, Ctrl-D releases in-flight requests, Ctrl-C exits'\n )\n )\n .action(async (urls: string[], options: { watch: boolean }) => {\n try {\n const { validUrls, errors } = validateUrls(urls);\n\n if (errors.length > 0) {\n console.error('\\n\u274C URL Validation Errors:');\n errors.forEach(({ url: invalidUrl, error }) => {\n console.error(` - ${invalidUrl}: ${error}`);\n });\n }\n\n if (validUrls.length === 0) {\n console.error('\\n\u274C No valid URLs to process. Exiting.');\n process.exit(1);\n }\n\n console.error(`\\n\u2705 Processing ${validUrls.length} valid URL(s)...`);\n\n const pageFetcher = new PageFetcher(options.watch ? 0 : 10000, 2);\n\n const execute = async (): Promise<void> => {\n const responses = await pageFetcher.fetchAll(validUrls);\n const pageMetadatas = await buildPageMetadata(responses);\n await printer.print(...pageMetadatas);\n };\n\n if (options.watch) {\n process.stdin.resume();\n process.on('SIGINT', () => process.exit(0));\n\n let activeExecution: Promise<void> | null = null;\n process.stdin.on('end', () => {\n activeExecution = null;\n });\n\n let winchTimer: ReturnType<typeof setTimeout> | null = null;\n process.on('SIGWINCH', () => {\n if (winchTimer !== null) clearTimeout(winchTimer);\n winchTimer = setTimeout(() => {\n winchTimer = null;\n activeExecution = execute().catch((err: unknown) => {\n console.error('\\n\u274C An error occurred:', err instanceof Error ? err.message : err);\n });\n }, 150);\n });\n\n activeExecution = execute();\n await activeExecution;\n } else {\n await execute();\n }\n } catch (error) {\n console.error('\\n\u274C An error occurred:', error instanceof Error ? error.message : error);\n process.exit(1);\n }\n });\n\n // \u2500\u2500 file subcommand (local filesystem access) \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n program\n .command('file')\n .description('extract resources from local file(s) via direct filesystem access')\n .addArgument(fileArg)\n .addOption(\n new Option('--no-failsafe', `bypass the ${MAX_FILES_FAILSAFE}-file limit safety check`)\n )\n .action(async (paths: string[], options: { failsafe: boolean }) => {\n try {\n if (options.failsafe && paths.length > MAX_FILES_FAILSAFE) {\n console.error(\n `\\n\u274C ${paths.length} files specified exceeds the safety limit of ${MAX_FILES_FAILSAFE}.`\n );\n console.error(` Pass --no-failsafe to bypass this check and process all files.`);\n process.exit(1);\n }\n\n if (!options.failsafe && paths.length > MAX_FILES_FAILSAFE) {\n console.error(\n `\\n\u26A0\uFE0F Failsafe bypassed: processing ${paths.length} files (limit is ${MAX_FILES_FAILSAFE}).`\n );\n }\n\n console.error(`\\n\u2705 Processing ${paths.length} file(s)...`);\n\n const fileFetcher = new FileFetcher();\n const responses = await fileFetcher.fetchAll(paths);\n const pageMetadatas = await buildPageMetadata(\n responses.map(({ path, content, error }) => ({ path, content, error }))\n );\n\n await printer.print(...pageMetadatas);\n } catch (error) {\n console.error('\\n\u274C An error occurred:', error instanceof Error ? error.message : error);\n process.exit(1);\n }\n });\n\n await program.parseAsync(argv);\n}\n\nif (isCliEntrypoint()) {\n runCli().catch((error: unknown) => {\n console.error('\\n\u274C An error occurred:', error instanceof Error ? error.message : error);\n process.exit(1);\n });\n}\n", "export abstract class AbstractExtractor<V, R> {\n constructor(readonly name: string) {}\n abstract extract(value: V): Promise<R>;\n}\n", "import type { Page } from '../page/index.js';\nimport type { DOMResult } from '../page/index.js';\nimport { AbstractExtractor } from './AbstractExtractor.js';\n\nexport class PageExtractor extends AbstractExtractor<DOMResult, Page> {\n constructor() {\n super('page-extractor');\n }\n\n async extract(value: DOMResult): Promise<Page> {\n const {\n window: { document },\n url,\n } = value;\n return { title: document.title, url };\n }\n}\n", "/**\n * @license MIT\n * We are interested in visualising a page as a collection of tags.\n *\n * We wish to work with tags that can be compactly previewed on a webpage.\n * Here we must declare all of the element types that can be used to represent\n * a resource that can be hyperlinked off a webpage.\n */\ntype Tags = HTMLElementTagNameMap;\n\nexport const RESOURCE_DISPLAYABLE_KEYS = [\n 'id',\n 'innerText',\n 'textContent',\n 'class',\n 'ariaLabel',\n 'ariaDescription',\n 'alt',\n] as const;\n\nexport type DisplayableKey = (typeof RESOURCE_DISPLAYABLE_KEYS)[number];\n\nexport const RESOURCE_LINK_KEYS = ['href', 'data-src', 'target', 'action', 'src', 'url'] as const;\n\nexport type LinkKey = (typeof RESOURCE_LINK_KEYS)[number];\n\nexport type AttributeKey = DisplayableKey | LinkKey;\n\nexport type ResourceKey = { key: AttributeKey; value: string };\nexport type ResourceLink = { key: LinkKey; value: string };\n\nexport type ExternalResource = {\n text: ResourceKey;\n link: ResourceLink;\n};\n\nexport type Tag = keyof Tags;\n\nexport type Resource = HTMLElement & {\n [K in AttributeKey]?: string | null;\n};\n\nexport type ResourceByName<T extends keyof Tags> = Tags[T];\n\n// --- adapters ---\n\nconst readAttr = (element: Resource, key: AttributeKey): string | undefined => {\n const v = element.getAttribute(key);\n return v != null && v.trim() !== '' ? v : undefined;\n};\n\nexport function findResourceText(element: Resource): ResourceKey | undefined {\n for (const key of RESOURCE_DISPLAYABLE_KEYS) {\n const value = readAttr(element, key);\n if (value !== undefined) return { key, value };\n }\n return undefined;\n}\n\nexport function findResourceLink(element: Resource): ResourceLink | undefined {\n for (const key of RESOURCE_LINK_KEYS) {\n const value = readAttr(element, key);\n if (value !== undefined) return { key, value };\n }\n return undefined;\n}\n\nexport const isResourceKey = (key: string): key is AttributeKey =>\n (RESOURCE_DISPLAYABLE_KEYS as readonly string[]).includes(key) ||\n (RESOURCE_LINK_KEYS as readonly string[]).includes(key);\n", "import type { DOMResult } from '../page/index.js';\nimport {\n findResourceLink,\n findResourceText,\n type ExternalResource,\n type Resource,\n type Tag,\n} from '../resource.js';\nimport { AbstractExtractor } from './AbstractExtractor.js';\n\nexport class ResourceExtractor extends AbstractExtractor<DOMResult, ExternalResource[]> {\n constructor(private readonly tags: Tag[]) {\n super('page-extractor');\n }\n async extract(value: DOMResult): Promise<ExternalResource[]> {\n const { document } = value.window;\n return this.tags.flatMap((tag) =>\n Array.from(document.querySelectorAll<Resource>(tag)).flatMap((element) => {\n const link = findResourceLink(element);\n if (!link) return [];\n const text = findResourceText(element) ?? { key: 'src' as const, value: link.value };\n return [{ text, link }];\n })\n );\n }\n}\n", "import { parseHTML } from 'linkedom';\n\ntype ParseHTMLResult = {\n document: Document;\n};\n\nexport interface DOMResult {\n window: { document: Document };\n url: string;\n}\n\ninterface PageResponse {\n url: string;\n content?: DOMResult;\n error?: string;\n}\n\nexport class PageFetcher {\n private readonly timeout: number;\n private readonly maxRetries: number;\n\n constructor(timeout = 10000, maxRetries = 2) {\n this.timeout = timeout;\n this.maxRetries = maxRetries;\n }\n\n private buildDOMResult(html: string, url: string): DOMResult {\n const { document } = parseHTML(html) as ParseHTMLResult;\n return { window: { document }, url };\n }\n\n private decodeHtml(buffer: ArrayBuffer, charset: string): string {\n try {\n return new TextDecoder(charset).decode(new Uint8Array(buffer));\n } catch {\n return new TextDecoder('utf-8').decode(new Uint8Array(buffer));\n }\n }\n\n private async fetchPage(url: string, retryCount = 0): Promise<PageResponse> {\n const controller = new AbortController();\n let timeoutId: ReturnType<typeof setTimeout> | null = null;\n\n try {\n if (this.timeout > 0) {\n timeoutId = setTimeout(() => {\n controller.abort(new Error('Request timeout'));\n }, this.timeout);\n }\n\n const content = await fetch(url, { signal: controller.signal }).then(async (response) => {\n const buffer = await response.arrayBuffer();\n const contentType = response.headers.get('content-type') ?? '';\n const charsetMatch = /charset=([^\\s;]+)/i.exec(contentType);\n const html = this.decodeHtml(buffer, charsetMatch?.[1] ?? 'utf-8');\n return this.buildDOMResult(html, url);\n });\n\n return { url, content };\n } catch (error) {\n const abortTimeout = error instanceof Error && error.name === 'AbortError';\n const message = abortTimeout\n ? 'Request timeout'\n : error instanceof Error\n ? error.message\n : 'Unknown error';\n\n // Retry logic for transient errors\n if (retryCount < this.maxRetries && this.isRetryableError(message)) {\n process.stderr.write(`Retrying ${url} (attempt ${retryCount + 1}/${this.maxRetries})...\\n`);\n await this.delay(1000 * (retryCount + 1)); // Exponential backoff\n return this.fetchPage(url, retryCount + 1);\n }\n\n return { url, error: `Failed to fetch: ${message}` };\n } finally {\n if (timeoutId !== null) {\n clearTimeout(timeoutId);\n }\n }\n }\n\n private isRetryableError(message: string): boolean {\n const retryablePatterns = [/timeout/i, /ECONNRESET/i, /ETIMEDOUT/i, /ENOTFOUND/i, /network/i];\n return retryablePatterns.some((pattern) => pattern.test(message));\n }\n\n private delay(ms: number): Promise<void> {\n return new Promise((resolve) => setTimeout(resolve, ms));\n }\n\n async fetchAll(urls: string[]): Promise<PageResponse[]> {\n const responses = await Promise.all(urls.map((url) => this.fetchPage(url)));\n return responses.filter((response) => response.content !== undefined || response.error);\n }\n}\n", "import { readFile } from 'node:fs/promises';\nimport { parseHTML } from 'linkedom';\n\nimport type { DOMResult } from './PageFetcher.js';\n\nexport const MAX_FILES_FAILSAFE = 254;\n\ntype ParseHTMLResult = {\n document: Document;\n};\n\nexport interface FileResponse {\n path: string;\n content?: DOMResult;\n error?: string;\n}\n\nexport class FileFetcher {\n private buildDOMResult(html: string, filePath: string): DOMResult {\n const { document } = parseHTML(html) as ParseHTMLResult;\n return { window: { document }, url: `file://${filePath}` };\n }\n\n async fetchFile(filePath: string): Promise<FileResponse> {\n try {\n // filePath is supplied directly by the CLI user, not derived from network input.\n // eslint-disable-next-line security/detect-non-literal-fs-filename\n const html = await readFile(filePath, 'utf-8');\n return { path: filePath, content: this.buildDOMResult(html, filePath) };\n } catch (error) {\n return {\n path: filePath,\n error: error instanceof Error ? error.message : 'Unknown error',\n };\n }\n }\n\n async fetchAll(filePaths: string[]): Promise<FileResponse[]> {\n return Promise.all(filePaths.map((p) => this.fetchFile(p)));\n }\n}\n", "import type { PageMetadata } from '../page/index.js';\n\nexport abstract class AbstractResourcePrinter {\n constructor() {}\n abstract print(...pages: PageMetadata[]): void | Promise<void>;\n}\n", "import type { PageMetadata } from '../page/index.js';\nimport { AbstractResourcePrinter } from './AbstractResourcePrinter.js';\n\nexport class JSONStylePrinter extends AbstractResourcePrinter {\n print(...pages: PageMetadata[]): void | Promise<void> {\n const json = JSON.stringify(pages);\n process.stdout.write(json + '\\n');\n }\n}\n", "/**\n * Security utilities for URL validation and sanitization\n */\n\nconst ALLOWED_PROTOCOLS = ['http:', 'https:'];\nconst MAX_URL_LENGTH = 2048;\nconst SUSPICIOUS_PATTERNS = [\n /javascript:/i,\n /data:/i,\n /vbscript:/i,\n /<script/i,\n /on\\w+=/i, // Event handlers like onclick=\n];\n\nexport interface ValidationResult {\n isValid: boolean;\n error?: string;\n sanitizedUrl?: string;\n}\n\n/**\n * Validates a URL for security concerns\n * @param url - The URL to validate\n * @returns ValidationResult object with validation status\n */\nexport function validateUrl(url: string): ValidationResult {\n // Check if URL is empty or whitespace\n if (!url || !url.trim()) {\n return {\n isValid: false,\n error: 'URL cannot be empty',\n };\n }\n\n const trimmedUrl = url.trim();\n\n // Check URL length to prevent DoS\n if (trimmedUrl.length > MAX_URL_LENGTH) {\n return {\n isValid: false,\n error: `URL exceeds maximum length of ${MAX_URL_LENGTH} characters`,\n };\n }\n\n // Check for suspicious patterns\n for (const pattern of SUSPICIOUS_PATTERNS) {\n if (pattern.test(trimmedUrl)) {\n return {\n isValid: false,\n error: 'URL contains suspicious patterns',\n };\n }\n }\n\n // Parse the URL\n let parsedUrl: URL;\n try {\n parsedUrl = new URL(trimmedUrl);\n } catch {\n return {\n isValid: false,\n error: 'Invalid URL format',\n };\n }\n\n // Check protocol\n if (!ALLOWED_PROTOCOLS.includes(parsedUrl.protocol)) {\n return {\n isValid: false,\n error: `Protocol ${parsedUrl.protocol} is not allowed. Allowed protocols: ${ALLOWED_PROTOCOLS.join(', ')}`,\n };\n }\n\n // Check for localhost/internal IPs in production (security consideration)\n const hostname = parsedUrl.hostname.toLowerCase();\n const isLocalhost =\n hostname === 'localhost' ||\n hostname === '127.0.0.1' ||\n hostname === '::1' ||\n hostname.startsWith('192.168.') ||\n hostname.startsWith('10.') ||\n /^172\\.(1[6-9]|2\\d|3[01])\\./.test(hostname);\n\n if (isLocalhost) {\n // Allow but warn about localhost URLs\n console.warn(`Warning: Accessing local network resource: ${trimmedUrl}`);\n }\n\n return {\n isValid: true,\n sanitizedUrl: parsedUrl.toString(),\n };\n}\n\n/**\n * Validates an array of URLs\n * @param urls - Array of URLs to validate\n * @returns Object with valid URLs and errors\n */\nexport function validateUrls(urls: string[]): {\n validUrls: string[];\n errors: Array<{ url: string; error: string }>;\n} {\n const validUrls: string[] = [];\n const errors: Array<{ url: string; error: string }> = [];\n\n for (const url of urls) {\n const result = validateUrl(url);\n if (result.isValid && result.sanitizedUrl) {\n validUrls.push(result.sanitizedUrl);\n } else {\n errors.push({\n url,\n error: result.error || 'Unknown validation error',\n });\n }\n }\n\n return { validUrls, errors };\n}\n\n/**\n * Rate limiter to prevent abuse\n */\nexport class RateLimiter {\n private requests: number[] = [];\n private readonly maxRequests: number;\n private readonly windowMs: number;\n\n constructor(maxRequests = 10, windowMs = 60000) {\n this.maxRequests = maxRequests;\n this.windowMs = windowMs;\n }\n\n /**\n * Check if a request is allowed under rate limiting\n * @returns true if request is allowed, false otherwise\n */\n public isAllowed(): boolean {\n const now = Date.now();\n\n // Remove old requests outside the time window\n this.requests = this.requests.filter((time) => now - time < this.windowMs);\n\n if (this.requests.length >= this.maxRequests) {\n return false;\n }\n\n this.requests.push(now);\n return true;\n }\n\n /**\n * Get remaining requests in current window\n */\n public getRemainingRequests(): number {\n const now = Date.now();\n this.requests = this.requests.filter((time) => now - time < this.windowMs);\n return Math.max(0, this.maxRequests - this.requests.length);\n }\n}\n\n/**\n * Sanitizes HTML content to prevent XSS attacks\n * @param text - Text to sanitize\n * @returns Sanitized text\n */\nexport function sanitizeText(text: string): string {\n if (!text) return '';\n\n return text\n .replace(/</g, '&lt;')\n .replace(/>/g, '&gt;')\n .replace(/\"/g, '&quot;')\n .replace(/'/g, '&#x27;')\n .replace(/\\//g, '&#x2F;');\n}\n"],
5
- "mappings": ";;;AACA,SAAS,SAAS,gBAAgB,cAAc;AAChD,SAAS,qBAAqB;AAC9B,SAAS,eAAe;AACxB,SAAS,qBAAqB;;;ACJvB,IAAe,oBAAf,MAAuC;AAAA,EAC5C,YAAqBA,OAAc;AAAd,gBAAAA;AAAA,EAAe;AAEtC;;;ACCO,IAAM,gBAAN,cAA4B,kBAAmC;AAAA,EACpE,cAAc;AACZ,UAAM,gBAAgB;AAAA,EACxB;AAAA,EAEA,MAAM,QAAQ,OAAiC;AAC7C,UAAM;AAAA,MACJ,QAAQ,EAAE,SAAS;AAAA,MACnB;AAAA,IACF,IAAI;AACJ,WAAO,EAAE,OAAO,SAAS,OAAO,IAAI;AAAA,EACtC;AACF;;;ACNO,IAAM,4BAA4B;AAAA,EACvC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAIO,IAAM,qBAAqB,CAAC,QAAQ,YAAY,UAAU,UAAU,OAAO,KAAK;AAwBvF,IAAM,WAAW,CAAC,SAAmB,QAA0C;AAC7E,QAAM,IAAI,QAAQ,aAAa,GAAG;AAClC,SAAO,KAAK,QAAQ,EAAE,KAAK,MAAM,KAAK,IAAI;AAC5C;AAEO,SAAS,iBAAiB,SAA4C;AAC3E,aAAW,OAAO,2BAA2B;AAC3C,UAAM,QAAQ,SAAS,SAAS,GAAG;AACnC,QAAI,UAAU,OAAW,QAAO,EAAE,KAAK,MAAM;AAAA,EAC/C;AACA,SAAO;AACT;AAEO,SAAS,iBAAiB,SAA6C;AAC5E,aAAW,OAAO,oBAAoB;AACpC,UAAM,QAAQ,SAAS,SAAS,GAAG;AACnC,QAAI,UAAU,OAAW,QAAO,EAAE,KAAK,MAAM;AAAA,EAC/C;AACA,SAAO;AACT;;;ACvDO,IAAM,oBAAN,cAAgC,kBAAiD;AAAA,EACtF,YAA6B,MAAa;AACxC,UAAM,gBAAgB;AADK;AAAA,EAE7B;AAAA,EACA,MAAM,QAAQ,OAA+C;AAC3D,UAAM,EAAE,SAAS,IAAI,MAAM;AAC3B,WAAO,KAAK,KAAK;AAAA,MAAQ,CAAC,QACxB,MAAM,KAAK,SAAS,iBAA2B,GAAG,CAAC,EAAE,QAAQ,CAAC,YAAY;AACxE,cAAM,OAAO,iBAAiB,OAAO;AACrC,YAAI,CAAC,KAAM,QAAO,CAAC;AACnB,cAAM,OAAO,iBAAiB,OAAO,KAAK,EAAE,KAAK,OAAgB,OAAO,KAAK,MAAM;AACnF,eAAO,CAAC,EAAE,MAAM,KAAK,CAAC;AAAA,MACxB,CAAC;AAAA,IACH;AAAA,EACF;AACF;;;ACzBA,SAAS,iBAAiB;AAiBnB,IAAM,cAAN,MAAkB;AAAA,EACN;AAAA,EACA;AAAA,EAEjB,YAAY,UAAU,KAAO,aAAa,GAAG;AAC3C,SAAK,UAAU;AACf,SAAK,aAAa;AAAA,EACpB;AAAA,EAEQ,eAAe,MAAc,KAAwB;AAC3D,UAAM,EAAE,SAAS,IAAI,UAAU,IAAI;AACnC,WAAO,EAAE,QAAQ,EAAE,SAAS,GAAG,IAAI;AAAA,EACrC;AAAA,EAEQ,WAAW,QAAqB,SAAyB;AAC/D,QAAI;AACF,aAAO,IAAI,YAAY,OAAO,EAAE,OAAO,IAAI,WAAW,MAAM,CAAC;AAAA,IAC/D,QAAQ;AACN,aAAO,IAAI,YAAY,OAAO,EAAE,OAAO,IAAI,WAAW,MAAM,CAAC;AAAA,IAC/D;AAAA,EACF;AAAA,EAEA,MAAc,UAAU,KAAa,aAAa,GAA0B;AAC1E,UAAM,aAAa,IAAI,gBAAgB;AACvC,QAAI,YAAkD;AAEtD,QAAI;AACF,UAAI,KAAK,UAAU,GAAG;AACpB,oBAAY,WAAW,MAAM;AAC3B,qBAAW,MAAM,IAAI,MAAM,iBAAiB,CAAC;AAAA,QAC/C,GAAG,KAAK,OAAO;AAAA,MACjB;AAEA,YAAM,UAAU,MAAM,MAAM,KAAK,EAAE,QAAQ,WAAW,OAAO,CAAC,EAAE,KAAK,OAAO,aAAa;AACvF,cAAM,SAAS,MAAM,SAAS,YAAY;AAC1C,cAAM,cAAc,SAAS,QAAQ,IAAI,cAAc,KAAK;AAC5D,cAAM,eAAe,qBAAqB,KAAK,WAAW;AAC1D,cAAM,OAAO,KAAK,WAAW,QAAQ,eAAe,CAAC,KAAK,OAAO;AACjE,eAAO,KAAK,eAAe,MAAM,GAAG;AAAA,MACtC,CAAC;AAED,aAAO,EAAE,KAAK,QAAQ;AAAA,IACxB,SAAS,OAAO;AACd,YAAM,eAAe,iBAAiB,SAAS,MAAM,SAAS;AAC9D,YAAM,UAAU,eACZ,oBACA,iBAAiB,QACf,MAAM,UACN;AAGN,UAAI,aAAa,KAAK,cAAc,KAAK,iBAAiB,OAAO,GAAG;AAClE,gBAAQ,OAAO,MAAM,YAAY,GAAG,aAAa,aAAa,CAAC,IAAI,KAAK,UAAU;AAAA,CAAQ;AAC1F,cAAM,KAAK,MAAM,OAAQ,aAAa,EAAE;AACxC,eAAO,KAAK,UAAU,KAAK,aAAa,CAAC;AAAA,MAC3C;AAEA,aAAO,EAAE,KAAK,OAAO,oBAAoB,OAAO,GAAG;AAAA,IACrD,UAAE;AACA,UAAI,cAAc,MAAM;AACtB,qBAAa,SAAS;AAAA,MACxB;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,iBAAiB,SAA0B;AACjD,UAAM,oBAAoB,CAAC,YAAY,eAAe,cAAc,cAAc,UAAU;AAC5F,WAAO,kBAAkB,KAAK,CAAC,YAAY,QAAQ,KAAK,OAAO,CAAC;AAAA,EAClE;AAAA,EAEQ,MAAM,IAA2B;AACvC,WAAO,IAAI,QAAQ,CAACC,aAAY,WAAWA,UAAS,EAAE,CAAC;AAAA,EACzD;AAAA,EAEA,MAAM,SAAS,MAAyC;AACtD,UAAM,YAAY,MAAM,QAAQ,IAAI,KAAK,IAAI,CAAC,QAAQ,KAAK,UAAU,GAAG,CAAC,CAAC;AAC1E,WAAO,UAAU,OAAO,CAAC,aAAa,SAAS,YAAY,UAAa,SAAS,KAAK;AAAA,EACxF;AACF;;;AC/FA,SAAS,gBAAgB;AACzB,SAAS,aAAAC,kBAAiB;AAInB,IAAM,qBAAqB;AAY3B,IAAM,cAAN,MAAkB;AAAA,EACf,eAAe,MAAc,UAA6B;AAChE,UAAM,EAAE,SAAS,IAAIA,WAAU,IAAI;AACnC,WAAO,EAAE,QAAQ,EAAE,SAAS,GAAG,KAAK,UAAU,QAAQ,GAAG;AAAA,EAC3D;AAAA,EAEA,MAAM,UAAU,UAAyC;AACvD,QAAI;AAGF,YAAM,OAAO,MAAM,SAAS,UAAU,OAAO;AAC7C,aAAO,EAAE,MAAM,UAAU,SAAS,KAAK,eAAe,MAAM,QAAQ,EAAE;AAAA,IACxE,SAAS,OAAO;AACd,aAAO;AAAA,QACL,MAAM;AAAA,QACN,OAAO,iBAAiB,QAAQ,MAAM,UAAU;AAAA,MAClD;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,SAAS,WAA8C;AAC3D,WAAO,QAAQ,IAAI,UAAU,IAAI,CAAC,MAAM,KAAK,UAAU,CAAC,CAAC,CAAC;AAAA,EAC5D;AACF;;;ACtCO,IAAe,0BAAf,MAAuC;AAAA,EAC5C,cAAc;AAAA,EAAC;AAEjB;;;ACFO,IAAM,mBAAN,cAA+B,wBAAwB;AAAA,EAC5D,SAAS,OAA6C;AACpD,UAAM,OAAO,KAAK,UAAU,KAAK;AACjC,YAAQ,OAAO,MAAM,OAAO,IAAI;AAAA,EAClC;AACF;;;ACJA,IAAM,oBAAoB,CAAC,SAAS,QAAQ;AAC5C,IAAM,iBAAiB;AACvB,IAAM,sBAAsB;AAAA,EAC1B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AACF;AAaO,SAAS,YAAY,KAA+B;AAEzD,MAAI,CAAC,OAAO,CAAC,IAAI,KAAK,GAAG;AACvB,WAAO;AAAA,MACL,SAAS;AAAA,MACT,OAAO;AAAA,IACT;AAAA,EACF;AAEA,QAAM,aAAa,IAAI,KAAK;AAG5B,MAAI,WAAW,SAAS,gBAAgB;AACtC,WAAO;AAAA,MACL,SAAS;AAAA,MACT,OAAO,iCAAiC,cAAc;AAAA,IACxD;AAAA,EACF;AAGA,aAAW,WAAW,qBAAqB;AACzC,QAAI,QAAQ,KAAK,UAAU,GAAG;AAC5B,aAAO;AAAA,QACL,SAAS;AAAA,QACT,OAAO;AAAA,MACT;AAAA,IACF;AAAA,EACF;AAGA,MAAI;AACJ,MAAI;AACF,gBAAY,IAAI,IAAI,UAAU;AAAA,EAChC,QAAQ;AACN,WAAO;AAAA,MACL,SAAS;AAAA,MACT,OAAO;AAAA,IACT;AAAA,EACF;AAGA,MAAI,CAAC,kBAAkB,SAAS,UAAU,QAAQ,GAAG;AACnD,WAAO;AAAA,MACL,SAAS;AAAA,MACT,OAAO,YAAY,UAAU,QAAQ,uCAAuC,kBAAkB,KAAK,IAAI,CAAC;AAAA,IAC1G;AAAA,EACF;AAGA,QAAM,WAAW,UAAU,SAAS,YAAY;AAChD,QAAM,cACJ,aAAa,eACb,aAAa,eACb,aAAa,SACb,SAAS,WAAW,UAAU,KAC9B,SAAS,WAAW,KAAK,KACzB,6BAA6B,KAAK,QAAQ;AAE5C,MAAI,aAAa;AAEf,YAAQ,KAAK,8CAA8C,UAAU,EAAE;AAAA,EACzE;AAEA,SAAO;AAAA,IACL,SAAS;AAAA,IACT,cAAc,UAAU,SAAS;AAAA,EACnC;AACF;AAOO,SAAS,aAAa,MAG3B;AACA,QAAM,YAAsB,CAAC;AAC7B,QAAM,SAAgD,CAAC;AAEvD,aAAW,OAAO,MAAM;AACtB,UAAM,SAAS,YAAY,GAAG;AAC9B,QAAI,OAAO,WAAW,OAAO,cAAc;AACzC,gBAAU,KAAK,OAAO,YAAY;AAAA,IACpC,OAAO;AACL,aAAO,KAAK;AAAA,QACV;AAAA,QACA,OAAO,OAAO,SAAS;AAAA,MACzB,CAAC;AAAA,IACH;AAAA,EACF;AAEA,SAAO,EAAE,WAAW,OAAO;AAC7B;;;AT5GA,IAAMC,WAAU,cAAc,YAAY,GAAG;AAC7C,IAAM,MAAMA,SAAQ,iBAAiB;AAMrC,IAAM,EAAE,aAAa,MAAM,QAAQ,IAAI;AAEvC,IAAM,UAAU,IAAI,QAAQ;AAE5B,IAAM,SAAS,eAAe,YAAY,oCAAoC;AAC9E,IAAM,UAAU,eAAe,cAAc,kCAAkC;AAG/E,IAAM,gBAAgB,IAAI,cAAc;AACxC,IAAM,oBAAoB,IAAI,kBAAkB,CAAC,KAAK,QAAQ,QAAQ,SAAS,QAAQ,CAAC;AACxF,IAAM,UAAU,IAAI,iBAAiB;AAErC,eAAe,kBACb,WAMyB;AACzB,QAAM,gBAAgC,CAAC;AAEvC,aAAW,EAAE,SAAS,KAAK,aAAa,MAAM,MAAM,KAAK,WAAW;AAClE,UAAM,cAAc,eAAe,QAAQ;AAC3C,UAAM,YACJ,UAAU,UAAa,CAAC,UAAU,CAAC,IAAI,MAAM,kBAAkB,QAAQ,OAAO;AAChF,UAAM,aACJ,UAAU,UAAa,CAAC,UACpB,EAAE,KAAK,aAAa,OAAO,SAAS,iBAAiB,UAAU,IAC/D,MAAM,cAAc,QAAQ,OAAO;AACzC,kBAAc,KAAK,EAAE,GAAG,YAAY,UAAU,CAAC;AAAA,EACjD;AAEA,SAAO;AACT;AAEA,SAAS,kBAA2B;AAClC,QAAM,cAAc,QAAQ,KAAK,CAAC;AAClC,MAAI,CAAC,aAAa;AAChB,WAAO;AAAA,EACT;AAEA,SAAO,cAAc,YAAY,GAAG,MAAM,QAAQ,WAAW;AAC/D;AAEA,eAAsB,OAAO,OAAiB,QAAQ,MAAqB;AACzE,UAAQ,KAAK,IAAI,EAAE,QAAQ,SAAS,eAAe,EAAE,YAAY,WAAW;AAG5E,UACG,QAAQ,SAAS,EAAE,WAAW,KAAK,CAAC,EACpC,YAAY,gDAAgD,EAC5D,YAAY,MAAM,EAClB;AAAA,IACC,IAAI;AAAA,MACF;AAAA,MACA;AAAA,IACF;AAAA,EACF,EACC,OAAO,OAAO,MAAgB,YAAgC;AAC7D,QAAI;AACF,YAAM,EAAE,WAAW,OAAO,IAAI,aAAa,IAAI;AAE/C,UAAI,OAAO,SAAS,GAAG;AACrB,gBAAQ,MAAM,iCAA4B;AAC1C,eAAO,QAAQ,CAAC,EAAE,KAAK,YAAY,MAAM,MAAM;AAC7C,kBAAQ,MAAM,OAAO,UAAU,KAAK,KAAK,EAAE;AAAA,QAC7C,CAAC;AAAA,MACH;AAEA,UAAI,UAAU,WAAW,GAAG;AAC1B,gBAAQ,MAAM,6CAAwC;AACtD,gBAAQ,KAAK,CAAC;AAAA,MAChB;AAEA,cAAQ,MAAM;AAAA,oBAAkB,UAAU,MAAM,kBAAkB;AAElE,YAAM,cAAc,IAAI,YAAY,QAAQ,QAAQ,IAAI,KAAO,CAAC;AAEhE,YAAM,UAAU,YAA2B;AACzC,cAAM,YAAY,MAAM,YAAY,SAAS,SAAS;AACtD,cAAM,gBAAgB,MAAM,kBAAkB,SAAS;AACvD,cAAM,QAAQ,MAAM,GAAG,aAAa;AAAA,MACtC;AAEA,UAAI,QAAQ,OAAO;AACjB,gBAAQ,MAAM,OAAO;AACrB,gBAAQ,GAAG,UAAU,MAAM,QAAQ,KAAK,CAAC,CAAC;AAE1C,YAAI,kBAAwC;AAC5C,gBAAQ,MAAM,GAAG,OAAO,MAAM;AAC5B,4BAAkB;AAAA,QACpB,CAAC;AAED,YAAI,aAAmD;AACvD,gBAAQ,GAAG,YAAY,MAAM;AAC3B,cAAI,eAAe,KAAM,cAAa,UAAU;AAChD,uBAAa,WAAW,MAAM;AAC5B,yBAAa;AACb,8BAAkB,QAAQ,EAAE,MAAM,CAAC,QAAiB;AAClD,sBAAQ,MAAM,+BAA0B,eAAe,QAAQ,IAAI,UAAU,GAAG;AAAA,YAClF,CAAC;AAAA,UACH,GAAG,GAAG;AAAA,QACR,CAAC;AAED,0BAAkB,QAAQ;AAC1B,cAAM;AAAA,MACR,OAAO;AACL,cAAM,QAAQ;AAAA,MAChB;AAAA,IACF,SAAS,OAAO;AACd,cAAQ,MAAM,+BAA0B,iBAAiB,QAAQ,MAAM,UAAU,KAAK;AACtF,cAAQ,KAAK,CAAC;AAAA,IAChB;AAAA,EACF,CAAC;AAGH,UACG,QAAQ,MAAM,EACd,YAAY,mEAAmE,EAC/E,YAAY,OAAO,EACnB;AAAA,IACC,IAAI,OAAO,iBAAiB,cAAc,kBAAkB,0BAA0B;AAAA,EACxF,EACC,OAAO,OAAO,OAAiB,YAAmC;AACjE,QAAI;AACF,UAAI,QAAQ,YAAY,MAAM,SAAS,oBAAoB;AACzD,gBAAQ;AAAA,UACN;AAAA,SAAO,MAAM,MAAM,gDAAgD,kBAAkB;AAAA,QACvF;AACA,gBAAQ,MAAM,mEAAmE;AACjF,gBAAQ,KAAK,CAAC;AAAA,MAChB;AAEA,UAAI,CAAC,QAAQ,YAAY,MAAM,SAAS,oBAAoB;AAC1D,gBAAQ;AAAA,UACN;AAAA,8CAAuC,MAAM,MAAM,oBAAoB,kBAAkB;AAAA,QAC3F;AAAA,MACF;AAEA,cAAQ,MAAM;AAAA,oBAAkB,MAAM,MAAM,aAAa;AAEzD,YAAM,cAAc,IAAI,YAAY;AACpC,YAAM,YAAY,MAAM,YAAY,SAAS,KAAK;AAClD,YAAM,gBAAgB,MAAM;AAAA,QAC1B,UAAU,IAAI,CAAC,EAAE,MAAM,SAAS,MAAM,OAAO,EAAE,MAAM,SAAS,MAAM,EAAE;AAAA,MACxE;AAEA,YAAM,QAAQ,MAAM,GAAG,aAAa;AAAA,IACtC,SAAS,OAAO;AACd,cAAQ,MAAM,+BAA0B,iBAAiB,QAAQ,MAAM,UAAU,KAAK;AACtF,cAAQ,KAAK,CAAC;AAAA,IAChB;AAAA,EACF,CAAC;AAEH,QAAM,QAAQ,WAAW,IAAI;AAC/B;AAEA,IAAI,gBAAgB,GAAG;AACrB,SAAO,EAAE,MAAM,CAAC,UAAmB;AACjC,YAAQ,MAAM,+BAA0B,iBAAiB,QAAQ,MAAM,UAAU,KAAK;AACtF,YAAQ,KAAK,CAAC;AAAA,EAChB,CAAC;AACH;",
4
+ "sourcesContent": ["#!/usr/bin/env node\nimport { Command, createArgument, Option } from 'commander';\nimport { createRequire } from 'node:module';\nimport { resolve } from 'node:path';\nimport { fileURLToPath } from 'node:url';\nimport { realpathSync } from 'node:fs';\n\nimport { PageExtractor, ResourceExtractor } from './extractors/index.js';\nimport { FileFetcher, MAX_FILES_FAILSAFE, PageFetcher, type PageMetadata } from './page/index.js';\nimport { JSONStylePrinter } from './printers/index.js';\nimport { validateUrls } from './security.js';\n\nconst require = createRequire(import.meta.url);\nconst pkg = require('../package.json') as {\n description: string;\n name: string;\n version: string;\n};\n\nconst { description, name, version } = pkg;\n\nconst program = new Command();\n\nconst urlArg = createArgument('<url...>', 'remote https://URL to extract from');\nconst fileArg = createArgument('<paths...>', 'local file paths to extract from');\n\n// Shared extractor instances.\nconst pageExtractor = new PageExtractor();\nconst resourceExtractor = new ResourceExtractor(['a', 'meta', 'link', 'embed', 'script']);\nconst printer = new JSONStylePrinter();\n\nasync function buildPageMetadata(\n responses: Array<{\n url?: string;\n path?: string;\n content?: import('./page/index.js').DOMResult;\n error?: string;\n }>\n): Promise<PageMetadata[]> {\n const pageMetadatas: PageMetadata[] = [];\n\n for (const { content, url: responseUrl, path, error } of responses) {\n const resolvedUrl = responseUrl ?? path ?? '';\n const resources =\n error !== undefined || !content ? [] : await resourceExtractor.extract(content);\n const descriptor =\n error !== undefined || !content\n ? { url: resolvedUrl, error: error ?? 'Unknown error', resources }\n : await pageExtractor.extract(content);\n pageMetadatas.push({ ...descriptor, resources });\n }\n\n return pageMetadatas;\n}\n\nfunction normalizeLocalPath(value: string): string {\n if (value.startsWith('file://')) {\n return fileURLToPath(value);\n }\n\n return value;\n}\n\nasync function runFileCommand(paths: string[], options: { failsafe: boolean }): Promise<void> {\n if (options.failsafe && paths.length > MAX_FILES_FAILSAFE) {\n console.error(\n `\\n\u274C ${paths.length} files specified exceeds the safety limit of ${MAX_FILES_FAILSAFE}.`\n );\n console.error(` Pass --no-failsafe to bypass this check and process all files.`);\n process.exit(1);\n }\n\n if (!options.failsafe && paths.length > MAX_FILES_FAILSAFE) {\n console.error(\n `\\n\u26A0\uFE0F Failsafe bypassed: processing ${paths.length} files (limit is ${MAX_FILES_FAILSAFE}).`\n );\n }\n\n console.error(`\\n\u2705 Processing ${paths.length} file(s)...`);\n\n const fileFetcher = new FileFetcher();\n const normalizedPaths = paths.map((pathValue) => normalizeLocalPath(pathValue));\n const responses = await fileFetcher.fetchAll(normalizedPaths);\n const pageMetadatas = await buildPageMetadata(\n responses.map(({ path, content, error }) => ({ path, content, error }))\n );\n\n await printer.print(...pageMetadatas);\n}\n\nfunction isCliEntrypoint(): boolean {\n const invokedPath = process.argv[1];\n if (!invokedPath) {\n return false;\n }\n\n try {\n return realpathSync(fileURLToPath(import.meta.url)) === realpathSync(resolve(invokedPath));\n } catch {\n return false;\n }\n}\n\nexport async function runCli(argv: string[] = process.argv): Promise<void> {\n program.name(name).version(version, '-v, --version').description(description);\n\n program\n .addArgument(fileArg)\n .addOption(\n new Option('--no-failsafe', `bypass the ${MAX_FILES_FAILSAFE}-file limit safety check`)\n )\n .action(async (paths: string[], options: { failsafe: boolean }) => {\n try {\n await runFileCommand(paths, options);\n } catch (error) {\n console.error('\\n\u274C An error occurred:', error instanceof Error ? error.message : error);\n process.exit(1);\n }\n });\n\n // \u2500\u2500 fetch subcommand (remote URL mode only) \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n program\n .command('fetch')\n .description('fetch and extract resources from remote URL(s)')\n .addArgument(urlArg)\n .addOption(\n new Option(\n '--watch',\n 'keep running: SIGWINCH re-fetches after resize, Ctrl-D releases in-flight requests, Ctrl-C exits'\n )\n )\n .addOption(new Option('-A, --user-agent <value>', 'override the HTTP User-Agent header'))\n .action(async (urls: string[], options: { watch: boolean; userAgent?: string }) => {\n try {\n const { validUrls, errors } = validateUrls(urls);\n\n if (errors.length > 0) {\n console.error('\\n\u274C URL Validation Errors:');\n errors.forEach(({ url: invalidUrl, error }) => {\n console.error(` - ${invalidUrl}: ${error}`);\n });\n }\n\n if (validUrls.length === 0) {\n console.error('\\n\u274C No valid URLs to process. Exiting.');\n process.exit(1);\n }\n\n console.error(`\\n\u2705 Processing ${validUrls.length} valid URL(s)...`);\n\n const pageFetcher = new PageFetcher(options.watch ? 0 : 10000, 2, options.userAgent);\n\n const execute = async (): Promise<void> => {\n const responses = await pageFetcher.fetchAll(validUrls);\n const pageMetadatas = await buildPageMetadata(responses);\n await printer.print(...pageMetadatas);\n };\n\n if (options.watch) {\n process.stdin.resume();\n process.on('SIGINT', () => process.exit(0));\n\n let activeExecution: Promise<void> | null = null;\n process.stdin.on('end', () => {\n activeExecution = null;\n });\n\n let winchTimer: ReturnType<typeof setTimeout> | null = null;\n process.on('SIGWINCH', () => {\n if (winchTimer !== null) clearTimeout(winchTimer);\n winchTimer = setTimeout(() => {\n winchTimer = null;\n activeExecution = execute().catch((err: unknown) => {\n console.error('\\n\u274C An error occurred:', err instanceof Error ? err.message : err);\n });\n }, 150);\n });\n\n activeExecution = execute();\n await activeExecution;\n } else {\n await execute();\n }\n } catch (error) {\n console.error('\\n\u274C An error occurred:', error instanceof Error ? error.message : error);\n process.exit(1);\n }\n });\n\n // \u2500\u2500 file subcommand (local filesystem access) \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n program\n .command('file')\n .description('extract resources from local file(s) via direct filesystem access')\n .addArgument(fileArg)\n .addOption(\n new Option('--no-failsafe', `bypass the ${MAX_FILES_FAILSAFE}-file limit safety check`)\n )\n .action(async (paths: string[], options: { failsafe: boolean }) => {\n try {\n await runFileCommand(paths, options);\n } catch (error) {\n console.error('\\n\u274C An error occurred:', error instanceof Error ? error.message : error);\n process.exit(1);\n }\n });\n\n await program.parseAsync(argv);\n}\n\nif (isCliEntrypoint()) {\n runCli().catch((error: unknown) => {\n console.error('\\n\u274C An error occurred:', error instanceof Error ? error.message : error);\n process.exit(1);\n });\n}\n", "export abstract class AbstractExtractor<V, R> {\n constructor(readonly name: string) {}\n abstract extract(value: V): Promise<R>;\n}\n", "import type { Page } from '../page/index.js';\nimport type { DOMResult } from '../page/index.js';\nimport { AbstractExtractor } from './AbstractExtractor.js';\n\nexport class PageExtractor extends AbstractExtractor<DOMResult, Page> {\n constructor() {\n super('page-extractor');\n }\n\n async extract(value: DOMResult): Promise<Page> {\n const {\n window: { document },\n url,\n } = value;\n return { title: document.title, url };\n }\n}\n", "/**\n * @license MIT\n * We are interested in visualising a page as a collection of tags.\n *\n * We wish to work with tags that can be compactly previewed on a webpage.\n * Here we must declare all of the element types that can be used to represent\n * a resource that can be hyperlinked off a webpage.\n */\ntype Tags = HTMLElementTagNameMap;\n\nexport const RESOURCE_DISPLAYABLE_KEYS = [\n 'id',\n 'innerText',\n 'textContent',\n 'class',\n 'ariaLabel',\n 'ariaDescription',\n 'alt',\n] as const;\n\nexport type DisplayableKey = (typeof RESOURCE_DISPLAYABLE_KEYS)[number];\n\nexport const RESOURCE_LINK_KEYS = ['href', 'data-src', 'target', 'action', 'src', 'url'] as const;\n\nexport type LinkKey = (typeof RESOURCE_LINK_KEYS)[number];\n\nexport type AttributeKey = DisplayableKey | LinkKey;\n\nexport type ResourceKey = { key: AttributeKey; value: string };\nexport type ResourceLink = { key: LinkKey; value: string };\n\nexport type ExternalResource = {\n text: ResourceKey;\n link: ResourceLink;\n};\n\nexport type Tag = keyof Tags;\n\nexport type Resource = HTMLElement & {\n [K in AttributeKey]?: string | null;\n};\n\nexport type ResourceByName<T extends keyof Tags> = Tags[T];\n\n// --- adapters ---\n\nconst readAttr = (element: Resource, key: AttributeKey): string | undefined => {\n const v = element.getAttribute(key);\n return v != null && v.trim() !== '' ? v : undefined;\n};\n\nexport function findResourceText(element: Resource): ResourceKey | undefined {\n for (const key of RESOURCE_DISPLAYABLE_KEYS) {\n const value = readAttr(element, key);\n if (value !== undefined) return { key, value };\n }\n return undefined;\n}\n\nexport function findResourceLink(element: Resource): ResourceLink | undefined {\n for (const key of RESOURCE_LINK_KEYS) {\n const value = readAttr(element, key);\n if (value !== undefined) return { key, value };\n }\n return undefined;\n}\n\nexport const isResourceKey = (key: string): key is AttributeKey =>\n (RESOURCE_DISPLAYABLE_KEYS as readonly string[]).includes(key) ||\n (RESOURCE_LINK_KEYS as readonly string[]).includes(key);\n", "import type { DOMResult } from '../page/index.js';\nimport {\n findResourceLink,\n findResourceText,\n type ExternalResource,\n type Resource,\n type Tag,\n} from '../resource.js';\nimport { AbstractExtractor } from './AbstractExtractor.js';\n\nexport class ResourceExtractor extends AbstractExtractor<DOMResult, ExternalResource[]> {\n constructor(private readonly tags: Tag[]) {\n super('page-extractor');\n }\n async extract(value: DOMResult): Promise<ExternalResource[]> {\n const { document } = value.window;\n return this.tags.flatMap((tag) =>\n Array.from(document.querySelectorAll<Resource>(tag)).flatMap((element) => {\n const link = findResourceLink(element);\n if (!link) return [];\n const text = findResourceText(element) ?? { key: 'src' as const, value: link.value };\n return [{ text, link }];\n })\n );\n }\n}\n", "import { parseHTML } from 'linkedom';\n\ntype ParseHTMLResult = {\n document: Document;\n};\n\nexport interface DOMResult {\n window: { document: Document };\n url: string;\n}\n\ninterface PageResponse {\n url: string;\n content?: DOMResult;\n error?: string;\n}\n\nexport class PageFetcher {\n private readonly timeout: number;\n private readonly maxRetries: number;\n private readonly userAgent?: string;\n\n constructor(timeout = 10000, maxRetries = 2, userAgent?: string) {\n this.timeout = timeout;\n this.maxRetries = maxRetries;\n this.userAgent = userAgent;\n }\n\n private buildDOMResult(html: string, url: string): DOMResult {\n const { document } = parseHTML(html) as ParseHTMLResult;\n return { window: { document }, url };\n }\n\n private decodeHtml(buffer: ArrayBuffer, charset: string): string {\n try {\n return new TextDecoder(charset).decode(new Uint8Array(buffer));\n } catch {\n return new TextDecoder('utf-8').decode(new Uint8Array(buffer));\n }\n }\n\n private async fetchPage(url: string, retryCount = 0): Promise<PageResponse> {\n const controller = new AbortController();\n let timeoutId: ReturnType<typeof setTimeout> | null = null;\n\n try {\n if (this.timeout > 0) {\n timeoutId = setTimeout(() => {\n controller.abort(new Error('Request timeout'));\n }, this.timeout);\n }\n\n const headers = this.userAgent ? { 'user-agent': this.userAgent } : undefined;\n const content = await fetch(url, { headers, signal: controller.signal }).then(\n async (response) => {\n const buffer = await response.arrayBuffer();\n const contentType = response.headers.get('content-type') ?? '';\n const charsetMatch = /charset=([^\\s;]+)/i.exec(contentType);\n const html = this.decodeHtml(buffer, charsetMatch?.[1] ?? 'utf-8');\n return this.buildDOMResult(html, url);\n }\n );\n\n return { url, content };\n } catch (error) {\n const abortTimeout = error instanceof Error && error.name === 'AbortError';\n const message = abortTimeout\n ? 'Request timeout'\n : error instanceof Error\n ? error.message\n : 'Unknown error';\n\n // Retry logic for transient errors\n if (retryCount < this.maxRetries && this.isRetryableError(message)) {\n process.stderr.write(`Retrying ${url} (attempt ${retryCount + 1}/${this.maxRetries})...\\n`);\n await this.delay(1000 * (retryCount + 1)); // Exponential backoff\n return this.fetchPage(url, retryCount + 1);\n }\n\n return { url, error: `Failed to fetch: ${message}` };\n } finally {\n if (timeoutId !== null) {\n clearTimeout(timeoutId);\n }\n }\n }\n\n private isRetryableError(message: string): boolean {\n const retryablePatterns = [/timeout/i, /ECONNRESET/i, /ETIMEDOUT/i, /ENOTFOUND/i, /network/i];\n return retryablePatterns.some((pattern) => pattern.test(message));\n }\n\n private delay(ms: number): Promise<void> {\n return new Promise((resolve) => setTimeout(resolve, ms));\n }\n\n async fetchAll(urls: string[]): Promise<PageResponse[]> {\n const responses = await Promise.all(urls.map((url) => this.fetchPage(url)));\n return responses.filter((response) => response.content !== undefined || response.error);\n }\n}\n", "import { readFile } from 'node:fs/promises';\nimport { parseHTML } from 'linkedom';\n\nimport type { DOMResult } from './PageFetcher.js';\n\nexport const MAX_FILES_FAILSAFE = 254;\n\ntype ParseHTMLResult = {\n document: Document;\n};\n\nexport interface FileResponse {\n path: string;\n content?: DOMResult;\n error?: string;\n}\n\nexport class FileFetcher {\n private buildDOMResult(html: string, filePath: string): DOMResult {\n const { document } = parseHTML(html) as ParseHTMLResult;\n return { window: { document }, url: `file://${filePath}` };\n }\n\n async fetchFile(filePath: string): Promise<FileResponse> {\n try {\n // filePath is supplied directly by the CLI user, not derived from network input.\n // eslint-disable-next-line security/detect-non-literal-fs-filename\n const html = await readFile(filePath, 'utf-8');\n return { path: filePath, content: this.buildDOMResult(html, filePath) };\n } catch (error) {\n return {\n path: filePath,\n error: error instanceof Error ? error.message : 'Unknown error',\n };\n }\n }\n\n async fetchAll(filePaths: string[]): Promise<FileResponse[]> {\n return Promise.all(filePaths.map((p) => this.fetchFile(p)));\n }\n}\n", "import type { PageMetadata } from '../page/index.js';\n\nexport abstract class AbstractResourcePrinter {\n constructor() {}\n abstract print(...pages: PageMetadata[]): void | Promise<void>;\n}\n", "import type { PageMetadata } from '../page/index.js';\nimport { AbstractResourcePrinter } from './AbstractResourcePrinter.js';\n\nexport class JSONStylePrinter extends AbstractResourcePrinter {\n print(...pages: PageMetadata[]): void | Promise<void> {\n const json = JSON.stringify(pages);\n process.stdout.write(json + '\\n');\n }\n}\n", "/**\n * Security utilities for URL validation and sanitization\n */\n\nconst ALLOWED_PROTOCOLS = ['http:', 'https:'];\nconst MAX_URL_LENGTH = 2048;\nconst SUSPICIOUS_PATTERNS = [\n /javascript:/i,\n /data:/i,\n /vbscript:/i,\n /<script/i,\n /on\\w+=/i, // Event handlers like onclick=\n];\n\nexport interface ValidationResult {\n isValid: boolean;\n error?: string;\n sanitizedUrl?: string;\n}\n\n/**\n * Validates a URL for security concerns\n * @param url - The URL to validate\n * @returns ValidationResult object with validation status\n */\nexport function validateUrl(url: string): ValidationResult {\n // Check if URL is empty or whitespace\n if (!url || !url.trim()) {\n return {\n isValid: false,\n error: 'URL cannot be empty',\n };\n }\n\n const trimmedUrl = url.trim();\n\n // Check URL length to prevent DoS\n if (trimmedUrl.length > MAX_URL_LENGTH) {\n return {\n isValid: false,\n error: `URL exceeds maximum length of ${MAX_URL_LENGTH} characters`,\n };\n }\n\n // Check for suspicious patterns\n for (const pattern of SUSPICIOUS_PATTERNS) {\n if (pattern.test(trimmedUrl)) {\n return {\n isValid: false,\n error: 'URL contains suspicious patterns',\n };\n }\n }\n\n // Parse the URL\n let parsedUrl: URL;\n try {\n parsedUrl = new URL(trimmedUrl);\n } catch {\n return {\n isValid: false,\n error: 'Invalid URL format',\n };\n }\n\n // Check protocol\n if (!ALLOWED_PROTOCOLS.includes(parsedUrl.protocol)) {\n return {\n isValid: false,\n error: `Protocol ${parsedUrl.protocol} is not allowed. Allowed protocols: ${ALLOWED_PROTOCOLS.join(', ')}`,\n };\n }\n\n // Check for localhost/internal IPs in production (security consideration)\n const hostname = parsedUrl.hostname.toLowerCase();\n const isLocalhost =\n hostname === 'localhost' ||\n hostname === '127.0.0.1' ||\n hostname === '::1' ||\n hostname.startsWith('192.168.') ||\n hostname.startsWith('10.') ||\n /^172\\.(1[6-9]|2\\d|3[01])\\./.test(hostname);\n\n if (isLocalhost) {\n // Allow but warn about localhost URLs\n console.warn(`Warning: Accessing local network resource: ${trimmedUrl}`);\n }\n\n return {\n isValid: true,\n sanitizedUrl: parsedUrl.toString(),\n };\n}\n\n/**\n * Validates an array of URLs\n * @param urls - Array of URLs to validate\n * @returns Object with valid URLs and errors\n */\nexport function validateUrls(urls: string[]): {\n validUrls: string[];\n errors: Array<{ url: string; error: string }>;\n} {\n const validUrls: string[] = [];\n const errors: Array<{ url: string; error: string }> = [];\n\n for (const url of urls) {\n const result = validateUrl(url);\n if (result.isValid && result.sanitizedUrl) {\n validUrls.push(result.sanitizedUrl);\n } else {\n errors.push({\n url,\n error: result.error || 'Unknown validation error',\n });\n }\n }\n\n return { validUrls, errors };\n}\n\n/**\n * Rate limiter to prevent abuse\n */\nexport class RateLimiter {\n private requests: number[] = [];\n private readonly maxRequests: number;\n private readonly windowMs: number;\n\n constructor(maxRequests = 10, windowMs = 60000) {\n this.maxRequests = maxRequests;\n this.windowMs = windowMs;\n }\n\n /**\n * Check if a request is allowed under rate limiting\n * @returns true if request is allowed, false otherwise\n */\n public isAllowed(): boolean {\n const now = Date.now();\n\n // Remove old requests outside the time window\n this.requests = this.requests.filter((time) => now - time < this.windowMs);\n\n if (this.requests.length >= this.maxRequests) {\n return false;\n }\n\n this.requests.push(now);\n return true;\n }\n\n /**\n * Get remaining requests in current window\n */\n public getRemainingRequests(): number {\n const now = Date.now();\n this.requests = this.requests.filter((time) => now - time < this.windowMs);\n return Math.max(0, this.maxRequests - this.requests.length);\n }\n}\n\n/**\n * Sanitizes HTML content to prevent XSS attacks\n * @param text - Text to sanitize\n * @returns Sanitized text\n */\nexport function sanitizeText(text: string): string {\n if (!text) return '';\n\n return text\n .replace(/</g, '&lt;')\n .replace(/>/g, '&gt;')\n .replace(/\"/g, '&quot;')\n .replace(/'/g, '&#x27;')\n .replace(/\\//g, '&#x2F;');\n}\n"],
5
+ "mappings": ";;;AACA,SAAS,SAAS,gBAAgB,cAAc;AAChD,SAAS,qBAAqB;AAC9B,SAAS,eAAe;AACxB,SAAS,qBAAqB;AAC9B,SAAS,oBAAoB;;;ACLtB,IAAe,oBAAf,MAAuC;AAAA,EAC5C,YAAqBA,OAAc;AAAd,gBAAAA;AAAA,EAAe;AAEtC;;;ACCO,IAAM,gBAAN,cAA4B,kBAAmC;AAAA,EACpE,cAAc;AACZ,UAAM,gBAAgB;AAAA,EACxB;AAAA,EAEA,MAAM,QAAQ,OAAiC;AAC7C,UAAM;AAAA,MACJ,QAAQ,EAAE,SAAS;AAAA,MACnB;AAAA,IACF,IAAI;AACJ,WAAO,EAAE,OAAO,SAAS,OAAO,IAAI;AAAA,EACtC;AACF;;;ACNO,IAAM,4BAA4B;AAAA,EACvC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAIO,IAAM,qBAAqB,CAAC,QAAQ,YAAY,UAAU,UAAU,OAAO,KAAK;AAwBvF,IAAM,WAAW,CAAC,SAAmB,QAA0C;AAC7E,QAAM,IAAI,QAAQ,aAAa,GAAG;AAClC,SAAO,KAAK,QAAQ,EAAE,KAAK,MAAM,KAAK,IAAI;AAC5C;AAEO,SAAS,iBAAiB,SAA4C;AAC3E,aAAW,OAAO,2BAA2B;AAC3C,UAAM,QAAQ,SAAS,SAAS,GAAG;AACnC,QAAI,UAAU,OAAW,QAAO,EAAE,KAAK,MAAM;AAAA,EAC/C;AACA,SAAO;AACT;AAEO,SAAS,iBAAiB,SAA6C;AAC5E,aAAW,OAAO,oBAAoB;AACpC,UAAM,QAAQ,SAAS,SAAS,GAAG;AACnC,QAAI,UAAU,OAAW,QAAO,EAAE,KAAK,MAAM;AAAA,EAC/C;AACA,SAAO;AACT;;;ACvDO,IAAM,oBAAN,cAAgC,kBAAiD;AAAA,EACtF,YAA6B,MAAa;AACxC,UAAM,gBAAgB;AADK;AAAA,EAE7B;AAAA,EACA,MAAM,QAAQ,OAA+C;AAC3D,UAAM,EAAE,SAAS,IAAI,MAAM;AAC3B,WAAO,KAAK,KAAK;AAAA,MAAQ,CAAC,QACxB,MAAM,KAAK,SAAS,iBAA2B,GAAG,CAAC,EAAE,QAAQ,CAAC,YAAY;AACxE,cAAM,OAAO,iBAAiB,OAAO;AACrC,YAAI,CAAC,KAAM,QAAO,CAAC;AACnB,cAAM,OAAO,iBAAiB,OAAO,KAAK,EAAE,KAAK,OAAgB,OAAO,KAAK,MAAM;AACnF,eAAO,CAAC,EAAE,MAAM,KAAK,CAAC;AAAA,MACxB,CAAC;AAAA,IACH;AAAA,EACF;AACF;;;ACzBA,SAAS,iBAAiB;AAiBnB,IAAM,cAAN,MAAkB;AAAA,EACN;AAAA,EACA;AAAA,EACA;AAAA,EAEjB,YAAY,UAAU,KAAO,aAAa,GAAG,WAAoB;AAC/D,SAAK,UAAU;AACf,SAAK,aAAa;AAClB,SAAK,YAAY;AAAA,EACnB;AAAA,EAEQ,eAAe,MAAc,KAAwB;AAC3D,UAAM,EAAE,SAAS,IAAI,UAAU,IAAI;AACnC,WAAO,EAAE,QAAQ,EAAE,SAAS,GAAG,IAAI;AAAA,EACrC;AAAA,EAEQ,WAAW,QAAqB,SAAyB;AAC/D,QAAI;AACF,aAAO,IAAI,YAAY,OAAO,EAAE,OAAO,IAAI,WAAW,MAAM,CAAC;AAAA,IAC/D,QAAQ;AACN,aAAO,IAAI,YAAY,OAAO,EAAE,OAAO,IAAI,WAAW,MAAM,CAAC;AAAA,IAC/D;AAAA,EACF;AAAA,EAEA,MAAc,UAAU,KAAa,aAAa,GAA0B;AAC1E,UAAM,aAAa,IAAI,gBAAgB;AACvC,QAAI,YAAkD;AAEtD,QAAI;AACF,UAAI,KAAK,UAAU,GAAG;AACpB,oBAAY,WAAW,MAAM;AAC3B,qBAAW,MAAM,IAAI,MAAM,iBAAiB,CAAC;AAAA,QAC/C,GAAG,KAAK,OAAO;AAAA,MACjB;AAEA,YAAM,UAAU,KAAK,YAAY,EAAE,cAAc,KAAK,UAAU,IAAI;AACpE,YAAM,UAAU,MAAM,MAAM,KAAK,EAAE,SAAS,QAAQ,WAAW,OAAO,CAAC,EAAE;AAAA,QACvE,OAAO,aAAa;AAClB,gBAAM,SAAS,MAAM,SAAS,YAAY;AAC1C,gBAAM,cAAc,SAAS,QAAQ,IAAI,cAAc,KAAK;AAC5D,gBAAM,eAAe,qBAAqB,KAAK,WAAW;AAC1D,gBAAM,OAAO,KAAK,WAAW,QAAQ,eAAe,CAAC,KAAK,OAAO;AACjE,iBAAO,KAAK,eAAe,MAAM,GAAG;AAAA,QACtC;AAAA,MACF;AAEA,aAAO,EAAE,KAAK,QAAQ;AAAA,IACxB,SAAS,OAAO;AACd,YAAM,eAAe,iBAAiB,SAAS,MAAM,SAAS;AAC9D,YAAM,UAAU,eACZ,oBACA,iBAAiB,QACf,MAAM,UACN;AAGN,UAAI,aAAa,KAAK,cAAc,KAAK,iBAAiB,OAAO,GAAG;AAClE,gBAAQ,OAAO,MAAM,YAAY,GAAG,aAAa,aAAa,CAAC,IAAI,KAAK,UAAU;AAAA,CAAQ;AAC1F,cAAM,KAAK,MAAM,OAAQ,aAAa,EAAE;AACxC,eAAO,KAAK,UAAU,KAAK,aAAa,CAAC;AAAA,MAC3C;AAEA,aAAO,EAAE,KAAK,OAAO,oBAAoB,OAAO,GAAG;AAAA,IACrD,UAAE;AACA,UAAI,cAAc,MAAM;AACtB,qBAAa,SAAS;AAAA,MACxB;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,iBAAiB,SAA0B;AACjD,UAAM,oBAAoB,CAAC,YAAY,eAAe,cAAc,cAAc,UAAU;AAC5F,WAAO,kBAAkB,KAAK,CAAC,YAAY,QAAQ,KAAK,OAAO,CAAC;AAAA,EAClE;AAAA,EAEQ,MAAM,IAA2B;AACvC,WAAO,IAAI,QAAQ,CAACC,aAAY,WAAWA,UAAS,EAAE,CAAC;AAAA,EACzD;AAAA,EAEA,MAAM,SAAS,MAAyC;AACtD,UAAM,YAAY,MAAM,QAAQ,IAAI,KAAK,IAAI,CAAC,QAAQ,KAAK,UAAU,GAAG,CAAC,CAAC;AAC1E,WAAO,UAAU,OAAO,CAAC,aAAa,SAAS,YAAY,UAAa,SAAS,KAAK;AAAA,EACxF;AACF;;;ACpGA,SAAS,gBAAgB;AACzB,SAAS,aAAAC,kBAAiB;AAInB,IAAM,qBAAqB;AAY3B,IAAM,cAAN,MAAkB;AAAA,EACf,eAAe,MAAc,UAA6B;AAChE,UAAM,EAAE,SAAS,IAAIA,WAAU,IAAI;AACnC,WAAO,EAAE,QAAQ,EAAE,SAAS,GAAG,KAAK,UAAU,QAAQ,GAAG;AAAA,EAC3D;AAAA,EAEA,MAAM,UAAU,UAAyC;AACvD,QAAI;AAGF,YAAM,OAAO,MAAM,SAAS,UAAU,OAAO;AAC7C,aAAO,EAAE,MAAM,UAAU,SAAS,KAAK,eAAe,MAAM,QAAQ,EAAE;AAAA,IACxE,SAAS,OAAO;AACd,aAAO;AAAA,QACL,MAAM;AAAA,QACN,OAAO,iBAAiB,QAAQ,MAAM,UAAU;AAAA,MAClD;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,SAAS,WAA8C;AAC3D,WAAO,QAAQ,IAAI,UAAU,IAAI,CAAC,MAAM,KAAK,UAAU,CAAC,CAAC,CAAC;AAAA,EAC5D;AACF;;;ACtCO,IAAe,0BAAf,MAAuC;AAAA,EAC5C,cAAc;AAAA,EAAC;AAEjB;;;ACFO,IAAM,mBAAN,cAA+B,wBAAwB;AAAA,EAC5D,SAAS,OAA6C;AACpD,UAAM,OAAO,KAAK,UAAU,KAAK;AACjC,YAAQ,OAAO,MAAM,OAAO,IAAI;AAAA,EAClC;AACF;;;ACJA,IAAM,oBAAoB,CAAC,SAAS,QAAQ;AAC5C,IAAM,iBAAiB;AACvB,IAAM,sBAAsB;AAAA,EAC1B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AACF;AAaO,SAAS,YAAY,KAA+B;AAEzD,MAAI,CAAC,OAAO,CAAC,IAAI,KAAK,GAAG;AACvB,WAAO;AAAA,MACL,SAAS;AAAA,MACT,OAAO;AAAA,IACT;AAAA,EACF;AAEA,QAAM,aAAa,IAAI,KAAK;AAG5B,MAAI,WAAW,SAAS,gBAAgB;AACtC,WAAO;AAAA,MACL,SAAS;AAAA,MACT,OAAO,iCAAiC,cAAc;AAAA,IACxD;AAAA,EACF;AAGA,aAAW,WAAW,qBAAqB;AACzC,QAAI,QAAQ,KAAK,UAAU,GAAG;AAC5B,aAAO;AAAA,QACL,SAAS;AAAA,QACT,OAAO;AAAA,MACT;AAAA,IACF;AAAA,EACF;AAGA,MAAI;AACJ,MAAI;AACF,gBAAY,IAAI,IAAI,UAAU;AAAA,EAChC,QAAQ;AACN,WAAO;AAAA,MACL,SAAS;AAAA,MACT,OAAO;AAAA,IACT;AAAA,EACF;AAGA,MAAI,CAAC,kBAAkB,SAAS,UAAU,QAAQ,GAAG;AACnD,WAAO;AAAA,MACL,SAAS;AAAA,MACT,OAAO,YAAY,UAAU,QAAQ,uCAAuC,kBAAkB,KAAK,IAAI,CAAC;AAAA,IAC1G;AAAA,EACF;AAGA,QAAM,WAAW,UAAU,SAAS,YAAY;AAChD,QAAM,cACJ,aAAa,eACb,aAAa,eACb,aAAa,SACb,SAAS,WAAW,UAAU,KAC9B,SAAS,WAAW,KAAK,KACzB,6BAA6B,KAAK,QAAQ;AAE5C,MAAI,aAAa;AAEf,YAAQ,KAAK,8CAA8C,UAAU,EAAE;AAAA,EACzE;AAEA,SAAO;AAAA,IACL,SAAS;AAAA,IACT,cAAc,UAAU,SAAS;AAAA,EACnC;AACF;AAOO,SAAS,aAAa,MAG3B;AACA,QAAM,YAAsB,CAAC;AAC7B,QAAM,SAAgD,CAAC;AAEvD,aAAW,OAAO,MAAM;AACtB,UAAM,SAAS,YAAY,GAAG;AAC9B,QAAI,OAAO,WAAW,OAAO,cAAc;AACzC,gBAAU,KAAK,OAAO,YAAY;AAAA,IACpC,OAAO;AACL,aAAO,KAAK;AAAA,QACV;AAAA,QACA,OAAO,OAAO,SAAS;AAAA,MACzB,CAAC;AAAA,IACH;AAAA,EACF;AAEA,SAAO,EAAE,WAAW,OAAO;AAC7B;;;AT3GA,IAAMC,WAAU,cAAc,YAAY,GAAG;AAC7C,IAAM,MAAMA,SAAQ,iBAAiB;AAMrC,IAAM,EAAE,aAAa,MAAM,QAAQ,IAAI;AAEvC,IAAM,UAAU,IAAI,QAAQ;AAE5B,IAAM,SAAS,eAAe,YAAY,oCAAoC;AAC9E,IAAM,UAAU,eAAe,cAAc,kCAAkC;AAG/E,IAAM,gBAAgB,IAAI,cAAc;AACxC,IAAM,oBAAoB,IAAI,kBAAkB,CAAC,KAAK,QAAQ,QAAQ,SAAS,QAAQ,CAAC;AACxF,IAAM,UAAU,IAAI,iBAAiB;AAErC,eAAe,kBACb,WAMyB;AACzB,QAAM,gBAAgC,CAAC;AAEvC,aAAW,EAAE,SAAS,KAAK,aAAa,MAAM,MAAM,KAAK,WAAW;AAClE,UAAM,cAAc,eAAe,QAAQ;AAC3C,UAAM,YACJ,UAAU,UAAa,CAAC,UAAU,CAAC,IAAI,MAAM,kBAAkB,QAAQ,OAAO;AAChF,UAAM,aACJ,UAAU,UAAa,CAAC,UACpB,EAAE,KAAK,aAAa,OAAO,SAAS,iBAAiB,UAAU,IAC/D,MAAM,cAAc,QAAQ,OAAO;AACzC,kBAAc,KAAK,EAAE,GAAG,YAAY,UAAU,CAAC;AAAA,EACjD;AAEA,SAAO;AACT;AAEA,SAAS,mBAAmB,OAAuB;AACjD,MAAI,MAAM,WAAW,SAAS,GAAG;AAC/B,WAAO,cAAc,KAAK;AAAA,EAC5B;AAEA,SAAO;AACT;AAEA,eAAe,eAAe,OAAiB,SAA+C;AAC5F,MAAI,QAAQ,YAAY,MAAM,SAAS,oBAAoB;AACzD,YAAQ;AAAA,MACN;AAAA,SAAO,MAAM,MAAM,gDAAgD,kBAAkB;AAAA,IACvF;AACA,YAAQ,MAAM,mEAAmE;AACjF,YAAQ,KAAK,CAAC;AAAA,EAChB;AAEA,MAAI,CAAC,QAAQ,YAAY,MAAM,SAAS,oBAAoB;AAC1D,YAAQ;AAAA,MACN;AAAA,8CAAuC,MAAM,MAAM,oBAAoB,kBAAkB;AAAA,IAC3F;AAAA,EACF;AAEA,UAAQ,MAAM;AAAA,oBAAkB,MAAM,MAAM,aAAa;AAEzD,QAAM,cAAc,IAAI,YAAY;AACpC,QAAM,kBAAkB,MAAM,IAAI,CAAC,cAAc,mBAAmB,SAAS,CAAC;AAC9E,QAAM,YAAY,MAAM,YAAY,SAAS,eAAe;AAC5D,QAAM,gBAAgB,MAAM;AAAA,IAC1B,UAAU,IAAI,CAAC,EAAE,MAAM,SAAS,MAAM,OAAO,EAAE,MAAM,SAAS,MAAM,EAAE;AAAA,EACxE;AAEA,QAAM,QAAQ,MAAM,GAAG,aAAa;AACtC;AAEA,SAAS,kBAA2B;AAClC,QAAM,cAAc,QAAQ,KAAK,CAAC;AAClC,MAAI,CAAC,aAAa;AAChB,WAAO;AAAA,EACT;AAEA,MAAI;AACF,WAAO,aAAa,cAAc,YAAY,GAAG,CAAC,MAAM,aAAa,QAAQ,WAAW,CAAC;AAAA,EAC3F,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAEA,eAAsB,OAAO,OAAiB,QAAQ,MAAqB;AACzE,UAAQ,KAAK,IAAI,EAAE,QAAQ,SAAS,eAAe,EAAE,YAAY,WAAW;AAE5E,UACG,YAAY,OAAO,EACnB;AAAA,IACC,IAAI,OAAO,iBAAiB,cAAc,kBAAkB,0BAA0B;AAAA,EACxF,EACC,OAAO,OAAO,OAAiB,YAAmC;AACjE,QAAI;AACF,YAAM,eAAe,OAAO,OAAO;AAAA,IACrC,SAAS,OAAO;AACd,cAAQ,MAAM,+BAA0B,iBAAiB,QAAQ,MAAM,UAAU,KAAK;AACtF,cAAQ,KAAK,CAAC;AAAA,IAChB;AAAA,EACF,CAAC;AAGH,UACG,QAAQ,OAAO,EACf,YAAY,gDAAgD,EAC5D,YAAY,MAAM,EAClB;AAAA,IACC,IAAI;AAAA,MACF;AAAA,MACA;AAAA,IACF;AAAA,EACF,EACC,UAAU,IAAI,OAAO,4BAA4B,qCAAqC,CAAC,EACvF,OAAO,OAAO,MAAgB,YAAoD;AACjF,QAAI;AACF,YAAM,EAAE,WAAW,OAAO,IAAI,aAAa,IAAI;AAE/C,UAAI,OAAO,SAAS,GAAG;AACrB,gBAAQ,MAAM,iCAA4B;AAC1C,eAAO,QAAQ,CAAC,EAAE,KAAK,YAAY,MAAM,MAAM;AAC7C,kBAAQ,MAAM,OAAO,UAAU,KAAK,KAAK,EAAE;AAAA,QAC7C,CAAC;AAAA,MACH;AAEA,UAAI,UAAU,WAAW,GAAG;AAC1B,gBAAQ,MAAM,6CAAwC;AACtD,gBAAQ,KAAK,CAAC;AAAA,MAChB;AAEA,cAAQ,MAAM;AAAA,oBAAkB,UAAU,MAAM,kBAAkB;AAElE,YAAM,cAAc,IAAI,YAAY,QAAQ,QAAQ,IAAI,KAAO,GAAG,QAAQ,SAAS;AAEnF,YAAM,UAAU,YAA2B;AACzC,cAAM,YAAY,MAAM,YAAY,SAAS,SAAS;AACtD,cAAM,gBAAgB,MAAM,kBAAkB,SAAS;AACvD,cAAM,QAAQ,MAAM,GAAG,aAAa;AAAA,MACtC;AAEA,UAAI,QAAQ,OAAO;AACjB,gBAAQ,MAAM,OAAO;AACrB,gBAAQ,GAAG,UAAU,MAAM,QAAQ,KAAK,CAAC,CAAC;AAE1C,YAAI,kBAAwC;AAC5C,gBAAQ,MAAM,GAAG,OAAO,MAAM;AAC5B,4BAAkB;AAAA,QACpB,CAAC;AAED,YAAI,aAAmD;AACvD,gBAAQ,GAAG,YAAY,MAAM;AAC3B,cAAI,eAAe,KAAM,cAAa,UAAU;AAChD,uBAAa,WAAW,MAAM;AAC5B,yBAAa;AACb,8BAAkB,QAAQ,EAAE,MAAM,CAAC,QAAiB;AAClD,sBAAQ,MAAM,+BAA0B,eAAe,QAAQ,IAAI,UAAU,GAAG;AAAA,YAClF,CAAC;AAAA,UACH,GAAG,GAAG;AAAA,QACR,CAAC;AAED,0BAAkB,QAAQ;AAC1B,cAAM;AAAA,MACR,OAAO;AACL,cAAM,QAAQ;AAAA,MAChB;AAAA,IACF,SAAS,OAAO;AACd,cAAQ,MAAM,+BAA0B,iBAAiB,QAAQ,MAAM,UAAU,KAAK;AACtF,cAAQ,KAAK,CAAC;AAAA,IAChB;AAAA,EACF,CAAC;AAGH,UACG,QAAQ,MAAM,EACd,YAAY,mEAAmE,EAC/E,YAAY,OAAO,EACnB;AAAA,IACC,IAAI,OAAO,iBAAiB,cAAc,kBAAkB,0BAA0B;AAAA,EACxF,EACC,OAAO,OAAO,OAAiB,YAAmC;AACjE,QAAI;AACF,YAAM,eAAe,OAAO,OAAO;AAAA,IACrC,SAAS,OAAO;AACd,cAAQ,MAAM,+BAA0B,iBAAiB,QAAQ,MAAM,UAAU,KAAK;AACtF,cAAQ,KAAK,CAAC;AAAA,IAChB;AAAA,EACF,CAAC;AAEH,QAAM,QAAQ,WAAW,IAAI;AAC/B;AAEA,IAAI,gBAAgB,GAAG;AACrB,SAAO,EAAE,MAAM,CAAC,UAAmB;AACjC,YAAQ,MAAM,+BAA0B,iBAAiB,QAAQ,MAAM,UAAU,KAAK;AACtF,YAAQ,KAAK,CAAC;AAAA,EAChB,CAAC;AACH;",
6
6
  "names": ["name", "resolve", "parseHTML", "require"]
7
7
  }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "pagerts",
3
3
  "description": "A tool for viewing external relations in a webpage",
4
- "version": "1.5.1",
4
+ "version": "1.5.4",
5
5
  "type": "module",
6
6
  "main": "main.js",
7
7
  "bin": {