pagerts 1.4.3 → 1.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -9
- package/bin/main.js +36 -12
- package/bin/main.js.map +3 -3
- package/package.json +11 -5
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# PagerTS
|
|
2
2
|
|
|
3
|
-
[](https://github.com/akinevz2/pagerts/actions/workflows/ci.yml)
|
|
4
4
|
[](./SECURITY.md)
|
|
5
5
|
[](https://nodejs.org)
|
|
6
6
|
[](./LICENSE)
|
|
@@ -33,7 +33,7 @@ npx pagerts <url>
|
|
|
33
33
|
### From Source
|
|
34
34
|
|
|
35
35
|
```bash
|
|
36
|
-
git clone https://github.com/
|
|
36
|
+
git clone https://github.com/akinevz2/pagerts.git
|
|
37
37
|
cd pagerts
|
|
38
38
|
npm install
|
|
39
39
|
npm run build
|
|
@@ -117,7 +117,7 @@ PagerTS takes security seriously. See [SECURITY.md](./SECURITY.md) for:
|
|
|
117
117
|
|
|
118
118
|
```bash
|
|
119
119
|
# Clone the repository
|
|
120
|
-
git clone https://github.com/
|
|
120
|
+
git clone https://github.com/akinevz2/pagerts.git
|
|
121
121
|
cd pagerts
|
|
122
122
|
|
|
123
123
|
# Install dependencies
|
|
@@ -186,8 +186,8 @@ pagerts/
|
|
|
186
186
|
├── .github/workflows/ # CI/CD pipelines
|
|
187
187
|
├── package.json
|
|
188
188
|
├── tsconfig.json
|
|
189
|
-
├── jest.config.
|
|
190
|
-
├── eslint.config.
|
|
189
|
+
├── jest.config.cjs
|
|
190
|
+
├── eslint.config.mjs
|
|
191
191
|
└── SECURITY.md
|
|
192
192
|
```
|
|
193
193
|
|
|
@@ -220,13 +220,13 @@ This project is licensed under the MIT License - see the [LICENSE](./LICENSE) fi
|
|
|
220
220
|
|
|
221
221
|
## Support
|
|
222
222
|
|
|
223
|
-
- 🐛 [Report bugs](https://github.com/
|
|
224
|
-
- 💡 [Request features](https://github.com/
|
|
223
|
+
- 🐛 [Report bugs](https://github.com/akinevz2/pagerts/issues)
|
|
224
|
+
- 💡 [Request features](https://github.com/akinevz2/pagerts/issues)
|
|
225
225
|
- 🔒 [Report security issues](./SECURITY.md)
|
|
226
226
|
|
|
227
227
|
## Changelog
|
|
228
228
|
|
|
229
|
-
### v0.3.0 -> v1.4.
|
|
229
|
+
### v0.3.0 -> v1.4.3 summary
|
|
230
230
|
|
|
231
231
|
Key changes in this range:
|
|
232
232
|
|
|
@@ -234,7 +234,7 @@ Key changes in this range:
|
|
|
234
234
|
- CI/security gate tightening and scan-noise cleanup (`da73bdb`, `46875e8`).
|
|
235
235
|
- Packaging/runtime interoperability fixes for CJS/ESM builds and publishes (`4054ab9`, `74d3f98`, `64b2a2f`, `e67acd6`).
|
|
236
236
|
- Regression fix for ignored script resources (`bc13b55`).
|
|
237
|
-
- Dependency tree refresh/stabilization (`1f8f86d`) and release
|
|
237
|
+
- Dependency tree refresh/stabilization (`1f8f86d`) and release bumps through `v1.4.3`.
|
|
238
238
|
- General code hardening and cleanup across extractors/fetching/printers, plus lockfile and build artifact maintenance in the same span.
|
|
239
239
|
|
|
240
240
|
### v0.2.0
|
package/bin/main.js
CHANGED
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
// src/main.ts
|
|
4
4
|
import { Command, createArgument, Option } from "commander";
|
|
5
5
|
import { createRequire } from "node:module";
|
|
6
|
+
import { resolve } from "node:path";
|
|
7
|
+
import { fileURLToPath } from "node:url";
|
|
6
8
|
|
|
7
9
|
// src/extractors/AbstractExtractor.ts
|
|
8
10
|
var AbstractExtractor = class {
|
|
@@ -95,23 +97,25 @@ var PageFetcher = class {
|
|
|
95
97
|
}
|
|
96
98
|
}
|
|
97
99
|
async fetchPage(url, retryCount = 0) {
|
|
100
|
+
const controller = new AbortController();
|
|
101
|
+
let timeoutId = null;
|
|
98
102
|
try {
|
|
99
|
-
|
|
103
|
+
if (this.timeout > 0) {
|
|
104
|
+
timeoutId = setTimeout(() => {
|
|
105
|
+
controller.abort(new Error("Request timeout"));
|
|
106
|
+
}, this.timeout);
|
|
107
|
+
}
|
|
108
|
+
const content = await fetch(url, { signal: controller.signal }).then(async (response) => {
|
|
100
109
|
const buffer = await response.arrayBuffer();
|
|
101
110
|
const contentType = response.headers.get("content-type") ?? "";
|
|
102
111
|
const charsetMatch = /charset=([^\s;]+)/i.exec(contentType);
|
|
103
112
|
const html = this.decodeHtml(buffer, charsetMatch?.[1] ?? "utf-8");
|
|
104
113
|
return this.buildDOMResult(html, url);
|
|
105
114
|
});
|
|
106
|
-
const content = await (this.timeout > 0 ? Promise.race([
|
|
107
|
-
domPromise,
|
|
108
|
-
new Promise(
|
|
109
|
-
(_, reject) => setTimeout(() => reject(new Error("Request timeout")), this.timeout)
|
|
110
|
-
)
|
|
111
|
-
]) : domPromise);
|
|
112
115
|
return { url, content };
|
|
113
116
|
} catch (error) {
|
|
114
|
-
const
|
|
117
|
+
const abortTimeout = error instanceof Error && error.name === "AbortError";
|
|
118
|
+
const message = abortTimeout ? "Request timeout" : error instanceof Error ? error.message : "Unknown error";
|
|
115
119
|
if (retryCount < this.maxRetries && this.isRetryableError(message)) {
|
|
116
120
|
process.stderr.write(`Retrying ${url} (attempt ${retryCount + 1}/${this.maxRetries})...
|
|
117
121
|
`);
|
|
@@ -119,6 +123,10 @@ var PageFetcher = class {
|
|
|
119
123
|
return this.fetchPage(url, retryCount + 1);
|
|
120
124
|
}
|
|
121
125
|
return { url, error: `Failed to fetch: ${message}` };
|
|
126
|
+
} finally {
|
|
127
|
+
if (timeoutId !== null) {
|
|
128
|
+
clearTimeout(timeoutId);
|
|
129
|
+
}
|
|
122
130
|
}
|
|
123
131
|
}
|
|
124
132
|
isRetryableError(message) {
|
|
@@ -126,7 +134,7 @@ var PageFetcher = class {
|
|
|
126
134
|
return retryablePatterns.some((pattern) => pattern.test(message));
|
|
127
135
|
}
|
|
128
136
|
delay(ms) {
|
|
129
|
-
return new Promise((
|
|
137
|
+
return new Promise((resolve2) => setTimeout(resolve2, ms));
|
|
130
138
|
}
|
|
131
139
|
async fetchAll(urls) {
|
|
132
140
|
const responses = await Promise.all(urls.map((url) => this.fetchPage(url)));
|
|
@@ -268,7 +276,14 @@ async function buildPageMetadata(responses) {
|
|
|
268
276
|
}
|
|
269
277
|
return pageMetadatas;
|
|
270
278
|
}
|
|
271
|
-
|
|
279
|
+
function isCliEntrypoint() {
|
|
280
|
+
const invokedPath = process.argv[1];
|
|
281
|
+
if (!invokedPath) {
|
|
282
|
+
return false;
|
|
283
|
+
}
|
|
284
|
+
return fileURLToPath(import.meta.url) === resolve(invokedPath);
|
|
285
|
+
}
|
|
286
|
+
async function runCli(argv = process.argv) {
|
|
272
287
|
program.name(name).version(version, "-v, --version").description(description);
|
|
273
288
|
program.command("fetch", { isDefault: true }).description("fetch and extract resources from remote URL(s)").addArgument(urlArg).addOption(
|
|
274
289
|
new Option(
|
|
@@ -354,8 +369,17 @@ async function buildPageMetadata(responses) {
|
|
|
354
369
|
process.exit(1);
|
|
355
370
|
}
|
|
356
371
|
});
|
|
357
|
-
await program.parseAsync(
|
|
358
|
-
}
|
|
372
|
+
await program.parseAsync(argv);
|
|
373
|
+
}
|
|
374
|
+
if (isCliEntrypoint()) {
|
|
375
|
+
runCli().catch((error) => {
|
|
376
|
+
console.error("\n\u274C An error occurred:", error instanceof Error ? error.message : error);
|
|
377
|
+
process.exit(1);
|
|
378
|
+
});
|
|
379
|
+
}
|
|
380
|
+
export {
|
|
381
|
+
runCli
|
|
382
|
+
};
|
|
359
383
|
/**
|
|
360
384
|
* @license MIT
|
|
361
385
|
* We are interested in visualising a page as a collection of tags.
|
package/bin/main.js.map
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../src/main.ts", "../src/extractors/AbstractExtractor.ts", "../src/extractors/PageExtractor.ts", "../src/resource.ts", "../src/extractors/ResourceExtractor.ts", "../src/page/PageFetcher.ts", "../src/page/FileFetcher.ts", "../src/printers/AbstractResourcePrinter.ts", "../src/printers/JSONStylePrinter.ts", "../src/security.ts"],
|
|
4
|
-
"sourcesContent": ["#!/usr/bin/env node\nimport { Command, createArgument, Option } from 'commander';\nimport { createRequire } from 'node:module';\n\nimport { PageExtractor, ResourceExtractor } from './extractors/index.js';\nimport { FileFetcher, MAX_FILES_FAILSAFE, PageFetcher, type PageMetadata } from './page/index.js';\nimport { JSONStylePrinter } from './printers/index.js';\nimport { validateUrls } from './security.js';\n\nconst require = createRequire(import.meta.url);\nconst pkg = require('../package.json') as {\n description: string;\n name: string;\n version: string;\n};\n\nconst { description, name, version } = pkg;\n\nconst program = new Command();\n\nconst urlArg = createArgument('<url...>', 'remote https://URL to extract from');\nconst fileArg = createArgument('<paths...>', 'local file paths to extract from');\n\n// Shared extractor instances.\nconst pageExtractor = new PageExtractor();\nconst resourceExtractor = new ResourceExtractor(['a', 'meta', 'link', 'embed', 'script']);\nconst printer = new JSONStylePrinter();\n\nasync function buildPageMetadata(\n responses: Array<{\n url?: string;\n path?: string;\n content?: import('./page/index.js').DOMResult;\n error?: string;\n }>\n): Promise<PageMetadata[]> {\n const pageMetadatas: PageMetadata[] = [];\n\n for (const { content, url: responseUrl, path, error } of responses) {\n const resolvedUrl = responseUrl ?? path ?? '';\n const resources =\n error !== undefined || !content ? [] : await resourceExtractor.extract(content);\n const descriptor =\n error !== undefined || !content\n ? { url: resolvedUrl, error: error ?? 'Unknown error', resources }\n : await pageExtractor.extract(content);\n pageMetadatas.push({ ...descriptor, resources });\n }\n\n return pageMetadatas;\n}\n\n(async (): Promise<void> => {\n program.name(name).version(version, '-v, --version').description(description);\n\n // \u2500\u2500 fetch subcommand (default remote URL mode) \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n program\n .command('fetch', { isDefault: true })\n .description('fetch and extract resources from remote URL(s)')\n .addArgument(urlArg)\n .addOption(\n new Option(\n '--watch',\n 'keep running: SIGWINCH re-fetches after resize, Ctrl-D releases in-flight requests, Ctrl-C exits'\n )\n )\n .action(async (urls: string[], options: { watch: boolean }) => {\n try {\n const { validUrls, errors } = validateUrls(urls);\n\n if (errors.length > 0) {\n console.error('\\n\u274C URL Validation Errors:');\n errors.forEach(({ url: invalidUrl, error }) => {\n console.error(` - ${invalidUrl}: ${error}`);\n });\n }\n\n if (validUrls.length === 0) {\n console.error('\\n\u274C No valid URLs to process. Exiting.');\n process.exit(1);\n }\n\n console.error(`\\n\u2705 Processing ${validUrls.length} valid URL(s)...`);\n\n const pageFetcher = new PageFetcher(options.watch ? 0 : 10000, 2);\n\n const execute = async (): Promise<void> => {\n const responses = await pageFetcher.fetchAll(validUrls);\n const pageMetadatas = await buildPageMetadata(responses);\n await printer.print(...pageMetadatas);\n };\n\n if (options.watch) {\n process.stdin.resume();\n process.on('SIGINT', () => process.exit(0));\n\n let activeExecution: Promise<void> | null = null;\n process.stdin.on('end', () => {\n activeExecution = null;\n });\n\n let winchTimer: ReturnType<typeof setTimeout> | null = null;\n process.on('SIGWINCH', () => {\n if (winchTimer !== null) clearTimeout(winchTimer);\n winchTimer = setTimeout(() => {\n winchTimer = null;\n activeExecution = execute().catch((err: unknown) => {\n console.error('\\n\u274C An error occurred:', err instanceof Error ? err.message : err);\n });\n }, 150);\n });\n\n activeExecution = execute();\n await activeExecution;\n } else {\n await execute();\n }\n } catch (error) {\n console.error('\\n\u274C An error occurred:', error instanceof Error ? error.message : error);\n process.exit(1);\n }\n });\n\n // \u2500\u2500 file subcommand (local filesystem access) \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n program\n .command('file')\n .description('extract resources from local file(s) via direct filesystem access')\n .addArgument(fileArg)\n .addOption(\n new Option('--no-failsafe', `bypass the ${MAX_FILES_FAILSAFE}-file limit safety check`)\n )\n .action(async (paths: string[], options: { failsafe: boolean }) => {\n try {\n if (options.failsafe && paths.length > MAX_FILES_FAILSAFE) {\n console.error(\n `\\n\u274C ${paths.length} files specified exceeds the safety limit of ${MAX_FILES_FAILSAFE}.`\n );\n console.error(` Pass --no-failsafe to bypass this check and process all files.`);\n process.exit(1);\n }\n\n if (!options.failsafe && paths.length > MAX_FILES_FAILSAFE) {\n console.error(\n `\\n\u26A0\uFE0F Failsafe bypassed: processing ${paths.length} files (limit is ${MAX_FILES_FAILSAFE}).`\n );\n }\n\n console.error(`\\n\u2705 Processing ${paths.length} file(s)...`);\n\n const fileFetcher = new FileFetcher();\n const responses = await fileFetcher.fetchAll(paths);\n const pageMetadatas = await buildPageMetadata(\n responses.map(({ path, content, error }) => ({ path, content, error }))\n );\n\n await printer.print(...pageMetadatas);\n } catch (error) {\n console.error('\\n\u274C An error occurred:', error instanceof Error ? error.message : error);\n process.exit(1);\n }\n });\n\n await program.parseAsync(process.argv);\n})();\n", "export abstract class AbstractExtractor<V, R> {\n constructor(readonly name: string) {}\n abstract extract(value: V): Promise<R>;\n}\n", "import type { Page } from '../page/index.js';\nimport type { DOMResult } from '../page/index.js';\nimport { AbstractExtractor } from './AbstractExtractor.js';\n\nexport class PageExtractor extends AbstractExtractor<DOMResult, Page> {\n constructor() {\n super('page-extractor');\n }\n\n async extract(value: DOMResult): Promise<Page> {\n const {\n window: { document },\n url,\n } = value;\n return { title: document.title, url };\n }\n}\n", "/**\n * @license MIT\n * We are interested in visualising a page as a collection of tags.\n *\n * We wish to work with tags that can be compactly previewed on a webpage.\n * Here we must declare all of the element types that can be used to represent\n * a resource that can be hyperlinked off a webpage.\n */\ntype Tags = HTMLElementTagNameMap;\n\nexport const RESOURCE_DISPLAYABLE_KEYS = [\n 'id',\n 'innerText',\n 'textContent',\n 'class',\n 'ariaLabel',\n 'ariaDescription',\n 'alt',\n] as const;\n\nexport type DisplayableKey = (typeof RESOURCE_DISPLAYABLE_KEYS)[number];\n\nexport const RESOURCE_LINK_KEYS = ['href', 'data-src', 'target', 'action', 'src', 'url'] as const;\n\nexport type LinkKey = (typeof RESOURCE_LINK_KEYS)[number];\n\nexport type AttributeKey = DisplayableKey | LinkKey;\n\nexport type ResourceKey = { key: AttributeKey; value: string };\nexport type ResourceLink = { key: LinkKey; value: string };\n\nexport type ExternalResource = {\n text: ResourceKey;\n link: ResourceLink;\n};\n\nexport type Tag = keyof Tags;\n\nexport type Resource = HTMLElement & {\n [K in AttributeKey]?: string | null;\n};\n\nexport type ResourceByName<T extends keyof Tags> = Tags[T];\n\n// --- adapters ---\n\nconst readAttr = (element: Resource, key: AttributeKey): string | undefined => {\n const v = element.getAttribute(key);\n return v != null && v.trim() !== '' ? v : undefined;\n};\n\nexport function findResourceText(element: Resource): ResourceKey | undefined {\n for (const key of RESOURCE_DISPLAYABLE_KEYS) {\n const value = readAttr(element, key);\n if (value !== undefined) return { key, value };\n }\n return undefined;\n}\n\nexport function findResourceLink(element: Resource): ResourceLink | undefined {\n for (const key of RESOURCE_LINK_KEYS) {\n const value = readAttr(element, key);\n if (value !== undefined) return { key, value };\n }\n return undefined;\n}\n\nexport const isResourceKey = (key: string): key is AttributeKey =>\n (RESOURCE_DISPLAYABLE_KEYS as readonly string[]).includes(key) ||\n (RESOURCE_LINK_KEYS as readonly string[]).includes(key);\n", "import type { DOMResult } from '../page/index.js';\nimport {\n findResourceLink,\n findResourceText,\n type ExternalResource,\n type Resource,\n type Tag,\n} from '../resource.js';\nimport { AbstractExtractor } from './AbstractExtractor.js';\n\nexport class ResourceExtractor extends AbstractExtractor<DOMResult, ExternalResource[]> {\n constructor(private readonly tags: Tag[]) {\n super('page-extractor');\n }\n async extract(value: DOMResult): Promise<ExternalResource[]> {\n const { document } = value.window;\n return this.tags.flatMap((tag) =>\n Array.from(document.querySelectorAll<Resource>(tag)).flatMap((element) => {\n const link = findResourceLink(element);\n if (!link) return [];\n const text = findResourceText(element) ?? { key: 'src' as const, value: link.value };\n return [{ text, link }];\n })\n );\n }\n}\n", "import { parseHTML } from 'linkedom';\n\ntype ParseHTMLResult = {\n document: Document;\n};\n\nexport interface DOMResult {\n window: { document: Document };\n url: string;\n}\n\ninterface PageResponse {\n url: string;\n content?: DOMResult;\n error?: string;\n}\n\nexport class PageFetcher {\n private readonly timeout: number;\n private readonly maxRetries: number;\n\n constructor(timeout = 10000, maxRetries = 2) {\n this.timeout = timeout;\n this.maxRetries = maxRetries;\n }\n\n private buildDOMResult(html: string, url: string): DOMResult {\n const { document } = parseHTML(html) as ParseHTMLResult;\n return { window: { document }, url };\n }\n\n private decodeHtml(buffer: ArrayBuffer, charset: string): string {\n try {\n return new TextDecoder(charset).decode(new Uint8Array(buffer));\n } catch {\n return new TextDecoder('utf-8').decode(new Uint8Array(buffer));\n }\n }\n\n private async fetchPage(url: string, retryCount = 0): Promise<PageResponse> {\n try {\n const domPromise = fetch(url).then(async (response) => {\n const buffer = await response.arrayBuffer();\n const contentType = response.headers.get('content-type') ?? '';\n const charsetMatch = /charset=([^\\s;]+)/i.exec(contentType);\n const html = this.decodeHtml(buffer, charsetMatch?.[1] ?? 'utf-8');\n return this.buildDOMResult(html, url);\n });\n\n const content = await (this.timeout > 0\n ? Promise.race([\n domPromise,\n new Promise<never>((_, reject) =>\n setTimeout(() => reject(new Error('Request timeout')), this.timeout)\n ),\n ])\n : domPromise);\n\n return { url, content };\n } catch (error) {\n const message = error instanceof Error ? error.message : 'Unknown error';\n\n // Retry logic for transient errors\n if (retryCount < this.maxRetries && this.isRetryableError(message)) {\n process.stderr.write(`Retrying ${url} (attempt ${retryCount + 1}/${this.maxRetries})...\\n`);\n await this.delay(1000 * (retryCount + 1)); // Exponential backoff\n return this.fetchPage(url, retryCount + 1);\n }\n\n return { url, error: `Failed to fetch: ${message}` };\n }\n }\n\n private isRetryableError(message: string): boolean {\n const retryablePatterns = [/timeout/i, /ECONNRESET/i, /ETIMEDOUT/i, /ENOTFOUND/i, /network/i];\n return retryablePatterns.some((pattern) => pattern.test(message));\n }\n\n private delay(ms: number): Promise<void> {\n return new Promise((resolve) => setTimeout(resolve, ms));\n }\n\n async fetchAll(urls: string[]): Promise<PageResponse[]> {\n const responses = await Promise.all(urls.map((url) => this.fetchPage(url)));\n return responses.filter((response) => response.content !== undefined || response.error);\n }\n}\n", "import { readFile } from 'node:fs/promises';\nimport { parseHTML } from 'linkedom';\n\nimport type { DOMResult } from './PageFetcher.js';\n\nexport const MAX_FILES_FAILSAFE = 254;\n\ntype ParseHTMLResult = {\n document: Document;\n};\n\nexport interface FileResponse {\n path: string;\n content?: DOMResult;\n error?: string;\n}\n\nexport class FileFetcher {\n private buildDOMResult(html: string, filePath: string): DOMResult {\n const { document } = parseHTML(html) as ParseHTMLResult;\n return { window: { document }, url: `file://${filePath}` };\n }\n\n async fetchFile(filePath: string): Promise<FileResponse> {\n try {\n // filePath is supplied directly by the CLI user, not derived from network input.\n // eslint-disable-next-line security/detect-non-literal-fs-filename\n const html = await readFile(filePath, 'utf-8');\n return { path: filePath, content: this.buildDOMResult(html, filePath) };\n } catch (error) {\n return {\n path: filePath,\n error: error instanceof Error ? error.message : 'Unknown error',\n };\n }\n }\n\n async fetchAll(filePaths: string[]): Promise<FileResponse[]> {\n return Promise.all(filePaths.map((p) => this.fetchFile(p)));\n }\n}\n", "import type { PageMetadata } from '../page/index.js';\n\nexport abstract class AbstractResourcePrinter {\n constructor() {}\n abstract print(...pages: PageMetadata[]): void | Promise<void>;\n}\n", "import type { PageMetadata } from '../page/index.js';\nimport { AbstractResourcePrinter } from './AbstractResourcePrinter.js';\n\nexport class JSONStylePrinter extends AbstractResourcePrinter {\n print(...pages: PageMetadata[]): void | Promise<void> {\n const json = JSON.stringify(pages);\n process.stdout.write(json + '\\n');\n }\n}\n", "/**\n * Security utilities for URL validation and sanitization\n */\n\nconst ALLOWED_PROTOCOLS = ['http:', 'https:'];\nconst MAX_URL_LENGTH = 2048;\nconst SUSPICIOUS_PATTERNS = [\n /javascript:/i,\n /data:/i,\n /vbscript:/i,\n /<script/i,\n /on\\w+=/i, // Event handlers like onclick=\n];\n\nexport interface ValidationResult {\n isValid: boolean;\n error?: string;\n sanitizedUrl?: string;\n}\n\n/**\n * Validates a URL for security concerns\n * @param url - The URL to validate\n * @returns ValidationResult object with validation status\n */\nexport function validateUrl(url: string): ValidationResult {\n // Check if URL is empty or whitespace\n if (!url || !url.trim()) {\n return {\n isValid: false,\n error: 'URL cannot be empty',\n };\n }\n\n const trimmedUrl = url.trim();\n\n // Check URL length to prevent DoS\n if (trimmedUrl.length > MAX_URL_LENGTH) {\n return {\n isValid: false,\n error: `URL exceeds maximum length of ${MAX_URL_LENGTH} characters`,\n };\n }\n\n // Check for suspicious patterns\n for (const pattern of SUSPICIOUS_PATTERNS) {\n if (pattern.test(trimmedUrl)) {\n return {\n isValid: false,\n error: 'URL contains suspicious patterns',\n };\n }\n }\n\n // Parse the URL\n let parsedUrl: URL;\n try {\n parsedUrl = new URL(trimmedUrl);\n } catch {\n return {\n isValid: false,\n error: 'Invalid URL format',\n };\n }\n\n // Check protocol\n if (!ALLOWED_PROTOCOLS.includes(parsedUrl.protocol)) {\n return {\n isValid: false,\n error: `Protocol ${parsedUrl.protocol} is not allowed. Allowed protocols: ${ALLOWED_PROTOCOLS.join(', ')}`,\n };\n }\n\n // Check for localhost/internal IPs in production (security consideration)\n const hostname = parsedUrl.hostname.toLowerCase();\n const isLocalhost =\n hostname === 'localhost' ||\n hostname === '127.0.0.1' ||\n hostname === '::1' ||\n hostname.startsWith('192.168.') ||\n hostname.startsWith('10.') ||\n /^172\\.(1[6-9]|2\\d|3[01])\\./.test(hostname);\n\n if (isLocalhost) {\n // Allow but warn about localhost URLs\n console.warn(`Warning: Accessing local network resource: ${trimmedUrl}`);\n }\n\n return {\n isValid: true,\n sanitizedUrl: parsedUrl.toString(),\n };\n}\n\n/**\n * Validates an array of URLs\n * @param urls - Array of URLs to validate\n * @returns Object with valid URLs and errors\n */\nexport function validateUrls(urls: string[]): {\n validUrls: string[];\n errors: Array<{ url: string; error: string }>;\n} {\n const validUrls: string[] = [];\n const errors: Array<{ url: string; error: string }> = [];\n\n for (const url of urls) {\n const result = validateUrl(url);\n if (result.isValid && result.sanitizedUrl) {\n validUrls.push(result.sanitizedUrl);\n } else {\n errors.push({\n url,\n error: result.error || 'Unknown validation error',\n });\n }\n }\n\n return { validUrls, errors };\n}\n\n/**\n * Rate limiter to prevent abuse\n */\nexport class RateLimiter {\n private requests: number[] = [];\n private readonly maxRequests: number;\n private readonly windowMs: number;\n\n constructor(maxRequests = 10, windowMs = 60000) {\n this.maxRequests = maxRequests;\n this.windowMs = windowMs;\n }\n\n /**\n * Check if a request is allowed under rate limiting\n * @returns true if request is allowed, false otherwise\n */\n public isAllowed(): boolean {\n const now = Date.now();\n\n // Remove old requests outside the time window\n this.requests = this.requests.filter((time) => now - time < this.windowMs);\n\n if (this.requests.length >= this.maxRequests) {\n return false;\n }\n\n this.requests.push(now);\n return true;\n }\n\n /**\n * Get remaining requests in current window\n */\n public getRemainingRequests(): number {\n const now = Date.now();\n this.requests = this.requests.filter((time) => now - time < this.windowMs);\n return Math.max(0, this.maxRequests - this.requests.length);\n }\n}\n\n/**\n * Sanitizes HTML content to prevent XSS attacks\n * @param text - Text to sanitize\n * @returns Sanitized text\n */\nexport function sanitizeText(text: string): string {\n if (!text) return '';\n\n return text\n .replace(/</g, '<')\n .replace(/>/g, '>')\n .replace(/\"/g, '"')\n .replace(/'/g, ''')\n .replace(/\\//g, '/');\n}\n"],
|
|
5
|
-
"mappings": ";;;AACA,SAAS,SAAS,gBAAgB,cAAc;AAChD,SAAS,qBAAqB;;;
|
|
6
|
-
"names": ["name", "parseHTML", "require"]
|
|
4
|
+
"sourcesContent": ["#!/usr/bin/env node\nimport { Command, createArgument, Option } from 'commander';\nimport { createRequire } from 'node:module';\nimport { resolve } from 'node:path';\nimport { fileURLToPath } from 'node:url';\n\nimport { PageExtractor, ResourceExtractor } from './extractors/index.js';\nimport { FileFetcher, MAX_FILES_FAILSAFE, PageFetcher, type PageMetadata } from './page/index.js';\nimport { JSONStylePrinter } from './printers/index.js';\nimport { validateUrls } from './security.js';\n\nconst require = createRequire(import.meta.url);\nconst pkg = require('../package.json') as {\n description: string;\n name: string;\n version: string;\n};\n\nconst { description, name, version } = pkg;\n\nconst program = new Command();\n\nconst urlArg = createArgument('<url...>', 'remote https://URL to extract from');\nconst fileArg = createArgument('<paths...>', 'local file paths to extract from');\n\n// Shared extractor instances.\nconst pageExtractor = new PageExtractor();\nconst resourceExtractor = new ResourceExtractor(['a', 'meta', 'link', 'embed', 'script']);\nconst printer = new JSONStylePrinter();\n\nasync function buildPageMetadata(\n responses: Array<{\n url?: string;\n path?: string;\n content?: import('./page/index.js').DOMResult;\n error?: string;\n }>\n): Promise<PageMetadata[]> {\n const pageMetadatas: PageMetadata[] = [];\n\n for (const { content, url: responseUrl, path, error } of responses) {\n const resolvedUrl = responseUrl ?? path ?? '';\n const resources =\n error !== undefined || !content ? [] : await resourceExtractor.extract(content);\n const descriptor =\n error !== undefined || !content\n ? { url: resolvedUrl, error: error ?? 'Unknown error', resources }\n : await pageExtractor.extract(content);\n pageMetadatas.push({ ...descriptor, resources });\n }\n\n return pageMetadatas;\n}\n\nfunction isCliEntrypoint(): boolean {\n const invokedPath = process.argv[1];\n if (!invokedPath) {\n return false;\n }\n\n return fileURLToPath(import.meta.url) === resolve(invokedPath);\n}\n\nexport async function runCli(argv: string[] = process.argv): Promise<void> {\n program.name(name).version(version, '-v, --version').description(description);\n\n // \u2500\u2500 fetch subcommand (default remote URL mode) \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n program\n .command('fetch', { isDefault: true })\n .description('fetch and extract resources from remote URL(s)')\n .addArgument(urlArg)\n .addOption(\n new Option(\n '--watch',\n 'keep running: SIGWINCH re-fetches after resize, Ctrl-D releases in-flight requests, Ctrl-C exits'\n )\n )\n .action(async (urls: string[], options: { watch: boolean }) => {\n try {\n const { validUrls, errors } = validateUrls(urls);\n\n if (errors.length > 0) {\n console.error('\\n\u274C URL Validation Errors:');\n errors.forEach(({ url: invalidUrl, error }) => {\n console.error(` - ${invalidUrl}: ${error}`);\n });\n }\n\n if (validUrls.length === 0) {\n console.error('\\n\u274C No valid URLs to process. Exiting.');\n process.exit(1);\n }\n\n console.error(`\\n\u2705 Processing ${validUrls.length} valid URL(s)...`);\n\n const pageFetcher = new PageFetcher(options.watch ? 0 : 10000, 2);\n\n const execute = async (): Promise<void> => {\n const responses = await pageFetcher.fetchAll(validUrls);\n const pageMetadatas = await buildPageMetadata(responses);\n await printer.print(...pageMetadatas);\n };\n\n if (options.watch) {\n process.stdin.resume();\n process.on('SIGINT', () => process.exit(0));\n\n let activeExecution: Promise<void> | null = null;\n process.stdin.on('end', () => {\n activeExecution = null;\n });\n\n let winchTimer: ReturnType<typeof setTimeout> | null = null;\n process.on('SIGWINCH', () => {\n if (winchTimer !== null) clearTimeout(winchTimer);\n winchTimer = setTimeout(() => {\n winchTimer = null;\n activeExecution = execute().catch((err: unknown) => {\n console.error('\\n\u274C An error occurred:', err instanceof Error ? err.message : err);\n });\n }, 150);\n });\n\n activeExecution = execute();\n await activeExecution;\n } else {\n await execute();\n }\n } catch (error) {\n console.error('\\n\u274C An error occurred:', error instanceof Error ? error.message : error);\n process.exit(1);\n }\n });\n\n // \u2500\u2500 file subcommand (local filesystem access) \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n program\n .command('file')\n .description('extract resources from local file(s) via direct filesystem access')\n .addArgument(fileArg)\n .addOption(\n new Option('--no-failsafe', `bypass the ${MAX_FILES_FAILSAFE}-file limit safety check`)\n )\n .action(async (paths: string[], options: { failsafe: boolean }) => {\n try {\n if (options.failsafe && paths.length > MAX_FILES_FAILSAFE) {\n console.error(\n `\\n\u274C ${paths.length} files specified exceeds the safety limit of ${MAX_FILES_FAILSAFE}.`\n );\n console.error(` Pass --no-failsafe to bypass this check and process all files.`);\n process.exit(1);\n }\n\n if (!options.failsafe && paths.length > MAX_FILES_FAILSAFE) {\n console.error(\n `\\n\u26A0\uFE0F Failsafe bypassed: processing ${paths.length} files (limit is ${MAX_FILES_FAILSAFE}).`\n );\n }\n\n console.error(`\\n\u2705 Processing ${paths.length} file(s)...`);\n\n const fileFetcher = new FileFetcher();\n const responses = await fileFetcher.fetchAll(paths);\n const pageMetadatas = await buildPageMetadata(\n responses.map(({ path, content, error }) => ({ path, content, error }))\n );\n\n await printer.print(...pageMetadatas);\n } catch (error) {\n console.error('\\n\u274C An error occurred:', error instanceof Error ? error.message : error);\n process.exit(1);\n }\n });\n\n await program.parseAsync(argv);\n}\n\nif (isCliEntrypoint()) {\n runCli().catch((error: unknown) => {\n console.error('\\n\u274C An error occurred:', error instanceof Error ? error.message : error);\n process.exit(1);\n });\n}\n", "export abstract class AbstractExtractor<V, R> {\n constructor(readonly name: string) {}\n abstract extract(value: V): Promise<R>;\n}\n", "import type { Page } from '../page/index.js';\nimport type { DOMResult } from '../page/index.js';\nimport { AbstractExtractor } from './AbstractExtractor.js';\n\nexport class PageExtractor extends AbstractExtractor<DOMResult, Page> {\n constructor() {\n super('page-extractor');\n }\n\n async extract(value: DOMResult): Promise<Page> {\n const {\n window: { document },\n url,\n } = value;\n return { title: document.title, url };\n }\n}\n", "/**\n * @license MIT\n * We are interested in visualising a page as a collection of tags.\n *\n * We wish to work with tags that can be compactly previewed on a webpage.\n * Here we must declare all of the element types that can be used to represent\n * a resource that can be hyperlinked off a webpage.\n */\ntype Tags = HTMLElementTagNameMap;\n\nexport const RESOURCE_DISPLAYABLE_KEYS = [\n 'id',\n 'innerText',\n 'textContent',\n 'class',\n 'ariaLabel',\n 'ariaDescription',\n 'alt',\n] as const;\n\nexport type DisplayableKey = (typeof RESOURCE_DISPLAYABLE_KEYS)[number];\n\nexport const RESOURCE_LINK_KEYS = ['href', 'data-src', 'target', 'action', 'src', 'url'] as const;\n\nexport type LinkKey = (typeof RESOURCE_LINK_KEYS)[number];\n\nexport type AttributeKey = DisplayableKey | LinkKey;\n\nexport type ResourceKey = { key: AttributeKey; value: string };\nexport type ResourceLink = { key: LinkKey; value: string };\n\nexport type ExternalResource = {\n text: ResourceKey;\n link: ResourceLink;\n};\n\nexport type Tag = keyof Tags;\n\nexport type Resource = HTMLElement & {\n [K in AttributeKey]?: string | null;\n};\n\nexport type ResourceByName<T extends keyof Tags> = Tags[T];\n\n// --- adapters ---\n\nconst readAttr = (element: Resource, key: AttributeKey): string | undefined => {\n const v = element.getAttribute(key);\n return v != null && v.trim() !== '' ? v : undefined;\n};\n\nexport function findResourceText(element: Resource): ResourceKey | undefined {\n for (const key of RESOURCE_DISPLAYABLE_KEYS) {\n const value = readAttr(element, key);\n if (value !== undefined) return { key, value };\n }\n return undefined;\n}\n\nexport function findResourceLink(element: Resource): ResourceLink | undefined {\n for (const key of RESOURCE_LINK_KEYS) {\n const value = readAttr(element, key);\n if (value !== undefined) return { key, value };\n }\n return undefined;\n}\n\nexport const isResourceKey = (key: string): key is AttributeKey =>\n (RESOURCE_DISPLAYABLE_KEYS as readonly string[]).includes(key) ||\n (RESOURCE_LINK_KEYS as readonly string[]).includes(key);\n", "import type { DOMResult } from '../page/index.js';\nimport {\n findResourceLink,\n findResourceText,\n type ExternalResource,\n type Resource,\n type Tag,\n} from '../resource.js';\nimport { AbstractExtractor } from './AbstractExtractor.js';\n\nexport class ResourceExtractor extends AbstractExtractor<DOMResult, ExternalResource[]> {\n constructor(private readonly tags: Tag[]) {\n super('page-extractor');\n }\n async extract(value: DOMResult): Promise<ExternalResource[]> {\n const { document } = value.window;\n return this.tags.flatMap((tag) =>\n Array.from(document.querySelectorAll<Resource>(tag)).flatMap((element) => {\n const link = findResourceLink(element);\n if (!link) return [];\n const text = findResourceText(element) ?? { key: 'src' as const, value: link.value };\n return [{ text, link }];\n })\n );\n }\n}\n", "import { parseHTML } from 'linkedom';\n\ntype ParseHTMLResult = {\n document: Document;\n};\n\nexport interface DOMResult {\n window: { document: Document };\n url: string;\n}\n\ninterface PageResponse {\n url: string;\n content?: DOMResult;\n error?: string;\n}\n\nexport class PageFetcher {\n private readonly timeout: number;\n private readonly maxRetries: number;\n\n constructor(timeout = 10000, maxRetries = 2) {\n this.timeout = timeout;\n this.maxRetries = maxRetries;\n }\n\n private buildDOMResult(html: string, url: string): DOMResult {\n const { document } = parseHTML(html) as ParseHTMLResult;\n return { window: { document }, url };\n }\n\n private decodeHtml(buffer: ArrayBuffer, charset: string): string {\n try {\n return new TextDecoder(charset).decode(new Uint8Array(buffer));\n } catch {\n return new TextDecoder('utf-8').decode(new Uint8Array(buffer));\n }\n }\n\n private async fetchPage(url: string, retryCount = 0): Promise<PageResponse> {\n const controller = new AbortController();\n let timeoutId: ReturnType<typeof setTimeout> | null = null;\n\n try {\n if (this.timeout > 0) {\n timeoutId = setTimeout(() => {\n controller.abort(new Error('Request timeout'));\n }, this.timeout);\n }\n\n const content = await fetch(url, { signal: controller.signal }).then(async (response) => {\n const buffer = await response.arrayBuffer();\n const contentType = response.headers.get('content-type') ?? '';\n const charsetMatch = /charset=([^\\s;]+)/i.exec(contentType);\n const html = this.decodeHtml(buffer, charsetMatch?.[1] ?? 'utf-8');\n return this.buildDOMResult(html, url);\n });\n\n return { url, content };\n } catch (error) {\n const abortTimeout = error instanceof Error && error.name === 'AbortError';\n const message = abortTimeout\n ? 'Request timeout'\n : error instanceof Error\n ? error.message\n : 'Unknown error';\n\n // Retry logic for transient errors\n if (retryCount < this.maxRetries && this.isRetryableError(message)) {\n process.stderr.write(`Retrying ${url} (attempt ${retryCount + 1}/${this.maxRetries})...\\n`);\n await this.delay(1000 * (retryCount + 1)); // Exponential backoff\n return this.fetchPage(url, retryCount + 1);\n }\n\n return { url, error: `Failed to fetch: ${message}` };\n } finally {\n if (timeoutId !== null) {\n clearTimeout(timeoutId);\n }\n }\n }\n\n private isRetryableError(message: string): boolean {\n const retryablePatterns = [/timeout/i, /ECONNRESET/i, /ETIMEDOUT/i, /ENOTFOUND/i, /network/i];\n return retryablePatterns.some((pattern) => pattern.test(message));\n }\n\n private delay(ms: number): Promise<void> {\n return new Promise((resolve) => setTimeout(resolve, ms));\n }\n\n async fetchAll(urls: string[]): Promise<PageResponse[]> {\n const responses = await Promise.all(urls.map((url) => this.fetchPage(url)));\n return responses.filter((response) => response.content !== undefined || response.error);\n }\n}\n", "import { readFile } from 'node:fs/promises';\nimport { parseHTML } from 'linkedom';\n\nimport type { DOMResult } from './PageFetcher.js';\n\nexport const MAX_FILES_FAILSAFE = 254;\n\ntype ParseHTMLResult = {\n document: Document;\n};\n\nexport interface FileResponse {\n path: string;\n content?: DOMResult;\n error?: string;\n}\n\nexport class FileFetcher {\n private buildDOMResult(html: string, filePath: string): DOMResult {\n const { document } = parseHTML(html) as ParseHTMLResult;\n return { window: { document }, url: `file://${filePath}` };\n }\n\n async fetchFile(filePath: string): Promise<FileResponse> {\n try {\n // filePath is supplied directly by the CLI user, not derived from network input.\n // eslint-disable-next-line security/detect-non-literal-fs-filename\n const html = await readFile(filePath, 'utf-8');\n return { path: filePath, content: this.buildDOMResult(html, filePath) };\n } catch (error) {\n return {\n path: filePath,\n error: error instanceof Error ? error.message : 'Unknown error',\n };\n }\n }\n\n async fetchAll(filePaths: string[]): Promise<FileResponse[]> {\n return Promise.all(filePaths.map((p) => this.fetchFile(p)));\n }\n}\n", "import type { PageMetadata } from '../page/index.js';\n\nexport abstract class AbstractResourcePrinter {\n constructor() {}\n abstract print(...pages: PageMetadata[]): void | Promise<void>;\n}\n", "import type { PageMetadata } from '../page/index.js';\nimport { AbstractResourcePrinter } from './AbstractResourcePrinter.js';\n\nexport class JSONStylePrinter extends AbstractResourcePrinter {\n print(...pages: PageMetadata[]): void | Promise<void> {\n const json = JSON.stringify(pages);\n process.stdout.write(json + '\\n');\n }\n}\n", "/**\n * Security utilities for URL validation and sanitization\n */\n\nconst ALLOWED_PROTOCOLS = ['http:', 'https:'];\nconst MAX_URL_LENGTH = 2048;\nconst SUSPICIOUS_PATTERNS = [\n /javascript:/i,\n /data:/i,\n /vbscript:/i,\n /<script/i,\n /on\\w+=/i, // Event handlers like onclick=\n];\n\nexport interface ValidationResult {\n isValid: boolean;\n error?: string;\n sanitizedUrl?: string;\n}\n\n/**\n * Validates a URL for security concerns\n * @param url - The URL to validate\n * @returns ValidationResult object with validation status\n */\nexport function validateUrl(url: string): ValidationResult {\n // Check if URL is empty or whitespace\n if (!url || !url.trim()) {\n return {\n isValid: false,\n error: 'URL cannot be empty',\n };\n }\n\n const trimmedUrl = url.trim();\n\n // Check URL length to prevent DoS\n if (trimmedUrl.length > MAX_URL_LENGTH) {\n return {\n isValid: false,\n error: `URL exceeds maximum length of ${MAX_URL_LENGTH} characters`,\n };\n }\n\n // Check for suspicious patterns\n for (const pattern of SUSPICIOUS_PATTERNS) {\n if (pattern.test(trimmedUrl)) {\n return {\n isValid: false,\n error: 'URL contains suspicious patterns',\n };\n }\n }\n\n // Parse the URL\n let parsedUrl: URL;\n try {\n parsedUrl = new URL(trimmedUrl);\n } catch {\n return {\n isValid: false,\n error: 'Invalid URL format',\n };\n }\n\n // Check protocol\n if (!ALLOWED_PROTOCOLS.includes(parsedUrl.protocol)) {\n return {\n isValid: false,\n error: `Protocol ${parsedUrl.protocol} is not allowed. Allowed protocols: ${ALLOWED_PROTOCOLS.join(', ')}`,\n };\n }\n\n // Check for localhost/internal IPs in production (security consideration)\n const hostname = parsedUrl.hostname.toLowerCase();\n const isLocalhost =\n hostname === 'localhost' ||\n hostname === '127.0.0.1' ||\n hostname === '::1' ||\n hostname.startsWith('192.168.') ||\n hostname.startsWith('10.') ||\n /^172\\.(1[6-9]|2\\d|3[01])\\./.test(hostname);\n\n if (isLocalhost) {\n // Allow but warn about localhost URLs\n console.warn(`Warning: Accessing local network resource: ${trimmedUrl}`);\n }\n\n return {\n isValid: true,\n sanitizedUrl: parsedUrl.toString(),\n };\n}\n\n/**\n * Validates an array of URLs\n * @param urls - Array of URLs to validate\n * @returns Object with valid URLs and errors\n */\nexport function validateUrls(urls: string[]): {\n validUrls: string[];\n errors: Array<{ url: string; error: string }>;\n} {\n const validUrls: string[] = [];\n const errors: Array<{ url: string; error: string }> = [];\n\n for (const url of urls) {\n const result = validateUrl(url);\n if (result.isValid && result.sanitizedUrl) {\n validUrls.push(result.sanitizedUrl);\n } else {\n errors.push({\n url,\n error: result.error || 'Unknown validation error',\n });\n }\n }\n\n return { validUrls, errors };\n}\n\n/**\n * Rate limiter to prevent abuse\n */\nexport class RateLimiter {\n private requests: number[] = [];\n private readonly maxRequests: number;\n private readonly windowMs: number;\n\n constructor(maxRequests = 10, windowMs = 60000) {\n this.maxRequests = maxRequests;\n this.windowMs = windowMs;\n }\n\n /**\n * Check if a request is allowed under rate limiting\n * @returns true if request is allowed, false otherwise\n */\n public isAllowed(): boolean {\n const now = Date.now();\n\n // Remove old requests outside the time window\n this.requests = this.requests.filter((time) => now - time < this.windowMs);\n\n if (this.requests.length >= this.maxRequests) {\n return false;\n }\n\n this.requests.push(now);\n return true;\n }\n\n /**\n * Get remaining requests in current window\n */\n public getRemainingRequests(): number {\n const now = Date.now();\n this.requests = this.requests.filter((time) => now - time < this.windowMs);\n return Math.max(0, this.maxRequests - this.requests.length);\n }\n}\n\n/**\n * Sanitizes HTML content to prevent XSS attacks\n * @param text - Text to sanitize\n * @returns Sanitized text\n */\nexport function sanitizeText(text: string): string {\n if (!text) return '';\n\n return text\n .replace(/</g, '<')\n .replace(/>/g, '>')\n .replace(/\"/g, '"')\n .replace(/'/g, ''')\n .replace(/\\//g, '/');\n}\n"],
|
|
5
|
+
"mappings": ";;;AACA,SAAS,SAAS,gBAAgB,cAAc;AAChD,SAAS,qBAAqB;AAC9B,SAAS,eAAe;AACxB,SAAS,qBAAqB;;;ACJvB,IAAe,oBAAf,MAAuC;AAAA,EAC5C,YAAqBA,OAAc;AAAd,gBAAAA;AAAA,EAAe;AAEtC;;;ACCO,IAAM,gBAAN,cAA4B,kBAAmC;AAAA,EACpE,cAAc;AACZ,UAAM,gBAAgB;AAAA,EACxB;AAAA,EAEA,MAAM,QAAQ,OAAiC;AAC7C,UAAM;AAAA,MACJ,QAAQ,EAAE,SAAS;AAAA,MACnB;AAAA,IACF,IAAI;AACJ,WAAO,EAAE,OAAO,SAAS,OAAO,IAAI;AAAA,EACtC;AACF;;;ACNO,IAAM,4BAA4B;AAAA,EACvC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAIO,IAAM,qBAAqB,CAAC,QAAQ,YAAY,UAAU,UAAU,OAAO,KAAK;AAwBvF,IAAM,WAAW,CAAC,SAAmB,QAA0C;AAC7E,QAAM,IAAI,QAAQ,aAAa,GAAG;AAClC,SAAO,KAAK,QAAQ,EAAE,KAAK,MAAM,KAAK,IAAI;AAC5C;AAEO,SAAS,iBAAiB,SAA4C;AAC3E,aAAW,OAAO,2BAA2B;AAC3C,UAAM,QAAQ,SAAS,SAAS,GAAG;AACnC,QAAI,UAAU,OAAW,QAAO,EAAE,KAAK,MAAM;AAAA,EAC/C;AACA,SAAO;AACT;AAEO,SAAS,iBAAiB,SAA6C;AAC5E,aAAW,OAAO,oBAAoB;AACpC,UAAM,QAAQ,SAAS,SAAS,GAAG;AACnC,QAAI,UAAU,OAAW,QAAO,EAAE,KAAK,MAAM;AAAA,EAC/C;AACA,SAAO;AACT;;;ACvDO,IAAM,oBAAN,cAAgC,kBAAiD;AAAA,EACtF,YAA6B,MAAa;AACxC,UAAM,gBAAgB;AADK;AAAA,EAE7B;AAAA,EACA,MAAM,QAAQ,OAA+C;AAC3D,UAAM,EAAE,SAAS,IAAI,MAAM;AAC3B,WAAO,KAAK,KAAK;AAAA,MAAQ,CAAC,QACxB,MAAM,KAAK,SAAS,iBAA2B,GAAG,CAAC,EAAE,QAAQ,CAAC,YAAY;AACxE,cAAM,OAAO,iBAAiB,OAAO;AACrC,YAAI,CAAC,KAAM,QAAO,CAAC;AACnB,cAAM,OAAO,iBAAiB,OAAO,KAAK,EAAE,KAAK,OAAgB,OAAO,KAAK,MAAM;AACnF,eAAO,CAAC,EAAE,MAAM,KAAK,CAAC;AAAA,MACxB,CAAC;AAAA,IACH;AAAA,EACF;AACF;;;ACzBA,SAAS,iBAAiB;AAiBnB,IAAM,cAAN,MAAkB;AAAA,EACN;AAAA,EACA;AAAA,EAEjB,YAAY,UAAU,KAAO,aAAa,GAAG;AAC3C,SAAK,UAAU;AACf,SAAK,aAAa;AAAA,EACpB;AAAA,EAEQ,eAAe,MAAc,KAAwB;AAC3D,UAAM,EAAE,SAAS,IAAI,UAAU,IAAI;AACnC,WAAO,EAAE,QAAQ,EAAE,SAAS,GAAG,IAAI;AAAA,EACrC;AAAA,EAEQ,WAAW,QAAqB,SAAyB;AAC/D,QAAI;AACF,aAAO,IAAI,YAAY,OAAO,EAAE,OAAO,IAAI,WAAW,MAAM,CAAC;AAAA,IAC/D,QAAQ;AACN,aAAO,IAAI,YAAY,OAAO,EAAE,OAAO,IAAI,WAAW,MAAM,CAAC;AAAA,IAC/D;AAAA,EACF;AAAA,EAEA,MAAc,UAAU,KAAa,aAAa,GAA0B;AAC1E,UAAM,aAAa,IAAI,gBAAgB;AACvC,QAAI,YAAkD;AAEtD,QAAI;AACF,UAAI,KAAK,UAAU,GAAG;AACpB,oBAAY,WAAW,MAAM;AAC3B,qBAAW,MAAM,IAAI,MAAM,iBAAiB,CAAC;AAAA,QAC/C,GAAG,KAAK,OAAO;AAAA,MACjB;AAEA,YAAM,UAAU,MAAM,MAAM,KAAK,EAAE,QAAQ,WAAW,OAAO,CAAC,EAAE,KAAK,OAAO,aAAa;AACvF,cAAM,SAAS,MAAM,SAAS,YAAY;AAC1C,cAAM,cAAc,SAAS,QAAQ,IAAI,cAAc,KAAK;AAC5D,cAAM,eAAe,qBAAqB,KAAK,WAAW;AAC1D,cAAM,OAAO,KAAK,WAAW,QAAQ,eAAe,CAAC,KAAK,OAAO;AACjE,eAAO,KAAK,eAAe,MAAM,GAAG;AAAA,MACtC,CAAC;AAED,aAAO,EAAE,KAAK,QAAQ;AAAA,IACxB,SAAS,OAAO;AACd,YAAM,eAAe,iBAAiB,SAAS,MAAM,SAAS;AAC9D,YAAM,UAAU,eACZ,oBACA,iBAAiB,QACf,MAAM,UACN;AAGN,UAAI,aAAa,KAAK,cAAc,KAAK,iBAAiB,OAAO,GAAG;AAClE,gBAAQ,OAAO,MAAM,YAAY,GAAG,aAAa,aAAa,CAAC,IAAI,KAAK,UAAU;AAAA,CAAQ;AAC1F,cAAM,KAAK,MAAM,OAAQ,aAAa,EAAE;AACxC,eAAO,KAAK,UAAU,KAAK,aAAa,CAAC;AAAA,MAC3C;AAEA,aAAO,EAAE,KAAK,OAAO,oBAAoB,OAAO,GAAG;AAAA,IACrD,UAAE;AACA,UAAI,cAAc,MAAM;AACtB,qBAAa,SAAS;AAAA,MACxB;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,iBAAiB,SAA0B;AACjD,UAAM,oBAAoB,CAAC,YAAY,eAAe,cAAc,cAAc,UAAU;AAC5F,WAAO,kBAAkB,KAAK,CAAC,YAAY,QAAQ,KAAK,OAAO,CAAC;AAAA,EAClE;AAAA,EAEQ,MAAM,IAA2B;AACvC,WAAO,IAAI,QAAQ,CAACC,aAAY,WAAWA,UAAS,EAAE,CAAC;AAAA,EACzD;AAAA,EAEA,MAAM,SAAS,MAAyC;AACtD,UAAM,YAAY,MAAM,QAAQ,IAAI,KAAK,IAAI,CAAC,QAAQ,KAAK,UAAU,GAAG,CAAC,CAAC;AAC1E,WAAO,UAAU,OAAO,CAAC,aAAa,SAAS,YAAY,UAAa,SAAS,KAAK;AAAA,EACxF;AACF;;;AC/FA,SAAS,gBAAgB;AACzB,SAAS,aAAAC,kBAAiB;AAInB,IAAM,qBAAqB;AAY3B,IAAM,cAAN,MAAkB;AAAA,EACf,eAAe,MAAc,UAA6B;AAChE,UAAM,EAAE,SAAS,IAAIA,WAAU,IAAI;AACnC,WAAO,EAAE,QAAQ,EAAE,SAAS,GAAG,KAAK,UAAU,QAAQ,GAAG;AAAA,EAC3D;AAAA,EAEA,MAAM,UAAU,UAAyC;AACvD,QAAI;AAGF,YAAM,OAAO,MAAM,SAAS,UAAU,OAAO;AAC7C,aAAO,EAAE,MAAM,UAAU,SAAS,KAAK,eAAe,MAAM,QAAQ,EAAE;AAAA,IACxE,SAAS,OAAO;AACd,aAAO;AAAA,QACL,MAAM;AAAA,QACN,OAAO,iBAAiB,QAAQ,MAAM,UAAU;AAAA,MAClD;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,SAAS,WAA8C;AAC3D,WAAO,QAAQ,IAAI,UAAU,IAAI,CAAC,MAAM,KAAK,UAAU,CAAC,CAAC,CAAC;AAAA,EAC5D;AACF;;;ACtCO,IAAe,0BAAf,MAAuC;AAAA,EAC5C,cAAc;AAAA,EAAC;AAEjB;;;ACFO,IAAM,mBAAN,cAA+B,wBAAwB;AAAA,EAC5D,SAAS,OAA6C;AACpD,UAAM,OAAO,KAAK,UAAU,KAAK;AACjC,YAAQ,OAAO,MAAM,OAAO,IAAI;AAAA,EAClC;AACF;;;ACJA,IAAM,oBAAoB,CAAC,SAAS,QAAQ;AAC5C,IAAM,iBAAiB;AACvB,IAAM,sBAAsB;AAAA,EAC1B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AACF;AAaO,SAAS,YAAY,KAA+B;AAEzD,MAAI,CAAC,OAAO,CAAC,IAAI,KAAK,GAAG;AACvB,WAAO;AAAA,MACL,SAAS;AAAA,MACT,OAAO;AAAA,IACT;AAAA,EACF;AAEA,QAAM,aAAa,IAAI,KAAK;AAG5B,MAAI,WAAW,SAAS,gBAAgB;AACtC,WAAO;AAAA,MACL,SAAS;AAAA,MACT,OAAO,iCAAiC,cAAc;AAAA,IACxD;AAAA,EACF;AAGA,aAAW,WAAW,qBAAqB;AACzC,QAAI,QAAQ,KAAK,UAAU,GAAG;AAC5B,aAAO;AAAA,QACL,SAAS;AAAA,QACT,OAAO;AAAA,MACT;AAAA,IACF;AAAA,EACF;AAGA,MAAI;AACJ,MAAI;AACF,gBAAY,IAAI,IAAI,UAAU;AAAA,EAChC,QAAQ;AACN,WAAO;AAAA,MACL,SAAS;AAAA,MACT,OAAO;AAAA,IACT;AAAA,EACF;AAGA,MAAI,CAAC,kBAAkB,SAAS,UAAU,QAAQ,GAAG;AACnD,WAAO;AAAA,MACL,SAAS;AAAA,MACT,OAAO,YAAY,UAAU,QAAQ,uCAAuC,kBAAkB,KAAK,IAAI,CAAC;AAAA,IAC1G;AAAA,EACF;AAGA,QAAM,WAAW,UAAU,SAAS,YAAY;AAChD,QAAM,cACJ,aAAa,eACb,aAAa,eACb,aAAa,SACb,SAAS,WAAW,UAAU,KAC9B,SAAS,WAAW,KAAK,KACzB,6BAA6B,KAAK,QAAQ;AAE5C,MAAI,aAAa;AAEf,YAAQ,KAAK,8CAA8C,UAAU,EAAE;AAAA,EACzE;AAEA,SAAO;AAAA,IACL,SAAS;AAAA,IACT,cAAc,UAAU,SAAS;AAAA,EACnC;AACF;AAOO,SAAS,aAAa,MAG3B;AACA,QAAM,YAAsB,CAAC;AAC7B,QAAM,SAAgD,CAAC;AAEvD,aAAW,OAAO,MAAM;AACtB,UAAM,SAAS,YAAY,GAAG;AAC9B,QAAI,OAAO,WAAW,OAAO,cAAc;AACzC,gBAAU,KAAK,OAAO,YAAY;AAAA,IACpC,OAAO;AACL,aAAO,KAAK;AAAA,QACV;AAAA,QACA,OAAO,OAAO,SAAS;AAAA,MACzB,CAAC;AAAA,IACH;AAAA,EACF;AAEA,SAAO,EAAE,WAAW,OAAO;AAC7B;;;AT5GA,IAAMC,WAAU,cAAc,YAAY,GAAG;AAC7C,IAAM,MAAMA,SAAQ,iBAAiB;AAMrC,IAAM,EAAE,aAAa,MAAM,QAAQ,IAAI;AAEvC,IAAM,UAAU,IAAI,QAAQ;AAE5B,IAAM,SAAS,eAAe,YAAY,oCAAoC;AAC9E,IAAM,UAAU,eAAe,cAAc,kCAAkC;AAG/E,IAAM,gBAAgB,IAAI,cAAc;AACxC,IAAM,oBAAoB,IAAI,kBAAkB,CAAC,KAAK,QAAQ,QAAQ,SAAS,QAAQ,CAAC;AACxF,IAAM,UAAU,IAAI,iBAAiB;AAErC,eAAe,kBACb,WAMyB;AACzB,QAAM,gBAAgC,CAAC;AAEvC,aAAW,EAAE,SAAS,KAAK,aAAa,MAAM,MAAM,KAAK,WAAW;AAClE,UAAM,cAAc,eAAe,QAAQ;AAC3C,UAAM,YACJ,UAAU,UAAa,CAAC,UAAU,CAAC,IAAI,MAAM,kBAAkB,QAAQ,OAAO;AAChF,UAAM,aACJ,UAAU,UAAa,CAAC,UACpB,EAAE,KAAK,aAAa,OAAO,SAAS,iBAAiB,UAAU,IAC/D,MAAM,cAAc,QAAQ,OAAO;AACzC,kBAAc,KAAK,EAAE,GAAG,YAAY,UAAU,CAAC;AAAA,EACjD;AAEA,SAAO;AACT;AAEA,SAAS,kBAA2B;AAClC,QAAM,cAAc,QAAQ,KAAK,CAAC;AAClC,MAAI,CAAC,aAAa;AAChB,WAAO;AAAA,EACT;AAEA,SAAO,cAAc,YAAY,GAAG,MAAM,QAAQ,WAAW;AAC/D;AAEA,eAAsB,OAAO,OAAiB,QAAQ,MAAqB;AACzE,UAAQ,KAAK,IAAI,EAAE,QAAQ,SAAS,eAAe,EAAE,YAAY,WAAW;AAG5E,UACG,QAAQ,SAAS,EAAE,WAAW,KAAK,CAAC,EACpC,YAAY,gDAAgD,EAC5D,YAAY,MAAM,EAClB;AAAA,IACC,IAAI;AAAA,MACF;AAAA,MACA;AAAA,IACF;AAAA,EACF,EACC,OAAO,OAAO,MAAgB,YAAgC;AAC7D,QAAI;AACF,YAAM,EAAE,WAAW,OAAO,IAAI,aAAa,IAAI;AAE/C,UAAI,OAAO,SAAS,GAAG;AACrB,gBAAQ,MAAM,iCAA4B;AAC1C,eAAO,QAAQ,CAAC,EAAE,KAAK,YAAY,MAAM,MAAM;AAC7C,kBAAQ,MAAM,OAAO,UAAU,KAAK,KAAK,EAAE;AAAA,QAC7C,CAAC;AAAA,MACH;AAEA,UAAI,UAAU,WAAW,GAAG;AAC1B,gBAAQ,MAAM,6CAAwC;AACtD,gBAAQ,KAAK,CAAC;AAAA,MAChB;AAEA,cAAQ,MAAM;AAAA,oBAAkB,UAAU,MAAM,kBAAkB;AAElE,YAAM,cAAc,IAAI,YAAY,QAAQ,QAAQ,IAAI,KAAO,CAAC;AAEhE,YAAM,UAAU,YAA2B;AACzC,cAAM,YAAY,MAAM,YAAY,SAAS,SAAS;AACtD,cAAM,gBAAgB,MAAM,kBAAkB,SAAS;AACvD,cAAM,QAAQ,MAAM,GAAG,aAAa;AAAA,MACtC;AAEA,UAAI,QAAQ,OAAO;AACjB,gBAAQ,MAAM,OAAO;AACrB,gBAAQ,GAAG,UAAU,MAAM,QAAQ,KAAK,CAAC,CAAC;AAE1C,YAAI,kBAAwC;AAC5C,gBAAQ,MAAM,GAAG,OAAO,MAAM;AAC5B,4BAAkB;AAAA,QACpB,CAAC;AAED,YAAI,aAAmD;AACvD,gBAAQ,GAAG,YAAY,MAAM;AAC3B,cAAI,eAAe,KAAM,cAAa,UAAU;AAChD,uBAAa,WAAW,MAAM;AAC5B,yBAAa;AACb,8BAAkB,QAAQ,EAAE,MAAM,CAAC,QAAiB;AAClD,sBAAQ,MAAM,+BAA0B,eAAe,QAAQ,IAAI,UAAU,GAAG;AAAA,YAClF,CAAC;AAAA,UACH,GAAG,GAAG;AAAA,QACR,CAAC;AAED,0BAAkB,QAAQ;AAC1B,cAAM;AAAA,MACR,OAAO;AACL,cAAM,QAAQ;AAAA,MAChB;AAAA,IACF,SAAS,OAAO;AACd,cAAQ,MAAM,+BAA0B,iBAAiB,QAAQ,MAAM,UAAU,KAAK;AACtF,cAAQ,KAAK,CAAC;AAAA,IAChB;AAAA,EACF,CAAC;AAGH,UACG,QAAQ,MAAM,EACd,YAAY,mEAAmE,EAC/E,YAAY,OAAO,EACnB;AAAA,IACC,IAAI,OAAO,iBAAiB,cAAc,kBAAkB,0BAA0B;AAAA,EACxF,EACC,OAAO,OAAO,OAAiB,YAAmC;AACjE,QAAI;AACF,UAAI,QAAQ,YAAY,MAAM,SAAS,oBAAoB;AACzD,gBAAQ;AAAA,UACN;AAAA,SAAO,MAAM,MAAM,gDAAgD,kBAAkB;AAAA,QACvF;AACA,gBAAQ,MAAM,mEAAmE;AACjF,gBAAQ,KAAK,CAAC;AAAA,MAChB;AAEA,UAAI,CAAC,QAAQ,YAAY,MAAM,SAAS,oBAAoB;AAC1D,gBAAQ;AAAA,UACN;AAAA,8CAAuC,MAAM,MAAM,oBAAoB,kBAAkB;AAAA,QAC3F;AAAA,MACF;AAEA,cAAQ,MAAM;AAAA,oBAAkB,MAAM,MAAM,aAAa;AAEzD,YAAM,cAAc,IAAI,YAAY;AACpC,YAAM,YAAY,MAAM,YAAY,SAAS,KAAK;AAClD,YAAM,gBAAgB,MAAM;AAAA,QAC1B,UAAU,IAAI,CAAC,EAAE,MAAM,SAAS,MAAM,OAAO,EAAE,MAAM,SAAS,MAAM,EAAE;AAAA,MACxE;AAEA,YAAM,QAAQ,MAAM,GAAG,aAAa;AAAA,IACtC,SAAS,OAAO;AACd,cAAQ,MAAM,+BAA0B,iBAAiB,QAAQ,MAAM,UAAU,KAAK;AACtF,cAAQ,KAAK,CAAC;AAAA,IAChB;AAAA,EACF,CAAC;AAEH,QAAM,QAAQ,WAAW,IAAI;AAC/B;AAEA,IAAI,gBAAgB,GAAG;AACrB,SAAO,EAAE,MAAM,CAAC,UAAmB;AACjC,YAAQ,MAAM,+BAA0B,iBAAiB,QAAQ,MAAM,UAAU,KAAK;AACtF,YAAQ,KAAK,CAAC;AAAA,EAChB,CAAC;AACH;",
|
|
6
|
+
"names": ["name", "resolve", "parseHTML", "require"]
|
|
7
7
|
}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pagerts",
|
|
3
3
|
"description": "A tool for viewing external relations in a webpage",
|
|
4
|
-
"version": "1.
|
|
4
|
+
"version": "1.5.1",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "main.js",
|
|
7
7
|
"bin": {
|
|
@@ -48,18 +48,24 @@
|
|
|
48
48
|
"linkedom": "^0.18.9"
|
|
49
49
|
},
|
|
50
50
|
"devDependencies": {
|
|
51
|
-
"@
|
|
52
|
-
"@
|
|
51
|
+
"@swc/core": "^1.15.33",
|
|
52
|
+
"@swc/jest": "^0.2.39",
|
|
53
|
+
"@types/jest": "^30.0.0",
|
|
54
|
+
"@types/node": "^25.8.0",
|
|
53
55
|
"@typescript-eslint/eslint-plugin": "^8.20.0",
|
|
54
56
|
"@typescript-eslint/parser": "^8.20.0",
|
|
55
57
|
"esbuild": "^0.25.1",
|
|
56
58
|
"eslint": "^9.18.0",
|
|
57
59
|
"eslint-config-prettier": "^9.1.0",
|
|
58
60
|
"eslint-plugin-security": "^3.0.1",
|
|
59
|
-
"jest": "^
|
|
61
|
+
"jest": "^30.4.2",
|
|
60
62
|
"prettier": "^3.4.2",
|
|
61
|
-
"ts-jest": "^29.2.5",
|
|
62
63
|
"tsx": "^4.19.2",
|
|
63
64
|
"typescript": "^5.7.2"
|
|
65
|
+
},
|
|
66
|
+
"overrides": {
|
|
67
|
+
"babel-plugin-istanbul": "^8.0.0",
|
|
68
|
+
"test-exclude": "^8.0.0",
|
|
69
|
+
"glob": "^13.0.6"
|
|
64
70
|
}
|
|
65
71
|
}
|