pagerts 1.0.3 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/bin/main.js +6 -6
- package/bin/main.js.map +3 -3
- package/package.json +6 -6
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# PagerTS
|
|
2
2
|
|
|
3
|
-
[](https://github.com/akinevz2/pagerts/actions/workflows/ci.yml)
|
|
4
4
|
[](./SECURITY.md)
|
|
5
5
|
[](https://nodejs.org)
|
|
6
6
|
[](./LICENSE)
|
|
@@ -216,7 +216,7 @@ This project is licensed under the MIT License - see the [LICENSE](./LICENSE) fi
|
|
|
216
216
|
|
|
217
217
|
## Author
|
|
218
218
|
|
|
219
|
-
**Kirill
|
|
219
|
+
**Kirill <kine> Nevzorov**
|
|
220
220
|
|
|
221
221
|
## Support
|
|
222
222
|
|
package/bin/main.js
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import{Command as
|
|
3
|
-
`)
|
|
4
|
-
`)
|
|
5
|
-
|
|
6
|
-
\u274C URL Validation Errors:`),r.forEach(({url:n,error:l})=>{console.error(` - ${n}: ${l}`)})),e.length===0&&(console.error(`
|
|
2
|
+
import{Command as q,createArgument as _,Option as $}from"commander";var w={name:"pagerts",description:"A tool for viewing external relations in a webpage",version:"1.3.0",type:"module",main:"main.js",bin:{pagerts:"bin/main.js"},files:["bin"],engines:{node:">=18.0.0"},scripts:{test:"jest --coverage","test:watch":"jest --watch",build:"esbuild src/main.ts --bundle --packages=external --outdir=bin --minify --sourcemap --platform=node --format=esm",lint:"eslint src/**/*.ts","lint:fix":"eslint src/**/*.ts --fix","type-check":"tsc --noEmit",format:'prettier --write "src/**/*.ts"',"format:check":'prettier --check "src/**/*.ts"',"security:audit":"npm audit --audit-level=moderate","security:check":"npm run security:audit && npm run lint",start:"node ./bin/main.js",dev:"tsx src/main.ts",prepare:"npm run build"},keywords:["webpage","hierarchy","management","web-scraping","cli","url-extraction"],author:"Kirill <kine> Nevzorov",license:"MIT",bugs:{url:"https://github.com/akinevz2/pagerts/issues"},homepage:"https://github.com/akinevz2/pagerts",dependencies:{"@exodus/bytes":"^1.15.0",commander:"^12.1.0",linkedom:"^0.18.9"},devDependencies:{"@types/jest":"^29.5.14","@types/node":"^22.10.5","@typescript-eslint/eslint-plugin":"^8.20.0","@typescript-eslint/parser":"^8.20.0",esbuild:"^0.25.1",eslint:"^9.18.0","eslint-config-prettier":"^9.1.0","eslint-plugin-security":"^3.0.1",jest:"^29.7.0",prettier:"^3.4.2","ts-jest":"^29.2.5",tsx:"^4.19.2",typescript:"^5.7.2"}};var u=class{constructor(t){this.name=t}};var d=class extends u{constructor(){super("page-extractor")}async extract(t){let{window:{document:e},url:r}=t;return{title:e.title,url:r}}};var L=["id","innerText","textContent","class","ariaLabel","ariaDescription","alt"],k=["href","data-src","target","action","src","url"],P=(s,t)=>{let e=s.getAttribute(t);return e!=null&&e.trim()!==""?e:void 0};function b(s){for(let t of L){let e=P(s,t);if(e!==void 0)return{key:t,value:e}}}function v(s){for(let t of k){let e=P(s,t);if(e!==void 0)return{key:t,value:e}}}var g=class extends u{constructor(e){super("page-extractor");this.tags=e}async extract(e){let{document:r}=e.window;return this.tags.flatMap(o=>Array.from(r.querySelectorAll(o)).flatMap(i=>{let a=v(i);return a?[{text:b(i)??{key:"src",value:a.value},link:a}]:[]}))}};import{readFile as O}from"fs/promises";import{parseHTML as D}from"linkedom";import{legacyHookDecode as S}from"@exodus/bytes/encoding.js";var f=class{timeout;maxRetries;constructor(t=1e4,e=2){this.timeout=t,this.maxRetries=e}buildDOMResult(t,e){let{document:r}=D(t);return{window:{document:r},url:e}}async fetchPage(t,e=0){try{let r;t.startsWith("file://")?r=O(t.substring(7),"utf-8").then(i=>this.buildDOMResult(i,t)):r=fetch(t).then(async i=>{let a=await i.arrayBuffer(),x=i.headers.get("content-type")??"",m=/charset=([^\s;]+)/i.exec(x),n=S(new Uint8Array(a),m?.[1]??"utf-8");return this.buildDOMResult(n,t)});let o=await(this.timeout>0?Promise.race([r,new Promise((i,a)=>setTimeout(()=>a(new Error("Request timeout")),this.timeout))]):r);return{url:t,content:o}}catch(r){let o=r instanceof Error?r.message:"Unknown error";return e<this.maxRetries&&this.isRetryableError(o)?(process.stderr.write(`Retrying ${t} (attempt ${e+1}/${this.maxRetries})...
|
|
3
|
+
`),await this.delay(1e3*(e+1)),this.fetchPage(t,e+1)):{url:t,error:`Failed to fetch: ${o}`}}}isRetryableError(t){return[/timeout/i,/ECONNRESET/i,/ETIMEDOUT/i,/ENOTFOUND/i,/network/i].some(r=>r.test(t))}delay(t){return new Promise(e=>setTimeout(e,t))}async fetchAll(t){return(await Promise.all(t.map(r=>this.fetchPage(r)))).filter(r=>r.content!==void 0||r.error)}};var p=class{constructor(){}};var y=class extends p{print(...t){let e=JSON.stringify(t);process.stdout.write(e+`
|
|
4
|
+
`)}};var E=["http:","https:","file:"];var K=[/javascript:/i,/data:/i,/vbscript:/i,/<script/i,/on\w+=/i];function N(s){if(!s||!s.trim())return{isValid:!1,error:"URL cannot be empty"};let t=s.trim();if(t.length>2048)return{isValid:!1,error:"URL exceeds maximum length of 2048 characters"};for(let i of K)if(i.test(t))return{isValid:!1,error:"URL contains suspicious patterns"};let e;try{e=new URL(t)}catch{return t.startsWith("file://")?{isValid:!0,sanitizedUrl:t}:{isValid:!1,error:"Invalid URL format"}}if(!E.includes(e.protocol))return{isValid:!1,error:`Protocol ${e.protocol} is not allowed. Allowed protocols: ${E.join(", ")}`};let r=e.hostname.toLowerCase();return(r==="localhost"||r==="127.0.0.1"||r==="::1"||r.startsWith("192.168.")||r.startsWith("10.")||/^172\.(1[6-9]|2\d|3[01])\./.test(r))&&e.protocol!=="file:"&&console.warn(`Warning: Accessing local network resource: ${t}`),{isValid:!0,sanitizedUrl:e.toString()}}function M(s){let t=[],e=[];for(let r of s){let o=N(r);o.isValid&&o.sanitizedUrl?t.push(o.sanitizedUrl):e.push({url:r,error:o.error||"Unknown validation error"})}return{validUrls:t,errors:e}}var{description:I,name:V,version:C}=w,z=new q,F=_("<url | file...>","remote https://URL or local file://resource.html to extract from");(async()=>await z.name(V).version(C,"-v, --version").description(I).addArgument(F).addOption(new $("--watch","keep running: SIGWINCH re-fetches after resize, Ctrl-D releases in-flight requests, Ctrl-C exits")).action(async(s,t)=>{try{let{validUrls:e,errors:r}=M(s);r.length>0&&(console.error(`
|
|
5
|
+
\u274C URL Validation Errors:`),r.forEach(({url:n,error:c})=>{console.error(` - ${n}: ${c}`)})),e.length===0&&(console.error(`
|
|
7
6
|
\u274C No valid URLs to process. Exiting.`),process.exit(1)),console.error(`
|
|
8
|
-
\u2705 Processing ${e.length} valid URL(s)...`);let
|
|
7
|
+
\u2705 Processing ${e.length} valid URL(s)...`);let o=new y,i=new f(t.watch?0:1e4,2),a=new d,x=new g(["a","meta","link","embed","script"]),m=async()=>{let n=await i.fetchAll(e),c=[];for(let{content:l,url:T,error:h}of n){let R=h!==void 0||!l?[]:await x.extract(l),U=h!==void 0||!l?{url:T,error:h??"Unknown error",resources:R}:await a.extract(l);c.push({...U,resources:R})}await o.print(...c)};if(t.watch){process.stdin.resume(),process.on("SIGINT",()=>{process.exit(0)});let n=null;process.stdin.on("end",()=>{n=null});let c=null;process.on("SIGWINCH",()=>{c!==null&&clearTimeout(c),c=setTimeout(()=>{c=null,n=m().catch(l=>{console.error(`
|
|
8
|
+
\u274C An error occurred:`,l instanceof Error?l.message:l)})},150)}),n=m(),await n}else await m()}catch(e){console.error(`
|
|
9
9
|
\u274C An error occurred:`,e instanceof Error?e.message:e),process.exit(1)}}).parseAsync(process.argv))();
|
|
10
10
|
/**
|
|
11
11
|
* @license MIT
|
package/bin/main.js.map
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../src/main.ts", "../package.json", "../src/extractors/AbstractExtractor.ts", "../src/extractors/PageExtractor.ts", "../src/resource.ts", "../src/extractors/ResourceExtractor.ts", "../src/page/PageFetcher.ts", "../src/printers/AbstractResourcePrinter.ts", "../src/printers/JSONStylePrinter.ts", "../src/security.ts"],
|
|
4
|
-
"sourcesContent": ["#!/usr/bin/env node\nimport { Command, createArgument } from 'commander';\n\nimport pkg from '../package.json' with { type: 'json' };\nimport { PageExtractor, ResourceExtractor } from './extractors/index.js';\nimport { PageFetcher, type PageMetadata } from './page/index.js';\nimport { JSONStylePrinter } from './printers/index.js';\nimport { validateUrls } from './security.js';\n\nconst { description, name, version } = pkg;\n\nconst program = new Command();\n\nconst url = createArgument(\n '<url | file...>',\n 'remote https://URL or local file://resource.html to extract from'\n);\n\n(async (): Promise<void> => {\n await program\n .name(name)\n .version(version, '-v, --version')\n .description(description)\n .addArgument(url)\n .action(async (urls: string[]) => {\n try {\n // Validate URLs first\n const { validUrls, errors } = validateUrls(urls);\n\n // Report validation errors\n if (errors.length > 0) {\n console.error('\\n\u274C URL Validation Errors:');\n errors.forEach(({ url: invalidUrl, error }) => {\n console.error(` - ${invalidUrl}: ${error}`);\n });\n }\n\n // Exit if no valid URLs\n if (validUrls.length === 0) {\n console.error('\\n\u274C No valid URLs to process. Exiting.');\n process.exit(1);\n }\n\n console.error(`\\n\u2705 Processing ${validUrls.length} valid URL(s)...`);\n\n const printer = new JSONStylePrinter();\n const pageFetcher = new PageFetcher();\n const pageExtractor = new PageExtractor();\n const resourceExtractor = new ResourceExtractor(['a', 'meta', 'link', 'embed']);\n\n const pageResponses = await pageFetcher.fetchAll(validUrls);\n const pageMetadatas: PageMetadata[] = [];\n\n for (const { content, url: responseUrl, error } of pageResponses) {\n const resources =\n error !== undefined || !content ? [] : await resourceExtractor.extract(content);\n const descriptor =\n error !== undefined || !content\n ? { url: responseUrl, error: error ?? 'Unknown error', resources }\n : await pageExtractor.extract(content);\n pageMetadatas.push({ ...descriptor, resources });\n }\n\n await printer.print(...pageMetadatas);\n } catch (error) {\n console.error('\\n\u274C An error occurred:', error instanceof Error ? error.message : error);\n process.exit(1);\n }\n })\n .parseAsync(process.argv);\n})();\n", "{\r\n \"name\": \"pagerts\",\r\n \"description\": \"A tool for viewing external relations in a webpage\",\r\n \"version\": \"1.0.3\",\r\n \"type\": \"module\",\r\n \"main\": \"main.js\",\r\n \"bin\": {\r\n \"pagerts\": \"bin/main.js\"\r\n },\r\n \"files\": [\r\n \"bin\"\r\n ],\r\n \"engines\": {\r\n \"node\": \">=18.0.0\"\r\n },\r\n \"scripts\": {\r\n \"test\": \"jest --coverage\",\r\n \"test:watch\": \"jest --watch\",\r\n \"build\": \"esbuild src/main.ts --bundle --packages=external --outdir=bin --minify --sourcemap --platform=node --format=esm\",\r\n \"lint\": \"eslint src/**/*.ts\",\r\n \"lint:fix\": \"eslint src/**/*.ts --fix\",\r\n \"type-check\": \"tsc --noEmit\",\r\n \"format\": \"prettier --write \\\"src/**/*.ts\\\"\",\r\n \"format:check\": \"prettier --check \\\"src/**/*.ts\\\"\",\r\n \"security:audit\": \"npm audit --audit-level=moderate\",\r\n \"security:check\": \"npm run security:audit && npm run lint\",\r\n \"start\": \"node ./bin/main.js\",\r\n \"dev\": \"tsx src/main.ts\",\r\n \"prepare\": \"npm run build\"\r\n },\r\n \"keywords\": [\r\n \"webpage\",\r\n \"hierarchy\",\r\n \"management\",\r\n \"web-scraping\",\r\n \"cli\",\r\n \"url-extraction\"\r\n ],\r\n \"author\": \"Kirill kn253 Nevzorov\",\r\n \"license\": \"MIT\",\r\n \"bugs\": {\r\n \"url\": \"https://github.com/akinevz0/pagerts/issues\"\r\n },\r\n \"homepage\": \"https://github.com/akinevz0/pagerts\",\r\n \"dependencies\": {\r\n \"commander\": \"^12.1.0\",\r\n \"jsdom\": \"^25.0.1\"\r\n },\r\n \"devDependencies\": {\r\n \"@types/jest\": \"^29.5.14\",\r\n \"@types/jsdom\": \"^21.1.7\",\r\n \"@types/node\": \"^22.10.5\",\r\n \"@typescript-eslint/eslint-plugin\": \"^8.20.0\",\r\n \"@typescript-eslint/parser\": \"^8.20.0\",\r\n \"esbuild\": \"^0.25.1\",\r\n \"eslint\": \"^9.18.0\",\r\n \"eslint-config-prettier\": \"^9.1.0\",\r\n \"eslint-plugin-security\": \"^3.0.1\",\r\n \"jest\": \"^29.7.0\",\r\n \"prettier\": \"^3.4.2\",\r\n \"ts-jest\": \"^29.2.5\",\r\n \"tsx\": \"^4.19.2\",\r\n \"typescript\": \"^5.7.2\"\r\n }\r\n}", "export abstract class AbstractExtractor<V, R> {\n constructor(readonly name: string) {}\n abstract extract(value: V): Promise<R>;\n}\n", "import type { Page } from '../page/index.js';\nimport { JSDOM } from 'jsdom';\nimport { AbstractExtractor } from './AbstractExtractor.js';\n\nexport class PageExtractor extends AbstractExtractor<JSDOM, Page> {\n constructor() {\n super('page-extractor');\n }\n\n async extract(value: JSDOM): Promise<Page> {\n const {\n window: {\n document: {\n title,\n location: { href: url },\n },\n },\n } = value;\n return { title, url };\n }\n}\n", "/**\n * @license MIT\n * We are interested in visualising a page as a collection of tags.\n *\n * We wish to work with tags that can be compactly previewed on a webpage.\n * Here we must declare all of the element types that can be used to represent\n * a resource that can be hyperlinked off a webpage.\n */\ntype Tags = HTMLElementTagNameMap;\n\nfunction findDefinedKey(element: Resource, keys: LinkKey[]): LinkKey | undefined {\n for (const key of keys) {\n if (isKeyDefined(key, element)) {\n return key;\n }\n }\n\n return undefined;\n}\n\nexport const RESOURCE_DISPLAYABLE_KEYS = [\n 'id',\n 'innerText',\n 'textContent',\n 'class',\n 'ariaLabel',\n 'ariaDescription',\n 'alt',\n] as const;\n\nexport type DisplayableKey = (typeof RESOURCE_DISPLAYABLE_KEYS)[number];\n\nexport type ResourceKey = {\n key: DisplayableKey;\n value: string;\n};\n\nexport const RESOURCE_LINK_KEYS = ['href', 'data-src', 'target', 'action', 'src', 'url'] as const;\n\nexport type LinkKey = (typeof RESOURCE_LINK_KEYS)[number];\n\nexport type ResourceLink = {\n key: LinkKey;\n url: string;\n};\n\nexport function findResourceText(element: Resource): ResourceKey | undefined {\n for (const key of RESOURCE_DISPLAYABLE_KEYS) {\n const value = element[key];\n if (value && typeof value === 'string' && value.trim() !== '') return { key, value };\n }\n\n return undefined;\n}\n\nexport function findResourceLink(element: Resource): ResourceLink | undefined {\n const key = findDefinedKey(element, [...RESOURCE_LINK_KEYS]);\n if (!key) {\n return undefined;\n }\n\n const url = element[key];\n if (url && typeof url === 'string' && url.trim() !== '') return { key, url };\n\n return undefined;\n}\n\nexport type ExternalResource = {\n text: ResourceKey;\n link: ResourceLink;\n};\n\nexport const isResourceKey = (key: string): key is LinkKey => key in RESOURCE_LINK_KEYS;\n\nexport const isKeyDefined = (key: DisplayableKey | LinkKey, element: Resource): boolean =>\n key in element && element[key] !== undefined;\n\nexport type ResourceElement<T, U> = {\n [K in keyof T]: U extends keyof T[K] ? T[K] : never;\n}[keyof T];\n\nexport type Tag = keyof Tags;\n\nexport type Resource = HTMLElement & {\n [K in DisplayableKey | LinkKey]?: string | null;\n};\n\nexport type ResourceByName<T extends keyof Tags> = Tags[T];\n", "import type { JSDOM } from 'jsdom';\nimport {\n findResourceLink,\n findResourceText,\n type ExternalResource,\n type Resource,\n type Tag,\n} from '../resource.js';\nimport { AbstractExtractor } from './AbstractExtractor.js';\n\nexport class ResourceExtractor extends AbstractExtractor<JSDOM, ExternalResource[]> {\n constructor(private readonly tags: Tag[]) {\n super('page-extractor');\n }\n async extract(value: JSDOM): Promise<ExternalResource[]> {\n const { document } = value.window;\n const externalResources: ExternalResource[] = [];\n for (const tag of this.tags) {\n const selector = document.querySelectorAll<Resource>(tag);\n const elements = Array.from(selector);\n for (const element of elements) {\n const text = findResourceText(element);\n const link = findResourceLink(element);\n if (!text || !link) continue;\n if (!link.url.startsWith('http')) continue;\n externalResources.push({ text, link });\n }\n }\n return externalResources;\n }\n}\n", "import { JSDOM, VirtualConsole } from 'jsdom';\n\ninterface PageResponse {\n url: string;\n content?: JSDOM;\n error?: string;\n}\n\nexport class PageFetcher {\n private readonly timeout: number;\n private readonly maxRetries: number;\n\n constructor(timeout = 10000, maxRetries = 2) {\n this.timeout = timeout;\n this.maxRetries = maxRetries;\n }\n\n private async fetchPage(url: string, retryCount = 0): Promise<PageResponse> {\n const virtualConsole = new VirtualConsole().on('jsdomError', (error: Error) => {\n process.stderr.write(`Error parsing ${url}: ${error.message}\\n`);\n });\n\n try {\n let dom: Promise<JSDOM>;\n\n if (url.startsWith('file://')) {\n dom = JSDOM.fromFile(url.substring(7), { virtualConsole });\n } else {\n // Add timeout and security options for remote URLs\n dom = JSDOM.fromURL(url, {\n virtualConsole,\n resources: 'usable',\n runScripts: 'outside-only', // More secure - don't execute page scripts\n beforeParse(window) {\n // Prevent infinite loops and resource exhaustion\n window.setTimeout = (() => {\n throw new Error('setTimeout disabled for security');\n }) as typeof window.setTimeout;\n window.setInterval = (() => {\n throw new Error('setInterval disabled for security');\n }) as typeof window.setInterval;\n },\n });\n }\n\n const content = await Promise.race([\n dom,\n new Promise<never>((_, reject) =>\n setTimeout(() => reject(new Error('Request timeout')), this.timeout)\n ),\n ]);\n\n return { url, content };\n } catch (error) {\n const message = error instanceof Error ? error.message : 'Unknown error';\n\n // Retry logic for transient errors\n if (retryCount < this.maxRetries && this.isRetryableError(message)) {\n process.stderr.write(`Retrying ${url} (attempt ${retryCount + 1}/${this.maxRetries})...\\n`);\n await this.delay(1000 * (retryCount + 1)); // Exponential backoff\n return this.fetchPage(url, retryCount + 1);\n }\n\n return { url, error: `Failed to fetch: ${message}` };\n }\n }\n\n private isRetryableError(message: string): boolean {\n const retryablePatterns = [/timeout/i, /ECONNRESET/i, /ETIMEDOUT/i, /ENOTFOUND/i, /network/i];\n return retryablePatterns.some((pattern) => pattern.test(message));\n }\n\n private delay(ms: number): Promise<void> {\n return new Promise((resolve) => setTimeout(resolve, ms));\n }\n\n async fetchAll(urls: string[]): Promise<PageResponse[]> {\n const responses = await Promise.all(urls.map((url) => this.fetchPage(url)));\n return responses.filter((response) => response.content !== undefined || response.error);\n }\n}\n", "import type { PageMetadata } from '../page/index.js';\n\nexport abstract class AbstractResourcePrinter {\n constructor() {}\n abstract print(...pages: PageMetadata[]): void | Promise<void>;\n}\n", "import type { PageMetadata } from '../page/index.js';\nimport { AbstractResourcePrinter } from './AbstractResourcePrinter.js';\n\nexport class JSONStylePrinter extends AbstractResourcePrinter {\n print(...pages: PageMetadata[]): void | Promise<void> {\n const json = JSON.stringify(pages);\n process.stdout.write(json + '\\n');\n }\n}\n", "/**\n * Security utilities for URL validation and sanitization\n */\n\nconst ALLOWED_PROTOCOLS = ['http:', 'https:', 'file:'];\nconst MAX_URL_LENGTH = 2048;\nconst SUSPICIOUS_PATTERNS = [\n /javascript:/i,\n /data:/i,\n /vbscript:/i,\n /<script/i,\n /on\\w+=/i, // Event handlers like onclick=\n];\n\nexport interface ValidationResult {\n isValid: boolean;\n error?: string;\n sanitizedUrl?: string;\n}\n\n/**\n * Validates a URL for security concerns\n * @param url - The URL to validate\n * @returns ValidationResult object with validation status\n */\nexport function validateUrl(url: string): ValidationResult {\n // Check if URL is empty or whitespace\n if (!url || !url.trim()) {\n return {\n isValid: false,\n error: 'URL cannot be empty',\n };\n }\n\n const trimmedUrl = url.trim();\n\n // Check URL length to prevent DoS\n if (trimmedUrl.length > MAX_URL_LENGTH) {\n return {\n isValid: false,\n error: `URL exceeds maximum length of ${MAX_URL_LENGTH} characters`,\n };\n }\n\n // Check for suspicious patterns\n for (const pattern of SUSPICIOUS_PATTERNS) {\n if (pattern.test(trimmedUrl)) {\n return {\n isValid: false,\n error: 'URL contains suspicious patterns',\n };\n }\n }\n\n // Parse the URL\n let parsedUrl: URL;\n try {\n parsedUrl = new URL(trimmedUrl);\n } catch (error) {\n // If URL parsing fails, it might be a file path\n if (trimmedUrl.startsWith('file://')) {\n return {\n isValid: true,\n sanitizedUrl: trimmedUrl,\n };\n }\n return {\n isValid: false,\n error: 'Invalid URL format',\n };\n }\n\n // Check protocol\n if (!ALLOWED_PROTOCOLS.includes(parsedUrl.protocol)) {\n return {\n isValid: false,\n error: `Protocol ${parsedUrl.protocol} is not allowed. Allowed protocols: ${ALLOWED_PROTOCOLS.join(', ')}`,\n };\n }\n\n // Check for localhost/internal IPs in production (security consideration)\n const hostname = parsedUrl.hostname.toLowerCase();\n const isLocalhost =\n hostname === 'localhost' ||\n hostname === '127.0.0.1' ||\n hostname === '::1' ||\n hostname.startsWith('192.168.') ||\n hostname.startsWith('10.') ||\n /^172\\.(1[6-9]|2\\d|3[01])\\./.test(hostname);\n\n if (isLocalhost && parsedUrl.protocol !== 'file:') {\n // Allow but warn about localhost URLs\n console.warn(`Warning: Accessing local network resource: ${trimmedUrl}`);\n }\n\n return {\n isValid: true,\n sanitizedUrl: parsedUrl.toString(),\n };\n}\n\n/**\n * Validates an array of URLs\n * @param urls - Array of URLs to validate\n * @returns Object with valid URLs and errors\n */\nexport function validateUrls(urls: string[]): {\n validUrls: string[];\n errors: Array<{ url: string; error: string }>;\n} {\n const validUrls: string[] = [];\n const errors: Array<{ url: string; error: string }> = [];\n\n for (const url of urls) {\n const result = validateUrl(url);\n if (result.isValid && result.sanitizedUrl) {\n validUrls.push(result.sanitizedUrl);\n } else {\n errors.push({\n url,\n error: result.error || 'Unknown validation error',\n });\n }\n }\n\n return { validUrls, errors };\n}\n\n/**\n * Rate limiter to prevent abuse\n */\nexport class RateLimiter {\n private requests: number[] = [];\n private readonly maxRequests: number;\n private readonly windowMs: number;\n\n constructor(maxRequests = 10, windowMs = 60000) {\n this.maxRequests = maxRequests;\n this.windowMs = windowMs;\n }\n\n /**\n * Check if a request is allowed under rate limiting\n * @returns true if request is allowed, false otherwise\n */\n public isAllowed(): boolean {\n const now = Date.now();\n\n // Remove old requests outside the time window\n this.requests = this.requests.filter((time) => now - time < this.windowMs);\n\n if (this.requests.length >= this.maxRequests) {\n return false;\n }\n\n this.requests.push(now);\n return true;\n }\n\n /**\n * Get remaining requests in current window\n */\n public getRemainingRequests(): number {\n const now = Date.now();\n this.requests = this.requests.filter((time) => now - time < this.windowMs);\n return Math.max(0, this.maxRequests - this.requests.length);\n }\n}\n\n/**\n * Sanitizes HTML content to prevent XSS attacks\n * @param text - Text to sanitize\n * @returns Sanitized text\n */\nexport function sanitizeText(text: string): string {\n if (!text) return '';\n\n return text\n .replace(/</g, '<')\n .replace(/>/g, '>')\n .replace(/\"/g, '"')\n .replace(/'/g, ''')\n .replace(/\\//g, '/');\n}\n"],
|
|
5
|
-
"mappings": ";AACA,OAAS,WAAAA,EAAS,kBAAAC,
|
|
6
|
-
"names": ["Command", "createArgument", "package_default", "AbstractExtractor", "name", "PageExtractor", "AbstractExtractor", "value", "
|
|
4
|
+
"sourcesContent": ["#!/usr/bin/env node\nimport { Command, createArgument, Option } from 'commander';\n\nimport pkg from '../package.json' with { type: 'json' };\nimport { PageExtractor, ResourceExtractor } from './extractors/index.js';\nimport { PageFetcher, type PageMetadata } from './page/index.js';\nimport { JSONStylePrinter } from './printers/index.js';\nimport { validateUrls } from './security.js';\n\nconst { description, name, version } = pkg;\n\nconst program = new Command();\n\nconst url = createArgument(\n '<url | file...>',\n 'remote https://URL or local file://resource.html to extract from'\n);\n\n(async (): Promise<void> => {\n await program\n .name(name)\n .version(version, '-v, --version')\n .description(description)\n .addArgument(url)\n .addOption(new Option('--watch', 'keep running: SIGWINCH re-fetches after resize, Ctrl-D releases in-flight requests, Ctrl-C exits'))\n .action(async (urls: string[], options: { watch: boolean }) => {\n try {\n // Validate URLs first\n const { validUrls, errors } = validateUrls(urls);\n\n // Report validation errors\n if (errors.length > 0) {\n console.error('\\n\u274C URL Validation Errors:');\n errors.forEach(({ url: invalidUrl, error }) => {\n console.error(` - ${invalidUrl}: ${error}`);\n });\n }\n\n // Exit if no valid URLs\n if (validUrls.length === 0) {\n console.error('\\n\u274C No valid URLs to process. Exiting.');\n process.exit(1);\n }\n\n console.error(`\\n\u2705 Processing ${validUrls.length} valid URL(s)...`);\n\n const printer = new JSONStylePrinter();\n // watch mode is unbounded (timeout=0); default mode uses 10s timeout\n const pageFetcher = new PageFetcher(options.watch ? 0 : 10000, 2);\n const pageExtractor = new PageExtractor();\n const resourceExtractor = new ResourceExtractor(['a', 'meta', 'link', 'embed', 'script']);\n\n const execute = async (): Promise<void> => {\n const pageResponses = await pageFetcher.fetchAll(validUrls);\n const pageMetadatas: PageMetadata[] = [];\n\n for (const { content, url: responseUrl, error } of pageResponses) {\n const resources =\n error !== undefined || !content ? [] : await resourceExtractor.extract(content);\n const descriptor =\n error !== undefined || !content\n ? { url: responseUrl, error: error ?? 'Unknown error', resources }\n : await pageExtractor.extract(content);\n pageMetadatas.push({ ...descriptor, resources });\n\n\n }\n\n await printer.print(...pageMetadatas);\n };\n\n if (options.watch) {\n process.stdin.resume();\n\n process.on('SIGINT', () => {\n process.exit(0);\n });\n\n let activeExecution: Promise<void> | null = null;\n\n process.stdin.on('end', () => {\n // Ctrl-D: detach in-flight requests and let them fly off\n activeExecution = null;\n });\n\n let winchTimer: ReturnType<typeof setTimeout> | null = null;\n process.on('SIGWINCH', () => {\n if (winchTimer !== null) clearTimeout(winchTimer);\n winchTimer = setTimeout(() => {\n winchTimer = null;\n activeExecution = execute().catch((err: unknown) => {\n console.error('\\n\u274C An error occurred:', err instanceof Error ? err.message : err);\n });\n }, 150);\n });\n\n activeExecution = execute();\n await activeExecution;\n } else {\n await execute();\n }\n } catch (error) {\n console.error('\\n\u274C An error occurred:', error instanceof Error ? error.message : error);\n process.exit(1);\n }\n })\n .parseAsync(process.argv);\n})();\n", "{\r\n \"name\": \"pagerts\",\r\n \"description\": \"A tool for viewing external relations in a webpage\",\r\n \"version\": \"1.3.0\",\r\n \"type\": \"module\",\r\n \"main\": \"main.js\",\r\n \"bin\": {\r\n \"pagerts\": \"bin/main.js\"\r\n },\r\n \"files\": [\r\n \"bin\"\r\n ],\r\n \"engines\": {\r\n \"node\": \">=18.0.0\"\r\n },\r\n \"scripts\": {\r\n \"test\": \"jest --coverage\",\r\n \"test:watch\": \"jest --watch\",\r\n \"build\": \"esbuild src/main.ts --bundle --packages=external --outdir=bin --minify --sourcemap --platform=node --format=esm\",\r\n \"lint\": \"eslint src/**/*.ts\",\r\n \"lint:fix\": \"eslint src/**/*.ts --fix\",\r\n \"type-check\": \"tsc --noEmit\",\r\n \"format\": \"prettier --write \\\"src/**/*.ts\\\"\",\r\n \"format:check\": \"prettier --check \\\"src/**/*.ts\\\"\",\r\n \"security:audit\": \"npm audit --audit-level=moderate\",\r\n \"security:check\": \"npm run security:audit && npm run lint\",\r\n \"start\": \"node ./bin/main.js\",\r\n \"dev\": \"tsx src/main.ts\",\r\n \"prepare\": \"npm run build\"\r\n },\r\n \"keywords\": [\r\n \"webpage\",\r\n \"hierarchy\",\r\n \"management\",\r\n \"web-scraping\",\r\n \"cli\",\r\n \"url-extraction\"\r\n ],\r\n \"author\": \"Kirill <kine> Nevzorov\",\r\n \"license\": \"MIT\",\r\n \"bugs\": {\r\n \"url\": \"https://github.com/akinevz2/pagerts/issues\"\r\n },\r\n \"homepage\": \"https://github.com/akinevz2/pagerts\",\r\n \"dependencies\": {\r\n \"@exodus/bytes\": \"^1.15.0\",\r\n \"commander\": \"^12.1.0\",\r\n \"linkedom\": \"^0.18.9\"\r\n },\r\n \"devDependencies\": {\r\n \"@types/jest\": \"^29.5.14\",\r\n \"@types/node\": \"^22.10.5\",\r\n \"@typescript-eslint/eslint-plugin\": \"^8.20.0\",\r\n \"@typescript-eslint/parser\": \"^8.20.0\",\r\n \"esbuild\": \"^0.25.1\",\r\n \"eslint\": \"^9.18.0\",\r\n \"eslint-config-prettier\": \"^9.1.0\",\r\n \"eslint-plugin-security\": \"^3.0.1\",\r\n \"jest\": \"^29.7.0\",\r\n \"prettier\": \"^3.4.2\",\r\n \"ts-jest\": \"^29.2.5\",\r\n \"tsx\": \"^4.19.2\",\r\n \"typescript\": \"^5.7.2\"\r\n }\r\n}", "export abstract class AbstractExtractor<V, R> {\n constructor(readonly name: string) {}\n abstract extract(value: V): Promise<R>;\n}\n", "import type { Page } from '../page/index.js';\nimport type { DOMResult } from '../page/index.js';\nimport { AbstractExtractor } from './AbstractExtractor.js';\n\nexport class PageExtractor extends AbstractExtractor<DOMResult, Page> {\n constructor() {\n super('page-extractor');\n }\n\n async extract(value: DOMResult): Promise<Page> {\n const { window: { document }, url } = value;\n return { title: document.title, url };\n }\n}\n", "/**\n * @license MIT\n * We are interested in visualising a page as a collection of tags.\n *\n * We wish to work with tags that can be compactly previewed on a webpage.\n * Here we must declare all of the element types that can be used to represent\n * a resource that can be hyperlinked off a webpage.\n */\ntype Tags = HTMLElementTagNameMap;\n\nexport const RESOURCE_DISPLAYABLE_KEYS = [\n 'id',\n 'innerText',\n 'textContent',\n 'class',\n 'ariaLabel',\n 'ariaDescription',\n 'alt',\n] as const;\n\nexport type DisplayableKey = (typeof RESOURCE_DISPLAYABLE_KEYS)[number];\n\nexport const RESOURCE_LINK_KEYS = ['href', 'data-src', 'target', 'action', 'src', 'url'] as const;\n\nexport type LinkKey = (typeof RESOURCE_LINK_KEYS)[number];\n\nexport type AttributeKey = DisplayableKey | LinkKey;\n\nexport type ResourceKey = { key: AttributeKey; value: string };\nexport type ResourceLink = { key: LinkKey; value: string };\n\nexport type ExternalResource = {\n text: ResourceKey;\n link: ResourceLink;\n};\n\nexport type Tag = keyof Tags;\n\nexport type Resource = HTMLElement & {\n [K in AttributeKey]?: string | null;\n};\n\nexport type ResourceByName<T extends keyof Tags> = Tags[T];\n\n// --- adapters ---\n\nconst readAttr = (element: Resource, key: AttributeKey): string | undefined => {\n const v = element.getAttribute(key);\n return v != null && v.trim() !== '' ? v : undefined;\n};\n\nexport function findResourceText(element: Resource): ResourceKey | undefined {\n for (const key of RESOURCE_DISPLAYABLE_KEYS) {\n const value = readAttr(element, key);\n if (value !== undefined) return { key, value };\n }\n return undefined;\n}\n\nexport function findResourceLink(element: Resource): ResourceLink | undefined {\n for (const key of RESOURCE_LINK_KEYS) {\n const value = readAttr(element, key);\n if (value !== undefined) return { key, value };\n }\n return undefined;\n}\n\nexport const isResourceKey = (key: string): key is AttributeKey =>\n (RESOURCE_DISPLAYABLE_KEYS as readonly string[]).includes(key) ||\n (RESOURCE_LINK_KEYS as readonly string[]).includes(key);\n", "import type { DOMResult } from '../page/index.js';\nimport {\n findResourceLink,\n findResourceText,\n type ExternalResource,\n type Resource,\n type Tag,\n} from '../resource.js';\nimport { AbstractExtractor } from './AbstractExtractor.js';\n\nexport class ResourceExtractor extends AbstractExtractor<DOMResult, ExternalResource[]> {\n constructor(private readonly tags: Tag[]) {\n super('page-extractor');\n }\n async extract(value: DOMResult): Promise<ExternalResource[]> {\n const { document } = value.window;\n return this.tags.flatMap((tag) =>\n Array.from(document.querySelectorAll<Resource>(tag)).flatMap((element) => {\n const link = findResourceLink(element);\n if (!link) return [];\n const text = findResourceText(element) ?? { key: 'src' as const, value: link.value };\n return [{ text, link }];\n })\n );\n }\n}\n", "import { readFile } from 'fs/promises';\nimport { parseHTML } from 'linkedom';\nimport { legacyHookDecode } from '@exodus/bytes/encoding.js';\n\nexport interface DOMResult {\n window: { document: Document };\n url: string;\n}\n\ninterface PageResponse {\n url: string;\n content?: DOMResult;\n error?: string;\n}\n\nexport class PageFetcher {\n private readonly timeout: number;\n private readonly maxRetries: number;\n\n constructor(timeout = 10000, maxRetries = 2) {\n this.timeout = timeout;\n this.maxRetries = maxRetries;\n }\n\n private buildDOMResult(html: string, url: string): DOMResult {\n const { document } = parseHTML(html) as { document: Document };\n return { window: { document }, url };\n }\n\n private async fetchPage(url: string, retryCount = 0): Promise<PageResponse> {\n try {\n let domPromise: Promise<DOMResult>;\n\n if (url.startsWith('file://')) {\n domPromise = readFile(url.substring(7), 'utf-8').then((html) =>\n this.buildDOMResult(html, url)\n );\n } else {\n domPromise = fetch(url).then(async (response) => {\n const buffer = await response.arrayBuffer();\n const contentType = response.headers.get('content-type') ?? '';\n const charsetMatch = /charset=([^\\s;]+)/i.exec(contentType);\n const html = legacyHookDecode(new Uint8Array(buffer), charsetMatch?.[1] ?? 'utf-8');\n return this.buildDOMResult(html, url);\n });\n }\n\n const content = await (this.timeout > 0\n ? Promise.race([\n domPromise,\n new Promise<never>((_, reject) =>\n setTimeout(() => reject(new Error('Request timeout')), this.timeout)\n ),\n ])\n : domPromise);\n\n return { url, content };\n } catch (error) {\n const message = error instanceof Error ? error.message : 'Unknown error';\n\n // Retry logic for transient errors\n if (retryCount < this.maxRetries && this.isRetryableError(message)) {\n process.stderr.write(`Retrying ${url} (attempt ${retryCount + 1}/${this.maxRetries})...\\n`);\n await this.delay(1000 * (retryCount + 1)); // Exponential backoff\n return this.fetchPage(url, retryCount + 1);\n }\n\n return { url, error: `Failed to fetch: ${message}` };\n }\n }\n\n private isRetryableError(message: string): boolean {\n const retryablePatterns = [/timeout/i, /ECONNRESET/i, /ETIMEDOUT/i, /ENOTFOUND/i, /network/i];\n return retryablePatterns.some((pattern) => pattern.test(message));\n }\n\n private delay(ms: number): Promise<void> {\n return new Promise((resolve) => setTimeout(resolve, ms));\n }\n\n async fetchAll(urls: string[]): Promise<PageResponse[]> {\n const responses = await Promise.all(urls.map((url) => this.fetchPage(url)));\n return responses.filter((response) => response.content !== undefined || response.error);\n }\n}\n", "import type { PageMetadata } from '../page/index.js';\n\nexport abstract class AbstractResourcePrinter {\n constructor() {}\n abstract print(...pages: PageMetadata[]): void | Promise<void>;\n}\n", "import type { PageMetadata } from '../page/index.js';\nimport { AbstractResourcePrinter } from './AbstractResourcePrinter.js';\n\nexport class JSONStylePrinter extends AbstractResourcePrinter {\n print(...pages: PageMetadata[]): void | Promise<void> {\n const json = JSON.stringify(pages);\n process.stdout.write(json + '\\n');\n }\n}\n", "/**\n * Security utilities for URL validation and sanitization\n */\n\nconst ALLOWED_PROTOCOLS = ['http:', 'https:', 'file:'];\nconst MAX_URL_LENGTH = 2048;\nconst SUSPICIOUS_PATTERNS = [\n /javascript:/i,\n /data:/i,\n /vbscript:/i,\n /<script/i,\n /on\\w+=/i, // Event handlers like onclick=\n];\n\nexport interface ValidationResult {\n isValid: boolean;\n error?: string;\n sanitizedUrl?: string;\n}\n\n/**\n * Validates a URL for security concerns\n * @param url - The URL to validate\n * @returns ValidationResult object with validation status\n */\nexport function validateUrl(url: string): ValidationResult {\n // Check if URL is empty or whitespace\n if (!url || !url.trim()) {\n return {\n isValid: false,\n error: 'URL cannot be empty',\n };\n }\n\n const trimmedUrl = url.trim();\n\n // Check URL length to prevent DoS\n if (trimmedUrl.length > MAX_URL_LENGTH) {\n return {\n isValid: false,\n error: `URL exceeds maximum length of ${MAX_URL_LENGTH} characters`,\n };\n }\n\n // Check for suspicious patterns\n for (const pattern of SUSPICIOUS_PATTERNS) {\n if (pattern.test(trimmedUrl)) {\n return {\n isValid: false,\n error: 'URL contains suspicious patterns',\n };\n }\n }\n\n // Parse the URL\n let parsedUrl: URL;\n try {\n parsedUrl = new URL(trimmedUrl);\n } catch (error) {\n // If URL parsing fails, it might be a file path\n if (trimmedUrl.startsWith('file://')) {\n return {\n isValid: true,\n sanitizedUrl: trimmedUrl,\n };\n }\n return {\n isValid: false,\n error: 'Invalid URL format',\n };\n }\n\n // Check protocol\n if (!ALLOWED_PROTOCOLS.includes(parsedUrl.protocol)) {\n return {\n isValid: false,\n error: `Protocol ${parsedUrl.protocol} is not allowed. Allowed protocols: ${ALLOWED_PROTOCOLS.join(', ')}`,\n };\n }\n\n // Check for localhost/internal IPs in production (security consideration)\n const hostname = parsedUrl.hostname.toLowerCase();\n const isLocalhost =\n hostname === 'localhost' ||\n hostname === '127.0.0.1' ||\n hostname === '::1' ||\n hostname.startsWith('192.168.') ||\n hostname.startsWith('10.') ||\n /^172\\.(1[6-9]|2\\d|3[01])\\./.test(hostname);\n\n if (isLocalhost && parsedUrl.protocol !== 'file:') {\n // Allow but warn about localhost URLs\n console.warn(`Warning: Accessing local network resource: ${trimmedUrl}`);\n }\n\n return {\n isValid: true,\n sanitizedUrl: parsedUrl.toString(),\n };\n}\n\n/**\n * Validates an array of URLs\n * @param urls - Array of URLs to validate\n * @returns Object with valid URLs and errors\n */\nexport function validateUrls(urls: string[]): {\n validUrls: string[];\n errors: Array<{ url: string; error: string }>;\n} {\n const validUrls: string[] = [];\n const errors: Array<{ url: string; error: string }> = [];\n\n for (const url of urls) {\n const result = validateUrl(url);\n if (result.isValid && result.sanitizedUrl) {\n validUrls.push(result.sanitizedUrl);\n } else {\n errors.push({\n url,\n error: result.error || 'Unknown validation error',\n });\n }\n }\n\n return { validUrls, errors };\n}\n\n/**\n * Rate limiter to prevent abuse\n */\nexport class RateLimiter {\n private requests: number[] = [];\n private readonly maxRequests: number;\n private readonly windowMs: number;\n\n constructor(maxRequests = 10, windowMs = 60000) {\n this.maxRequests = maxRequests;\n this.windowMs = windowMs;\n }\n\n /**\n * Check if a request is allowed under rate limiting\n * @returns true if request is allowed, false otherwise\n */\n public isAllowed(): boolean {\n const now = Date.now();\n\n // Remove old requests outside the time window\n this.requests = this.requests.filter((time) => now - time < this.windowMs);\n\n if (this.requests.length >= this.maxRequests) {\n return false;\n }\n\n this.requests.push(now);\n return true;\n }\n\n /**\n * Get remaining requests in current window\n */\n public getRemainingRequests(): number {\n const now = Date.now();\n this.requests = this.requests.filter((time) => now - time < this.windowMs);\n return Math.max(0, this.maxRequests - this.requests.length);\n }\n}\n\n/**\n * Sanitizes HTML content to prevent XSS attacks\n * @param text - Text to sanitize\n * @returns Sanitized text\n */\nexport function sanitizeText(text: string): string {\n if (!text) return '';\n\n return text\n .replace(/</g, '<')\n .replace(/>/g, '>')\n .replace(/\"/g, '"')\n .replace(/'/g, ''')\n .replace(/\\//g, '/');\n}\n"],
|
|
5
|
+
"mappings": ";AACA,OAAS,WAAAA,EAAS,kBAAAC,EAAgB,UAAAC,MAAc,YCDhD,IAAAC,EAAA,CACE,KAAQ,UACR,YAAe,qDACf,QAAW,QACX,KAAQ,SACR,KAAQ,UACR,IAAO,CACL,QAAW,aACb,EACA,MAAS,CACP,KACF,EACA,QAAW,CACT,KAAQ,UACV,EACA,QAAW,CACT,KAAQ,kBACR,aAAc,eACd,MAAS,kHACT,KAAQ,qBACR,WAAY,2BACZ,aAAc,eACd,OAAU,iCACV,eAAgB,iCAChB,iBAAkB,mCAClB,iBAAkB,yCAClB,MAAS,qBACT,IAAO,kBACP,QAAW,eACb,EACA,SAAY,CACV,UACA,YACA,aACA,eACA,MACA,gBACF,EACA,OAAU,yBACV,QAAW,MACX,KAAQ,CACN,IAAO,4CACT,EACA,SAAY,sCACZ,aAAgB,CACd,gBAAiB,UACjB,UAAa,UACb,SAAY,SACd,EACA,gBAAmB,CACjB,cAAe,WACf,cAAe,WACf,mCAAoC,UACpC,4BAA6B,UAC7B,QAAW,UACX,OAAU,UACV,yBAA0B,SAC1B,yBAA0B,SAC1B,KAAQ,UACR,SAAY,SACZ,UAAW,UACX,IAAO,UACP,WAAc,QAChB,CACF,EChEO,IAAeC,EAAf,KAAuC,CAC5C,YAAqBC,EAAc,CAAd,UAAAA,CAAe,CAEtC,ECCO,IAAMC,EAAN,cAA4BC,CAAmC,CACpE,aAAc,CACZ,MAAM,gBAAgB,CACxB,CAEA,MAAM,QAAQC,EAAiC,CAC7C,GAAM,CAAE,OAAQ,CAAE,SAAAC,CAAS,EAAG,IAAAC,CAAI,EAAIF,EACtC,MAAO,CAAE,MAAOC,EAAS,MAAO,IAAAC,CAAI,CACtC,CACF,ECHO,IAAMC,EAA4B,CACvC,KACA,YACA,cACA,QACA,YACA,kBACA,KACF,EAIaC,EAAqB,CAAC,OAAQ,WAAY,SAAU,SAAU,MAAO,KAAK,EAwBjFC,EAAW,CAACC,EAAmBC,IAA0C,CAC7E,IAAMC,EAAIF,EAAQ,aAAaC,CAAG,EAClC,OAAOC,GAAK,MAAQA,EAAE,KAAK,IAAM,GAAKA,EAAI,MAC5C,EAEO,SAASC,EAAiBH,EAA4C,CAC3E,QAAWC,KAAOJ,EAA2B,CAC3C,IAAMO,EAAQL,EAASC,EAASC,CAAG,EACnC,GAAIG,IAAU,OAAW,MAAO,CAAE,IAAAH,EAAK,MAAAG,CAAM,CAC/C,CAEF,CAEO,SAASC,EAAiBL,EAA6C,CAC5E,QAAWC,KAAOH,EAAoB,CACpC,IAAMM,EAAQL,EAASC,EAASC,CAAG,EACnC,GAAIG,IAAU,OAAW,MAAO,CAAE,IAAAH,EAAK,MAAAG,CAAM,CAC/C,CAEF,CCvDO,IAAME,EAAN,cAAgCC,CAAiD,CACtF,YAA6BC,EAAa,CACxC,MAAM,gBAAgB,EADK,UAAAA,CAE7B,CACA,MAAM,QAAQC,EAA+C,CAC3D,GAAM,CAAE,SAAAC,CAAS,EAAID,EAAM,OAC3B,OAAO,KAAK,KAAK,QAASE,GACxB,MAAM,KAAKD,EAAS,iBAA2BC,CAAG,CAAC,EAAE,QAASC,GAAY,CACxE,IAAMC,EAAOC,EAAiBF,CAAO,EACrC,OAAKC,EAEE,CAAC,CAAE,KADGE,EAAiBH,CAAO,GAAK,CAAE,IAAK,MAAgB,MAAOC,EAAK,KAAM,EACnE,KAAAA,CAAK,CAAC,EAFJ,CAAC,CAGrB,CAAC,CACH,CACF,CACF,ECzBA,OAAS,YAAAG,MAAgB,cACzB,OAAS,aAAAC,MAAiB,WAC1B,OAAS,oBAAAC,MAAwB,4BAa1B,IAAMC,EAAN,KAAkB,CACN,QACA,WAEjB,YAAYC,EAAU,IAAOC,EAAa,EAAG,CAC3C,KAAK,QAAUD,EACf,KAAK,WAAaC,CACpB,CAEQ,eAAeC,EAAcC,EAAwB,CAC3D,GAAM,CAAE,SAAAC,CAAS,EAAIP,EAAUK,CAAI,EACnC,MAAO,CAAE,OAAQ,CAAE,SAAAE,CAAS,EAAG,IAAAD,CAAI,CACrC,CAEA,MAAc,UAAUA,EAAaE,EAAa,EAA0B,CAC1E,GAAI,CACF,IAAIC,EAEAH,EAAI,WAAW,SAAS,EAC1BG,EAAaV,EAASO,EAAI,UAAU,CAAC,EAAG,OAAO,EAAE,KAAMD,GACrD,KAAK,eAAeA,EAAMC,CAAG,CAC/B,EAEAG,EAAa,MAAMH,CAAG,EAAE,KAAK,MAAOI,GAAa,CAC/C,IAAMC,EAAS,MAAMD,EAAS,YAAY,EACpCE,EAAcF,EAAS,QAAQ,IAAI,cAAc,GAAK,GACtDG,EAAe,qBAAqB,KAAKD,CAAW,EACpDP,EAAOJ,EAAiB,IAAI,WAAWU,CAAM,EAAGE,IAAe,CAAC,GAAK,OAAO,EAClF,OAAO,KAAK,eAAeR,EAAMC,CAAG,CACtC,CAAC,EAGH,IAAMQ,EAAU,MAAO,KAAK,QAAU,EAClC,QAAQ,KAAK,CACXL,EACA,IAAI,QAAe,CAACM,EAAGC,IACrB,WAAW,IAAMA,EAAO,IAAI,MAAM,iBAAiB,CAAC,EAAG,KAAK,OAAO,CACrE,CACF,CAAC,EACDP,GAEJ,MAAO,CAAE,IAAAH,EAAK,QAAAQ,CAAQ,CACxB,OAASG,EAAO,CACd,IAAMC,EAAUD,aAAiB,MAAQA,EAAM,QAAU,gBAGzD,OAAIT,EAAa,KAAK,YAAc,KAAK,iBAAiBU,CAAO,GAC/D,QAAQ,OAAO,MAAM,YAAYZ,CAAG,aAAaE,EAAa,CAAC,IAAI,KAAK,UAAU;AAAA,CAAQ,EAC1F,MAAM,KAAK,MAAM,KAAQA,EAAa,EAAE,EACjC,KAAK,UAAUF,EAAKE,EAAa,CAAC,GAGpC,CAAE,IAAAF,EAAK,MAAO,oBAAoBY,CAAO,EAAG,CACrD,CACF,CAEQ,iBAAiBA,EAA0B,CAEjD,MAD0B,CAAC,WAAY,cAAe,aAAc,aAAc,UAAU,EACnE,KAAMC,GAAYA,EAAQ,KAAKD,CAAO,CAAC,CAClE,CAEQ,MAAME,EAA2B,CACvC,OAAO,IAAI,QAASC,GAAY,WAAWA,EAASD,CAAE,CAAC,CACzD,CAEA,MAAM,SAASE,EAAyC,CAEtD,OADkB,MAAM,QAAQ,IAAIA,EAAK,IAAKhB,GAAQ,KAAK,UAAUA,CAAG,CAAC,CAAC,GACzD,OAAQI,GAAaA,EAAS,UAAY,QAAaA,EAAS,KAAK,CACxF,CACF,EClFO,IAAea,EAAf,KAAuC,CAC5C,aAAc,CAAC,CAEjB,ECFO,IAAMC,EAAN,cAA+BC,CAAwB,CAC5D,SAASC,EAA6C,CACpD,IAAMC,EAAO,KAAK,UAAUD,CAAK,EACjC,QAAQ,OAAO,MAAMC,EAAO;AAAA,CAAI,CAClC,CACF,ECJA,IAAMC,EAAoB,CAAC,QAAS,SAAU,OAAO,EAErD,IAAMC,EAAsB,CAC1B,eACA,SACA,aACA,WACA,SACF,EAaO,SAASC,EAAYC,EAA+B,CAEzD,GAAI,CAACA,GAAO,CAACA,EAAI,KAAK,EACpB,MAAO,CACL,QAAS,GACT,MAAO,qBACT,EAGF,IAAMC,EAAaD,EAAI,KAAK,EAG5B,GAAIC,EAAW,OAAS,KACtB,MAAO,CACL,QAAS,GACT,MAAO,+CACT,EAIF,QAAWC,KAAWJ,EACpB,GAAII,EAAQ,KAAKD,CAAU,EACzB,MAAO,CACL,QAAS,GACT,MAAO,kCACT,EAKJ,IAAIE,EACJ,GAAI,CACFA,EAAY,IAAI,IAAIF,CAAU,CAChC,MAAgB,CAEd,OAAIA,EAAW,WAAW,SAAS,EAC1B,CACL,QAAS,GACT,aAAcA,CAChB,EAEK,CACL,QAAS,GACT,MAAO,oBACT,CACF,CAGA,GAAI,CAACG,EAAkB,SAASD,EAAU,QAAQ,EAChD,MAAO,CACL,QAAS,GACT,MAAO,YAAYA,EAAU,QAAQ,uCAAuCC,EAAkB,KAAK,IAAI,CAAC,EAC1G,EAIF,IAAMC,EAAWF,EAAU,SAAS,YAAY,EAShD,OAPEE,IAAa,aACbA,IAAa,aACbA,IAAa,OACbA,EAAS,WAAW,UAAU,GAC9BA,EAAS,WAAW,KAAK,GACzB,6BAA6B,KAAKA,CAAQ,IAEzBF,EAAU,WAAa,SAExC,QAAQ,KAAK,8CAA8CF,CAAU,EAAE,EAGlE,CACL,QAAS,GACT,aAAcE,EAAU,SAAS,CACnC,CACF,CAOO,SAASG,EAAaC,EAG3B,CACA,IAAMC,EAAsB,CAAC,EACvBC,EAAgD,CAAC,EAEvD,QAAWT,KAAOO,EAAM,CACtB,IAAMG,EAASX,EAAYC,CAAG,EAC1BU,EAAO,SAAWA,EAAO,aAC3BF,EAAU,KAAKE,EAAO,YAAY,EAElCD,EAAO,KAAK,CACV,IAAAT,EACA,MAAOU,EAAO,OAAS,0BACzB,CAAC,CAEL,CAEA,MAAO,CAAE,UAAAF,EAAW,OAAAC,CAAO,CAC7B,CTrHA,GAAM,CAAE,YAAAE,EAAa,KAAAC,EAAM,QAAAC,CAAQ,EAAIC,EAEjCC,EAAU,IAAIC,EAEdC,EAAMC,EACV,kBACA,kEACF,GAEC,SACC,MAAMH,EACH,KAAKH,CAAI,EACT,QAAQC,EAAS,eAAe,EAChC,YAAYF,CAAW,EACvB,YAAYM,CAAG,EACf,UAAU,IAAIE,EAAO,UAAW,kGAAkG,CAAC,EACnI,OAAO,MAAOC,EAAgBC,IAAgC,CAC7D,GAAI,CAEF,GAAM,CAAE,UAAAC,EAAW,OAAAC,CAAO,EAAIC,EAAaJ,CAAI,EAG3CG,EAAO,OAAS,IAClB,QAAQ,MAAM;AAAA,8BAA4B,EAC1CA,EAAO,QAAQ,CAAC,CAAE,IAAKE,EAAY,MAAAC,CAAM,IAAM,CAC7C,QAAQ,MAAM,OAAOD,CAAU,KAAKC,CAAK,EAAE,CAC7C,CAAC,GAICJ,EAAU,SAAW,IACvB,QAAQ,MAAM;AAAA,0CAAwC,EACtD,QAAQ,KAAK,CAAC,GAGhB,QAAQ,MAAM;AAAA,oBAAkBA,EAAU,MAAM,kBAAkB,EAElE,IAAMK,EAAU,IAAIC,EAEdC,EAAc,IAAIC,EAAYT,EAAQ,MAAQ,EAAI,IAAO,CAAC,EAC1DU,EAAgB,IAAIC,EACpBC,EAAoB,IAAIC,EAAkB,CAAC,IAAK,OAAQ,OAAQ,QAAS,QAAQ,CAAC,EAElFC,EAAU,SAA2B,CACzC,IAAMC,EAAgB,MAAMP,EAAY,SAASP,CAAS,EACpDe,EAAgC,CAAC,EAEvC,OAAW,CAAE,QAAAC,EAAS,IAAKC,EAAa,MAAAb,CAAM,IAAKU,EAAe,CAChE,IAAMI,EACJd,IAAU,QAAa,CAACY,EAAU,CAAC,EAAI,MAAML,EAAkB,QAAQK,CAAO,EAC1EG,EACJf,IAAU,QAAa,CAACY,EACpB,CAAE,IAAKC,EAAa,MAAOb,GAAS,gBAAiB,UAAAc,CAAU,EAC/D,MAAMT,EAAc,QAAQO,CAAO,EACzCD,EAAc,KAAK,CAAE,GAAGI,EAAY,UAAAD,CAAU,CAAC,CAGjD,CAEA,MAAMb,EAAQ,MAAM,GAAGU,CAAa,CACtC,EAEA,GAAIhB,EAAQ,MAAO,CACjB,QAAQ,MAAM,OAAO,EAErB,QAAQ,GAAG,SAAU,IAAM,CACzB,QAAQ,KAAK,CAAC,CAChB,CAAC,EAED,IAAIqB,EAAwC,KAE5C,QAAQ,MAAM,GAAG,MAAO,IAAM,CAE5BA,EAAkB,IACpB,CAAC,EAED,IAAIC,EAAmD,KACvD,QAAQ,GAAG,WAAY,IAAM,CACvBA,IAAe,MAAM,aAAaA,CAAU,EAChDA,EAAa,WAAW,IAAM,CAC5BA,EAAa,KACbD,EAAkBP,EAAQ,EAAE,MAAOS,GAAiB,CAClD,QAAQ,MAAM;AAAA,2BAA0BA,aAAe,MAAQA,EAAI,QAAUA,CAAG,CAClF,CAAC,CACH,EAAG,GAAG,CACR,CAAC,EAEDF,EAAkBP,EAAQ,EAC1B,MAAMO,CACR,MACE,MAAMP,EAAQ,CAElB,OAAST,EAAO,CACd,QAAQ,MAAM;AAAA,2BAA0BA,aAAiB,MAAQA,EAAM,QAAUA,CAAK,EACtF,QAAQ,KAAK,CAAC,CAChB,CACF,CAAC,EACA,WAAW,QAAQ,IAAI",
|
|
6
|
+
"names": ["Command", "createArgument", "Option", "package_default", "AbstractExtractor", "name", "PageExtractor", "AbstractExtractor", "value", "document", "url", "RESOURCE_DISPLAYABLE_KEYS", "RESOURCE_LINK_KEYS", "readAttr", "element", "key", "v", "findResourceText", "value", "findResourceLink", "ResourceExtractor", "AbstractExtractor", "tags", "value", "document", "tag", "element", "link", "findResourceLink", "findResourceText", "readFile", "parseHTML", "legacyHookDecode", "PageFetcher", "timeout", "maxRetries", "html", "url", "document", "retryCount", "domPromise", "response", "buffer", "contentType", "charsetMatch", "content", "_", "reject", "error", "message", "pattern", "ms", "resolve", "urls", "AbstractResourcePrinter", "JSONStylePrinter", "AbstractResourcePrinter", "pages", "json", "ALLOWED_PROTOCOLS", "SUSPICIOUS_PATTERNS", "validateUrl", "url", "trimmedUrl", "pattern", "parsedUrl", "ALLOWED_PROTOCOLS", "hostname", "validateUrls", "urls", "validUrls", "errors", "result", "description", "name", "version", "package_default", "program", "Command", "url", "createArgument", "Option", "urls", "options", "validUrls", "errors", "validateUrls", "invalidUrl", "error", "printer", "JSONStylePrinter", "pageFetcher", "PageFetcher", "pageExtractor", "PageExtractor", "resourceExtractor", "ResourceExtractor", "execute", "pageResponses", "pageMetadatas", "content", "responseUrl", "resources", "descriptor", "activeExecution", "winchTimer", "err"]
|
|
7
7
|
}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pagerts",
|
|
3
3
|
"description": "A tool for viewing external relations in a webpage",
|
|
4
|
-
"version": "1.0
|
|
4
|
+
"version": "1.3.0",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "main.js",
|
|
7
7
|
"bin": {
|
|
@@ -36,19 +36,19 @@
|
|
|
36
36
|
"cli",
|
|
37
37
|
"url-extraction"
|
|
38
38
|
],
|
|
39
|
-
"author": "Kirill
|
|
39
|
+
"author": "Kirill <kine> Nevzorov",
|
|
40
40
|
"license": "MIT",
|
|
41
41
|
"bugs": {
|
|
42
|
-
"url": "https://github.com/
|
|
42
|
+
"url": "https://github.com/akinevz2/pagerts/issues"
|
|
43
43
|
},
|
|
44
|
-
"homepage": "https://github.com/
|
|
44
|
+
"homepage": "https://github.com/akinevz2/pagerts",
|
|
45
45
|
"dependencies": {
|
|
46
|
+
"@exodus/bytes": "^1.15.0",
|
|
46
47
|
"commander": "^12.1.0",
|
|
47
|
-
"
|
|
48
|
+
"linkedom": "^0.18.9"
|
|
48
49
|
},
|
|
49
50
|
"devDependencies": {
|
|
50
51
|
"@types/jest": "^29.5.14",
|
|
51
|
-
"@types/jsdom": "^21.1.7",
|
|
52
52
|
"@types/node": "^22.10.5",
|
|
53
53
|
"@typescript-eslint/eslint-plugin": "^8.20.0",
|
|
54
54
|
"@typescript-eslint/parser": "^8.20.0",
|