pagerts 0.2.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/.github/codeql/codeql-config.yml +7 -0
  2. package/.github/workflows/ci.yml +146 -0
  3. package/.github/workflows/dependency-update.yml +52 -0
  4. package/.prettierignore +5 -0
  5. package/.prettierrc.json +10 -0
  6. package/MAINTAINERS.md +30 -0
  7. package/POST-INSTALL.md +205 -0
  8. package/README.md +220 -16
  9. package/SECURITY.md +160 -0
  10. package/bin/main.js +24 -19
  11. package/bin/main.js.map +4 -4
  12. package/eslint.config.mjs +83 -0
  13. package/{jest.config.js → jest.config.cjs} +45 -30
  14. package/package.json +34 -13
  15. package/src/__tests__/PageFetcher.test.ts +48 -0
  16. package/src/__tests__/security.test.ts +153 -0
  17. package/src/extractors/AbstractExtractor.ts +4 -5
  18. package/src/extractors/PageExtractor.ts +21 -12
  19. package/src/extractors/ResourceExtractor.ts +31 -25
  20. package/src/extractors/TagExtractor.ts +13 -14
  21. package/src/extractors/index.ts +4 -0
  22. package/src/main.ts +71 -43
  23. package/src/page/Page.ts +24 -19
  24. package/src/page/PageFetcher.ts +81 -30
  25. package/src/page/index.ts +3 -0
  26. package/src/printers/AbstractResourcePrinter.ts +6 -6
  27. package/src/printers/JSONStylePrinter.ts +9 -12
  28. package/src/printers/LogStylePrinter.ts +30 -28
  29. package/src/printers/index.ts +3 -0
  30. package/src/resource.ts +88 -96
  31. package/src/security.ts +184 -0
  32. package/tsconfig.eslint.json +5 -0
  33. package/tsconfig.json +27 -11
  34. package/bin/package.json +0 -40
  35. package/bin/src/extractors/AbstractExtractor.js +0 -11
  36. package/bin/src/extractors/AbstractExtractor.js.map +0 -1
  37. package/bin/src/extractors/PageExtractor.js +0 -13
  38. package/bin/src/extractors/PageExtractor.js.map +0 -1
  39. package/bin/src/extractors/ResourceExtractor.js +0 -32
  40. package/bin/src/extractors/ResourceExtractor.js.map +0 -1
  41. package/bin/src/main.js +0 -36
  42. package/bin/src/main.js.map +0 -1
  43. package/bin/src/page/Page.js +0 -8
  44. package/bin/src/page/Page.js.map +0 -1
  45. package/bin/src/page/PageFetcher.js +0 -26
  46. package/bin/src/page/PageFetcher.js.map +0 -1
  47. package/bin/src/printers/AbstractResourcePrinter.js +0 -8
  48. package/bin/src/printers/AbstractResourcePrinter.js.map +0 -1
  49. package/bin/src/printers/JSONStylePrinter.js +0 -12
  50. package/bin/src/printers/JSONStylePrinter.js.map +0 -1
  51. package/bin/src/printers/LogStylePrinter.js +0 -27
  52. package/bin/src/printers/LogStylePrinter.js.map +0 -1
  53. package/bin/src/resource.js +0 -56
  54. package/bin/src/resource.js.map +0 -1
@@ -0,0 +1,184 @@
1
+ /**
2
+ * Security utilities for URL validation and sanitization
3
+ */
4
+
5
+ const ALLOWED_PROTOCOLS = ['http:', 'https:', 'file:'];
6
+ const MAX_URL_LENGTH = 2048;
7
+ const SUSPICIOUS_PATTERNS = [
8
+ /javascript:/i,
9
+ /data:/i,
10
+ /vbscript:/i,
11
+ /<script/i,
12
+ /on\w+=/i, // Event handlers like onclick=
13
+ ];
14
+
15
+ export interface ValidationResult {
16
+ isValid: boolean;
17
+ error?: string;
18
+ sanitizedUrl?: string;
19
+ }
20
+
21
+ /**
22
+ * Validates a URL for security concerns
23
+ * @param url - The URL to validate
24
+ * @returns ValidationResult object with validation status
25
+ */
26
+ export function validateUrl(url: string): ValidationResult {
27
+ // Check if URL is empty or whitespace
28
+ if (!url || !url.trim()) {
29
+ return {
30
+ isValid: false,
31
+ error: 'URL cannot be empty',
32
+ };
33
+ }
34
+
35
+ const trimmedUrl = url.trim();
36
+
37
+ // Check URL length to prevent DoS
38
+ if (trimmedUrl.length > MAX_URL_LENGTH) {
39
+ return {
40
+ isValid: false,
41
+ error: `URL exceeds maximum length of ${MAX_URL_LENGTH} characters`,
42
+ };
43
+ }
44
+
45
+ // Check for suspicious patterns
46
+ for (const pattern of SUSPICIOUS_PATTERNS) {
47
+ if (pattern.test(trimmedUrl)) {
48
+ return {
49
+ isValid: false,
50
+ error: 'URL contains suspicious patterns',
51
+ };
52
+ }
53
+ }
54
+
55
+ // Parse the URL
56
+ let parsedUrl: URL;
57
+ try {
58
+ parsedUrl = new URL(trimmedUrl);
59
+ } catch (error) {
60
+ // If URL parsing fails, it might be a file path
61
+ if (trimmedUrl.startsWith('file://')) {
62
+ return {
63
+ isValid: true,
64
+ sanitizedUrl: trimmedUrl,
65
+ };
66
+ }
67
+ return {
68
+ isValid: false,
69
+ error: 'Invalid URL format',
70
+ };
71
+ }
72
+
73
+ // Check protocol
74
+ if (!ALLOWED_PROTOCOLS.includes(parsedUrl.protocol)) {
75
+ return {
76
+ isValid: false,
77
+ error: `Protocol ${parsedUrl.protocol} is not allowed. Allowed protocols: ${ALLOWED_PROTOCOLS.join(', ')}`,
78
+ };
79
+ }
80
+
81
+ // Check for localhost/internal IPs in production (security consideration)
82
+ const hostname = parsedUrl.hostname.toLowerCase();
83
+ const isLocalhost =
84
+ hostname === 'localhost' ||
85
+ hostname === '127.0.0.1' ||
86
+ hostname === '::1' ||
87
+ hostname.startsWith('192.168.') ||
88
+ hostname.startsWith('10.') ||
89
+ /^172\.(1[6-9]|2\d|3[01])\./.test(hostname);
90
+
91
+ if (isLocalhost && parsedUrl.protocol !== 'file:') {
92
+ // Allow but warn about localhost URLs
93
+ console.warn(`Warning: Accessing local network resource: ${trimmedUrl}`);
94
+ }
95
+
96
+ return {
97
+ isValid: true,
98
+ sanitizedUrl: parsedUrl.toString(),
99
+ };
100
+ }
101
+
102
+ /**
103
+ * Validates an array of URLs
104
+ * @param urls - Array of URLs to validate
105
+ * @returns Object with valid URLs and errors
106
+ */
107
+ export function validateUrls(urls: string[]): {
108
+ validUrls: string[];
109
+ errors: Array<{ url: string; error: string }>;
110
+ } {
111
+ const validUrls: string[] = [];
112
+ const errors: Array<{ url: string; error: string }> = [];
113
+
114
+ for (const url of urls) {
115
+ const result = validateUrl(url);
116
+ if (result.isValid && result.sanitizedUrl) {
117
+ validUrls.push(result.sanitizedUrl);
118
+ } else {
119
+ errors.push({
120
+ url,
121
+ error: result.error || 'Unknown validation error',
122
+ });
123
+ }
124
+ }
125
+
126
+ return { validUrls, errors };
127
+ }
128
+
129
+ /**
130
+ * Rate limiter to prevent abuse
131
+ */
132
+ export class RateLimiter {
133
+ private requests: number[] = [];
134
+ private readonly maxRequests: number;
135
+ private readonly windowMs: number;
136
+
137
+ constructor(maxRequests = 10, windowMs = 60000) {
138
+ this.maxRequests = maxRequests;
139
+ this.windowMs = windowMs;
140
+ }
141
+
142
+ /**
143
+ * Check if a request is allowed under rate limiting
144
+ * @returns true if request is allowed, false otherwise
145
+ */
146
+ public isAllowed(): boolean {
147
+ const now = Date.now();
148
+
149
+ // Remove old requests outside the time window
150
+ this.requests = this.requests.filter((time) => now - time < this.windowMs);
151
+
152
+ if (this.requests.length >= this.maxRequests) {
153
+ return false;
154
+ }
155
+
156
+ this.requests.push(now);
157
+ return true;
158
+ }
159
+
160
+ /**
161
+ * Get remaining requests in current window
162
+ */
163
+ public getRemainingRequests(): number {
164
+ const now = Date.now();
165
+ this.requests = this.requests.filter((time) => now - time < this.windowMs);
166
+ return Math.max(0, this.maxRequests - this.requests.length);
167
+ }
168
+ }
169
+
170
+ /**
171
+ * Sanitizes HTML content to prevent XSS attacks
172
+ * @param text - Text to sanitize
173
+ * @returns Sanitized text
174
+ */
175
+ export function sanitizeText(text: string): string {
176
+ if (!text) return '';
177
+
178
+ return text
179
+ .replace(/</g, '&lt;')
180
+ .replace(/>/g, '&gt;')
181
+ .replace(/"/g, '&quot;')
182
+ .replace(/'/g, '&#x27;')
183
+ .replace(/\//g, '&#x2F;');
184
+ }
@@ -0,0 +1,5 @@
1
+ {
2
+ "extends": "./tsconfig.json",
3
+ "include": ["src/**/*.ts"],
4
+ "exclude": ["node_modules", "bin", "coverage"]
5
+ }
package/tsconfig.json CHANGED
@@ -1,12 +1,28 @@
1
1
  {
2
- "compilerOptions": {
3
- "module": "NodeNext",
4
- "target": "ESNext",
5
- "resolveJsonModule": true,
6
- "outDir": "bin",
7
- "sourceMap": true,
8
- },
9
- "include": [
10
- "src/**.*",
11
- ],
12
- }
2
+ "compilerOptions": {
3
+ "module": "NodeNext",
4
+ "target": "ES2022",
5
+ "lib": ["ES2022"],
6
+ "moduleResolution": "NodeNext",
7
+ "resolveJsonModule": true,
8
+ "outDir": "bin",
9
+ "sourceMap": true,
10
+ "strict": true,
11
+ "noImplicitAny": true,
12
+ "strictNullChecks": true,
13
+ "strictFunctionTypes": true,
14
+ "strictBindCallApply": true,
15
+ "strictPropertyInitialization": true,
16
+ "noImplicitThis": true,
17
+ "alwaysStrict": true,
18
+ "noUnusedLocals": true,
19
+ "noUnusedParameters": true,
20
+ "noImplicitReturns": true,
21
+ "noFallthroughCasesInSwitch": true,
22
+ "esModuleInterop": true,
23
+ "skipLibCheck": true,
24
+ "forceConsistentCasingInFileNames": true
25
+ },
26
+ "include": ["src/**/*"],
27
+ "exclude": ["node_modules", "bin", "coverage", "**/*.test.ts", "**/*.spec.ts"]
28
+ }
package/bin/package.json DELETED
@@ -1,40 +0,0 @@
1
- {
2
- "name": "pagerts",
3
- "description": "A tool for viewing external relations in a webpage",
4
- "version": "0.1.9",
5
- "main": "main.js",
6
- "bin": {
7
- "pagerts": "bin/main.js"
8
- },
9
- "scripts": {
10
- "test": "jest",
11
- "build": "esbuild src/main.ts --external:jsdom --bundle --outdir=bin --minify --sourcemap --platform=node",
12
- "lint": "tsc",
13
- "start": "node ./bin/main.js",
14
- "dev": "npx tsx src/main.ts"
15
- },
16
- "keywords": [
17
- "webpage",
18
- "hierarchy",
19
- "management"
20
- ],
21
- "author": "Kirill kn253 Nevzorov",
22
- "license": "MIT",
23
- "bugs": {
24
- "url": "https://github.com/akinevz0/pagerts/issues"
25
- },
26
- "homepage": "https://github.com/akinevz0/pagerts",
27
- "dependencies": {
28
- "blessed": "^0.1.81",
29
- "commander": "^12.1.0",
30
- "dotenv": "^16.4.5",
31
- "jsdom": "^26.0.0"
32
- },
33
- "devDependencies": {
34
- "@types/blessed": "^0.1.25",
35
- "@types/jsdom": "^21.1.7",
36
- "@types/node": "^22.8.2",
37
- "esbuild": "^0.25.1",
38
- "ts-node": "^10.9.2"
39
- }
40
- }
@@ -1,11 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.AbstractExtractor = void 0;
4
- class AbstractExtractor {
5
- name;
6
- constructor(name) {
7
- this.name = name;
8
- }
9
- }
10
- exports.AbstractExtractor = AbstractExtractor;
11
- //# sourceMappingURL=AbstractExtractor.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"AbstractExtractor.js","sourceRoot":"","sources":["../../../src/extractors/AbstractExtractor.ts"],"names":[],"mappings":";;;AACA,MAAsB,iBAAiB;IACd;IAArB,YAAqB,IAAW;QAAX,SAAI,GAAJ,IAAI,CAAO;IAAI,CAAC;CAExC;AAHD,8CAGC"}
@@ -1,13 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.PageExtractor = void 0;
4
- const AbstractExtractor_1 = require("./AbstractExtractor");
5
- class PageExtractor extends AbstractExtractor_1.AbstractExtractor {
6
- constructor() { super("page-extractor"); }
7
- async extract(value) {
8
- const { window: { document: { title, location: { href: url } } } } = value;
9
- return { title, url };
10
- }
11
- }
12
- exports.PageExtractor = PageExtractor;
13
- //# sourceMappingURL=PageExtractor.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"PageExtractor.js","sourceRoot":"","sources":["../../../src/extractors/PageExtractor.ts"],"names":[],"mappings":";;;AAEA,2DAAwD;AAExD,MAAa,aAAc,SAAQ,qCAA8B;IAC7D,gBAAgB,KAAK,CAAC,gBAAgB,CAAC,CAAC,CAAC,CAAC;IAE1C,KAAK,CAAC,OAAO,CAAC,KAAY;QACtB,MAAM,EAAE,MAAM,EAAE,EAAE,QAAQ,EAAE,EAAE,KAAK,EAAE,QAAQ,EAAE,EAAE,IAAI,EAAE,GAAG,EAAE,EAAE,EAAE,EAAE,GAAG,KAAK,CAAA;QAC1E,OAAO,EAAE,KAAK,EAAE,GAAG,EAAE,CAAA;IACzB,CAAC;CACJ;AAPD,sCAOC"}
@@ -1,32 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.ResourceExtractor = void 0;
4
- const resource_1 = require("../resource");
5
- const AbstractExtractor_1 = require("./AbstractExtractor");
6
- class ResourceExtractor extends AbstractExtractor_1.AbstractExtractor {
7
- tags;
8
- constructor(tags) {
9
- super("page-extractor");
10
- this.tags = tags;
11
- }
12
- async extract(value) {
13
- const { document } = value.window;
14
- const externalResources = [];
15
- for (const tag of this.tags) {
16
- const selector = document.querySelectorAll(tag);
17
- const elements = Array.from(selector);
18
- for (const element of elements) {
19
- const text = (0, resource_1.findResourceText)(element);
20
- const link = (0, resource_1.findResourceLink)(element);
21
- if (!text || !link)
22
- continue;
23
- if (!link.url.startsWith("http"))
24
- continue;
25
- externalResources.push({ text, link });
26
- }
27
- }
28
- return externalResources;
29
- }
30
- }
31
- exports.ResourceExtractor = ResourceExtractor;
32
- //# sourceMappingURL=ResourceExtractor.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"ResourceExtractor.js","sourceRoot":"","sources":["../../../src/extractors/ResourceExtractor.ts"],"names":[],"mappings":";;;AACA,0CAAiH;AACjH,2DAAwD;AAExD,MAAa,iBAAkB,SAAQ,qCAA4C;IAClD;IAA7B,YAA6B,IAAW;QACpC,KAAK,CAAC,gBAAgB,CAAC,CAAC;QADC,SAAI,GAAJ,IAAI,CAAO;IAExC,CAAC;IACD,KAAK,CAAC,OAAO,CAAC,KAAY;QACtB,MAAM,EAAE,QAAQ,EAAE,GAAG,KAAK,CAAC,MAAM,CAAC;QAClC,MAAM,iBAAiB,GAAuB,EAAE,CAAC;QACjD,KAAK,MAAM,GAAG,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;YAC1B,MAAM,QAAQ,GAAG,QAAQ,CAAC,gBAAgB,CAAW,GAAG,CAAC,CAAA;YACzD,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;YACrC,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;gBAC7B,MAAM,IAAI,GAAG,IAAA,2BAAgB,EAAC,OAAO,CAAC,CAAC;gBACvC,MAAM,IAAI,GAAG,IAAA,2BAAgB,EAAC,OAAO,CAAC,CAAC;gBACvC,IAAG,CAAC,IAAI,IAAI,CAAC,IAAI;oBAAE,SAAQ;gBAC3B,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,MAAM,CAAC;oBAAE,SAAQ;gBAC1C,iBAAiB,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAA;YAC1C,CAAC;QACL,CAAC;QACD,OAAO,iBAAiB,CAAC;IAC7B,CAAC;CACJ;AApBD,8CAoBC"}
package/bin/src/main.js DELETED
@@ -1,36 +0,0 @@
1
- #!/usr/bin/env node
2
- "use strict";
3
- Object.defineProperty(exports, "__esModule", { value: true });
4
- const commander_1 = require("commander");
5
- const package_json_1 = require("../package.json");
6
- const PageExtractor_1 = require("./extractors/PageExtractor");
7
- const ResourceExtractor_1 = require("./extractors/ResourceExtractor");
8
- const PageFetcher_1 = require("./page/PageFetcher");
9
- const JSONStylePrinter_1 = require("./printers/JSONStylePrinter");
10
- const program = new commander_1.Command();
11
- const url = (0, commander_1.createArgument)("<url | file...>", "remote https://URL or local file://resource.html to extract from");
12
- (async () => {
13
- await program
14
- .name(package_json_1.name)
15
- .version(package_json_1.version, "-v, --version")
16
- .description(package_json_1.description)
17
- .addArgument(url)
18
- .action(async (urls) => {
19
- const printer = new JSONStylePrinter_1.JSONStylePrinter();
20
- // simple log style printer
21
- // const printer = new LogStylePrinter();
22
- const pageFetcher = new PageFetcher_1.PageFetcher();
23
- const pageExtractor = new PageExtractor_1.PageExtractor();
24
- const resourceExtractor = new ResourceExtractor_1.ResourceExtractor(["a", "meta", "link", "embed"]);
25
- const pageResponses = await pageFetcher.fetchAll(urls);
26
- const pageMetadatas = [];
27
- for (const { content, url, error } of pageResponses) {
28
- const resources = error in (content) ? [] : await resourceExtractor.extract(content);
29
- const descriptor = error in content ? { url, error } : await pageExtractor.extract(content);
30
- pageMetadatas.push({ ...descriptor, resources });
31
- }
32
- await printer.print(...pageMetadatas);
33
- })
34
- .parseAsync(process.argv);
35
- })();
36
- //# sourceMappingURL=main.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"main.js","sourceRoot":"","sources":["../../src/main.ts"],"names":[],"mappings":";;;AACA,yCAAoD;AAEpD,kDAA6D;AAC7D,8DAA2D;AAC3D,sEAAmE;AACnE,oDAAiD;AAEjD,kEAA+D;AAG/D,MAAM,OAAO,GAAG,IAAI,mBAAO,EAAE,CAAC;AAE9B,MAAM,GAAG,GAAG,IAAA,0BAAc,EAAC,iBAAiB,EAAE,kEAAkE,CAAC,CAAC;AAElH,CAAC,KAAK,IAAI,EAAE;IACV,MAAM,OAAO;SACV,IAAI,CAAC,mBAAI,CAAC;SACV,OAAO,CAAC,sBAAO,EAAE,eAAe,CAAC;SACjC,WAAW,CAAC,0BAAW,CAAC;SACxB,WAAW,CAAC,GAAG,CAAC;SAChB,MAAM,CAAC,KAAK,EAAE,IAAc,EAAE,EAAE;QAC/B,MAAM,OAAO,GAAG,IAAI,mCAAgB,EAAE,CAAC;QACvC,2BAA2B;QAC3B,yCAAyC;QAEzC,MAAM,WAAW,GAAG,IAAI,yBAAW,EAAE,CAAA;QACrC,MAAM,aAAa,GAAG,IAAI,6BAAa,EAAE,CAAA;QACzC,MAAM,iBAAiB,GAAG,IAAI,qCAAiB,CAAC,CAAC,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC,CAAA;QAE/E,MAAM,aAAa,GAAG,MAAM,WAAW,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;QACvD,MAAM,aAAa,GAAmB,EAAE,CAAC;QAEzC,KAAK,MAAM,EAAE,OAAO,EAAE,GAAG,EAAE,KAAK,EAAE,IAAI,aAAa,EAAE,CAAC;YACpD,MAAM,SAAS,GAAG,KAAK,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,iBAAiB,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;YACrF,MAAM,UAAU,GAAG,KAAK,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,MAAM,aAAa,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;YAC5F,aAAa,CAAC,IAAI,CAAC,EAAE,GAAG,UAAU,EAAE,SAAS,EAAE,CAAC,CAAC;QACnD,CAAC;QAED,MAAM,OAAO,CAAC,KAAK,CAAC,GAAG,aAAa,CAAC,CAAC;IACxC,CAAC,CAAC;SACD,UAAU,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;AAC9B,CAAC,CAAC,EAAE,CAAC"}
@@ -1,8 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.isPage = exports.isError = void 0;
4
- const isError = (page) => 'error' in page;
5
- exports.isError = isError;
6
- const isPage = (page) => "resources" in page && Array.isArray(page.resources);
7
- exports.isPage = isPage;
8
- //# sourceMappingURL=Page.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"Page.js","sourceRoot":"","sources":["../../../src/page/Page.ts"],"names":[],"mappings":";;;AAgBO,MAAM,OAAO,GAAG,CAAC,IAAkB,EAA6B,EAAE,CAAC,OAAO,IAAI,IAAI,CAAC;AAA7E,QAAA,OAAO,WAAsE;AACnF,MAAM,MAAM,GAAG,CAAC,IAAS,EAAgB,EAAE,CAC9C,WAAW,IAAI,IAAI,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;AAD5C,QAAA,MAAM,UACsC"}
@@ -1,26 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.PageFetcher = void 0;
4
- const jsdom_1 = require("jsdom");
5
- class PageFetcher {
6
- async fetchPage(url) {
7
- let dom;
8
- const virtualConsole = new jsdom_1.VirtualConsole().on('jsdomError', (error) => {
9
- process.stderr.write(`Error parsing ${url}:${error.message}\n`);
10
- });
11
- if (url.startsWith("file://")) {
12
- dom = jsdom_1.JSDOM.fromFile(url, { virtualConsole });
13
- }
14
- else {
15
- dom = jsdom_1.JSDOM.fromURL(url, { virtualConsole });
16
- }
17
- return dom.then(content => ({ url, content }))
18
- .catch(({ message }) => ({ url, error: `JSDOM failed to parse: ${message}` }));
19
- }
20
- async fetchAll(urls) {
21
- const responses = await Promise.all(urls.map(url => this.fetchPage(url)));
22
- return responses.filter(response => response.content !== undefined);
23
- }
24
- }
25
- exports.PageFetcher = PageFetcher;
26
- //# sourceMappingURL=PageFetcher.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"PageFetcher.js","sourceRoot":"","sources":["../../../src/page/PageFetcher.ts"],"names":[],"mappings":";;;AAAA,iCAA8C;AAS9C,MAAa,WAAW;IACZ,KAAK,CAAC,SAAS,CAAC,GAAW;QAC/B,IAAI,GAAmB,CAAC;QACxB,MAAM,cAAc,GAAG,IAAI,sBAAc,EAAE,CAAC,EAAE,CAAC,YAAY,EAAE,CAAC,KAAK,EAAE,EAAE;YACnE,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,iBAAiB,GAAG,IAAI,KAAK,CAAC,OAAO,IAAI,CAAC,CAAC;QACpE,CAAC,CAAC,CAAC;QACH,IAAI,GAAG,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;YAC5B,GAAG,GAAG,aAAK,CAAC,QAAQ,CAAC,GAAG,EAAE,EAAE,cAAc,EAAE,CAAC,CAAC;QAClD,CAAC;aAAM,CAAC;YACJ,GAAG,GAAG,aAAK,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,cAAc,EAAE,CAAC,CAAC;QACjD,CAAC;QAED,OAAO,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,EAAE,GAAG,EAAE,OAAO,EAAE,CAAC,CAAC;aACzC,KAAK,CAAC,CAAC,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,0BAA0B,OAAO,EAAE,EAAE,CAAC,CAAC,CAAC;IACvF,CAAC;IACD,KAAK,CAAC,QAAQ,CAAC,IAAc;QACzB,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QAC1E,OAAO,SAAS,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,QAAQ,CAAC,OAAO,KAAK,SAAS,CAAC,CAAC;IACxE,CAAC;CAEJ;AApBD,kCAoBC"}
@@ -1,8 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.AbstractResourcePrinter = void 0;
4
- class AbstractResourcePrinter {
5
- constructor() { }
6
- }
7
- exports.AbstractResourcePrinter = AbstractResourcePrinter;
8
- //# sourceMappingURL=AbstractResourcePrinter.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"AbstractResourcePrinter.js","sourceRoot":"","sources":["../../../src/printers/AbstractResourcePrinter.ts"],"names":[],"mappings":";;;AAEA,MAAsB,uBAAuB;IACzC,gBAAiB,CAAC;CAErB;AAHD,0DAGC"}
@@ -1,12 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.JSONStylePrinter = void 0;
4
- const AbstractResourcePrinter_1 = require("./AbstractResourcePrinter");
5
- class JSONStylePrinter extends AbstractResourcePrinter_1.AbstractResourcePrinter {
6
- print(...pages) {
7
- const json = JSON.stringify(pages);
8
- process.stdout.write(json + "\n");
9
- }
10
- }
11
- exports.JSONStylePrinter = JSONStylePrinter;
12
- //# sourceMappingURL=JSONStylePrinter.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"JSONStylePrinter.js","sourceRoot":"","sources":["../../../src/printers/JSONStylePrinter.ts"],"names":[],"mappings":";;;AACA,uEAAoE;AAGpE,MAAa,gBAAiB,SAAQ,iDAAuB;IACzD,KAAK,CAAC,GAAG,KAAqB;QAC1B,MAAM,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;QACnC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,GAAG,IAAI,CAAC,CAAA;IACrC,CAAC;CAGJ;AAPD,4CAOC"}
@@ -1,27 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.LogStylePrinter = void 0;
4
- const Page_1 = require("../page/Page");
5
- const AbstractResourcePrinter_1 = require("./AbstractResourcePrinter");
6
- class LogStylePrinter extends AbstractResourcePrinter_1.AbstractResourcePrinter {
7
- write(str) {
8
- process.stdout.write(str);
9
- }
10
- async print(...pages) {
11
- for (const page of pages) {
12
- if (!(0, Page_1.isPage)(page)) {
13
- this.write(page.error);
14
- continue;
15
- }
16
- const { resources, title, url } = page;
17
- this.write(`Title: ${title}\n`);
18
- this.write(`URL: ${url}\n\n`);
19
- for (const resource of resources) {
20
- const { link: { url }, text: { value } } = resource;
21
- this.write(`${value}: ${url}\n`);
22
- }
23
- }
24
- }
25
- }
26
- exports.LogStylePrinter = LogStylePrinter;
27
- //# sourceMappingURL=LogStylePrinter.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"LogStylePrinter.js","sourceRoot":"","sources":["../../../src/printers/LogStylePrinter.ts"],"names":[],"mappings":";;;AAAA,uCAAoE;AACpE,uEAAoE;AAEpE,MAAa,eAAgB,SAAQ,iDAAuB;IAExD,KAAK,CAAC,GAAW;QACb,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;IAC7B,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,GAAG,KAAqB;QAChC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACvB,IAAI,CAAC,IAAA,aAAM,EAAC,IAAI,CAAC,EAAE,CAAC;gBAChB,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;gBACtB,SAAQ;YACZ,CAAC;YAED,MAAM,EAAC,SAAS,EAAE,KAAK,EAAE,GAAG,EAAE,GAAG,IAAI,CAAA;YAErC,IAAI,CAAC,KAAK,CAAC,UAAU,KAAK,IAAI,CAAC,CAAA;YAC/B,IAAI,CAAC,KAAK,CAAC,QAAQ,GAAG,MAAM,CAAC,CAAA;YAE7B,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;gBAC/B,MAAM,EAAE,IAAI,EAAE,EAAE,GAAG,EAAE,EAAE,IAAI,EAAE,EAAE,KAAK,EAAE,EAAE,GAAG,QAAQ,CAAA;gBACnD,IAAI,CAAC,KAAK,CAAC,GAAG,KAAK,KAAK,GAAG,IAAI,CAAC,CAAA;YACpC,CAAC;QACL,CAAC;IACL,CAAC;CACJ;AAxBD,0CAwBC"}
@@ -1,56 +0,0 @@
1
- "use strict";
2
- /**
3
- * @license MIT
4
- * We are interested in visualising a page as a collection of tags.
5
- *
6
- * We wish to work with tags that can be compactly previewed on a webpage.
7
- * Here we must declare all of the element types that can be used to represent
8
- * a resource that can be hyperlinked off a webpage.
9
- */
10
- Object.defineProperty(exports, "__esModule", { value: true });
11
- exports.isKeyDefined = exports.isResourceKey = exports.RESOURCE_LINK_KEYS = exports.RESOURCE_DISPLAYABLE_KEYS = void 0;
12
- exports.findResourceText = findResourceText;
13
- exports.findResourceLink = findResourceLink;
14
- function findDefinedKey(element, keys) {
15
- for (const key of keys) {
16
- if ((0, exports.isKeyDefined)(key, element)) {
17
- return key;
18
- }
19
- }
20
- }
21
- exports.RESOURCE_DISPLAYABLE_KEYS = [
22
- 'id',
23
- 'innerText',
24
- 'textContent',
25
- 'class',
26
- 'ariaLabel',
27
- 'ariaDescription',
28
- 'alt',
29
- 'rel'
30
- ];
31
- exports.RESOURCE_LINK_KEYS = [
32
- "href",
33
- "data-src",
34
- "target",
35
- "action",
36
- "src",
37
- "url"
38
- ];
39
- function findResourceText(element) {
40
- for (const key of exports.RESOURCE_DISPLAYABLE_KEYS) {
41
- const value = element[key];
42
- if (value && typeof value === 'string' && value.trim() !== '')
43
- return { key, value };
44
- }
45
- }
46
- function findResourceLink(element) {
47
- const key = findDefinedKey(element, [...exports.RESOURCE_LINK_KEYS]);
48
- const url = element[key];
49
- if (url && typeof url === 'string' && url.trim() !== '')
50
- return { key, url };
51
- }
52
- const isResourceKey = (key) => key in exports.RESOURCE_LINK_KEYS;
53
- exports.isResourceKey = isResourceKey;
54
- const isKeyDefined = (key, element) => key in element && element[key] !== undefined;
55
- exports.isKeyDefined = isKeyDefined;
56
- //# sourceMappingURL=resource.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"resource.js","sourceRoot":"","sources":["../../src/resource.ts"],"names":[],"mappings":";AAAA;;;;;;;GAOG;;;AA8CH,4CAMC;AAED,4CAKC;AAvDD,SAAS,cAAc,CAAC,OAAiB,EAAE,IAAe;IACtD,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACrB,IAAI,IAAA,oBAAY,EAAC,GAAG,EAAE,OAAO,CAAC,EAAE,CAAC;YAC7B,OAAO,GAAG,CAAC;QACf,CAAC;IACL,CAAC;AACL,CAAC;AAEY,QAAA,yBAAyB,GAAG;IACrC,IAAI;IACJ,WAAW;IACX,aAAa;IACb,OAAO;IACP,WAAW;IACX,iBAAiB;IACjB,KAAK;IACL,KAAK;CACC,CAAC;AASE,QAAA,kBAAkB,GAAG;IAC9B,MAAM;IACN,UAAU;IACV,QAAQ;IACR,QAAQ;IACR,KAAK;IACL,KAAK;CACC,CAAC;AASX,SAAgB,gBAAgB,CAAC,OAAiB;IAC9C,KAAK,MAAM,GAAG,IAAI,iCAAyB,EAAE,CAAC;QAC1C,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,CAAA;QAC1B,IAAI,KAAK,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,CAAC,IAAI,EAAE,KAAK,EAAE;YACzD,OAAO,EAAE,GAAG,EAAE,KAAK,EAAE,CAAC;IAC9B,CAAC;AACL,CAAC;AAED,SAAgB,gBAAgB,CAAC,OAAiB;IAC9C,MAAM,GAAG,GAAG,cAAc,CAAC,OAAO,EAAE,CAAC,GAAG,0BAAkB,CAAC,CAAC,CAAC;IAC7D,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC;IACzB,IAAI,GAAG,IAAI,OAAO,GAAG,KAAK,QAAQ,IAAI,GAAG,CAAC,IAAI,EAAE,KAAK,EAAE;QACnD,OAAO,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC;AAC5B,CAAC;AAOM,MAAM,aAAa,GAAG,CAAC,GAAW,EAAkB,EAAE,CAAC,GAAG,IAAI,0BAAkB,CAAC;AAA3E,QAAA,aAAa,iBAA8D;AAEjF,MAAM,YAAY,GAAG,CAA6B,GAAW,EAAE,OAAU,EAAW,EAAE,CACzF,GAAG,IAAI,OAAO,IAAI,OAAO,CAAC,GAAG,CAAC,KAAK,SAAS,CAAC;AADpC,QAAA,YAAY,gBACwB"}