pagerts 0.2.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/.github/codeql/codeql-config.yml +7 -0
  2. package/.github/workflows/ci.yml +146 -0
  3. package/.github/workflows/dependency-update.yml +52 -0
  4. package/.prettierignore +5 -0
  5. package/.prettierrc.json +10 -0
  6. package/MAINTAINERS.md +30 -0
  7. package/POST-INSTALL.md +205 -0
  8. package/README.md +220 -16
  9. package/SECURITY.md +160 -0
  10. package/bin/main.js +24 -19
  11. package/bin/main.js.map +4 -4
  12. package/eslint.config.mjs +83 -0
  13. package/{jest.config.js → jest.config.cjs} +45 -30
  14. package/package.json +34 -13
  15. package/src/__tests__/PageFetcher.test.ts +48 -0
  16. package/src/__tests__/security.test.ts +153 -0
  17. package/src/extractors/AbstractExtractor.ts +4 -5
  18. package/src/extractors/PageExtractor.ts +21 -12
  19. package/src/extractors/ResourceExtractor.ts +31 -25
  20. package/src/extractors/TagExtractor.ts +13 -14
  21. package/src/extractors/index.ts +4 -0
  22. package/src/main.ts +71 -43
  23. package/src/page/Page.ts +24 -19
  24. package/src/page/PageFetcher.ts +81 -30
  25. package/src/page/index.ts +3 -0
  26. package/src/printers/AbstractResourcePrinter.ts +6 -6
  27. package/src/printers/JSONStylePrinter.ts +9 -12
  28. package/src/printers/LogStylePrinter.ts +30 -28
  29. package/src/printers/index.ts +3 -0
  30. package/src/resource.ts +88 -96
  31. package/src/security.ts +184 -0
  32. package/tsconfig.eslint.json +5 -0
  33. package/tsconfig.json +27 -11
  34. package/bin/package.json +0 -40
  35. package/bin/src/extractors/AbstractExtractor.js +0 -11
  36. package/bin/src/extractors/AbstractExtractor.js.map +0 -1
  37. package/bin/src/extractors/PageExtractor.js +0 -13
  38. package/bin/src/extractors/PageExtractor.js.map +0 -1
  39. package/bin/src/extractors/ResourceExtractor.js +0 -32
  40. package/bin/src/extractors/ResourceExtractor.js.map +0 -1
  41. package/bin/src/main.js +0 -36
  42. package/bin/src/main.js.map +0 -1
  43. package/bin/src/page/Page.js +0 -8
  44. package/bin/src/page/Page.js.map +0 -1
  45. package/bin/src/page/PageFetcher.js +0 -26
  46. package/bin/src/page/PageFetcher.js.map +0 -1
  47. package/bin/src/printers/AbstractResourcePrinter.js +0 -8
  48. package/bin/src/printers/AbstractResourcePrinter.js.map +0 -1
  49. package/bin/src/printers/JSONStylePrinter.js +0 -12
  50. package/bin/src/printers/JSONStylePrinter.js.map +0 -1
  51. package/bin/src/printers/LogStylePrinter.js +0 -27
  52. package/bin/src/printers/LogStylePrinter.js.map +0 -1
  53. package/bin/src/resource.js +0 -56
  54. package/bin/src/resource.js.map +0 -1
@@ -0,0 +1,83 @@
1
+ import eslint from '@eslint/js';
2
+ import tseslint from '@typescript-eslint/eslint-plugin';
3
+ import tsparser from '@typescript-eslint/parser';
4
+ import security from 'eslint-plugin-security';
5
+ import prettier from 'eslint-config-prettier';
6
+
7
+ export default [
8
+ {
9
+ ignores: ['bin/**', 'coverage/**', 'node_modules/**'],
10
+ },
11
+ eslint.configs.recommended,
12
+ {
13
+ files: ['src/**/*.ts'],
14
+ languageOptions: {
15
+ parser: tsparser,
16
+ parserOptions: {
17
+ ecmaVersion: 2022,
18
+ sourceType: 'module',
19
+ project: './tsconfig.eslint.json',
20
+ },
21
+ globals: {
22
+ console: 'readonly',
23
+ process: 'readonly',
24
+ __dirname: 'readonly',
25
+ __filename: 'readonly',
26
+ Buffer: 'readonly',
27
+ },
28
+ },
29
+ plugins: {
30
+ '@typescript-eslint': tseslint,
31
+ security: security,
32
+ },
33
+ rules: {
34
+ // Disable base JS rules in favor of TS-aware equivalents
35
+ 'no-unused-vars': 'off',
36
+ 'no-undef': 'off',
37
+
38
+ // TypeScript rules
39
+ '@typescript-eslint/no-explicit-any': 'error',
40
+ '@typescript-eslint/explicit-function-return-type': 'warn',
41
+ '@typescript-eslint/no-unused-vars': ['error', { argsIgnorePattern: '^_' }],
42
+ '@typescript-eslint/no-non-null-assertion': 'error',
43
+ '@typescript-eslint/prefer-nullish-coalescing': 'warn',
44
+ '@typescript-eslint/prefer-optional-chain': 'warn',
45
+
46
+ // Security rules
47
+ 'security/detect-object-injection': 'warn',
48
+ 'security/detect-non-literal-regexp': 'warn',
49
+ 'security/detect-unsafe-regex': 'error',
50
+ 'security/detect-buffer-noassert': 'error',
51
+ 'security/detect-child-process': 'warn',
52
+ 'security/detect-disable-mustache-escape': 'error',
53
+ 'security/detect-eval-with-expression': 'error',
54
+ 'security/detect-no-csrf-before-method-override': 'error',
55
+ 'security/detect-non-literal-fs-filename': 'warn',
56
+ 'security/detect-non-literal-require': 'warn',
57
+ 'security/detect-possible-timing-attacks': 'warn',
58
+ 'security/detect-pseudoRandomBytes': 'error',
59
+
60
+ // General rules
61
+ 'no-console': ['warn', { allow: ['warn', 'error'] }],
62
+ 'no-debugger': 'error',
63
+ 'no-eval': 'error',
64
+ 'no-implied-eval': 'error',
65
+ 'no-new-func': 'error',
66
+ 'prefer-const': 'error',
67
+ 'no-var': 'error',
68
+ },
69
+ },
70
+ {
71
+ files: ['src/__tests__/**/*.ts'],
72
+ languageOptions: {
73
+ globals: {
74
+ describe: 'readonly',
75
+ it: 'readonly',
76
+ expect: 'readonly',
77
+ beforeEach: 'readonly',
78
+ setTimeout: 'readonly',
79
+ },
80
+ },
81
+ },
82
+ prettier,
83
+ ];
@@ -5,48 +5,63 @@
5
5
 
6
6
  /** @type {import('jest').Config} */
7
7
  const config = {
8
- // All imported modules in your tests should be mocked automatically
9
- // automock: false,
8
+ preset: 'ts-jest',
9
+ testEnvironment: 'node',
10
10
 
11
- // Stop running tests after `n` failures
12
- // bail: 0,
11
+ // Support for ES modules
12
+ extensionsToTreatAsEsm: ['.ts'],
13
13
 
14
- // The directory where Jest should store its cached dependency information
15
- // cacheDirectory: "/tmp/jest_rs",
14
+ // Module name mapper for package.json imports
15
+ moduleNameMapper: {
16
+ '^(\\.{1,2}/.*)\\.js$': '$1',
17
+ },
16
18
 
17
19
  // Automatically clear mock calls, instances, contexts and results before every test
18
- // clearMocks: false,
20
+ clearMocks: true,
19
21
 
20
22
  // Indicates whether the coverage information should be collected while executing the test
21
23
  collectCoverage: true,
22
24
 
23
- // An array of glob patterns indicating a set of files for which coverage information should be collected
24
- // collectCoverageFrom: undefined,
25
-
26
25
  // The directory where Jest should output its coverage files
27
- coverageDirectory: "coverage",
28
-
29
- // An array of regexp pattern strings used to skip coverage collection
30
- // coveragePathIgnorePatterns: [
31
- // "/node_modules/"
32
- // ],
26
+ coverageDirectory: 'coverage',
33
27
 
34
28
  // Indicates which provider should be used to instrument code for coverage
35
- coverageProvider: "v8",
29
+ coverageProvider: 'v8',
36
30
 
37
- // A list of reporter names that Jest uses when writing coverage reports
38
- // coverageReporters: [
39
- // "json",
40
- // "text",
41
- // "lcov",
42
- // "clover"
43
- // ],
44
-
45
- // An object that configures minimum threshold enforcement for coverage results
46
- // coverageThreshold: undefined,
47
-
48
- // A path to a custom dependency extractor
49
- // dependencyExtractor: undefined,
31
+ // An array of glob patterns indicating a set of files for which coverage information should be collected
32
+ collectCoverageFrom: ['src/**/*.ts', '!src/**/*.test.ts', '!src/**/*.spec.ts'],
33
+
34
+ // Coverage thresholds - realistic for current state, will improve over time
35
+ coverageThreshold: {
36
+ global: {
37
+ branches: 30,
38
+ functions: 35,
39
+ lines: 30,
40
+ statements: 30,
41
+ },
42
+ },
43
+
44
+ // Test match patterns
45
+ testMatch: ['**/__tests__/**/*.ts', '**/?(*.)+(spec|test).ts'],
46
+
47
+ // Module file extensions
48
+ moduleFileExtensions: ['ts', 'tsx', 'js', 'jsx', 'json', 'node'],
49
+
50
+ // Transform files with ts-jest
51
+ transform: {
52
+ '^.+\\.ts$': [
53
+ 'ts-jest',
54
+ {
55
+ useESM: true,
56
+ tsconfig: {
57
+ module: 'ES2022',
58
+ target: 'ES2022',
59
+ esModuleInterop: true,
60
+ moduleResolution: 'node',
61
+ },
62
+ },
63
+ ],
64
+ },
50
65
 
51
66
  // Make calling deprecated APIs throw helpful error messages
52
67
  // errorOnDeprecated: false,
package/package.json CHANGED
@@ -1,22 +1,36 @@
1
1
  {
2
2
  "name": "pagerts",
3
3
  "description": "A tool for viewing external relations in a webpage",
4
- "version": "0.2.0",
4
+ "version": "0.4.1",
5
5
  "main": "main.js",
6
6
  "bin": {
7
7
  "pagerts": "bin/main.js"
8
8
  },
9
+ "engines": {
10
+ "node": ">=18.0.0"
11
+ },
9
12
  "scripts": {
10
- "test": "jest",
11
- "build": "esbuild src/main.ts --external:jsdom --bundle --outdir=bin --minify --sourcemap --platform=node",
12
- "lint": "tsc",
13
+ "test": "jest --coverage",
14
+ "test:watch": "jest --watch",
15
+ "build": "esbuild src/main.ts --external:jsdom --bundle --outdir=bin --minify --sourcemap --platform=node --format=esm",
16
+ "lint": "eslint src/**/*.ts",
17
+ "lint:fix": "eslint src/**/*.ts --fix",
18
+ "type-check": "tsc --noEmit",
19
+ "format": "prettier --write \"src/**/*.ts\"",
20
+ "format:check": "prettier --check \"src/**/*.ts\"",
21
+ "security:audit": "npm audit --audit-level=moderate",
22
+ "security:check": "npm run security:audit && npm run lint",
13
23
  "start": "node ./bin/main.js",
14
- "dev": "npx tsx src/main.ts"
24
+ "dev": "tsx src/main.ts",
25
+ "prepare": "npm run build"
15
26
  },
16
27
  "keywords": [
17
28
  "webpage",
18
29
  "hierarchy",
19
- "management"
30
+ "management",
31
+ "web-scraping",
32
+ "cli",
33
+ "url-extraction"
20
34
  ],
21
35
  "author": "Kirill kn253 Nevzorov",
22
36
  "license": "MIT",
@@ -25,16 +39,23 @@
25
39
  },
26
40
  "homepage": "https://github.com/akinevz0/pagerts",
27
41
  "dependencies": {
28
- "blessed": "^0.1.81",
29
42
  "commander": "^12.1.0",
30
- "dotenv": "^16.4.5",
31
- "jsdom": "^26.0.0"
43
+ "jsdom": "^25.0.1"
32
44
  },
33
45
  "devDependencies": {
34
- "@types/blessed": "^0.1.25",
46
+ "@types/jest": "^29.5.14",
35
47
  "@types/jsdom": "^21.1.7",
36
- "@types/node": "^22.8.2",
48
+ "@types/node": "^22.10.5",
49
+ "@typescript-eslint/eslint-plugin": "^8.20.0",
50
+ "@typescript-eslint/parser": "^8.20.0",
37
51
  "esbuild": "^0.25.1",
38
- "ts-node": "^10.9.2"
52
+ "eslint": "^9.18.0",
53
+ "eslint-config-prettier": "^9.1.0",
54
+ "eslint-plugin-security": "^3.0.1",
55
+ "jest": "^29.7.0",
56
+ "prettier": "^3.4.2",
57
+ "ts-jest": "^29.2.5",
58
+ "tsx": "^4.19.2",
59
+ "typescript": "^5.7.2"
39
60
  }
40
- }
61
+ }
@@ -0,0 +1,48 @@
1
+ import { PageFetcher } from '../page/PageFetcher';
2
+
3
+ describe('PageFetcher', () => {
4
+ let pageFetcher: PageFetcher;
5
+
6
+ beforeEach(() => {
7
+ pageFetcher = new PageFetcher();
8
+ });
9
+
10
+ describe('fetchAll', () => {
11
+ it('should fetch valid URLs', async () => {
12
+ const urls = ['https://example.com'];
13
+ const responses = await pageFetcher.fetchAll(urls);
14
+
15
+ expect(responses.length).toBeGreaterThan(0);
16
+ expect(responses[0].url).toBe('https://example.com');
17
+ });
18
+
19
+ it('should handle invalid URLs gracefully', async () => {
20
+ const urls = ['https://this-domain-definitely-does-not-exist-12345.com'];
21
+ const responses = await pageFetcher.fetchAll(urls);
22
+
23
+ expect(responses.length).toBeGreaterThan(0);
24
+ if (responses[0].error) {
25
+ expect(responses[0].error).toContain('Failed to fetch');
26
+ }
27
+ });
28
+
29
+ it('should handle multiple URLs', async () => {
30
+ const urls = ['https://example.com', 'https://example.org'];
31
+ const responses = await pageFetcher.fetchAll(urls);
32
+
33
+ expect(responses.length).toBe(2);
34
+ });
35
+
36
+ it('should have timeout for slow requests', async () => {
37
+ const slowFetcher = new PageFetcher(100, 0); // 100ms timeout, no retries
38
+ const urls = ['https://httpbin.org/delay/5']; // This will timeout
39
+
40
+ const responses = await slowFetcher.fetchAll(urls);
41
+ expect(responses.length).toBeGreaterThan(0);
42
+
43
+ if (responses[0].error) {
44
+ expect(responses[0].error).toContain('timeout');
45
+ }
46
+ }, 10000);
47
+ });
48
+ });
@@ -0,0 +1,153 @@
1
+ import { validateUrl, validateUrls, RateLimiter, sanitizeText } from '../security';
2
+
3
+ describe('Security Module', () => {
4
+ describe('validateUrl', () => {
5
+ it('should validate a proper HTTPS URL', () => {
6
+ const result = validateUrl('https://example.com');
7
+ expect(result.isValid).toBe(true);
8
+ expect(result.sanitizedUrl).toBe('https://example.com/');
9
+ });
10
+
11
+ it('should validate a proper HTTP URL', () => {
12
+ const result = validateUrl('http://example.com');
13
+ expect(result.isValid).toBe(true);
14
+ });
15
+
16
+ it('should validate a file:// URL', () => {
17
+ const result = validateUrl('file:///path/to/file.html');
18
+ expect(result.isValid).toBe(true);
19
+ });
20
+
21
+ it('should reject empty URLs', () => {
22
+ const result = validateUrl('');
23
+ expect(result.isValid).toBe(false);
24
+ expect(result.error).toContain('empty');
25
+ });
26
+
27
+ it('should reject URLs with javascript: protocol', () => {
28
+ const result = validateUrl('javascript:alert(1)');
29
+ expect(result.isValid).toBe(false);
30
+ expect(result.error).toContain('suspicious');
31
+ });
32
+
33
+ it('should reject URLs with data: protocol', () => {
34
+ const result = validateUrl('data:text/html,<script>alert(1)</script>');
35
+ expect(result.isValid).toBe(false);
36
+ expect(result.error).toContain('suspicious');
37
+ });
38
+
39
+ it('should reject URLs exceeding maximum length', () => {
40
+ const longUrl = 'https://example.com/' + 'a'.repeat(3000);
41
+ const result = validateUrl(longUrl);
42
+ expect(result.isValid).toBe(false);
43
+ expect(result.error).toContain('exceeds maximum length');
44
+ });
45
+
46
+ it('should reject URLs with script tags', () => {
47
+ const result = validateUrl('https://example.com/<script>alert(1)</script>');
48
+ expect(result.isValid).toBe(false);
49
+ expect(result.error).toContain('suspicious');
50
+ });
51
+
52
+ it('should reject invalid URL formats', () => {
53
+ const result = validateUrl('not-a-valid-url');
54
+ expect(result.isValid).toBe(false);
55
+ expect(result.error).toContain('Invalid URL format');
56
+ });
57
+
58
+ it('should trim whitespace from URLs', () => {
59
+ const result = validateUrl(' https://example.com ');
60
+ expect(result.isValid).toBe(true);
61
+ expect(result.sanitizedUrl).toBe('https://example.com/');
62
+ });
63
+ });
64
+
65
+ describe('validateUrls', () => {
66
+ it('should validate multiple URLs and separate valid from invalid', () => {
67
+ const urls = ['https://example.com', 'javascript:alert(1)', 'http://test.com', 'invalid-url'];
68
+ const result = validateUrls(urls);
69
+
70
+ expect(result.validUrls.length).toBe(2);
71
+ expect(result.errors.length).toBe(2);
72
+ expect(result.validUrls).toContain('https://example.com/');
73
+ expect(result.validUrls).toContain('http://test.com/');
74
+ });
75
+
76
+ it('should return empty arrays for empty input', () => {
77
+ const result = validateUrls([]);
78
+ expect(result.validUrls.length).toBe(0);
79
+ expect(result.errors.length).toBe(0);
80
+ });
81
+ });
82
+
83
+ describe('RateLimiter', () => {
84
+ it('should allow requests within the limit', () => {
85
+ const limiter = new RateLimiter(5, 1000);
86
+
87
+ for (let i = 0; i < 5; i++) {
88
+ expect(limiter.isAllowed()).toBe(true);
89
+ }
90
+ });
91
+
92
+ it('should block requests exceeding the limit', () => {
93
+ const limiter = new RateLimiter(3, 1000);
94
+
95
+ // Use up all allowed requests
96
+ for (let i = 0; i < 3; i++) {
97
+ limiter.isAllowed();
98
+ }
99
+
100
+ // Next request should be blocked
101
+ expect(limiter.isAllowed()).toBe(false);
102
+ });
103
+
104
+ it('should reset after the time window', async () => {
105
+ const limiter = new RateLimiter(2, 100); // 100ms window
106
+
107
+ limiter.isAllowed();
108
+ limiter.isAllowed();
109
+ expect(limiter.isAllowed()).toBe(false);
110
+
111
+ // Wait for window to expire
112
+ await new Promise((resolve) => setTimeout(resolve, 150));
113
+
114
+ // Should be allowed again
115
+ expect(limiter.isAllowed()).toBe(true);
116
+ });
117
+
118
+ it('should correctly report remaining requests', () => {
119
+ const limiter = new RateLimiter(5, 1000);
120
+
121
+ expect(limiter.getRemainingRequests()).toBe(5);
122
+ limiter.isAllowed();
123
+ expect(limiter.getRemainingRequests()).toBe(4);
124
+ limiter.isAllowed();
125
+ expect(limiter.getRemainingRequests()).toBe(3);
126
+ });
127
+ });
128
+
129
+ describe('sanitizeText', () => {
130
+ it('should sanitize HTML special characters', () => {
131
+ const input = '<script>alert("XSS")</script>';
132
+ const output = sanitizeText(input);
133
+ expect(output).toBe('&lt;script&gt;alert(&quot;XSS&quot;)&lt;&#x2F;script&gt;');
134
+ });
135
+
136
+ it('should handle empty strings', () => {
137
+ expect(sanitizeText('')).toBe('');
138
+ });
139
+
140
+ it('should escape quotes and apostrophes', () => {
141
+ const input = `It's a "test"`;
142
+ const output = sanitizeText(input);
143
+ expect(output).toContain('&#x27;');
144
+ expect(output).toContain('&quot;');
145
+ });
146
+
147
+ it('should escape forward slashes', () => {
148
+ const input = '</script>';
149
+ const output = sanitizeText(input);
150
+ expect(output).toBe('&lt;&#x2F;script&gt;');
151
+ });
152
+ });
153
+ });
@@ -1,5 +1,4 @@
1
-
2
- export abstract class AbstractExtractor<V, R> {
3
- constructor(readonly name:string) { }
4
- abstract extract(value: V): Promise<R>;
5
- }
1
+ export abstract class AbstractExtractor<V, R> {
2
+ constructor(readonly name: string) {}
3
+ abstract extract(value: V): Promise<R>;
4
+ }
@@ -1,12 +1,21 @@
1
- import { isError, type Page } from '../page/Page';
2
- import { JSDOM } from 'jsdom';
3
- import { AbstractExtractor } from './AbstractExtractor';
4
-
5
- export class PageExtractor extends AbstractExtractor<JSDOM, Page> {
6
- constructor() { super("page-extractor"); }
7
-
8
- async extract(value: JSDOM): Promise<Page> {
9
- const { window: { document: { title, location: { href: url } } } } = value
10
- return { title, url }
11
- }
12
- }
1
+ import type { Page } from '../page/index.js';
2
+ import { JSDOM } from 'jsdom';
3
+ import { AbstractExtractor } from './AbstractExtractor.js';
4
+
5
+ export class PageExtractor extends AbstractExtractor<JSDOM, Page> {
6
+ constructor() {
7
+ super('page-extractor');
8
+ }
9
+
10
+ async extract(value: JSDOM): Promise<Page> {
11
+ const {
12
+ window: {
13
+ document: {
14
+ title,
15
+ location: { href: url },
16
+ },
17
+ },
18
+ } = value;
19
+ return { title, url };
20
+ }
21
+ }
@@ -1,25 +1,31 @@
1
- import type { JSDOM } from "jsdom";
2
- import { findResourceLink, findResourceText, type ExternalResource, type Resource, type Tag } from "../resource";
3
- import { AbstractExtractor } from './AbstractExtractor';
4
-
5
- export class ResourceExtractor extends AbstractExtractor<JSDOM, ExternalResource[]> {
6
- constructor(private readonly tags: Tag[]) {
7
- super("page-extractor");
8
- }
9
- async extract(value: JSDOM): Promise<ExternalResource[]> {
10
- const { document } = value.window;
11
- const externalResources: ExternalResource[] = [];
12
- for (const tag of this.tags) {
13
- const selector = document.querySelectorAll<Resource>(tag)
14
- const elements = Array.from(selector)
15
- for (const element of elements) {
16
- const text = findResourceText(element);
17
- const link = findResourceLink(element);
18
- if(!text || !link) continue
19
- if (!link.url.startsWith("http")) continue
20
- externalResources.push({ text, link })
21
- }
22
- }
23
- return externalResources;
24
- }
25
- }
1
+ import type { JSDOM } from 'jsdom';
2
+ import {
3
+ findResourceLink,
4
+ findResourceText,
5
+ type ExternalResource,
6
+ type Resource,
7
+ type Tag,
8
+ } from '../resource.js';
9
+ import { AbstractExtractor } from './AbstractExtractor.js';
10
+
11
+ export class ResourceExtractor extends AbstractExtractor<JSDOM, ExternalResource[]> {
12
+ constructor(private readonly tags: Tag[]) {
13
+ super('page-extractor');
14
+ }
15
+ async extract(value: JSDOM): Promise<ExternalResource[]> {
16
+ const { document } = value.window;
17
+ const externalResources: ExternalResource[] = [];
18
+ for (const tag of this.tags) {
19
+ const selector = document.querySelectorAll<Resource>(tag);
20
+ const elements = Array.from(selector);
21
+ for (const element of elements) {
22
+ const text = findResourceText(element);
23
+ const link = findResourceLink(element);
24
+ if (!text || !link) continue;
25
+ if (!link.url.startsWith('http')) continue;
26
+ externalResources.push({ text, link });
27
+ }
28
+ }
29
+ return externalResources;
30
+ }
31
+ }
@@ -1,14 +1,13 @@
1
- import { JSDOM } from 'jsdom';
2
- import type { Resource, Tag } from '../resource';
3
- import { AbstractExtractor } from './AbstractExtractor';
4
-
5
- export class TagExtractor<T extends Tag> extends AbstractExtractor<JSDOM, Resource[]> {
6
- extract(value: JSDOM): Promise<Resource[]> {
7
- const linkNodes = value.window.document.querySelectorAll<Resource>(this.tagName);
8
- return Promise.resolve(Array.from(linkNodes));
9
- }
10
- constructor(private readonly tagName: T) {
11
- super(`extract <${tagName}>`)
12
- };
13
-
14
- }
1
+ import { JSDOM } from 'jsdom';
2
+ import type { Resource, Tag } from '../resource.js';
3
+ import { AbstractExtractor } from './AbstractExtractor.js';
4
+
5
+ export class TagExtractor<T extends Tag> extends AbstractExtractor<JSDOM, Resource[]> {
6
+ extract(value: JSDOM): Promise<Resource[]> {
7
+ const linkNodes = value.window.document.querySelectorAll<Resource>(this.tagName);
8
+ return Promise.resolve(Array.from(linkNodes));
9
+ }
10
+ constructor(private readonly tagName: T) {
11
+ super(`extract <${tagName}>`);
12
+ }
13
+ }
@@ -0,0 +1,4 @@
1
+ export { AbstractExtractor } from './AbstractExtractor.js';
2
+ export { PageExtractor } from './PageExtractor.js';
3
+ export { ResourceExtractor } from './ResourceExtractor.js';
4
+ export { TagExtractor } from './TagExtractor.js';