pagerts 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/jest.config.js ADDED
@@ -0,0 +1,198 @@
1
+ /**
2
+ * For a detailed explanation regarding each configuration property, visit:
3
+ * https://jestjs.io/docs/configuration
4
+ */
5
+
6
+ /** @type {import('jest').Config} */
7
+ const config = {
8
+ // All imported modules in your tests should be mocked automatically
9
+ // automock: false,
10
+
11
+ // Stop running tests after `n` failures
12
+ // bail: 0,
13
+
14
+ // The directory where Jest should store its cached dependency information
15
+ // cacheDirectory: "/tmp/jest_rs",
16
+
17
+ // Automatically clear mock calls, instances, contexts and results before every test
18
+ // clearMocks: false,
19
+
20
+ // Indicates whether the coverage information should be collected while executing the test
21
+ collectCoverage: true,
22
+
23
+ // An array of glob patterns indicating a set of files for which coverage information should be collected
24
+ // collectCoverageFrom: undefined,
25
+
26
+ // The directory where Jest should output its coverage files
27
+ coverageDirectory: "coverage",
28
+
29
+ // An array of regexp pattern strings used to skip coverage collection
30
+ // coveragePathIgnorePatterns: [
31
+ // "/node_modules/"
32
+ // ],
33
+
34
+ // Indicates which provider should be used to instrument code for coverage
35
+ coverageProvider: "v8",
36
+
37
+ // A list of reporter names that Jest uses when writing coverage reports
38
+ // coverageReporters: [
39
+ // "json",
40
+ // "text",
41
+ // "lcov",
42
+ // "clover"
43
+ // ],
44
+
45
+ // An object that configures minimum threshold enforcement for coverage results
46
+ // coverageThreshold: undefined,
47
+
48
+ // A path to a custom dependency extractor
49
+ // dependencyExtractor: undefined,
50
+
51
+ // Make calling deprecated APIs throw helpful error messages
52
+ // errorOnDeprecated: false,
53
+
54
+ // The default configuration for fake timers
55
+ // fakeTimers: {
56
+ // "enableGlobally": false
57
+ // },
58
+
59
+ // Force coverage collection from ignored files using an array of glob patterns
60
+ // forceCoverageMatch: [],
61
+
62
+ // A path to a module which exports an async function that is triggered once before all test suites
63
+ // globalSetup: undefined,
64
+
65
+ // A path to a module which exports an async function that is triggered once after all test suites
66
+ // globalTeardown: undefined,
67
+
68
+ // A set of global variables that need to be available in all test environments
69
+ // globals: {},
70
+
71
+ // The maximum amount of workers used to run your tests. Can be specified as % or a number. E.g. maxWorkers: 10% will use 10% of your CPU amount + 1 as the maximum worker number. maxWorkers: 2 will use a maximum of 2 workers.
72
+ // maxWorkers: "50%",
73
+
74
+ // An array of directory names to be searched recursively up from the requiring module's location
75
+ // moduleDirectories: [
76
+ // "node_modules"
77
+ // ],
78
+
79
+ // An array of file extensions your modules use
80
+ // moduleFileExtensions: [
81
+ // "js",
82
+ // "mjs",
83
+ // "cjs",
84
+ // "jsx",
85
+ // "ts",
86
+ // "tsx",
87
+ // "json",
88
+ // "node"
89
+ // ],
90
+
91
+ // A map from regular expressions to module names or to arrays of module names that allow to stub out resources with a single module
92
+ // moduleNameMapper: {},
93
+
94
+ // An array of regexp pattern strings, matched against all module paths before considered 'visible' to the module loader
95
+ // modulePathIgnorePatterns: [],
96
+
97
+ // Activates notifications for test results
98
+ // notify: false,
99
+
100
+ // An enum that specifies notification mode. Requires { notify: true }
101
+ // notifyMode: "failure-change",
102
+
103
+ // A preset that is used as a base for Jest's configuration
104
+ // preset: undefined,
105
+
106
+ // Run tests from one or more projects
107
+ // projects: undefined,
108
+
109
+ // Use this configuration option to add custom reporters to Jest
110
+ // reporters: undefined,
111
+
112
+ // Automatically reset mock state before every test
113
+ // resetMocks: false,
114
+
115
+ // Reset the module registry before running each individual test
116
+ // resetModules: false,
117
+
118
+ // A path to a custom resolver
119
+ // resolver: undefined,
120
+
121
+ // Automatically restore mock state and implementation before every test
122
+ // restoreMocks: false,
123
+
124
+ // The root directory that Jest should scan for tests and modules within
125
+ // rootDir: undefined,
126
+
127
+ // A list of paths to directories that Jest should use to search for files in
128
+ // roots: [
129
+ // "<rootDir>"
130
+ // ],
131
+
132
+ // Allows you to use a custom runner instead of Jest's default test runner
133
+ // runner: "jest-runner",
134
+
135
+ // The paths to modules that run some code to configure or set up the testing environment before each test
136
+ // setupFiles: [],
137
+
138
+ // A list of paths to modules that run some code to configure or set up the testing framework before each test
139
+ // setupFilesAfterEnv: [],
140
+
141
+ // The number of seconds after which a test is considered as slow and reported as such in the results.
142
+ // slowTestThreshold: 5,
143
+
144
+ // A list of paths to snapshot serializer modules Jest should use for snapshot testing
145
+ // snapshotSerializers: [],
146
+
147
+ // The test environment that will be used for testing
148
+ // testEnvironment: "jest-environment-node",
149
+
150
+ // Options that will be passed to the testEnvironment
151
+ // testEnvironmentOptions: {},
152
+
153
+ // Adds a location field to test results
154
+ // testLocationInResults: false,
155
+
156
+ // The glob patterns Jest uses to detect test files
157
+ // testMatch: [
158
+ // "**/__tests__/**/*.[jt]s?(x)",
159
+ // "**/?(*.)+(spec|test).[tj]s?(x)"
160
+ // ],
161
+
162
+ // An array of regexp pattern strings that are matched against all test paths, matched tests are skipped
163
+ // testPathIgnorePatterns: [
164
+ // "/node_modules/"
165
+ // ],
166
+
167
+ // The regexp pattern or array of patterns that Jest uses to detect test files
168
+ // testRegex: [],
169
+
170
+ // This option allows the use of a custom results processor
171
+ // testResultsProcessor: undefined,
172
+
173
+ // This option allows use of a custom test runner
174
+ // testRunner: "jest-circus/runner",
175
+
176
+ // A map from regular expressions to paths to transformers
177
+ // transform: undefined,
178
+
179
+ // An array of regexp pattern strings that are matched against all source file paths, matched files will skip transformation
180
+ // transformIgnorePatterns: [
181
+ // "/node_modules/",
182
+ // "\\.pnp\\.[^\\/]+$"
183
+ // ],
184
+
185
+ // An array of regexp pattern strings that are matched against all modules before the module loader will automatically return a mock for them
186
+ // unmockedModulePathPatterns: undefined,
187
+
188
+ // Indicates whether each individual test should be reported during the run
189
+ // verbose: undefined,
190
+
191
+ // An array of regexp patterns that are matched against all source file paths before re-running tests in watch mode
192
+ // watchPathIgnorePatterns: [],
193
+
194
+ // Whether to use watchman for file crawling
195
+ // watchman: true,
196
+ };
197
+
198
+ module.exports = config;
package/package.json ADDED
@@ -0,0 +1,40 @@
1
+ {
2
+ "name": "pagerts",
3
+ "description": "A tool for viewing external relations in a webpage",
4
+ "version": "0.1.0",
5
+ "main": "./bin/main.js",
6
+ "bin": {
7
+ "pagerts": "./bin/main.js"
8
+ },
9
+ "scripts": {
10
+ "test": "jest",
11
+ "build": "esbuild src/main.ts --external:jsdom --bundle --outdir=bin --minify --sourcemap --platform=node",
12
+ "tscbuild": "tsc",
13
+ "start": "node ./bin/main.js",
14
+ "dev": "npx tsx src/main.ts"
15
+ },
16
+ "keywords": [
17
+ "webpage",
18
+ "hierarchy",
19
+ "management"
20
+ ],
21
+ "author": "Kirill kn253 Nevzorov",
22
+ "license": "MIT",
23
+ "bugs": {
24
+ "url": "https://github.com/akinevz0/treepage/issues"
25
+ },
26
+ "homepage": "https://github.com/akinevz0/treepage",
27
+ "dependencies": {
28
+ "blessed": "^0.1.81",
29
+ "commander": "^12.1.0",
30
+ "dotenv": "^16.4.5",
31
+ "jsdom": "^26.0.0"
32
+ },
33
+ "devDependencies": {
34
+ "@types/blessed": "^0.1.25",
35
+ "@types/jsdom": "^21.1.7",
36
+ "@types/node": "^22.8.2",
37
+ "esbuild": "^0.25.1",
38
+ "ts-node": "^10.9.2"
39
+ }
40
+ }
@@ -0,0 +1,5 @@
1
+
2
+ export abstract class AbstractExtractor<V, R> {
3
+ constructor(readonly name:string) { }
4
+ abstract extract(value: V): Promise<R>;
5
+ }
@@ -0,0 +1,15 @@
1
+ import { Page } from '../page/Page';
2
+ import type { PageDescriptor } from '../page/PageMetadata';
3
+ import { AbstractExtractor } from './AbstractExtractor';
4
+
5
+ export class PageExtractor extends AbstractExtractor<Page, PageDescriptor> {
6
+ constructor() { super("page-extractor"); }
7
+
8
+ async extract(value: Page): Promise<PageDescriptor> {
9
+ const { title, location: { href: url } } = value.window.document
10
+ return {
11
+ url,
12
+ title,
13
+ };
14
+ }
15
+ }
@@ -0,0 +1,25 @@
1
+ import { Page } from '../page/Page';
2
+ import { findResourceLink, findResourceText, type ExternalResource, type Resource, type Tag } from "../resource";
3
+ import { AbstractExtractor } from './AbstractExtractor';
4
+
5
+ export class ResourceExtractor extends AbstractExtractor<Page, ExternalResource[]> {
6
+ constructor(private readonly tags: Tag[]) {
7
+ super("page-extractor");
8
+ }
9
+ async extract(value: Page): Promise<ExternalResource[]> {
10
+ const { document } = value.window;
11
+ const externalResources: ExternalResource[] = [];
12
+ for (const tag of this.tags) {
13
+ const selector = document.querySelectorAll<Resource>(tag)
14
+ const elements = Array.from(selector)
15
+ for (const element of elements) {
16
+ const text = findResourceText(element);
17
+ const link = findResourceLink(element);
18
+ if(!text || !link) continue
19
+ const externalResource: ExternalResource = {text,link}
20
+ externalResources.push(externalResource)
21
+ }
22
+ }
23
+ return externalResources;
24
+ }
25
+ }
@@ -0,0 +1,14 @@
1
+ import { JSDOM } from 'jsdom';
2
+ import type { Resource, Tag } from '../resource';
3
+ import { AbstractExtractor } from './AbstractExtractor';
4
+
5
+ export class TagExtractor<T extends Tag> extends AbstractExtractor<JSDOM, Resource[]> {
6
+ extract(value: JSDOM): Promise<Resource[]> {
7
+ const linkNodes = value.window.document.querySelectorAll<Resource>(this.tagName);
8
+ return Promise.resolve(Array.from(linkNodes));
9
+ }
10
+ constructor(private readonly tagName: T) {
11
+ super(`extract <${tagName}>`)
12
+ };
13
+
14
+ }
package/src/main.ts ADDED
@@ -0,0 +1,37 @@
1
+ import { Command, createArgument } from "commander";
2
+
3
+ import { description, name, version } from '../package.json';
4
+ import { PageExtractor } from "./extractors/PageExtractor";
5
+ import { ResourceExtractor } from "./extractors/ResourceExtractor";
6
+ import { PageFetcher } from "./page/PageFetcher";
7
+ import type { PageMetadata } from "./page/PageMetadata";
8
+ import { JSONStylePrinter } from "./printers/JSONStylePrinter";
9
+
10
+ const program = new Command();
11
+
12
+ const url = createArgument("<url|file...>", "remote URL or local file to extract remote resources from");
13
+
14
+ (async () => {
15
+ await program
16
+ .name(name)
17
+ .version(version, "-v, --version")
18
+ .description(description)
19
+ .addArgument(url)
20
+ .action(async (urls: string[]) => {
21
+ const printer = new JSONStylePrinter();
22
+ const pageFetcher = new PageFetcher()
23
+ const pageExtractor = new PageExtractor()
24
+ const resourceExtractor = new ResourceExtractor(["a", "meta", "link", "embed"])
25
+ const pagesFetched = await pageFetcher.fetchAll(urls);
26
+ const metadataPages:PageMetadata[] = []
27
+ for (const page of pagesFetched) {
28
+ const resources = await resourceExtractor.extract(page);
29
+ const descriptor = await pageExtractor.extract(page);
30
+ metadataPages.push({
31
+ ...descriptor, resources
32
+ });
33
+ }
34
+ await printer.print(...metadataPages);
35
+ })
36
+ .parseAsync(process.argv);
37
+ })();
@@ -0,0 +1,3 @@
1
+ import { JSDOM } from 'jsdom';
2
+
3
+ export type Page = JSDOM
@@ -0,0 +1,16 @@
1
+ import { JSDOM, VirtualConsole } from 'jsdom';
2
+
3
+ export class PageFetcher {
4
+ async fetchPage(url: string): Promise<JSDOM> {
5
+ const dom = await JSDOM.fromURL(url, {
6
+ virtualConsole: new VirtualConsole().on('jsdomError', (error) => {
7
+ console.error(`Error parsing ${url}:`, error.message);
8
+ })
9
+ });
10
+ return dom;
11
+ }
12
+ async fetchAll(urls:string[]): Promise<JSDOM[]> {
13
+ return await Promise.all(urls.map(url => this.fetchPage(url)));
14
+ }
15
+ constructor() {}
16
+ }
@@ -0,0 +1,15 @@
1
+ import type { ExternalResource } from "../resource";
2
+
3
+ type hasTitle = {
4
+ title: string;
5
+ };
6
+
7
+ type hasUrl = {
8
+ url: string;
9
+ };
10
+
11
+ export type PageDescriptor = hasTitle & hasUrl;
12
+
13
+ export type PageMetadata = {
14
+ resources: ExternalResource[];
15
+ } & PageDescriptor;
@@ -0,0 +1,6 @@
1
+ import type { PageMetadata } from "../page/PageMetadata";
2
+
3
+ export abstract class AbstractResourcePrinter {
4
+ constructor() { }
5
+ abstract print(...pages: PageMetadata[]): void | Promise<void>;
6
+ }
@@ -0,0 +1,12 @@
1
+ import type { PageMetadata } from "../page/PageMetadata";
2
+ import { AbstractResourcePrinter } from "./AbstractResourcePrinter";
3
+
4
+
5
+ export class JSONStylePrinter extends AbstractResourcePrinter {
6
+ print(...pages: PageMetadata[]): void | Promise<void> {
7
+ const json = JSON.stringify(pages);
8
+ process.stdout.write(json + "\n")
9
+ }
10
+
11
+
12
+ }
@@ -0,0 +1,23 @@
1
+ import type { PageMetadata } from '../page/PageMetadata';
2
+ import { AbstractResourcePrinter } from './AbstractResourcePrinter';
3
+
4
+ export class LogStylePrinter extends AbstractResourcePrinter {
5
+
6
+ write(str: string): void {
7
+ process.stdout.write(str)
8
+ }
9
+
10
+ async print(...pages: PageMetadata[]): Promise<void> {
11
+ for (const page of pages) {
12
+ const { resources, title, url } = page
13
+
14
+ this.write(`Title: ${title}\n`)
15
+ this.write(`URL: ${url}\n\n`)
16
+
17
+ for (const resource of resources) {
18
+ const { link: { url }, text: { value } } = resource
19
+ this.write(`${value}: ${url}\n`)
20
+ }
21
+ }
22
+ }
23
+ }
@@ -0,0 +1,96 @@
1
+ /**
2
+ * @license MIT
3
+ * We are interested in visualising a page as a collection of tags.
4
+ *
5
+ * We wish to work with tags that can be compactly previewed on a webpage.
6
+ * Here we must declare all of the element types that can be used to represent
7
+ * a resource that can be hyperlinked off a webpage.
8
+ */
9
+
10
+ type Tags = HTMLElementTagNameMap
11
+
12
+ function findDefinedKey(element: Resource, keys: LinkKey[]): LinkKey | undefined {
13
+ for (const key of keys) {
14
+ if (isKeyDefined(key, element)) {
15
+ return key;
16
+ }
17
+ }
18
+ }
19
+
20
+ export const RESOURCE_DISPLAYABLE_KEYS = [
21
+ 'id',
22
+ 'innerText',
23
+ 'textContent',
24
+ 'class',
25
+ 'ariaLabel',
26
+ 'ariaDescription',
27
+ 'alt',
28
+ 'rel'
29
+ ] as const;
30
+
31
+ export type DisplayableKey = (typeof RESOURCE_DISPLAYABLE_KEYS)[number];
32
+
33
+ export type ResourceKey = {
34
+ key: DisplayableKey;
35
+ value: string;
36
+ };
37
+
38
+ export const RESOURCE_LINK_KEYS = [
39
+ "href",
40
+ "data-src",
41
+ "target",
42
+ "action",
43
+ "src",
44
+ "url"
45
+ ] as const;
46
+
47
+ export type LinkKey = typeof RESOURCE_LINK_KEYS[number];
48
+
49
+ export type ResourceLink = {
50
+ key: LinkKey;
51
+ url: string;
52
+ }
53
+
54
+ export function findResourceText(element: Resource): ResourceKey | undefined {
55
+ for (const key of RESOURCE_DISPLAYABLE_KEYS) {
56
+ const value = element[key]
57
+ if (value && typeof value === 'string' && value.trim() !== '')
58
+ return { key, value };
59
+ }
60
+ }
61
+
62
+ export function findResourceLink(element: Resource): ResourceLink | undefined {
63
+ const key = findDefinedKey(element, [...RESOURCE_LINK_KEYS]);
64
+ const url = element[key];
65
+ if (url && typeof url === 'string' && url.trim() !== '')
66
+ return { key, url };
67
+ }
68
+
69
+ export type ExternalResource = {
70
+ text: ResourceKey;
71
+ link: ResourceLink;
72
+ };
73
+
74
+ export const isResourceKey = (key: string): key is LinkKey => key in RESOURCE_LINK_KEYS;
75
+
76
+ export const isKeyDefined = <E extends Tags[keyof Tags]>(key: string, element: E): boolean =>
77
+ key in element && element[key] !== undefined;
78
+
79
+ export type ResourceElement<T, U> = {
80
+ [K in keyof T]: U extends keyof T[K] ? T[K] : never
81
+ }[keyof T];
82
+
83
+ export type Tag = keyof Tags
84
+
85
+ export type Resource = ResourceElement<Tags, (typeof RESOURCE_LINK_KEYS)[number]>;
86
+
87
+ export type ResourceByName<T extends keyof Tags> = Tags[T]
88
+
89
+ /** tests **/
90
+
91
+ type test1 = HTMLAnchorElement extends Resource ? true : false // true
92
+ type test2 = HTMLImageElement extends Resource ? true : false // true
93
+ type test3 = HTMLDivElement extends Resource ? true : false // false
94
+
95
+ type test4 = ResourceElement<Tags, "src">
96
+
package/tsconfig.json ADDED
@@ -0,0 +1,12 @@
1
+ {
2
+ "compilerOptions": {
3
+ "module": "NodeNext",
4
+ "target": "ESNext",
5
+ "resolveJsonModule": true,
6
+ "outDir": "dist",
7
+ "sourceMap": true,
8
+ },
9
+ "include": [
10
+ "src/**.*",
11
+ ],
12
+ }