pagerts 0.2.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +220 -16
- package/bin/main.js +9 -25
- package/bin/main.js.map +4 -4
- package/package.json +37 -13
- package/bin/package.json +0 -40
- package/bin/src/extractors/AbstractExtractor.js +0 -11
- package/bin/src/extractors/AbstractExtractor.js.map +0 -1
- package/bin/src/extractors/PageExtractor.js +0 -13
- package/bin/src/extractors/PageExtractor.js.map +0 -1
- package/bin/src/extractors/ResourceExtractor.js +0 -32
- package/bin/src/extractors/ResourceExtractor.js.map +0 -1
- package/bin/src/main.js +0 -36
- package/bin/src/main.js.map +0 -1
- package/bin/src/page/Page.js +0 -8
- package/bin/src/page/Page.js.map +0 -1
- package/bin/src/page/PageFetcher.js +0 -26
- package/bin/src/page/PageFetcher.js.map +0 -1
- package/bin/src/printers/AbstractResourcePrinter.js +0 -8
- package/bin/src/printers/AbstractResourcePrinter.js.map +0 -1
- package/bin/src/printers/JSONStylePrinter.js +0 -12
- package/bin/src/printers/JSONStylePrinter.js.map +0 -1
- package/bin/src/printers/LogStylePrinter.js +0 -27
- package/bin/src/printers/LogStylePrinter.js.map +0 -1
- package/bin/src/resource.js +0 -56
- package/bin/src/resource.js.map +0 -1
- package/jest.config.js +0 -198
- package/src/extractors/AbstractExtractor.ts +0 -5
- package/src/extractors/PageExtractor.ts +0 -12
- package/src/extractors/ResourceExtractor.ts +0 -25
- package/src/extractors/TagExtractor.ts +0 -14
- package/src/main.ts +0 -43
- package/src/page/Page.ts +0 -19
- package/src/page/PageFetcher.ts +0 -30
- package/src/printers/AbstractResourcePrinter.ts +0 -6
- package/src/printers/JSONStylePrinter.ts +0 -12
- package/src/printers/LogStylePrinter.ts +0 -28
- package/src/resource.ts +0 -96
- package/tsconfig.json +0 -12
package/src/page/PageFetcher.ts
DELETED
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
import { JSDOM, VirtualConsole } from 'jsdom';
|
|
2
|
-
import type { Page, PageMetadata } from './Page';
|
|
3
|
-
|
|
4
|
-
interface PageResponse {
|
|
5
|
-
url: string;
|
|
6
|
-
content?: JSDOM;
|
|
7
|
-
error?: string;
|
|
8
|
-
}
|
|
9
|
-
|
|
10
|
-
export class PageFetcher {
|
|
11
|
-
private async fetchPage(url: string): Promise<PageResponse> {
|
|
12
|
-
let dom: Promise<JSDOM>;
|
|
13
|
-
const virtualConsole = new VirtualConsole().on('jsdomError', (error) => {
|
|
14
|
-
process.stderr.write(`Error parsing ${url}:${error.message}\n`);
|
|
15
|
-
});
|
|
16
|
-
if (url.startsWith("file://")) {
|
|
17
|
-
dom = JSDOM.fromFile(url, { virtualConsole });
|
|
18
|
-
} else {
|
|
19
|
-
dom = JSDOM.fromURL(url, { virtualConsole });
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
return dom.then(content => ({ url, content }))
|
|
23
|
-
.catch(({ message }) => ({ url, error: `JSDOM failed to parse: ${message}` }));
|
|
24
|
-
}
|
|
25
|
-
async fetchAll(urls: string[]): Promise<PageResponse[]> {
|
|
26
|
-
const responses = await Promise.all(urls.map(url => this.fetchPage(url)));
|
|
27
|
-
return responses.filter(response => response.content !== undefined);
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
}
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
import type { PageMetadata } from "../page/Page";
|
|
2
|
-
import { AbstractResourcePrinter } from "./AbstractResourcePrinter";
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
export class JSONStylePrinter extends AbstractResourcePrinter {
|
|
6
|
-
print(...pages: PageMetadata[]): void | Promise<void> {
|
|
7
|
-
const json = JSON.stringify(pages);
|
|
8
|
-
process.stdout.write(json + "\n")
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
}
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
import { isPage, type Page, type PageMetadata } from '../page/Page';
|
|
2
|
-
import { AbstractResourcePrinter } from './AbstractResourcePrinter';
|
|
3
|
-
|
|
4
|
-
export class LogStylePrinter extends AbstractResourcePrinter {
|
|
5
|
-
|
|
6
|
-
write(str: string): void {
|
|
7
|
-
process.stdout.write(str)
|
|
8
|
-
}
|
|
9
|
-
|
|
10
|
-
async print(...pages: PageMetadata[]): Promise<void> {
|
|
11
|
-
for (const page of pages) {
|
|
12
|
-
if (!isPage(page)) {
|
|
13
|
-
this.write(page.error)
|
|
14
|
-
continue
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
const {resources, title, url } = page
|
|
18
|
-
|
|
19
|
-
this.write(`Title: ${title}\n`)
|
|
20
|
-
this.write(`URL: ${url}\n\n`)
|
|
21
|
-
|
|
22
|
-
for (const resource of resources) {
|
|
23
|
-
const { link: { url }, text: { value } } = resource
|
|
24
|
-
this.write(`${value}: ${url}\n`)
|
|
25
|
-
}
|
|
26
|
-
}
|
|
27
|
-
}
|
|
28
|
-
}
|
package/src/resource.ts
DELETED
|
@@ -1,96 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @license MIT
|
|
3
|
-
* We are interested in visualising a page as a collection of tags.
|
|
4
|
-
*
|
|
5
|
-
* We wish to work with tags that can be compactly previewed on a webpage.
|
|
6
|
-
* Here we must declare all of the element types that can be used to represent
|
|
7
|
-
* a resource that can be hyperlinked off a webpage.
|
|
8
|
-
*/
|
|
9
|
-
|
|
10
|
-
type Tags = HTMLElementTagNameMap
|
|
11
|
-
|
|
12
|
-
function findDefinedKey(element: Resource, keys: LinkKey[]): LinkKey | undefined {
|
|
13
|
-
for (const key of keys) {
|
|
14
|
-
if (isKeyDefined(key, element)) {
|
|
15
|
-
return key;
|
|
16
|
-
}
|
|
17
|
-
}
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
export const RESOURCE_DISPLAYABLE_KEYS = [
|
|
21
|
-
'id',
|
|
22
|
-
'innerText',
|
|
23
|
-
'textContent',
|
|
24
|
-
'class',
|
|
25
|
-
'ariaLabel',
|
|
26
|
-
'ariaDescription',
|
|
27
|
-
'alt',
|
|
28
|
-
'rel'
|
|
29
|
-
] as const;
|
|
30
|
-
|
|
31
|
-
export type DisplayableKey = (typeof RESOURCE_DISPLAYABLE_KEYS)[number];
|
|
32
|
-
|
|
33
|
-
export type ResourceKey = {
|
|
34
|
-
key: DisplayableKey;
|
|
35
|
-
value: string;
|
|
36
|
-
};
|
|
37
|
-
|
|
38
|
-
export const RESOURCE_LINK_KEYS = [
|
|
39
|
-
"href",
|
|
40
|
-
"data-src",
|
|
41
|
-
"target",
|
|
42
|
-
"action",
|
|
43
|
-
"src",
|
|
44
|
-
"url"
|
|
45
|
-
] as const;
|
|
46
|
-
|
|
47
|
-
export type LinkKey = typeof RESOURCE_LINK_KEYS[number];
|
|
48
|
-
|
|
49
|
-
export type ResourceLink = {
|
|
50
|
-
key: LinkKey;
|
|
51
|
-
url: string;
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
export function findResourceText(element: Resource): ResourceKey | undefined {
|
|
55
|
-
for (const key of RESOURCE_DISPLAYABLE_KEYS) {
|
|
56
|
-
const value = element[key]
|
|
57
|
-
if (value && typeof value === 'string' && value.trim() !== '')
|
|
58
|
-
return { key, value };
|
|
59
|
-
}
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
export function findResourceLink(element: Resource): ResourceLink | undefined {
|
|
63
|
-
const key = findDefinedKey(element, [...RESOURCE_LINK_KEYS]);
|
|
64
|
-
const url = element[key];
|
|
65
|
-
if (url && typeof url === 'string' && url.trim() !== '')
|
|
66
|
-
return { key, url };
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
export type ExternalResource = {
|
|
70
|
-
text: ResourceKey;
|
|
71
|
-
link: ResourceLink;
|
|
72
|
-
};
|
|
73
|
-
|
|
74
|
-
export const isResourceKey = (key: string): key is LinkKey => key in RESOURCE_LINK_KEYS;
|
|
75
|
-
|
|
76
|
-
export const isKeyDefined = <E extends Tags[keyof Tags]>(key: string, element: E): boolean =>
|
|
77
|
-
key in element && element[key] !== undefined;
|
|
78
|
-
|
|
79
|
-
export type ResourceElement<T, U> = {
|
|
80
|
-
[K in keyof T]: U extends keyof T[K] ? T[K] : never
|
|
81
|
-
}[keyof T];
|
|
82
|
-
|
|
83
|
-
export type Tag = keyof Tags
|
|
84
|
-
|
|
85
|
-
export type Resource = ResourceElement<Tags, (typeof RESOURCE_LINK_KEYS)[number]>;
|
|
86
|
-
|
|
87
|
-
export type ResourceByName<T extends keyof Tags> = Tags[T]
|
|
88
|
-
|
|
89
|
-
/** tests **/
|
|
90
|
-
|
|
91
|
-
type test1 = HTMLAnchorElement extends Resource ? true : false // true
|
|
92
|
-
type test2 = HTMLImageElement extends Resource ? true : false // true
|
|
93
|
-
type test3 = HTMLDivElement extends Resource ? true : false // false
|
|
94
|
-
|
|
95
|
-
type test4 = ResourceElement<Tags, "src">
|
|
96
|
-
|