pagerts 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/main.js +13 -13
- package/bin/main.js.map +4 -4
- package/package.json +1 -1
- package/src/main.ts +9 -6
- package/src/page/PageFetcher.ts +22 -9
- package/src/page/PageMetadata.ts +12 -2
- package/src/printers/AbstractResourcePrinter.ts +2 -2
- package/src/printers/JSONStylePrinter.ts +2 -2
package/package.json
CHANGED
package/src/main.ts
CHANGED
|
@@ -4,8 +4,8 @@ import { Command, createArgument } from "commander";
|
|
|
4
4
|
import { description, name, version } from '../package.json';
|
|
5
5
|
import { PageExtractor } from "./extractors/PageExtractor";
|
|
6
6
|
import { ResourceExtractor } from "./extractors/ResourceExtractor";
|
|
7
|
-
import { PageFetcher } from "./page/PageFetcher";
|
|
8
|
-
import type
|
|
7
|
+
import { isJSDOM, PageFetcher, type PageResponse } from "./page/PageFetcher";
|
|
8
|
+
import { isPage, type Page, type PageMetadata } from "./page/PageMetadata";
|
|
9
9
|
import { JSONStylePrinter } from "./printers/JSONStylePrinter";
|
|
10
10
|
|
|
11
11
|
const program = new Command();
|
|
@@ -24,13 +24,16 @@ const url = createArgument("<url|file...>", "remote URL or local file to extract
|
|
|
24
24
|
const pageExtractor = new PageExtractor()
|
|
25
25
|
const resourceExtractor = new ResourceExtractor(["a", "meta", "link", "embed"])
|
|
26
26
|
const pagesFetched = await pageFetcher.fetchAll(urls);
|
|
27
|
-
const metadataPages:
|
|
27
|
+
const metadataPages: Page[] = []
|
|
28
28
|
for (const page of pagesFetched) {
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
29
|
+
// check if page has an error
|
|
30
|
+
if (isJSDOM(page)) {
|
|
31
|
+
const resources = await resourceExtractor.extract(page);
|
|
32
|
+
const descriptor = await pageExtractor.extract(page);
|
|
33
|
+
metadataPages.push({
|
|
32
34
|
...descriptor, resources
|
|
33
35
|
});
|
|
36
|
+
} else metadataPages.push(page);
|
|
34
37
|
}
|
|
35
38
|
await printer.print(...metadataPages);
|
|
36
39
|
})
|
package/src/page/PageFetcher.ts
CHANGED
|
@@ -1,16 +1,29 @@
|
|
|
1
1
|
import { JSDOM, VirtualConsole } from 'jsdom';
|
|
2
|
+
import type { Page } from './PageMetadata';
|
|
3
|
+
|
|
4
|
+
export type PageResponse = JSDOM | Page
|
|
5
|
+
|
|
6
|
+
export const isJSDOM = (page: PageResponse): page is JSDOM => 'window' in page;
|
|
2
7
|
|
|
3
8
|
export class PageFetcher {
|
|
4
|
-
async fetchPage(url: string): Promise<
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
9
|
+
async fetchPage(url: string): Promise<PageResponse> {
|
|
10
|
+
try {
|
|
11
|
+
const dom = await JSDOM.fromURL(url, {
|
|
12
|
+
virtualConsole: new VirtualConsole().on('jsdomError', (error) => {
|
|
13
|
+
console.error(`Error parsing ${url}:`, error.message);
|
|
14
|
+
})
|
|
15
|
+
});
|
|
16
|
+
return dom;
|
|
17
|
+
} catch (error) {
|
|
18
|
+
return {
|
|
19
|
+
url,
|
|
20
|
+
title: "Plain resource",
|
|
21
|
+
error
|
|
22
|
+
}
|
|
23
|
+
}
|
|
11
24
|
}
|
|
12
|
-
async fetchAll(urls:string[]): Promise<
|
|
25
|
+
async fetchAll(urls: string[]): Promise<PageResponse[]> {
|
|
13
26
|
return await Promise.all(urls.map(url => this.fetchPage(url)));
|
|
14
27
|
}
|
|
15
|
-
constructor() {}
|
|
28
|
+
constructor() { }
|
|
16
29
|
}
|
package/src/page/PageMetadata.ts
CHANGED
|
@@ -1,4 +1,9 @@
|
|
|
1
1
|
import type { ExternalResource } from "../resource";
|
|
2
|
+
import type { PageResponse } from "./PageFetcher";
|
|
3
|
+
|
|
4
|
+
type mightHaveError = {
|
|
5
|
+
error: Error
|
|
6
|
+
}
|
|
2
7
|
|
|
3
8
|
type hasTitle = {
|
|
4
9
|
title: string;
|
|
@@ -8,8 +13,13 @@ type hasUrl = {
|
|
|
8
13
|
url: string;
|
|
9
14
|
};
|
|
10
15
|
|
|
11
|
-
export type PageDescriptor = hasTitle & hasUrl
|
|
16
|
+
export type PageDescriptor = hasTitle & hasUrl
|
|
12
17
|
|
|
13
18
|
export type PageMetadata = {
|
|
14
19
|
resources: ExternalResource[];
|
|
15
|
-
}
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export type Page = (PageDescriptor & (PageMetadata | mightHaveError))
|
|
23
|
+
|
|
24
|
+
export const isPage = (page: any): page is Page =>
|
|
25
|
+
"resources" in page && Array.isArray(page.resources);
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { Page } from "../page/PageMetadata";
|
|
2
2
|
|
|
3
3
|
export abstract class AbstractResourcePrinter {
|
|
4
4
|
constructor() { }
|
|
5
|
-
abstract print(...pages:
|
|
5
|
+
abstract print(...pages: Page[]): void | Promise<void>;
|
|
6
6
|
}
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { Page } from "../page/PageMetadata";
|
|
2
2
|
import { AbstractResourcePrinter } from "./AbstractResourcePrinter";
|
|
3
3
|
|
|
4
4
|
|
|
5
5
|
export class JSONStylePrinter extends AbstractResourcePrinter {
|
|
6
|
-
print(...pages:
|
|
6
|
+
print(...pages: Page[]): void | Promise<void> {
|
|
7
7
|
const json = JSON.stringify(pages);
|
|
8
8
|
process.stdout.write(json + "\n")
|
|
9
9
|
}
|