pagerts 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/main.ts +22 -16
- package/src/page/PageFetcher.ts +1 -5
- package/src/page/PageMetadata.ts +3 -5
package/package.json
CHANGED
package/src/main.ts
CHANGED
|
@@ -5,7 +5,7 @@ import { description, name, version } from '../package.json';
|
|
|
5
5
|
import { PageExtractor } from "./extractors/PageExtractor";
|
|
6
6
|
import { ResourceExtractor } from "./extractors/ResourceExtractor";
|
|
7
7
|
import { isJSDOM, PageFetcher, type PageResponse } from "./page/PageFetcher";
|
|
8
|
-
import {
|
|
8
|
+
import { type Page } from "./page/PageMetadata";
|
|
9
9
|
import { JSONStylePrinter } from "./printers/JSONStylePrinter";
|
|
10
10
|
|
|
11
11
|
const program = new Command();
|
|
@@ -20,22 +20,28 @@ const url = createArgument("<url|file...>", "remote URL or local file to extract
|
|
|
20
20
|
.addArgument(url)
|
|
21
21
|
.action(async (urls: string[]) => {
|
|
22
22
|
const printer = new JSONStylePrinter();
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
23
|
+
try {
|
|
24
|
+
const pageFetcher = new PageFetcher()
|
|
25
|
+
const pageExtractor = new PageExtractor()
|
|
26
|
+
const resourceExtractor = new ResourceExtractor(["a", "meta", "link", "embed"])
|
|
27
|
+
const pagesFetched: PageResponse[] = await pageFetcher.fetchAll(urls);
|
|
28
|
+
const metadataPages: Page[] = []
|
|
29
|
+
for (const page of pagesFetched) {
|
|
30
|
+
// check if page has an error
|
|
31
|
+
if (isJSDOM(page)) {
|
|
32
|
+
const resources = await resourceExtractor.extract(page);
|
|
33
|
+
const descriptor = await pageExtractor.extract(page);
|
|
34
|
+
metadataPages.push({
|
|
35
|
+
...descriptor, resources
|
|
36
|
+
});
|
|
37
|
+
} else metadataPages.push(page);
|
|
38
|
+
}
|
|
39
|
+
await printer.print(...metadataPages);
|
|
40
|
+
} catch (error) {
|
|
41
|
+
await printer.print({
|
|
42
|
+
error: error.message
|
|
43
|
+
})
|
|
37
44
|
}
|
|
38
|
-
await printer.print(...metadataPages);
|
|
39
45
|
})
|
|
40
46
|
.parseAsync(process.argv);
|
|
41
47
|
})();
|
package/src/page/PageFetcher.ts
CHANGED
package/src/page/PageMetadata.ts
CHANGED
|
@@ -1,10 +1,6 @@
|
|
|
1
1
|
import type { ExternalResource } from "../resource";
|
|
2
2
|
import type { PageResponse } from "./PageFetcher";
|
|
3
3
|
|
|
4
|
-
type mightHaveError = {
|
|
5
|
-
error: Error
|
|
6
|
-
}
|
|
7
|
-
|
|
8
4
|
type hasTitle = {
|
|
9
5
|
title: string;
|
|
10
6
|
};
|
|
@@ -19,7 +15,9 @@ export type PageMetadata = {
|
|
|
19
15
|
resources: ExternalResource[];
|
|
20
16
|
}
|
|
21
17
|
|
|
22
|
-
export type Page = (PageDescriptor &
|
|
18
|
+
export type Page = (PageDescriptor & PageMetadata) | {
|
|
19
|
+
error: Error
|
|
20
|
+
}
|
|
23
21
|
|
|
24
22
|
export const isPage = (page: any): page is Page =>
|
|
25
23
|
"resources" in page && Array.isArray(page.resources);
|