pagerts 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "pagerts",
3
3
  "description": "A tool for viewing external relations in a webpage",
4
- "version": "0.1.1",
4
+ "version": "0.1.2",
5
5
  "main": "main.js",
6
6
  "bin": {
7
7
  "pagerts": "bin/main.js"
package/src/main.ts CHANGED
@@ -4,8 +4,8 @@ import { Command, createArgument } from "commander";
4
4
  import { description, name, version } from '../package.json';
5
5
  import { PageExtractor } from "./extractors/PageExtractor";
6
6
  import { ResourceExtractor } from "./extractors/ResourceExtractor";
7
- import { PageFetcher } from "./page/PageFetcher";
8
- import type { PageMetadata } from "./page/PageMetadata";
7
+ import { isJSDOM, PageFetcher, type PageResponse } from "./page/PageFetcher";
8
+ import { isPage, type Page, type PageMetadata } from "./page/PageMetadata";
9
9
  import { JSONStylePrinter } from "./printers/JSONStylePrinter";
10
10
 
11
11
  const program = new Command();
@@ -24,13 +24,16 @@ const url = createArgument("<url|file...>", "remote URL or local file to extract
24
24
  const pageExtractor = new PageExtractor()
25
25
  const resourceExtractor = new ResourceExtractor(["a", "meta", "link", "embed"])
26
26
  const pagesFetched = await pageFetcher.fetchAll(urls);
27
- const metadataPages:PageMetadata[] = []
27
+ const metadataPages: Page[] = []
28
28
  for (const page of pagesFetched) {
29
- const resources = await resourceExtractor.extract(page);
30
- const descriptor = await pageExtractor.extract(page);
31
- metadataPages.push({
29
+ // check if page has an error
30
+ if (isJSDOM(page)) {
31
+ const resources = await resourceExtractor.extract(page);
32
+ const descriptor = await pageExtractor.extract(page);
33
+ metadataPages.push({
32
34
  ...descriptor, resources
33
35
  });
36
+ } else metadataPages.push(page);
34
37
  }
35
38
  await printer.print(...metadataPages);
36
39
  })
@@ -1,16 +1,29 @@
1
1
  import { JSDOM, VirtualConsole } from 'jsdom';
2
+ import type { Page } from './PageMetadata';
3
+
4
+ export type PageResponse = JSDOM | Page
5
+
6
+ export const isJSDOM = (page: PageResponse): page is JSDOM => 'window' in page;
2
7
 
3
8
  export class PageFetcher {
4
- async fetchPage(url: string): Promise<JSDOM> {
5
- const dom = await JSDOM.fromURL(url, {
6
- virtualConsole: new VirtualConsole().on('jsdomError', (error) => {
7
- console.error(`Error parsing ${url}:`, error.message);
8
- })
9
- });
10
- return dom;
9
+ async fetchPage(url: string): Promise<PageResponse> {
10
+ try {
11
+ const dom = await JSDOM.fromURL(url, {
12
+ virtualConsole: new VirtualConsole().on('jsdomError', (error) => {
13
+ console.error(`Error parsing ${url}:`, error.message);
14
+ })
15
+ });
16
+ return dom;
17
+ } catch (error) {
18
+ return {
19
+ url,
20
+ title: "Plain resource",
21
+ error
22
+ }
23
+ }
11
24
  }
12
- async fetchAll(urls:string[]): Promise<JSDOM[]> {
25
+ async fetchAll(urls: string[]): Promise<PageResponse[]> {
13
26
  return await Promise.all(urls.map(url => this.fetchPage(url)));
14
27
  }
15
- constructor() {}
28
+ constructor() { }
16
29
  }
@@ -1,4 +1,9 @@
1
1
  import type { ExternalResource } from "../resource";
2
+ import type { PageResponse } from "./PageFetcher";
3
+
4
+ type mightHaveError = {
5
+ error: Error
6
+ }
2
7
 
3
8
  type hasTitle = {
4
9
  title: string;
@@ -8,8 +13,13 @@ type hasUrl = {
8
13
  url: string;
9
14
  };
10
15
 
11
- export type PageDescriptor = hasTitle & hasUrl;
16
+ export type PageDescriptor = hasTitle & hasUrl
12
17
 
13
18
  export type PageMetadata = {
14
19
  resources: ExternalResource[];
15
- } & PageDescriptor;
20
+ }
21
+
22
+ export type Page = (PageDescriptor & (PageMetadata | mightHaveError))
23
+
24
+ export const isPage = (page: any): page is Page =>
25
+ "resources" in page && Array.isArray(page.resources);
@@ -1,6 +1,6 @@
1
- import type { PageMetadata } from "../page/PageMetadata";
1
+ import type { Page } from "../page/PageMetadata";
2
2
 
3
3
  export abstract class AbstractResourcePrinter {
4
4
  constructor() { }
5
- abstract print(...pages: PageMetadata[]): void | Promise<void>;
5
+ abstract print(...pages: Page[]): void | Promise<void>;
6
6
  }
@@ -1,9 +1,9 @@
1
- import type { PageMetadata } from "../page/PageMetadata";
1
+ import type { Page } from "../page/PageMetadata";
2
2
  import { AbstractResourcePrinter } from "./AbstractResourcePrinter";
3
3
 
4
4
 
5
5
  export class JSONStylePrinter extends AbstractResourcePrinter {
6
- print(...pages: PageMetadata[]): void | Promise<void> {
6
+ print(...pages: Page[]): void | Promise<void> {
7
7
  const json = JSON.stringify(pages);
8
8
  process.stdout.write(json + "\n")
9
9
  }