pagerts 0.2.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +220 -16
- package/bin/main.js +9 -25
- package/bin/main.js.map +4 -4
- package/package.json +37 -13
- package/bin/package.json +0 -40
- package/bin/src/extractors/AbstractExtractor.js +0 -11
- package/bin/src/extractors/AbstractExtractor.js.map +0 -1
- package/bin/src/extractors/PageExtractor.js +0 -13
- package/bin/src/extractors/PageExtractor.js.map +0 -1
- package/bin/src/extractors/ResourceExtractor.js +0 -32
- package/bin/src/extractors/ResourceExtractor.js.map +0 -1
- package/bin/src/main.js +0 -36
- package/bin/src/main.js.map +0 -1
- package/bin/src/page/Page.js +0 -8
- package/bin/src/page/Page.js.map +0 -1
- package/bin/src/page/PageFetcher.js +0 -26
- package/bin/src/page/PageFetcher.js.map +0 -1
- package/bin/src/printers/AbstractResourcePrinter.js +0 -8
- package/bin/src/printers/AbstractResourcePrinter.js.map +0 -1
- package/bin/src/printers/JSONStylePrinter.js +0 -12
- package/bin/src/printers/JSONStylePrinter.js.map +0 -1
- package/bin/src/printers/LogStylePrinter.js +0 -27
- package/bin/src/printers/LogStylePrinter.js.map +0 -1
- package/bin/src/resource.js +0 -56
- package/bin/src/resource.js.map +0 -1
- package/jest.config.js +0 -198
- package/src/extractors/AbstractExtractor.ts +0 -5
- package/src/extractors/PageExtractor.ts +0 -12
- package/src/extractors/ResourceExtractor.ts +0 -25
- package/src/extractors/TagExtractor.ts +0 -14
- package/src/main.ts +0 -43
- package/src/page/Page.ts +0 -19
- package/src/page/PageFetcher.ts +0 -30
- package/src/printers/AbstractResourcePrinter.ts +0 -6
- package/src/printers/JSONStylePrinter.ts +0 -12
- package/src/printers/LogStylePrinter.ts +0 -28
- package/src/resource.ts +0 -96
- package/tsconfig.json +0 -12
package/bin/package.json
DELETED
|
@@ -1,40 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"name": "pagerts",
|
|
3
|
-
"description": "A tool for viewing external relations in a webpage",
|
|
4
|
-
"version": "0.1.9",
|
|
5
|
-
"main": "main.js",
|
|
6
|
-
"bin": {
|
|
7
|
-
"pagerts": "bin/main.js"
|
|
8
|
-
},
|
|
9
|
-
"scripts": {
|
|
10
|
-
"test": "jest",
|
|
11
|
-
"build": "esbuild src/main.ts --external:jsdom --bundle --outdir=bin --minify --sourcemap --platform=node",
|
|
12
|
-
"lint": "tsc",
|
|
13
|
-
"start": "node ./bin/main.js",
|
|
14
|
-
"dev": "npx tsx src/main.ts"
|
|
15
|
-
},
|
|
16
|
-
"keywords": [
|
|
17
|
-
"webpage",
|
|
18
|
-
"hierarchy",
|
|
19
|
-
"management"
|
|
20
|
-
],
|
|
21
|
-
"author": "Kirill kn253 Nevzorov",
|
|
22
|
-
"license": "MIT",
|
|
23
|
-
"bugs": {
|
|
24
|
-
"url": "https://github.com/akinevz0/pagerts/issues"
|
|
25
|
-
},
|
|
26
|
-
"homepage": "https://github.com/akinevz0/pagerts",
|
|
27
|
-
"dependencies": {
|
|
28
|
-
"blessed": "^0.1.81",
|
|
29
|
-
"commander": "^12.1.0",
|
|
30
|
-
"dotenv": "^16.4.5",
|
|
31
|
-
"jsdom": "^26.0.0"
|
|
32
|
-
},
|
|
33
|
-
"devDependencies": {
|
|
34
|
-
"@types/blessed": "^0.1.25",
|
|
35
|
-
"@types/jsdom": "^21.1.7",
|
|
36
|
-
"@types/node": "^22.8.2",
|
|
37
|
-
"esbuild": "^0.25.1",
|
|
38
|
-
"ts-node": "^10.9.2"
|
|
39
|
-
}
|
|
40
|
-
}
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.AbstractExtractor = void 0;
|
|
4
|
-
class AbstractExtractor {
|
|
5
|
-
name;
|
|
6
|
-
constructor(name) {
|
|
7
|
-
this.name = name;
|
|
8
|
-
}
|
|
9
|
-
}
|
|
10
|
-
exports.AbstractExtractor = AbstractExtractor;
|
|
11
|
-
//# sourceMappingURL=AbstractExtractor.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"AbstractExtractor.js","sourceRoot":"","sources":["../../../src/extractors/AbstractExtractor.ts"],"names":[],"mappings":";;;AACA,MAAsB,iBAAiB;IACd;IAArB,YAAqB,IAAW;QAAX,SAAI,GAAJ,IAAI,CAAO;IAAI,CAAC;CAExC;AAHD,8CAGC"}
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.PageExtractor = void 0;
|
|
4
|
-
const AbstractExtractor_1 = require("./AbstractExtractor");
|
|
5
|
-
class PageExtractor extends AbstractExtractor_1.AbstractExtractor {
|
|
6
|
-
constructor() { super("page-extractor"); }
|
|
7
|
-
async extract(value) {
|
|
8
|
-
const { window: { document: { title, location: { href: url } } } } = value;
|
|
9
|
-
return { title, url };
|
|
10
|
-
}
|
|
11
|
-
}
|
|
12
|
-
exports.PageExtractor = PageExtractor;
|
|
13
|
-
//# sourceMappingURL=PageExtractor.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"PageExtractor.js","sourceRoot":"","sources":["../../../src/extractors/PageExtractor.ts"],"names":[],"mappings":";;;AAEA,2DAAwD;AAExD,MAAa,aAAc,SAAQ,qCAA8B;IAC7D,gBAAgB,KAAK,CAAC,gBAAgB,CAAC,CAAC,CAAC,CAAC;IAE1C,KAAK,CAAC,OAAO,CAAC,KAAY;QACtB,MAAM,EAAE,MAAM,EAAE,EAAE,QAAQ,EAAE,EAAE,KAAK,EAAE,QAAQ,EAAE,EAAE,IAAI,EAAE,GAAG,EAAE,EAAE,EAAE,EAAE,GAAG,KAAK,CAAA;QAC1E,OAAO,EAAE,KAAK,EAAE,GAAG,EAAE,CAAA;IACzB,CAAC;CACJ;AAPD,sCAOC"}
|
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.ResourceExtractor = void 0;
|
|
4
|
-
const resource_1 = require("../resource");
|
|
5
|
-
const AbstractExtractor_1 = require("./AbstractExtractor");
|
|
6
|
-
class ResourceExtractor extends AbstractExtractor_1.AbstractExtractor {
|
|
7
|
-
tags;
|
|
8
|
-
constructor(tags) {
|
|
9
|
-
super("page-extractor");
|
|
10
|
-
this.tags = tags;
|
|
11
|
-
}
|
|
12
|
-
async extract(value) {
|
|
13
|
-
const { document } = value.window;
|
|
14
|
-
const externalResources = [];
|
|
15
|
-
for (const tag of this.tags) {
|
|
16
|
-
const selector = document.querySelectorAll(tag);
|
|
17
|
-
const elements = Array.from(selector);
|
|
18
|
-
for (const element of elements) {
|
|
19
|
-
const text = (0, resource_1.findResourceText)(element);
|
|
20
|
-
const link = (0, resource_1.findResourceLink)(element);
|
|
21
|
-
if (!text || !link)
|
|
22
|
-
continue;
|
|
23
|
-
if (!link.url.startsWith("http"))
|
|
24
|
-
continue;
|
|
25
|
-
externalResources.push({ text, link });
|
|
26
|
-
}
|
|
27
|
-
}
|
|
28
|
-
return externalResources;
|
|
29
|
-
}
|
|
30
|
-
}
|
|
31
|
-
exports.ResourceExtractor = ResourceExtractor;
|
|
32
|
-
//# sourceMappingURL=ResourceExtractor.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"ResourceExtractor.js","sourceRoot":"","sources":["../../../src/extractors/ResourceExtractor.ts"],"names":[],"mappings":";;;AACA,0CAAiH;AACjH,2DAAwD;AAExD,MAAa,iBAAkB,SAAQ,qCAA4C;IAClD;IAA7B,YAA6B,IAAW;QACpC,KAAK,CAAC,gBAAgB,CAAC,CAAC;QADC,SAAI,GAAJ,IAAI,CAAO;IAExC,CAAC;IACD,KAAK,CAAC,OAAO,CAAC,KAAY;QACtB,MAAM,EAAE,QAAQ,EAAE,GAAG,KAAK,CAAC,MAAM,CAAC;QAClC,MAAM,iBAAiB,GAAuB,EAAE,CAAC;QACjD,KAAK,MAAM,GAAG,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;YAC1B,MAAM,QAAQ,GAAG,QAAQ,CAAC,gBAAgB,CAAW,GAAG,CAAC,CAAA;YACzD,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;YACrC,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;gBAC7B,MAAM,IAAI,GAAG,IAAA,2BAAgB,EAAC,OAAO,CAAC,CAAC;gBACvC,MAAM,IAAI,GAAG,IAAA,2BAAgB,EAAC,OAAO,CAAC,CAAC;gBACvC,IAAG,CAAC,IAAI,IAAI,CAAC,IAAI;oBAAE,SAAQ;gBAC3B,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,MAAM,CAAC;oBAAE,SAAQ;gBAC1C,iBAAiB,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAA;YAC1C,CAAC;QACL,CAAC;QACD,OAAO,iBAAiB,CAAC;IAC7B,CAAC;CACJ;AApBD,8CAoBC"}
|
package/bin/src/main.js
DELETED
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
"use strict";
|
|
3
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
4
|
-
const commander_1 = require("commander");
|
|
5
|
-
const package_json_1 = require("../package.json");
|
|
6
|
-
const PageExtractor_1 = require("./extractors/PageExtractor");
|
|
7
|
-
const ResourceExtractor_1 = require("./extractors/ResourceExtractor");
|
|
8
|
-
const PageFetcher_1 = require("./page/PageFetcher");
|
|
9
|
-
const JSONStylePrinter_1 = require("./printers/JSONStylePrinter");
|
|
10
|
-
const program = new commander_1.Command();
|
|
11
|
-
const url = (0, commander_1.createArgument)("<url | file...>", "remote https://URL or local file://resource.html to extract from");
|
|
12
|
-
(async () => {
|
|
13
|
-
await program
|
|
14
|
-
.name(package_json_1.name)
|
|
15
|
-
.version(package_json_1.version, "-v, --version")
|
|
16
|
-
.description(package_json_1.description)
|
|
17
|
-
.addArgument(url)
|
|
18
|
-
.action(async (urls) => {
|
|
19
|
-
const printer = new JSONStylePrinter_1.JSONStylePrinter();
|
|
20
|
-
// simple log style printer
|
|
21
|
-
// const printer = new LogStylePrinter();
|
|
22
|
-
const pageFetcher = new PageFetcher_1.PageFetcher();
|
|
23
|
-
const pageExtractor = new PageExtractor_1.PageExtractor();
|
|
24
|
-
const resourceExtractor = new ResourceExtractor_1.ResourceExtractor(["a", "meta", "link", "embed"]);
|
|
25
|
-
const pageResponses = await pageFetcher.fetchAll(urls);
|
|
26
|
-
const pageMetadatas = [];
|
|
27
|
-
for (const { content, url, error } of pageResponses) {
|
|
28
|
-
const resources = error in (content) ? [] : await resourceExtractor.extract(content);
|
|
29
|
-
const descriptor = error in content ? { url, error } : await pageExtractor.extract(content);
|
|
30
|
-
pageMetadatas.push({ ...descriptor, resources });
|
|
31
|
-
}
|
|
32
|
-
await printer.print(...pageMetadatas);
|
|
33
|
-
})
|
|
34
|
-
.parseAsync(process.argv);
|
|
35
|
-
})();
|
|
36
|
-
//# sourceMappingURL=main.js.map
|
package/bin/src/main.js.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"main.js","sourceRoot":"","sources":["../../src/main.ts"],"names":[],"mappings":";;;AACA,yCAAoD;AAEpD,kDAA6D;AAC7D,8DAA2D;AAC3D,sEAAmE;AACnE,oDAAiD;AAEjD,kEAA+D;AAG/D,MAAM,OAAO,GAAG,IAAI,mBAAO,EAAE,CAAC;AAE9B,MAAM,GAAG,GAAG,IAAA,0BAAc,EAAC,iBAAiB,EAAE,kEAAkE,CAAC,CAAC;AAElH,CAAC,KAAK,IAAI,EAAE;IACV,MAAM,OAAO;SACV,IAAI,CAAC,mBAAI,CAAC;SACV,OAAO,CAAC,sBAAO,EAAE,eAAe,CAAC;SACjC,WAAW,CAAC,0BAAW,CAAC;SACxB,WAAW,CAAC,GAAG,CAAC;SAChB,MAAM,CAAC,KAAK,EAAE,IAAc,EAAE,EAAE;QAC/B,MAAM,OAAO,GAAG,IAAI,mCAAgB,EAAE,CAAC;QACvC,2BAA2B;QAC3B,yCAAyC;QAEzC,MAAM,WAAW,GAAG,IAAI,yBAAW,EAAE,CAAA;QACrC,MAAM,aAAa,GAAG,IAAI,6BAAa,EAAE,CAAA;QACzC,MAAM,iBAAiB,GAAG,IAAI,qCAAiB,CAAC,CAAC,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC,CAAA;QAE/E,MAAM,aAAa,GAAG,MAAM,WAAW,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;QACvD,MAAM,aAAa,GAAmB,EAAE,CAAC;QAEzC,KAAK,MAAM,EAAE,OAAO,EAAE,GAAG,EAAE,KAAK,EAAE,IAAI,aAAa,EAAE,CAAC;YACpD,MAAM,SAAS,GAAG,KAAK,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,iBAAiB,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;YACrF,MAAM,UAAU,GAAG,KAAK,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,MAAM,aAAa,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;YAC5F,aAAa,CAAC,IAAI,CAAC,EAAE,GAAG,UAAU,EAAE,SAAS,EAAE,CAAC,CAAC;QACnD,CAAC;QAED,MAAM,OAAO,CAAC,KAAK,CAAC,GAAG,aAAa,CAAC,CAAC;IACxC,CAAC,CAAC;SACD,UAAU,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;AAC9B,CAAC,CAAC,EAAE,CAAC"}
|
package/bin/src/page/Page.js
DELETED
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.isPage = exports.isError = void 0;
|
|
4
|
-
const isError = (page) => 'error' in page;
|
|
5
|
-
exports.isError = isError;
|
|
6
|
-
const isPage = (page) => "resources" in page && Array.isArray(page.resources);
|
|
7
|
-
exports.isPage = isPage;
|
|
8
|
-
//# sourceMappingURL=Page.js.map
|
package/bin/src/page/Page.js.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"Page.js","sourceRoot":"","sources":["../../../src/page/Page.ts"],"names":[],"mappings":";;;AAgBO,MAAM,OAAO,GAAG,CAAC,IAAkB,EAA6B,EAAE,CAAC,OAAO,IAAI,IAAI,CAAC;AAA7E,QAAA,OAAO,WAAsE;AACnF,MAAM,MAAM,GAAG,CAAC,IAAS,EAAgB,EAAE,CAC9C,WAAW,IAAI,IAAI,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;AAD5C,QAAA,MAAM,UACsC"}
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.PageFetcher = void 0;
|
|
4
|
-
const jsdom_1 = require("jsdom");
|
|
5
|
-
class PageFetcher {
|
|
6
|
-
async fetchPage(url) {
|
|
7
|
-
let dom;
|
|
8
|
-
const virtualConsole = new jsdom_1.VirtualConsole().on('jsdomError', (error) => {
|
|
9
|
-
process.stderr.write(`Error parsing ${url}:${error.message}\n`);
|
|
10
|
-
});
|
|
11
|
-
if (url.startsWith("file://")) {
|
|
12
|
-
dom = jsdom_1.JSDOM.fromFile(url, { virtualConsole });
|
|
13
|
-
}
|
|
14
|
-
else {
|
|
15
|
-
dom = jsdom_1.JSDOM.fromURL(url, { virtualConsole });
|
|
16
|
-
}
|
|
17
|
-
return dom.then(content => ({ url, content }))
|
|
18
|
-
.catch(({ message }) => ({ url, error: `JSDOM failed to parse: ${message}` }));
|
|
19
|
-
}
|
|
20
|
-
async fetchAll(urls) {
|
|
21
|
-
const responses = await Promise.all(urls.map(url => this.fetchPage(url)));
|
|
22
|
-
return responses.filter(response => response.content !== undefined);
|
|
23
|
-
}
|
|
24
|
-
}
|
|
25
|
-
exports.PageFetcher = PageFetcher;
|
|
26
|
-
//# sourceMappingURL=PageFetcher.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"PageFetcher.js","sourceRoot":"","sources":["../../../src/page/PageFetcher.ts"],"names":[],"mappings":";;;AAAA,iCAA8C;AAS9C,MAAa,WAAW;IACZ,KAAK,CAAC,SAAS,CAAC,GAAW;QAC/B,IAAI,GAAmB,CAAC;QACxB,MAAM,cAAc,GAAG,IAAI,sBAAc,EAAE,CAAC,EAAE,CAAC,YAAY,EAAE,CAAC,KAAK,EAAE,EAAE;YACnE,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,iBAAiB,GAAG,IAAI,KAAK,CAAC,OAAO,IAAI,CAAC,CAAC;QACpE,CAAC,CAAC,CAAC;QACH,IAAI,GAAG,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;YAC5B,GAAG,GAAG,aAAK,CAAC,QAAQ,CAAC,GAAG,EAAE,EAAE,cAAc,EAAE,CAAC,CAAC;QAClD,CAAC;aAAM,CAAC;YACJ,GAAG,GAAG,aAAK,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,cAAc,EAAE,CAAC,CAAC;QACjD,CAAC;QAED,OAAO,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,EAAE,GAAG,EAAE,OAAO,EAAE,CAAC,CAAC;aACzC,KAAK,CAAC,CAAC,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,0BAA0B,OAAO,EAAE,EAAE,CAAC,CAAC,CAAC;IACvF,CAAC;IACD,KAAK,CAAC,QAAQ,CAAC,IAAc;QACzB,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QAC1E,OAAO,SAAS,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,QAAQ,CAAC,OAAO,KAAK,SAAS,CAAC,CAAC;IACxE,CAAC;CAEJ;AApBD,kCAoBC"}
|
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.AbstractResourcePrinter = void 0;
|
|
4
|
-
class AbstractResourcePrinter {
|
|
5
|
-
constructor() { }
|
|
6
|
-
}
|
|
7
|
-
exports.AbstractResourcePrinter = AbstractResourcePrinter;
|
|
8
|
-
//# sourceMappingURL=AbstractResourcePrinter.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"AbstractResourcePrinter.js","sourceRoot":"","sources":["../../../src/printers/AbstractResourcePrinter.ts"],"names":[],"mappings":";;;AAEA,MAAsB,uBAAuB;IACzC,gBAAiB,CAAC;CAErB;AAHD,0DAGC"}
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.JSONStylePrinter = void 0;
|
|
4
|
-
const AbstractResourcePrinter_1 = require("./AbstractResourcePrinter");
|
|
5
|
-
class JSONStylePrinter extends AbstractResourcePrinter_1.AbstractResourcePrinter {
|
|
6
|
-
print(...pages) {
|
|
7
|
-
const json = JSON.stringify(pages);
|
|
8
|
-
process.stdout.write(json + "\n");
|
|
9
|
-
}
|
|
10
|
-
}
|
|
11
|
-
exports.JSONStylePrinter = JSONStylePrinter;
|
|
12
|
-
//# sourceMappingURL=JSONStylePrinter.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"JSONStylePrinter.js","sourceRoot":"","sources":["../../../src/printers/JSONStylePrinter.ts"],"names":[],"mappings":";;;AACA,uEAAoE;AAGpE,MAAa,gBAAiB,SAAQ,iDAAuB;IACzD,KAAK,CAAC,GAAG,KAAqB;QAC1B,MAAM,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;QACnC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,GAAG,IAAI,CAAC,CAAA;IACrC,CAAC;CAGJ;AAPD,4CAOC"}
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.LogStylePrinter = void 0;
|
|
4
|
-
const Page_1 = require("../page/Page");
|
|
5
|
-
const AbstractResourcePrinter_1 = require("./AbstractResourcePrinter");
|
|
6
|
-
class LogStylePrinter extends AbstractResourcePrinter_1.AbstractResourcePrinter {
|
|
7
|
-
write(str) {
|
|
8
|
-
process.stdout.write(str);
|
|
9
|
-
}
|
|
10
|
-
async print(...pages) {
|
|
11
|
-
for (const page of pages) {
|
|
12
|
-
if (!(0, Page_1.isPage)(page)) {
|
|
13
|
-
this.write(page.error);
|
|
14
|
-
continue;
|
|
15
|
-
}
|
|
16
|
-
const { resources, title, url } = page;
|
|
17
|
-
this.write(`Title: ${title}\n`);
|
|
18
|
-
this.write(`URL: ${url}\n\n`);
|
|
19
|
-
for (const resource of resources) {
|
|
20
|
-
const { link: { url }, text: { value } } = resource;
|
|
21
|
-
this.write(`${value}: ${url}\n`);
|
|
22
|
-
}
|
|
23
|
-
}
|
|
24
|
-
}
|
|
25
|
-
}
|
|
26
|
-
exports.LogStylePrinter = LogStylePrinter;
|
|
27
|
-
//# sourceMappingURL=LogStylePrinter.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"LogStylePrinter.js","sourceRoot":"","sources":["../../../src/printers/LogStylePrinter.ts"],"names":[],"mappings":";;;AAAA,uCAAoE;AACpE,uEAAoE;AAEpE,MAAa,eAAgB,SAAQ,iDAAuB;IAExD,KAAK,CAAC,GAAW;QACb,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;IAC7B,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,GAAG,KAAqB;QAChC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACvB,IAAI,CAAC,IAAA,aAAM,EAAC,IAAI,CAAC,EAAE,CAAC;gBAChB,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;gBACtB,SAAQ;YACZ,CAAC;YAED,MAAM,EAAC,SAAS,EAAE,KAAK,EAAE,GAAG,EAAE,GAAG,IAAI,CAAA;YAErC,IAAI,CAAC,KAAK,CAAC,UAAU,KAAK,IAAI,CAAC,CAAA;YAC/B,IAAI,CAAC,KAAK,CAAC,QAAQ,GAAG,MAAM,CAAC,CAAA;YAE7B,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;gBAC/B,MAAM,EAAE,IAAI,EAAE,EAAE,GAAG,EAAE,EAAE,IAAI,EAAE,EAAE,KAAK,EAAE,EAAE,GAAG,QAAQ,CAAA;gBACnD,IAAI,CAAC,KAAK,CAAC,GAAG,KAAK,KAAK,GAAG,IAAI,CAAC,CAAA;YACpC,CAAC;QACL,CAAC;IACL,CAAC;CACJ;AAxBD,0CAwBC"}
|
package/bin/src/resource.js
DELETED
|
@@ -1,56 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
/**
|
|
3
|
-
* @license MIT
|
|
4
|
-
* We are interested in visualising a page as a collection of tags.
|
|
5
|
-
*
|
|
6
|
-
* We wish to work with tags that can be compactly previewed on a webpage.
|
|
7
|
-
* Here we must declare all of the element types that can be used to represent
|
|
8
|
-
* a resource that can be hyperlinked off a webpage.
|
|
9
|
-
*/
|
|
10
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
11
|
-
exports.isKeyDefined = exports.isResourceKey = exports.RESOURCE_LINK_KEYS = exports.RESOURCE_DISPLAYABLE_KEYS = void 0;
|
|
12
|
-
exports.findResourceText = findResourceText;
|
|
13
|
-
exports.findResourceLink = findResourceLink;
|
|
14
|
-
function findDefinedKey(element, keys) {
|
|
15
|
-
for (const key of keys) {
|
|
16
|
-
if ((0, exports.isKeyDefined)(key, element)) {
|
|
17
|
-
return key;
|
|
18
|
-
}
|
|
19
|
-
}
|
|
20
|
-
}
|
|
21
|
-
exports.RESOURCE_DISPLAYABLE_KEYS = [
|
|
22
|
-
'id',
|
|
23
|
-
'innerText',
|
|
24
|
-
'textContent',
|
|
25
|
-
'class',
|
|
26
|
-
'ariaLabel',
|
|
27
|
-
'ariaDescription',
|
|
28
|
-
'alt',
|
|
29
|
-
'rel'
|
|
30
|
-
];
|
|
31
|
-
exports.RESOURCE_LINK_KEYS = [
|
|
32
|
-
"href",
|
|
33
|
-
"data-src",
|
|
34
|
-
"target",
|
|
35
|
-
"action",
|
|
36
|
-
"src",
|
|
37
|
-
"url"
|
|
38
|
-
];
|
|
39
|
-
function findResourceText(element) {
|
|
40
|
-
for (const key of exports.RESOURCE_DISPLAYABLE_KEYS) {
|
|
41
|
-
const value = element[key];
|
|
42
|
-
if (value && typeof value === 'string' && value.trim() !== '')
|
|
43
|
-
return { key, value };
|
|
44
|
-
}
|
|
45
|
-
}
|
|
46
|
-
function findResourceLink(element) {
|
|
47
|
-
const key = findDefinedKey(element, [...exports.RESOURCE_LINK_KEYS]);
|
|
48
|
-
const url = element[key];
|
|
49
|
-
if (url && typeof url === 'string' && url.trim() !== '')
|
|
50
|
-
return { key, url };
|
|
51
|
-
}
|
|
52
|
-
const isResourceKey = (key) => key in exports.RESOURCE_LINK_KEYS;
|
|
53
|
-
exports.isResourceKey = isResourceKey;
|
|
54
|
-
const isKeyDefined = (key, element) => key in element && element[key] !== undefined;
|
|
55
|
-
exports.isKeyDefined = isKeyDefined;
|
|
56
|
-
//# sourceMappingURL=resource.js.map
|
package/bin/src/resource.js.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"resource.js","sourceRoot":"","sources":["../../src/resource.ts"],"names":[],"mappings":";AAAA;;;;;;;GAOG;;;AA8CH,4CAMC;AAED,4CAKC;AAvDD,SAAS,cAAc,CAAC,OAAiB,EAAE,IAAe;IACtD,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACrB,IAAI,IAAA,oBAAY,EAAC,GAAG,EAAE,OAAO,CAAC,EAAE,CAAC;YAC7B,OAAO,GAAG,CAAC;QACf,CAAC;IACL,CAAC;AACL,CAAC;AAEY,QAAA,yBAAyB,GAAG;IACrC,IAAI;IACJ,WAAW;IACX,aAAa;IACb,OAAO;IACP,WAAW;IACX,iBAAiB;IACjB,KAAK;IACL,KAAK;CACC,CAAC;AASE,QAAA,kBAAkB,GAAG;IAC9B,MAAM;IACN,UAAU;IACV,QAAQ;IACR,QAAQ;IACR,KAAK;IACL,KAAK;CACC,CAAC;AASX,SAAgB,gBAAgB,CAAC,OAAiB;IAC9C,KAAK,MAAM,GAAG,IAAI,iCAAyB,EAAE,CAAC;QAC1C,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,CAAA;QAC1B,IAAI,KAAK,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,CAAC,IAAI,EAAE,KAAK,EAAE;YACzD,OAAO,EAAE,GAAG,EAAE,KAAK,EAAE,CAAC;IAC9B,CAAC;AACL,CAAC;AAED,SAAgB,gBAAgB,CAAC,OAAiB;IAC9C,MAAM,GAAG,GAAG,cAAc,CAAC,OAAO,EAAE,CAAC,GAAG,0BAAkB,CAAC,CAAC,CAAC;IAC7D,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC;IACzB,IAAI,GAAG,IAAI,OAAO,GAAG,KAAK,QAAQ,IAAI,GAAG,CAAC,IAAI,EAAE,KAAK,EAAE;QACnD,OAAO,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC;AAC5B,CAAC;AAOM,MAAM,aAAa,GAAG,CAAC,GAAW,EAAkB,EAAE,CAAC,GAAG,IAAI,0BAAkB,CAAC;AAA3E,QAAA,aAAa,iBAA8D;AAEjF,MAAM,YAAY,GAAG,CAA6B,GAAW,EAAE,OAAU,EAAW,EAAE,CACzF,GAAG,IAAI,OAAO,IAAI,OAAO,CAAC,GAAG,CAAC,KAAK,SAAS,CAAC;AADpC,QAAA,YAAY,gBACwB"}
|
package/jest.config.js
DELETED
|
@@ -1,198 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* For a detailed explanation regarding each configuration property, visit:
|
|
3
|
-
* https://jestjs.io/docs/configuration
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
|
-
/** @type {import('jest').Config} */
|
|
7
|
-
const config = {
|
|
8
|
-
// All imported modules in your tests should be mocked automatically
|
|
9
|
-
// automock: false,
|
|
10
|
-
|
|
11
|
-
// Stop running tests after `n` failures
|
|
12
|
-
// bail: 0,
|
|
13
|
-
|
|
14
|
-
// The directory where Jest should store its cached dependency information
|
|
15
|
-
// cacheDirectory: "/tmp/jest_rs",
|
|
16
|
-
|
|
17
|
-
// Automatically clear mock calls, instances, contexts and results before every test
|
|
18
|
-
// clearMocks: false,
|
|
19
|
-
|
|
20
|
-
// Indicates whether the coverage information should be collected while executing the test
|
|
21
|
-
collectCoverage: true,
|
|
22
|
-
|
|
23
|
-
// An array of glob patterns indicating a set of files for which coverage information should be collected
|
|
24
|
-
// collectCoverageFrom: undefined,
|
|
25
|
-
|
|
26
|
-
// The directory where Jest should output its coverage files
|
|
27
|
-
coverageDirectory: "coverage",
|
|
28
|
-
|
|
29
|
-
// An array of regexp pattern strings used to skip coverage collection
|
|
30
|
-
// coveragePathIgnorePatterns: [
|
|
31
|
-
// "/node_modules/"
|
|
32
|
-
// ],
|
|
33
|
-
|
|
34
|
-
// Indicates which provider should be used to instrument code for coverage
|
|
35
|
-
coverageProvider: "v8",
|
|
36
|
-
|
|
37
|
-
// A list of reporter names that Jest uses when writing coverage reports
|
|
38
|
-
// coverageReporters: [
|
|
39
|
-
// "json",
|
|
40
|
-
// "text",
|
|
41
|
-
// "lcov",
|
|
42
|
-
// "clover"
|
|
43
|
-
// ],
|
|
44
|
-
|
|
45
|
-
// An object that configures minimum threshold enforcement for coverage results
|
|
46
|
-
// coverageThreshold: undefined,
|
|
47
|
-
|
|
48
|
-
// A path to a custom dependency extractor
|
|
49
|
-
// dependencyExtractor: undefined,
|
|
50
|
-
|
|
51
|
-
// Make calling deprecated APIs throw helpful error messages
|
|
52
|
-
// errorOnDeprecated: false,
|
|
53
|
-
|
|
54
|
-
// The default configuration for fake timers
|
|
55
|
-
// fakeTimers: {
|
|
56
|
-
// "enableGlobally": false
|
|
57
|
-
// },
|
|
58
|
-
|
|
59
|
-
// Force coverage collection from ignored files using an array of glob patterns
|
|
60
|
-
// forceCoverageMatch: [],
|
|
61
|
-
|
|
62
|
-
// A path to a module which exports an async function that is triggered once before all test suites
|
|
63
|
-
// globalSetup: undefined,
|
|
64
|
-
|
|
65
|
-
// A path to a module which exports an async function that is triggered once after all test suites
|
|
66
|
-
// globalTeardown: undefined,
|
|
67
|
-
|
|
68
|
-
// A set of global variables that need to be available in all test environments
|
|
69
|
-
// globals: {},
|
|
70
|
-
|
|
71
|
-
// The maximum amount of workers used to run your tests. Can be specified as % or a number. E.g. maxWorkers: 10% will use 10% of your CPU amount + 1 as the maximum worker number. maxWorkers: 2 will use a maximum of 2 workers.
|
|
72
|
-
// maxWorkers: "50%",
|
|
73
|
-
|
|
74
|
-
// An array of directory names to be searched recursively up from the requiring module's location
|
|
75
|
-
// moduleDirectories: [
|
|
76
|
-
// "node_modules"
|
|
77
|
-
// ],
|
|
78
|
-
|
|
79
|
-
// An array of file extensions your modules use
|
|
80
|
-
// moduleFileExtensions: [
|
|
81
|
-
// "js",
|
|
82
|
-
// "mjs",
|
|
83
|
-
// "cjs",
|
|
84
|
-
// "jsx",
|
|
85
|
-
// "ts",
|
|
86
|
-
// "tsx",
|
|
87
|
-
// "json",
|
|
88
|
-
// "node"
|
|
89
|
-
// ],
|
|
90
|
-
|
|
91
|
-
// A map from regular expressions to module names or to arrays of module names that allow to stub out resources with a single module
|
|
92
|
-
// moduleNameMapper: {},
|
|
93
|
-
|
|
94
|
-
// An array of regexp pattern strings, matched against all module paths before considered 'visible' to the module loader
|
|
95
|
-
// modulePathIgnorePatterns: [],
|
|
96
|
-
|
|
97
|
-
// Activates notifications for test results
|
|
98
|
-
// notify: false,
|
|
99
|
-
|
|
100
|
-
// An enum that specifies notification mode. Requires { notify: true }
|
|
101
|
-
// notifyMode: "failure-change",
|
|
102
|
-
|
|
103
|
-
// A preset that is used as a base for Jest's configuration
|
|
104
|
-
// preset: undefined,
|
|
105
|
-
|
|
106
|
-
// Run tests from one or more projects
|
|
107
|
-
// projects: undefined,
|
|
108
|
-
|
|
109
|
-
// Use this configuration option to add custom reporters to Jest
|
|
110
|
-
// reporters: undefined,
|
|
111
|
-
|
|
112
|
-
// Automatically reset mock state before every test
|
|
113
|
-
// resetMocks: false,
|
|
114
|
-
|
|
115
|
-
// Reset the module registry before running each individual test
|
|
116
|
-
// resetModules: false,
|
|
117
|
-
|
|
118
|
-
// A path to a custom resolver
|
|
119
|
-
// resolver: undefined,
|
|
120
|
-
|
|
121
|
-
// Automatically restore mock state and implementation before every test
|
|
122
|
-
// restoreMocks: false,
|
|
123
|
-
|
|
124
|
-
// The root directory that Jest should scan for tests and modules within
|
|
125
|
-
// rootDir: undefined,
|
|
126
|
-
|
|
127
|
-
// A list of paths to directories that Jest should use to search for files in
|
|
128
|
-
// roots: [
|
|
129
|
-
// "<rootDir>"
|
|
130
|
-
// ],
|
|
131
|
-
|
|
132
|
-
// Allows you to use a custom runner instead of Jest's default test runner
|
|
133
|
-
// runner: "jest-runner",
|
|
134
|
-
|
|
135
|
-
// The paths to modules that run some code to configure or set up the testing environment before each test
|
|
136
|
-
// setupFiles: [],
|
|
137
|
-
|
|
138
|
-
// A list of paths to modules that run some code to configure or set up the testing framework before each test
|
|
139
|
-
// setupFilesAfterEnv: [],
|
|
140
|
-
|
|
141
|
-
// The number of seconds after which a test is considered as slow and reported as such in the results.
|
|
142
|
-
// slowTestThreshold: 5,
|
|
143
|
-
|
|
144
|
-
// A list of paths to snapshot serializer modules Jest should use for snapshot testing
|
|
145
|
-
// snapshotSerializers: [],
|
|
146
|
-
|
|
147
|
-
// The test environment that will be used for testing
|
|
148
|
-
// testEnvironment: "jest-environment-node",
|
|
149
|
-
|
|
150
|
-
// Options that will be passed to the testEnvironment
|
|
151
|
-
// testEnvironmentOptions: {},
|
|
152
|
-
|
|
153
|
-
// Adds a location field to test results
|
|
154
|
-
// testLocationInResults: false,
|
|
155
|
-
|
|
156
|
-
// The glob patterns Jest uses to detect test files
|
|
157
|
-
// testMatch: [
|
|
158
|
-
// "**/__tests__/**/*.[jt]s?(x)",
|
|
159
|
-
// "**/?(*.)+(spec|test).[tj]s?(x)"
|
|
160
|
-
// ],
|
|
161
|
-
|
|
162
|
-
// An array of regexp pattern strings that are matched against all test paths, matched tests are skipped
|
|
163
|
-
// testPathIgnorePatterns: [
|
|
164
|
-
// "/node_modules/"
|
|
165
|
-
// ],
|
|
166
|
-
|
|
167
|
-
// The regexp pattern or array of patterns that Jest uses to detect test files
|
|
168
|
-
// testRegex: [],
|
|
169
|
-
|
|
170
|
-
// This option allows the use of a custom results processor
|
|
171
|
-
// testResultsProcessor: undefined,
|
|
172
|
-
|
|
173
|
-
// This option allows use of a custom test runner
|
|
174
|
-
// testRunner: "jest-circus/runner",
|
|
175
|
-
|
|
176
|
-
// A map from regular expressions to paths to transformers
|
|
177
|
-
// transform: undefined,
|
|
178
|
-
|
|
179
|
-
// An array of regexp pattern strings that are matched against all source file paths, matched files will skip transformation
|
|
180
|
-
// transformIgnorePatterns: [
|
|
181
|
-
// "/node_modules/",
|
|
182
|
-
// "\\.pnp\\.[^\\/]+$"
|
|
183
|
-
// ],
|
|
184
|
-
|
|
185
|
-
// An array of regexp pattern strings that are matched against all modules before the module loader will automatically return a mock for them
|
|
186
|
-
// unmockedModulePathPatterns: undefined,
|
|
187
|
-
|
|
188
|
-
// Indicates whether each individual test should be reported during the run
|
|
189
|
-
// verbose: undefined,
|
|
190
|
-
|
|
191
|
-
// An array of regexp patterns that are matched against all source file paths before re-running tests in watch mode
|
|
192
|
-
// watchPathIgnorePatterns: [],
|
|
193
|
-
|
|
194
|
-
// Whether to use watchman for file crawling
|
|
195
|
-
// watchman: true,
|
|
196
|
-
};
|
|
197
|
-
|
|
198
|
-
module.exports = config;
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
import { isError, type Page } from '../page/Page';
|
|
2
|
-
import { JSDOM } from 'jsdom';
|
|
3
|
-
import { AbstractExtractor } from './AbstractExtractor';
|
|
4
|
-
|
|
5
|
-
export class PageExtractor extends AbstractExtractor<JSDOM, Page> {
|
|
6
|
-
constructor() { super("page-extractor"); }
|
|
7
|
-
|
|
8
|
-
async extract(value: JSDOM): Promise<Page> {
|
|
9
|
-
const { window: { document: { title, location: { href: url } } } } = value
|
|
10
|
-
return { title, url }
|
|
11
|
-
}
|
|
12
|
-
}
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
import type { JSDOM } from "jsdom";
|
|
2
|
-
import { findResourceLink, findResourceText, type ExternalResource, type Resource, type Tag } from "../resource";
|
|
3
|
-
import { AbstractExtractor } from './AbstractExtractor';
|
|
4
|
-
|
|
5
|
-
export class ResourceExtractor extends AbstractExtractor<JSDOM, ExternalResource[]> {
|
|
6
|
-
constructor(private readonly tags: Tag[]) {
|
|
7
|
-
super("page-extractor");
|
|
8
|
-
}
|
|
9
|
-
async extract(value: JSDOM): Promise<ExternalResource[]> {
|
|
10
|
-
const { document } = value.window;
|
|
11
|
-
const externalResources: ExternalResource[] = [];
|
|
12
|
-
for (const tag of this.tags) {
|
|
13
|
-
const selector = document.querySelectorAll<Resource>(tag)
|
|
14
|
-
const elements = Array.from(selector)
|
|
15
|
-
for (const element of elements) {
|
|
16
|
-
const text = findResourceText(element);
|
|
17
|
-
const link = findResourceLink(element);
|
|
18
|
-
if(!text || !link) continue
|
|
19
|
-
if (!link.url.startsWith("http")) continue
|
|
20
|
-
externalResources.push({ text, link })
|
|
21
|
-
}
|
|
22
|
-
}
|
|
23
|
-
return externalResources;
|
|
24
|
-
}
|
|
25
|
-
}
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
import { JSDOM } from 'jsdom';
|
|
2
|
-
import type { Resource, Tag } from '../resource';
|
|
3
|
-
import { AbstractExtractor } from './AbstractExtractor';
|
|
4
|
-
|
|
5
|
-
export class TagExtractor<T extends Tag> extends AbstractExtractor<JSDOM, Resource[]> {
|
|
6
|
-
extract(value: JSDOM): Promise<Resource[]> {
|
|
7
|
-
const linkNodes = value.window.document.querySelectorAll<Resource>(this.tagName);
|
|
8
|
-
return Promise.resolve(Array.from(linkNodes));
|
|
9
|
-
}
|
|
10
|
-
constructor(private readonly tagName: T) {
|
|
11
|
-
super(`extract <${tagName}>`)
|
|
12
|
-
};
|
|
13
|
-
|
|
14
|
-
}
|
package/src/main.ts
DELETED
|
@@ -1,43 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
import { Command, createArgument } from "commander";
|
|
3
|
-
|
|
4
|
-
import { description, name, version } from '../package.json';
|
|
5
|
-
import { PageExtractor } from "./extractors/PageExtractor";
|
|
6
|
-
import { ResourceExtractor } from "./extractors/ResourceExtractor";
|
|
7
|
-
import { PageFetcher } from "./page/PageFetcher";
|
|
8
|
-
import type { Page, PageMetadata } from "./page/Page";
|
|
9
|
-
import { JSONStylePrinter } from "./printers/JSONStylePrinter";
|
|
10
|
-
import { LogStylePrinter } from "./printers/LogStylePrinter";
|
|
11
|
-
|
|
12
|
-
const program = new Command();
|
|
13
|
-
|
|
14
|
-
const url = createArgument("<url | file...>", "remote https://URL or local file://resource.html to extract from");
|
|
15
|
-
|
|
16
|
-
(async () => {
|
|
17
|
-
await program
|
|
18
|
-
.name(name)
|
|
19
|
-
.version(version, "-v, --version")
|
|
20
|
-
.description(description)
|
|
21
|
-
.addArgument(url)
|
|
22
|
-
.action(async (urls: string[]) => {
|
|
23
|
-
const printer = new JSONStylePrinter();
|
|
24
|
-
// simple log style printer
|
|
25
|
-
// const printer = new LogStylePrinter();
|
|
26
|
-
|
|
27
|
-
const pageFetcher = new PageFetcher()
|
|
28
|
-
const pageExtractor = new PageExtractor()
|
|
29
|
-
const resourceExtractor = new ResourceExtractor(["a", "meta", "link", "embed"])
|
|
30
|
-
|
|
31
|
-
const pageResponses = await pageFetcher.fetchAll(urls);
|
|
32
|
-
const pageMetadatas: PageMetadata[] = [];
|
|
33
|
-
|
|
34
|
-
for (const { content, url, error } of pageResponses) {
|
|
35
|
-
const resources = error in (content) ? [] : await resourceExtractor.extract(content);
|
|
36
|
-
const descriptor = error in content ? { url, error } : await pageExtractor.extract(content);
|
|
37
|
-
pageMetadatas.push({ ...descriptor, resources });
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
await printer.print(...pageMetadatas);
|
|
41
|
-
})
|
|
42
|
-
.parseAsync(process.argv);
|
|
43
|
-
})();
|
package/src/page/Page.ts
DELETED
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
import type { ExternalResource } from "../resource";
|
|
2
|
-
|
|
3
|
-
type hasTitle = {
|
|
4
|
-
title: string;
|
|
5
|
-
};
|
|
6
|
-
|
|
7
|
-
type hasUrl = {
|
|
8
|
-
url: string;
|
|
9
|
-
};
|
|
10
|
-
|
|
11
|
-
type hasResources = {
|
|
12
|
-
resources: ExternalResource[];
|
|
13
|
-
};
|
|
14
|
-
|
|
15
|
-
export type Page = hasTitle & hasUrl
|
|
16
|
-
export type PageMetadata = (Page & hasResources) | { error: string }
|
|
17
|
-
export const isError = (page: PageMetadata): page is { error: string } => 'error' in page;
|
|
18
|
-
export const isPage = (page: any): page is Page =>
|
|
19
|
-
"resources" in page && Array.isArray(page.resources);
|