pagerts 0.2.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/README.md +220 -16
  2. package/bin/main.js +9 -25
  3. package/bin/main.js.map +4 -4
  4. package/package.json +37 -13
  5. package/bin/package.json +0 -40
  6. package/bin/src/extractors/AbstractExtractor.js +0 -11
  7. package/bin/src/extractors/AbstractExtractor.js.map +0 -1
  8. package/bin/src/extractors/PageExtractor.js +0 -13
  9. package/bin/src/extractors/PageExtractor.js.map +0 -1
  10. package/bin/src/extractors/ResourceExtractor.js +0 -32
  11. package/bin/src/extractors/ResourceExtractor.js.map +0 -1
  12. package/bin/src/main.js +0 -36
  13. package/bin/src/main.js.map +0 -1
  14. package/bin/src/page/Page.js +0 -8
  15. package/bin/src/page/Page.js.map +0 -1
  16. package/bin/src/page/PageFetcher.js +0 -26
  17. package/bin/src/page/PageFetcher.js.map +0 -1
  18. package/bin/src/printers/AbstractResourcePrinter.js +0 -8
  19. package/bin/src/printers/AbstractResourcePrinter.js.map +0 -1
  20. package/bin/src/printers/JSONStylePrinter.js +0 -12
  21. package/bin/src/printers/JSONStylePrinter.js.map +0 -1
  22. package/bin/src/printers/LogStylePrinter.js +0 -27
  23. package/bin/src/printers/LogStylePrinter.js.map +0 -1
  24. package/bin/src/resource.js +0 -56
  25. package/bin/src/resource.js.map +0 -1
  26. package/jest.config.js +0 -198
  27. package/src/extractors/AbstractExtractor.ts +0 -5
  28. package/src/extractors/PageExtractor.ts +0 -12
  29. package/src/extractors/ResourceExtractor.ts +0 -25
  30. package/src/extractors/TagExtractor.ts +0 -14
  31. package/src/main.ts +0 -43
  32. package/src/page/Page.ts +0 -19
  33. package/src/page/PageFetcher.ts +0 -30
  34. package/src/printers/AbstractResourcePrinter.ts +0 -6
  35. package/src/printers/JSONStylePrinter.ts +0 -12
  36. package/src/printers/LogStylePrinter.ts +0 -28
  37. package/src/resource.ts +0 -96
  38. package/tsconfig.json +0 -12
package/bin/package.json DELETED
@@ -1,40 +0,0 @@
1
- {
2
- "name": "pagerts",
3
- "description": "A tool for viewing external relations in a webpage",
4
- "version": "0.1.9",
5
- "main": "main.js",
6
- "bin": {
7
- "pagerts": "bin/main.js"
8
- },
9
- "scripts": {
10
- "test": "jest",
11
- "build": "esbuild src/main.ts --external:jsdom --bundle --outdir=bin --minify --sourcemap --platform=node",
12
- "lint": "tsc",
13
- "start": "node ./bin/main.js",
14
- "dev": "npx tsx src/main.ts"
15
- },
16
- "keywords": [
17
- "webpage",
18
- "hierarchy",
19
- "management"
20
- ],
21
- "author": "Kirill kn253 Nevzorov",
22
- "license": "MIT",
23
- "bugs": {
24
- "url": "https://github.com/akinevz0/pagerts/issues"
25
- },
26
- "homepage": "https://github.com/akinevz0/pagerts",
27
- "dependencies": {
28
- "blessed": "^0.1.81",
29
- "commander": "^12.1.0",
30
- "dotenv": "^16.4.5",
31
- "jsdom": "^26.0.0"
32
- },
33
- "devDependencies": {
34
- "@types/blessed": "^0.1.25",
35
- "@types/jsdom": "^21.1.7",
36
- "@types/node": "^22.8.2",
37
- "esbuild": "^0.25.1",
38
- "ts-node": "^10.9.2"
39
- }
40
- }
@@ -1,11 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.AbstractExtractor = void 0;
4
- class AbstractExtractor {
5
- name;
6
- constructor(name) {
7
- this.name = name;
8
- }
9
- }
10
- exports.AbstractExtractor = AbstractExtractor;
11
- //# sourceMappingURL=AbstractExtractor.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"AbstractExtractor.js","sourceRoot":"","sources":["../../../src/extractors/AbstractExtractor.ts"],"names":[],"mappings":";;;AACA,MAAsB,iBAAiB;IACd;IAArB,YAAqB,IAAW;QAAX,SAAI,GAAJ,IAAI,CAAO;IAAI,CAAC;CAExC;AAHD,8CAGC"}
@@ -1,13 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.PageExtractor = void 0;
4
- const AbstractExtractor_1 = require("./AbstractExtractor");
5
- class PageExtractor extends AbstractExtractor_1.AbstractExtractor {
6
- constructor() { super("page-extractor"); }
7
- async extract(value) {
8
- const { window: { document: { title, location: { href: url } } } } = value;
9
- return { title, url };
10
- }
11
- }
12
- exports.PageExtractor = PageExtractor;
13
- //# sourceMappingURL=PageExtractor.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"PageExtractor.js","sourceRoot":"","sources":["../../../src/extractors/PageExtractor.ts"],"names":[],"mappings":";;;AAEA,2DAAwD;AAExD,MAAa,aAAc,SAAQ,qCAA8B;IAC7D,gBAAgB,KAAK,CAAC,gBAAgB,CAAC,CAAC,CAAC,CAAC;IAE1C,KAAK,CAAC,OAAO,CAAC,KAAY;QACtB,MAAM,EAAE,MAAM,EAAE,EAAE,QAAQ,EAAE,EAAE,KAAK,EAAE,QAAQ,EAAE,EAAE,IAAI,EAAE,GAAG,EAAE,EAAE,EAAE,EAAE,GAAG,KAAK,CAAA;QAC1E,OAAO,EAAE,KAAK,EAAE,GAAG,EAAE,CAAA;IACzB,CAAC;CACJ;AAPD,sCAOC"}
@@ -1,32 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.ResourceExtractor = void 0;
4
- const resource_1 = require("../resource");
5
- const AbstractExtractor_1 = require("./AbstractExtractor");
6
- class ResourceExtractor extends AbstractExtractor_1.AbstractExtractor {
7
- tags;
8
- constructor(tags) {
9
- super("page-extractor");
10
- this.tags = tags;
11
- }
12
- async extract(value) {
13
- const { document } = value.window;
14
- const externalResources = [];
15
- for (const tag of this.tags) {
16
- const selector = document.querySelectorAll(tag);
17
- const elements = Array.from(selector);
18
- for (const element of elements) {
19
- const text = (0, resource_1.findResourceText)(element);
20
- const link = (0, resource_1.findResourceLink)(element);
21
- if (!text || !link)
22
- continue;
23
- if (!link.url.startsWith("http"))
24
- continue;
25
- externalResources.push({ text, link });
26
- }
27
- }
28
- return externalResources;
29
- }
30
- }
31
- exports.ResourceExtractor = ResourceExtractor;
32
- //# sourceMappingURL=ResourceExtractor.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"ResourceExtractor.js","sourceRoot":"","sources":["../../../src/extractors/ResourceExtractor.ts"],"names":[],"mappings":";;;AACA,0CAAiH;AACjH,2DAAwD;AAExD,MAAa,iBAAkB,SAAQ,qCAA4C;IAClD;IAA7B,YAA6B,IAAW;QACpC,KAAK,CAAC,gBAAgB,CAAC,CAAC;QADC,SAAI,GAAJ,IAAI,CAAO;IAExC,CAAC;IACD,KAAK,CAAC,OAAO,CAAC,KAAY;QACtB,MAAM,EAAE,QAAQ,EAAE,GAAG,KAAK,CAAC,MAAM,CAAC;QAClC,MAAM,iBAAiB,GAAuB,EAAE,CAAC;QACjD,KAAK,MAAM,GAAG,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;YAC1B,MAAM,QAAQ,GAAG,QAAQ,CAAC,gBAAgB,CAAW,GAAG,CAAC,CAAA;YACzD,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;YACrC,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;gBAC7B,MAAM,IAAI,GAAG,IAAA,2BAAgB,EAAC,OAAO,CAAC,CAAC;gBACvC,MAAM,IAAI,GAAG,IAAA,2BAAgB,EAAC,OAAO,CAAC,CAAC;gBACvC,IAAG,CAAC,IAAI,IAAI,CAAC,IAAI;oBAAE,SAAQ;gBAC3B,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,MAAM,CAAC;oBAAE,SAAQ;gBAC1C,iBAAiB,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAA;YAC1C,CAAC;QACL,CAAC;QACD,OAAO,iBAAiB,CAAC;IAC7B,CAAC;CACJ;AApBD,8CAoBC"}
package/bin/src/main.js DELETED
@@ -1,36 +0,0 @@
1
- #!/usr/bin/env node
2
- "use strict";
3
- Object.defineProperty(exports, "__esModule", { value: true });
4
- const commander_1 = require("commander");
5
- const package_json_1 = require("../package.json");
6
- const PageExtractor_1 = require("./extractors/PageExtractor");
7
- const ResourceExtractor_1 = require("./extractors/ResourceExtractor");
8
- const PageFetcher_1 = require("./page/PageFetcher");
9
- const JSONStylePrinter_1 = require("./printers/JSONStylePrinter");
10
- const program = new commander_1.Command();
11
- const url = (0, commander_1.createArgument)("<url | file...>", "remote https://URL or local file://resource.html to extract from");
12
- (async () => {
13
- await program
14
- .name(package_json_1.name)
15
- .version(package_json_1.version, "-v, --version")
16
- .description(package_json_1.description)
17
- .addArgument(url)
18
- .action(async (urls) => {
19
- const printer = new JSONStylePrinter_1.JSONStylePrinter();
20
- // simple log style printer
21
- // const printer = new LogStylePrinter();
22
- const pageFetcher = new PageFetcher_1.PageFetcher();
23
- const pageExtractor = new PageExtractor_1.PageExtractor();
24
- const resourceExtractor = new ResourceExtractor_1.ResourceExtractor(["a", "meta", "link", "embed"]);
25
- const pageResponses = await pageFetcher.fetchAll(urls);
26
- const pageMetadatas = [];
27
- for (const { content, url, error } of pageResponses) {
28
- const resources = error in (content) ? [] : await resourceExtractor.extract(content);
29
- const descriptor = error in content ? { url, error } : await pageExtractor.extract(content);
30
- pageMetadatas.push({ ...descriptor, resources });
31
- }
32
- await printer.print(...pageMetadatas);
33
- })
34
- .parseAsync(process.argv);
35
- })();
36
- //# sourceMappingURL=main.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"main.js","sourceRoot":"","sources":["../../src/main.ts"],"names":[],"mappings":";;;AACA,yCAAoD;AAEpD,kDAA6D;AAC7D,8DAA2D;AAC3D,sEAAmE;AACnE,oDAAiD;AAEjD,kEAA+D;AAG/D,MAAM,OAAO,GAAG,IAAI,mBAAO,EAAE,CAAC;AAE9B,MAAM,GAAG,GAAG,IAAA,0BAAc,EAAC,iBAAiB,EAAE,kEAAkE,CAAC,CAAC;AAElH,CAAC,KAAK,IAAI,EAAE;IACV,MAAM,OAAO;SACV,IAAI,CAAC,mBAAI,CAAC;SACV,OAAO,CAAC,sBAAO,EAAE,eAAe,CAAC;SACjC,WAAW,CAAC,0BAAW,CAAC;SACxB,WAAW,CAAC,GAAG,CAAC;SAChB,MAAM,CAAC,KAAK,EAAE,IAAc,EAAE,EAAE;QAC/B,MAAM,OAAO,GAAG,IAAI,mCAAgB,EAAE,CAAC;QACvC,2BAA2B;QAC3B,yCAAyC;QAEzC,MAAM,WAAW,GAAG,IAAI,yBAAW,EAAE,CAAA;QACrC,MAAM,aAAa,GAAG,IAAI,6BAAa,EAAE,CAAA;QACzC,MAAM,iBAAiB,GAAG,IAAI,qCAAiB,CAAC,CAAC,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC,CAAA;QAE/E,MAAM,aAAa,GAAG,MAAM,WAAW,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;QACvD,MAAM,aAAa,GAAmB,EAAE,CAAC;QAEzC,KAAK,MAAM,EAAE,OAAO,EAAE,GAAG,EAAE,KAAK,EAAE,IAAI,aAAa,EAAE,CAAC;YACpD,MAAM,SAAS,GAAG,KAAK,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,iBAAiB,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;YACrF,MAAM,UAAU,GAAG,KAAK,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,MAAM,aAAa,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;YAC5F,aAAa,CAAC,IAAI,CAAC,EAAE,GAAG,UAAU,EAAE,SAAS,EAAE,CAAC,CAAC;QACnD,CAAC;QAED,MAAM,OAAO,CAAC,KAAK,CAAC,GAAG,aAAa,CAAC,CAAC;IACxC,CAAC,CAAC;SACD,UAAU,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;AAC9B,CAAC,CAAC,EAAE,CAAC"}
@@ -1,8 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.isPage = exports.isError = void 0;
4
- const isError = (page) => 'error' in page;
5
- exports.isError = isError;
6
- const isPage = (page) => "resources" in page && Array.isArray(page.resources);
7
- exports.isPage = isPage;
8
- //# sourceMappingURL=Page.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"Page.js","sourceRoot":"","sources":["../../../src/page/Page.ts"],"names":[],"mappings":";;;AAgBO,MAAM,OAAO,GAAG,CAAC,IAAkB,EAA6B,EAAE,CAAC,OAAO,IAAI,IAAI,CAAC;AAA7E,QAAA,OAAO,WAAsE;AACnF,MAAM,MAAM,GAAG,CAAC,IAAS,EAAgB,EAAE,CAC9C,WAAW,IAAI,IAAI,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;AAD5C,QAAA,MAAM,UACsC"}
@@ -1,26 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.PageFetcher = void 0;
4
- const jsdom_1 = require("jsdom");
5
- class PageFetcher {
6
- async fetchPage(url) {
7
- let dom;
8
- const virtualConsole = new jsdom_1.VirtualConsole().on('jsdomError', (error) => {
9
- process.stderr.write(`Error parsing ${url}:${error.message}\n`);
10
- });
11
- if (url.startsWith("file://")) {
12
- dom = jsdom_1.JSDOM.fromFile(url, { virtualConsole });
13
- }
14
- else {
15
- dom = jsdom_1.JSDOM.fromURL(url, { virtualConsole });
16
- }
17
- return dom.then(content => ({ url, content }))
18
- .catch(({ message }) => ({ url, error: `JSDOM failed to parse: ${message}` }));
19
- }
20
- async fetchAll(urls) {
21
- const responses = await Promise.all(urls.map(url => this.fetchPage(url)));
22
- return responses.filter(response => response.content !== undefined);
23
- }
24
- }
25
- exports.PageFetcher = PageFetcher;
26
- //# sourceMappingURL=PageFetcher.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"PageFetcher.js","sourceRoot":"","sources":["../../../src/page/PageFetcher.ts"],"names":[],"mappings":";;;AAAA,iCAA8C;AAS9C,MAAa,WAAW;IACZ,KAAK,CAAC,SAAS,CAAC,GAAW;QAC/B,IAAI,GAAmB,CAAC;QACxB,MAAM,cAAc,GAAG,IAAI,sBAAc,EAAE,CAAC,EAAE,CAAC,YAAY,EAAE,CAAC,KAAK,EAAE,EAAE;YACnE,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,iBAAiB,GAAG,IAAI,KAAK,CAAC,OAAO,IAAI,CAAC,CAAC;QACpE,CAAC,CAAC,CAAC;QACH,IAAI,GAAG,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;YAC5B,GAAG,GAAG,aAAK,CAAC,QAAQ,CAAC,GAAG,EAAE,EAAE,cAAc,EAAE,CAAC,CAAC;QAClD,CAAC;aAAM,CAAC;YACJ,GAAG,GAAG,aAAK,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,cAAc,EAAE,CAAC,CAAC;QACjD,CAAC;QAED,OAAO,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,EAAE,GAAG,EAAE,OAAO,EAAE,CAAC,CAAC;aACzC,KAAK,CAAC,CAAC,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,0BAA0B,OAAO,EAAE,EAAE,CAAC,CAAC,CAAC;IACvF,CAAC;IACD,KAAK,CAAC,QAAQ,CAAC,IAAc;QACzB,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QAC1E,OAAO,SAAS,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,QAAQ,CAAC,OAAO,KAAK,SAAS,CAAC,CAAC;IACxE,CAAC;CAEJ;AApBD,kCAoBC"}
@@ -1,8 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.AbstractResourcePrinter = void 0;
4
- class AbstractResourcePrinter {
5
- constructor() { }
6
- }
7
- exports.AbstractResourcePrinter = AbstractResourcePrinter;
8
- //# sourceMappingURL=AbstractResourcePrinter.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"AbstractResourcePrinter.js","sourceRoot":"","sources":["../../../src/printers/AbstractResourcePrinter.ts"],"names":[],"mappings":";;;AAEA,MAAsB,uBAAuB;IACzC,gBAAiB,CAAC;CAErB;AAHD,0DAGC"}
@@ -1,12 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.JSONStylePrinter = void 0;
4
- const AbstractResourcePrinter_1 = require("./AbstractResourcePrinter");
5
- class JSONStylePrinter extends AbstractResourcePrinter_1.AbstractResourcePrinter {
6
- print(...pages) {
7
- const json = JSON.stringify(pages);
8
- process.stdout.write(json + "\n");
9
- }
10
- }
11
- exports.JSONStylePrinter = JSONStylePrinter;
12
- //# sourceMappingURL=JSONStylePrinter.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"JSONStylePrinter.js","sourceRoot":"","sources":["../../../src/printers/JSONStylePrinter.ts"],"names":[],"mappings":";;;AACA,uEAAoE;AAGpE,MAAa,gBAAiB,SAAQ,iDAAuB;IACzD,KAAK,CAAC,GAAG,KAAqB;QAC1B,MAAM,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;QACnC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,GAAG,IAAI,CAAC,CAAA;IACrC,CAAC;CAGJ;AAPD,4CAOC"}
@@ -1,27 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.LogStylePrinter = void 0;
4
- const Page_1 = require("../page/Page");
5
- const AbstractResourcePrinter_1 = require("./AbstractResourcePrinter");
6
- class LogStylePrinter extends AbstractResourcePrinter_1.AbstractResourcePrinter {
7
- write(str) {
8
- process.stdout.write(str);
9
- }
10
- async print(...pages) {
11
- for (const page of pages) {
12
- if (!(0, Page_1.isPage)(page)) {
13
- this.write(page.error);
14
- continue;
15
- }
16
- const { resources, title, url } = page;
17
- this.write(`Title: ${title}\n`);
18
- this.write(`URL: ${url}\n\n`);
19
- for (const resource of resources) {
20
- const { link: { url }, text: { value } } = resource;
21
- this.write(`${value}: ${url}\n`);
22
- }
23
- }
24
- }
25
- }
26
- exports.LogStylePrinter = LogStylePrinter;
27
- //# sourceMappingURL=LogStylePrinter.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"LogStylePrinter.js","sourceRoot":"","sources":["../../../src/printers/LogStylePrinter.ts"],"names":[],"mappings":";;;AAAA,uCAAoE;AACpE,uEAAoE;AAEpE,MAAa,eAAgB,SAAQ,iDAAuB;IAExD,KAAK,CAAC,GAAW;QACb,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;IAC7B,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,GAAG,KAAqB;QAChC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACvB,IAAI,CAAC,IAAA,aAAM,EAAC,IAAI,CAAC,EAAE,CAAC;gBAChB,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;gBACtB,SAAQ;YACZ,CAAC;YAED,MAAM,EAAC,SAAS,EAAE,KAAK,EAAE,GAAG,EAAE,GAAG,IAAI,CAAA;YAErC,IAAI,CAAC,KAAK,CAAC,UAAU,KAAK,IAAI,CAAC,CAAA;YAC/B,IAAI,CAAC,KAAK,CAAC,QAAQ,GAAG,MAAM,CAAC,CAAA;YAE7B,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;gBAC/B,MAAM,EAAE,IAAI,EAAE,EAAE,GAAG,EAAE,EAAE,IAAI,EAAE,EAAE,KAAK,EAAE,EAAE,GAAG,QAAQ,CAAA;gBACnD,IAAI,CAAC,KAAK,CAAC,GAAG,KAAK,KAAK,GAAG,IAAI,CAAC,CAAA;YACpC,CAAC;QACL,CAAC;IACL,CAAC;CACJ;AAxBD,0CAwBC"}
@@ -1,56 +0,0 @@
1
- "use strict";
2
- /**
3
- * @license MIT
4
- * We are interested in visualising a page as a collection of tags.
5
- *
6
- * We wish to work with tags that can be compactly previewed on a webpage.
7
- * Here we must declare all of the element types that can be used to represent
8
- * a resource that can be hyperlinked off a webpage.
9
- */
10
- Object.defineProperty(exports, "__esModule", { value: true });
11
- exports.isKeyDefined = exports.isResourceKey = exports.RESOURCE_LINK_KEYS = exports.RESOURCE_DISPLAYABLE_KEYS = void 0;
12
- exports.findResourceText = findResourceText;
13
- exports.findResourceLink = findResourceLink;
14
- function findDefinedKey(element, keys) {
15
- for (const key of keys) {
16
- if ((0, exports.isKeyDefined)(key, element)) {
17
- return key;
18
- }
19
- }
20
- }
21
- exports.RESOURCE_DISPLAYABLE_KEYS = [
22
- 'id',
23
- 'innerText',
24
- 'textContent',
25
- 'class',
26
- 'ariaLabel',
27
- 'ariaDescription',
28
- 'alt',
29
- 'rel'
30
- ];
31
- exports.RESOURCE_LINK_KEYS = [
32
- "href",
33
- "data-src",
34
- "target",
35
- "action",
36
- "src",
37
- "url"
38
- ];
39
- function findResourceText(element) {
40
- for (const key of exports.RESOURCE_DISPLAYABLE_KEYS) {
41
- const value = element[key];
42
- if (value && typeof value === 'string' && value.trim() !== '')
43
- return { key, value };
44
- }
45
- }
46
- function findResourceLink(element) {
47
- const key = findDefinedKey(element, [...exports.RESOURCE_LINK_KEYS]);
48
- const url = element[key];
49
- if (url && typeof url === 'string' && url.trim() !== '')
50
- return { key, url };
51
- }
52
- const isResourceKey = (key) => key in exports.RESOURCE_LINK_KEYS;
53
- exports.isResourceKey = isResourceKey;
54
- const isKeyDefined = (key, element) => key in element && element[key] !== undefined;
55
- exports.isKeyDefined = isKeyDefined;
56
- //# sourceMappingURL=resource.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"resource.js","sourceRoot":"","sources":["../../src/resource.ts"],"names":[],"mappings":";AAAA;;;;;;;GAOG;;;AA8CH,4CAMC;AAED,4CAKC;AAvDD,SAAS,cAAc,CAAC,OAAiB,EAAE,IAAe;IACtD,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACrB,IAAI,IAAA,oBAAY,EAAC,GAAG,EAAE,OAAO,CAAC,EAAE,CAAC;YAC7B,OAAO,GAAG,CAAC;QACf,CAAC;IACL,CAAC;AACL,CAAC;AAEY,QAAA,yBAAyB,GAAG;IACrC,IAAI;IACJ,WAAW;IACX,aAAa;IACb,OAAO;IACP,WAAW;IACX,iBAAiB;IACjB,KAAK;IACL,KAAK;CACC,CAAC;AASE,QAAA,kBAAkB,GAAG;IAC9B,MAAM;IACN,UAAU;IACV,QAAQ;IACR,QAAQ;IACR,KAAK;IACL,KAAK;CACC,CAAC;AASX,SAAgB,gBAAgB,CAAC,OAAiB;IAC9C,KAAK,MAAM,GAAG,IAAI,iCAAyB,EAAE,CAAC;QAC1C,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,CAAA;QAC1B,IAAI,KAAK,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,CAAC,IAAI,EAAE,KAAK,EAAE;YACzD,OAAO,EAAE,GAAG,EAAE,KAAK,EAAE,CAAC;IAC9B,CAAC;AACL,CAAC;AAED,SAAgB,gBAAgB,CAAC,OAAiB;IAC9C,MAAM,GAAG,GAAG,cAAc,CAAC,OAAO,EAAE,CAAC,GAAG,0BAAkB,CAAC,CAAC,CAAC;IAC7D,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC;IACzB,IAAI,GAAG,IAAI,OAAO,GAAG,KAAK,QAAQ,IAAI,GAAG,CAAC,IAAI,EAAE,KAAK,EAAE;QACnD,OAAO,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC;AAC5B,CAAC;AAOM,MAAM,aAAa,GAAG,CAAC,GAAW,EAAkB,EAAE,CAAC,GAAG,IAAI,0BAAkB,CAAC;AAA3E,QAAA,aAAa,iBAA8D;AAEjF,MAAM,YAAY,GAAG,CAA6B,GAAW,EAAE,OAAU,EAAW,EAAE,CACzF,GAAG,IAAI,OAAO,IAAI,OAAO,CAAC,GAAG,CAAC,KAAK,SAAS,CAAC;AADpC,QAAA,YAAY,gBACwB"}
package/jest.config.js DELETED
@@ -1,198 +0,0 @@
1
- /**
2
- * For a detailed explanation regarding each configuration property, visit:
3
- * https://jestjs.io/docs/configuration
4
- */
5
-
6
- /** @type {import('jest').Config} */
7
- const config = {
8
- // All imported modules in your tests should be mocked automatically
9
- // automock: false,
10
-
11
- // Stop running tests after `n` failures
12
- // bail: 0,
13
-
14
- // The directory where Jest should store its cached dependency information
15
- // cacheDirectory: "/tmp/jest_rs",
16
-
17
- // Automatically clear mock calls, instances, contexts and results before every test
18
- // clearMocks: false,
19
-
20
- // Indicates whether the coverage information should be collected while executing the test
21
- collectCoverage: true,
22
-
23
- // An array of glob patterns indicating a set of files for which coverage information should be collected
24
- // collectCoverageFrom: undefined,
25
-
26
- // The directory where Jest should output its coverage files
27
- coverageDirectory: "coverage",
28
-
29
- // An array of regexp pattern strings used to skip coverage collection
30
- // coveragePathIgnorePatterns: [
31
- // "/node_modules/"
32
- // ],
33
-
34
- // Indicates which provider should be used to instrument code for coverage
35
- coverageProvider: "v8",
36
-
37
- // A list of reporter names that Jest uses when writing coverage reports
38
- // coverageReporters: [
39
- // "json",
40
- // "text",
41
- // "lcov",
42
- // "clover"
43
- // ],
44
-
45
- // An object that configures minimum threshold enforcement for coverage results
46
- // coverageThreshold: undefined,
47
-
48
- // A path to a custom dependency extractor
49
- // dependencyExtractor: undefined,
50
-
51
- // Make calling deprecated APIs throw helpful error messages
52
- // errorOnDeprecated: false,
53
-
54
- // The default configuration for fake timers
55
- // fakeTimers: {
56
- // "enableGlobally": false
57
- // },
58
-
59
- // Force coverage collection from ignored files using an array of glob patterns
60
- // forceCoverageMatch: [],
61
-
62
- // A path to a module which exports an async function that is triggered once before all test suites
63
- // globalSetup: undefined,
64
-
65
- // A path to a module which exports an async function that is triggered once after all test suites
66
- // globalTeardown: undefined,
67
-
68
- // A set of global variables that need to be available in all test environments
69
- // globals: {},
70
-
71
- // The maximum amount of workers used to run your tests. Can be specified as % or a number. E.g. maxWorkers: 10% will use 10% of your CPU amount + 1 as the maximum worker number. maxWorkers: 2 will use a maximum of 2 workers.
72
- // maxWorkers: "50%",
73
-
74
- // An array of directory names to be searched recursively up from the requiring module's location
75
- // moduleDirectories: [
76
- // "node_modules"
77
- // ],
78
-
79
- // An array of file extensions your modules use
80
- // moduleFileExtensions: [
81
- // "js",
82
- // "mjs",
83
- // "cjs",
84
- // "jsx",
85
- // "ts",
86
- // "tsx",
87
- // "json",
88
- // "node"
89
- // ],
90
-
91
- // A map from regular expressions to module names or to arrays of module names that allow to stub out resources with a single module
92
- // moduleNameMapper: {},
93
-
94
- // An array of regexp pattern strings, matched against all module paths before considered 'visible' to the module loader
95
- // modulePathIgnorePatterns: [],
96
-
97
- // Activates notifications for test results
98
- // notify: false,
99
-
100
- // An enum that specifies notification mode. Requires { notify: true }
101
- // notifyMode: "failure-change",
102
-
103
- // A preset that is used as a base for Jest's configuration
104
- // preset: undefined,
105
-
106
- // Run tests from one or more projects
107
- // projects: undefined,
108
-
109
- // Use this configuration option to add custom reporters to Jest
110
- // reporters: undefined,
111
-
112
- // Automatically reset mock state before every test
113
- // resetMocks: false,
114
-
115
- // Reset the module registry before running each individual test
116
- // resetModules: false,
117
-
118
- // A path to a custom resolver
119
- // resolver: undefined,
120
-
121
- // Automatically restore mock state and implementation before every test
122
- // restoreMocks: false,
123
-
124
- // The root directory that Jest should scan for tests and modules within
125
- // rootDir: undefined,
126
-
127
- // A list of paths to directories that Jest should use to search for files in
128
- // roots: [
129
- // "<rootDir>"
130
- // ],
131
-
132
- // Allows you to use a custom runner instead of Jest's default test runner
133
- // runner: "jest-runner",
134
-
135
- // The paths to modules that run some code to configure or set up the testing environment before each test
136
- // setupFiles: [],
137
-
138
- // A list of paths to modules that run some code to configure or set up the testing framework before each test
139
- // setupFilesAfterEnv: [],
140
-
141
- // The number of seconds after which a test is considered as slow and reported as such in the results.
142
- // slowTestThreshold: 5,
143
-
144
- // A list of paths to snapshot serializer modules Jest should use for snapshot testing
145
- // snapshotSerializers: [],
146
-
147
- // The test environment that will be used for testing
148
- // testEnvironment: "jest-environment-node",
149
-
150
- // Options that will be passed to the testEnvironment
151
- // testEnvironmentOptions: {},
152
-
153
- // Adds a location field to test results
154
- // testLocationInResults: false,
155
-
156
- // The glob patterns Jest uses to detect test files
157
- // testMatch: [
158
- // "**/__tests__/**/*.[jt]s?(x)",
159
- // "**/?(*.)+(spec|test).[tj]s?(x)"
160
- // ],
161
-
162
- // An array of regexp pattern strings that are matched against all test paths, matched tests are skipped
163
- // testPathIgnorePatterns: [
164
- // "/node_modules/"
165
- // ],
166
-
167
- // The regexp pattern or array of patterns that Jest uses to detect test files
168
- // testRegex: [],
169
-
170
- // This option allows the use of a custom results processor
171
- // testResultsProcessor: undefined,
172
-
173
- // This option allows use of a custom test runner
174
- // testRunner: "jest-circus/runner",
175
-
176
- // A map from regular expressions to paths to transformers
177
- // transform: undefined,
178
-
179
- // An array of regexp pattern strings that are matched against all source file paths, matched files will skip transformation
180
- // transformIgnorePatterns: [
181
- // "/node_modules/",
182
- // "\\.pnp\\.[^\\/]+$"
183
- // ],
184
-
185
- // An array of regexp pattern strings that are matched against all modules before the module loader will automatically return a mock for them
186
- // unmockedModulePathPatterns: undefined,
187
-
188
- // Indicates whether each individual test should be reported during the run
189
- // verbose: undefined,
190
-
191
- // An array of regexp patterns that are matched against all source file paths before re-running tests in watch mode
192
- // watchPathIgnorePatterns: [],
193
-
194
- // Whether to use watchman for file crawling
195
- // watchman: true,
196
- };
197
-
198
- module.exports = config;
@@ -1,5 +0,0 @@
1
-
2
- export abstract class AbstractExtractor<V, R> {
3
- constructor(readonly name:string) { }
4
- abstract extract(value: V): Promise<R>;
5
- }
@@ -1,12 +0,0 @@
1
- import { isError, type Page } from '../page/Page';
2
- import { JSDOM } from 'jsdom';
3
- import { AbstractExtractor } from './AbstractExtractor';
4
-
5
- export class PageExtractor extends AbstractExtractor<JSDOM, Page> {
6
- constructor() { super("page-extractor"); }
7
-
8
- async extract(value: JSDOM): Promise<Page> {
9
- const { window: { document: { title, location: { href: url } } } } = value
10
- return { title, url }
11
- }
12
- }
@@ -1,25 +0,0 @@
1
- import type { JSDOM } from "jsdom";
2
- import { findResourceLink, findResourceText, type ExternalResource, type Resource, type Tag } from "../resource";
3
- import { AbstractExtractor } from './AbstractExtractor';
4
-
5
- export class ResourceExtractor extends AbstractExtractor<JSDOM, ExternalResource[]> {
6
- constructor(private readonly tags: Tag[]) {
7
- super("page-extractor");
8
- }
9
- async extract(value: JSDOM): Promise<ExternalResource[]> {
10
- const { document } = value.window;
11
- const externalResources: ExternalResource[] = [];
12
- for (const tag of this.tags) {
13
- const selector = document.querySelectorAll<Resource>(tag)
14
- const elements = Array.from(selector)
15
- for (const element of elements) {
16
- const text = findResourceText(element);
17
- const link = findResourceLink(element);
18
- if(!text || !link) continue
19
- if (!link.url.startsWith("http")) continue
20
- externalResources.push({ text, link })
21
- }
22
- }
23
- return externalResources;
24
- }
25
- }
@@ -1,14 +0,0 @@
1
- import { JSDOM } from 'jsdom';
2
- import type { Resource, Tag } from '../resource';
3
- import { AbstractExtractor } from './AbstractExtractor';
4
-
5
- export class TagExtractor<T extends Tag> extends AbstractExtractor<JSDOM, Resource[]> {
6
- extract(value: JSDOM): Promise<Resource[]> {
7
- const linkNodes = value.window.document.querySelectorAll<Resource>(this.tagName);
8
- return Promise.resolve(Array.from(linkNodes));
9
- }
10
- constructor(private readonly tagName: T) {
11
- super(`extract <${tagName}>`)
12
- };
13
-
14
- }
package/src/main.ts DELETED
@@ -1,43 +0,0 @@
1
- #!/usr/bin/env node
2
- import { Command, createArgument } from "commander";
3
-
4
- import { description, name, version } from '../package.json';
5
- import { PageExtractor } from "./extractors/PageExtractor";
6
- import { ResourceExtractor } from "./extractors/ResourceExtractor";
7
- import { PageFetcher } from "./page/PageFetcher";
8
- import type { Page, PageMetadata } from "./page/Page";
9
- import { JSONStylePrinter } from "./printers/JSONStylePrinter";
10
- import { LogStylePrinter } from "./printers/LogStylePrinter";
11
-
12
- const program = new Command();
13
-
14
- const url = createArgument("<url | file...>", "remote https://URL or local file://resource.html to extract from");
15
-
16
- (async () => {
17
- await program
18
- .name(name)
19
- .version(version, "-v, --version")
20
- .description(description)
21
- .addArgument(url)
22
- .action(async (urls: string[]) => {
23
- const printer = new JSONStylePrinter();
24
- // simple log style printer
25
- // const printer = new LogStylePrinter();
26
-
27
- const pageFetcher = new PageFetcher()
28
- const pageExtractor = new PageExtractor()
29
- const resourceExtractor = new ResourceExtractor(["a", "meta", "link", "embed"])
30
-
31
- const pageResponses = await pageFetcher.fetchAll(urls);
32
- const pageMetadatas: PageMetadata[] = [];
33
-
34
- for (const { content, url, error } of pageResponses) {
35
- const resources = error in (content) ? [] : await resourceExtractor.extract(content);
36
- const descriptor = error in content ? { url, error } : await pageExtractor.extract(content);
37
- pageMetadatas.push({ ...descriptor, resources });
38
- }
39
-
40
- await printer.print(...pageMetadatas);
41
- })
42
- .parseAsync(process.argv);
43
- })();
package/src/page/Page.ts DELETED
@@ -1,19 +0,0 @@
1
- import type { ExternalResource } from "../resource";
2
-
3
- type hasTitle = {
4
- title: string;
5
- };
6
-
7
- type hasUrl = {
8
- url: string;
9
- };
10
-
11
- type hasResources = {
12
- resources: ExternalResource[];
13
- };
14
-
15
- export type Page = hasTitle & hasUrl
16
- export type PageMetadata = (Page & hasResources) | { error: string }
17
- export const isError = (page: PageMetadata): page is { error: string } => 'error' in page;
18
- export const isPage = (page: any): page is Page =>
19
- "resources" in page && Array.isArray(page.resources);