@xapp/arachne-cli 0.9.1 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/analyze.d.ts +11 -0
- package/lib/analyze.js +81 -0
- package/lib/analyze.js.map +1 -0
- package/lib/crawl.d.ts +30 -0
- package/lib/crawl.js +196 -0
- package/lib/crawl.js.map +1 -0
- package/lib/index.js +32 -227
- package/lib/index.js.map +1 -1
- package/lib/utils/index.d.ts +2 -0
- package/lib/utils/index.js +5 -0
- package/lib/utils/index.js.map +1 -0
- package/package.json +13 -8
package/lib/analyze.d.ts
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/*! Copyright (c) 2023, XAPP AI */
|
|
2
|
+
import "isomorphic-fetch";
|
|
3
|
+
export interface AnalyzeArgs {
|
|
4
|
+
u?: string;
|
|
5
|
+
url: string;
|
|
6
|
+
h?: boolean;
|
|
7
|
+
headless?: boolean;
|
|
8
|
+
v?: boolean;
|
|
9
|
+
verbose?: boolean;
|
|
10
|
+
}
|
|
11
|
+
export declare function analyze(argv: AnalyzeArgs): Promise<void>;
|
package/lib/analyze.js
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
/*! Copyright (c) 2023, XAPP AI */
|
|
2
|
+
/* eslint-disable no-console */
|
|
3
|
+
import "isomorphic-fetch";
|
|
4
|
+
import { ArachneWebService, hexToRGB } from "@xapp/arachne-web-service";
|
|
5
|
+
import terminalImage from "terminal-image";
|
|
6
|
+
import terminalLink from 'terminal-link';
|
|
7
|
+
export async function analyze(argv) {
|
|
8
|
+
const pageWait = 8000;
|
|
9
|
+
const url = argv.url;
|
|
10
|
+
console.log(`Analyzing ${url}`);
|
|
11
|
+
console.log(`This will take approximately ${pageWait} ms`);
|
|
12
|
+
const openAIKey = process.env.OPENAI_API_KEY;
|
|
13
|
+
if (!openAIKey) {
|
|
14
|
+
console.log(`No OPENAI_API_KEY environment variable found, limiting what we can do.`);
|
|
15
|
+
}
|
|
16
|
+
const verbose = argv.v;
|
|
17
|
+
if (verbose) {
|
|
18
|
+
process.env.STENTOR_LOG_LEVEL = "debug";
|
|
19
|
+
console.log(`VERBOSE ON!`);
|
|
20
|
+
}
|
|
21
|
+
const service = new ArachneWebService();
|
|
22
|
+
const data = await service.getWebsiteData(url, {
|
|
23
|
+
screenshotsPath: "./temp/",
|
|
24
|
+
markdownPath: "./temp/",
|
|
25
|
+
pageWait,
|
|
26
|
+
pageHandlerTimeout: 120000,
|
|
27
|
+
openAIKey,
|
|
28
|
+
headless: false
|
|
29
|
+
});
|
|
30
|
+
const business = data.business;
|
|
31
|
+
if (verbose) {
|
|
32
|
+
console.log(JSON.stringify(data, undefined, 2));
|
|
33
|
+
}
|
|
34
|
+
console.log(`Business:`);
|
|
35
|
+
console.log(`\tname: ${business.name}`);
|
|
36
|
+
console.log(`\tcategory: ${business.category}`);
|
|
37
|
+
console.log(``);
|
|
38
|
+
const site = data.site;
|
|
39
|
+
// site.schema
|
|
40
|
+
const primaryPreview = '\x1b[48;2;' + hexToRGB(site.primaryColor) + 'm \x1b[0m';
|
|
41
|
+
const secondaryPreview = '\x1b[48;2;' + hexToRGB(site.secondaryColor) + 'm \x1b[0m';
|
|
42
|
+
const vibrantPreview = '\x1b[48;2;' + hexToRGB(site.vibrantColor) + 'm \x1b[0m';
|
|
43
|
+
const darkVibrantPreview = '\x1b[48;2;' + hexToRGB(site.vibrantColor) + 'm \x1b[0m';
|
|
44
|
+
console.log('Site:');
|
|
45
|
+
console.log(`\tdescription: ${site.description}`);
|
|
46
|
+
console.log(`\tvibrantColor: ${site.darkVibrantColor} ${vibrantPreview}`);
|
|
47
|
+
console.log(`\tdarkVibrantColor: ${site.darkVibrantColor} ${darkVibrantPreview}`);
|
|
48
|
+
console.log(`\tprimaryColor: ${site.primaryColor} ${primaryPreview}`);
|
|
49
|
+
console.log(`\tsecondaryColor: ${site.secondaryColor} ${secondaryPreview}`);
|
|
50
|
+
if (site.logo) {
|
|
51
|
+
const link = terminalLink('logo', site.logo, { fallback: (text, url) => `${text} (${url})` });
|
|
52
|
+
console.log(`\t${link}: ${site.logo}`);
|
|
53
|
+
try {
|
|
54
|
+
const response = await fetch(site.logo);
|
|
55
|
+
// Ensure the request was successful
|
|
56
|
+
if (!response.ok) {
|
|
57
|
+
throw new Error(`Failed to fetch image: ${response.statusText}`);
|
|
58
|
+
}
|
|
59
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
60
|
+
const imageBuffer = Buffer.from(arrayBuffer);
|
|
61
|
+
const image = await terminalImage.buffer(imageBuffer, { preserveAspectRatio: true, width: 20, height: 20 });
|
|
62
|
+
console.log(image);
|
|
63
|
+
}
|
|
64
|
+
catch (e) {
|
|
65
|
+
console.log(`Unable to load image ${site.logo} ${e.message}`);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
console.log(``);
|
|
69
|
+
console.log(`Technologies:`);
|
|
70
|
+
const installed = data.detectedTechnologies.filter((technology) => technology.installed);
|
|
71
|
+
if (installed.length === 0) {
|
|
72
|
+
console.log(`\tnone detected`);
|
|
73
|
+
}
|
|
74
|
+
else {
|
|
75
|
+
const installedList = installed.reduce((previous, current) => {
|
|
76
|
+
return `${previous}${current.technology}, `;
|
|
77
|
+
}, "");
|
|
78
|
+
console.log(`\t${installedList}`);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
//# sourceMappingURL=analyze.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"analyze.js","sourceRoot":"","sources":["../src/analyze.ts"],"names":[],"mappings":"AAAA,kCAAkC;AAClC,+BAA+B;AAC/B,OAAO,kBAAkB,CAAC;AAC1B,OAAO,EAAE,iBAAiB,EAAE,QAAQ,EAAE,MAAM,2BAA2B,CAAC;AACxE,OAAO,aAAa,MAAM,gBAAgB,CAAC;AAC3C,OAAO,YAAY,MAAM,eAAe,CAAC;AAazC,MAAM,CAAC,KAAK,UAAU,OAAO,CAAC,IAAiB;IAG3C,MAAM,QAAQ,GAAG,IAAI,CAAC;IAEtB,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC;IACrB,OAAO,CAAC,GAAG,CAAC,aAAa,GAAG,EAAE,CAAC,CAAC;IAChC,OAAO,CAAC,GAAG,CAAC,gCAAgC,QAAQ,KAAK,CAAC,CAAC;IAE3D,MAAM,SAAS,GAAG,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC;IAE7C,IAAI,CAAC,SAAS,EAAE;QACZ,OAAO,CAAC,GAAG,CAAC,wEAAwE,CAAC,CAAA;KACxF;IAED,MAAM,OAAO,GAAG,IAAI,CAAC,CAAC,CAAC;IACvB,IAAI,OAAO,EAAE;QACT,OAAO,CAAC,GAAG,CAAC,iBAAiB,GAAG,OAAO,CAAC;QACxC,OAAO,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC;KAC9B;IAED,MAAM,OAAO,GAAG,IAAI,iBAAiB,EAAE,CAAC;IAExC,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,cAAc,CAAC,GAAG,EAAE;QAC3C,eAAe,EAAE,SAAS;QAC1B,YAAY,EAAE,SAAS;QACvB,QAAQ;QACR,kBAAkB,EAAE,MAAM;QAC1B,SAAS;QACT,QAAQ,EAAE,KAAK;KAClB,CAAC,CAAC;IAEH,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC;IAE/B,IAAI,OAAO,EAAE;QACT,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC;KACnD;IAED,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;IACzB,OAAO,CAAC,GAAG,CAAC,WAAW,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC;IACxC,OAAO,CAAC,GAAG,CAAC,eAAe,QAAQ,CAAC,QAAQ,EAAE,CAAC,CAAC;IAChD,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAEhB,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC;IAEvB,cAAc;IACd,MAAM,cAAc,GAAG,YAAY,GAAG,QAAQ,CAAC,IAAI,CAAC,YAAY,CAAC,GAAG,gBAAgB,CAAC;IACrF,MAAM,gBAAgB,GAAG,YAAY,GAAG,QAAQ,CAAC,IAAI,CAAC,cAAc,CAAC,GAAG,gBAAgB,CAAC;IACzF,MAAM,cAAc,GAAG,YAAY,GAAG,QAAQ,CAAC,IAAI,CAAC,YAAY,CAAC,GAAG,gBAAgB,CAAA;IACpF,MAAM,kBAAkB,GAAG,YAAY,GAAG,QAAQ,CAAC,IAAI,CAAC,YAAY,CAAC,GAAG,gBAAgB,CAAA;IAExF,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;IACrB,OAAO,CAAC,GAAG,CAAC,kBAAkB,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC;IAClD,OAAO,CAAC,GAAG,CAAC,mBAAmB,IAAI,CAAC,gBAAgB,IAAI,cAAc,EAAE,CAAC,CAAC;IAC1E,OAAO,CAAC,GAAG,CAAC,uBAAuB,IAAI,CAAC,gBAAgB,IAAI,kBAAkB,EAAE,CAAC,CAAC;IAClF,OAAO,CAAC,GAAG,CAAC,mBAAmB,IAAI,CAAC,YAAY,IAAI,cAAc,EAAE,CAAC,CAAC;IACtE,OAAO,CAAC,GAAG,CAAC,qBAAqB,IAAI,CAAC,cAAc,IAAI,gBAAgB,EAAE,CAAC,CAAC;IAE5E,IAAI,IAAI,CAAC,IAAI,EAAE;QACX,MAAM,IAAI,GAAG,YAAY,CAAC,MAAM,EAAE,IAAI,CAAC,IAAI,EAAE,EAAE,QAAQ,EAAE,CAAC,IAAI,EAAE,GAAG,EAAE,EAAE,CAAC,GAAG,IAAI,KAAK,GAAG,GAAG,EAAE,CAAC,CAAC;QAC9F,OAAO,CAAC,GAAG,CAAC,KAAK,IAAI,KAAK,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC;QACvC,IAAI;YACA,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAExC,oCAAoC;YACpC,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE;gBACd,MAAM,IAAI,KAAK,CAAC,0BAA0B,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAC;aACpE;YAED,MAAM,WAAW,GAAG,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC;YACjD,MAAM,WAAW,GAAG,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YAC7C,MAAM,KAAK,GAAG,MAAM,aAAa,CAAC,MAAM,CAAC,WAAW,EAAE,EAAE,mBAAmB,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC,CAAC;YAE5G,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;SACtB;QAAC,OAAO,CAAM,EAAE;YACb,OAAO,CAAC,GAAG,CAAC,wBAAwB,IAAI,CAAC,IAAI,IAAI,CAAC,CAAC,OAAO,EAAE,CAAC,CAAA;SAChE;KACJ;IACD,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAEhB,OAAO,CAAC,GAAG,CAAC,eAAe,CAAC,CAAC;IAC7B,MAAM,SAAS,GAAG,IAAI,CAAC,oBAAoB,CAAC,MAAM,CAAC,CAAC,UAAU,EAAE,EAAE,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC;IAEzF,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE;QACxB,OAAO,CAAC,GAAG,CAAC,iBAAiB,CAAC,CAAC;KAClC;SAAM;QACH,MAAM,aAAa,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,QAAQ,EAAE,OAAO,EAAE,EAAE;YACzD,OAAO,GAAG,QAAQ,GAAG,OAAO,CAAC,UAAU,IAAI,CAAC;QAChD,CAAC,EAAE,EAAE,CAAC,CAAC;QACP,OAAO,CAAC,GAAG,CAAC,KAAK,aAAa,EAAE,CAAC,CAAC;KACrC;AACL,CAAC"}
|
package/lib/crawl.d.ts
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
export interface CrawlArgs {
|
|
2
|
+
url: string;
|
|
3
|
+
u?: string;
|
|
4
|
+
a?: boolean;
|
|
5
|
+
faq?: boolean;
|
|
6
|
+
d?: string;
|
|
7
|
+
dir?: string;
|
|
8
|
+
e?: string;
|
|
9
|
+
executablePath?: string;
|
|
10
|
+
h?: boolean;
|
|
11
|
+
headless?: boolean;
|
|
12
|
+
m?: number;
|
|
13
|
+
max?: number;
|
|
14
|
+
o?: number;
|
|
15
|
+
timeout?: number;
|
|
16
|
+
b?: number;
|
|
17
|
+
break?: number;
|
|
18
|
+
settle?: number;
|
|
19
|
+
p?: number;
|
|
20
|
+
pages?: number;
|
|
21
|
+
s?: boolean;
|
|
22
|
+
stealth?: boolean;
|
|
23
|
+
t?: string;
|
|
24
|
+
text?: string;
|
|
25
|
+
w?: string;
|
|
26
|
+
whitelist?: string;
|
|
27
|
+
x?: boolean;
|
|
28
|
+
metadata?: boolean;
|
|
29
|
+
}
|
|
30
|
+
export declare function crawl(argv: CrawlArgs): Promise<void>;
|
package/lib/crawl.js
ADDED
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
/*! Copyright (c) 2023, XAPP AI */
|
|
2
|
+
/* eslint-disable no-console */
|
|
3
|
+
import { Arachne, ArachneURLPattern, autoScroll, enqueueLinks, MemoryRequestQueue, } from "@xapp/arachne";
|
|
4
|
+
import { ArachneJSONLD, ArachneHTMLtoText, isAnswer } from "@xapp/arachne-parsers";
|
|
5
|
+
import { URL } from "url";
|
|
6
|
+
import { existsSync, mkdirSync, writeFileSync, readFileSync } from "fs";
|
|
7
|
+
import { resolve } from "path";
|
|
8
|
+
export async function crawl(argv) {
|
|
9
|
+
const inputURL = argv.url;
|
|
10
|
+
const url = new URL(inputURL);
|
|
11
|
+
const cleanHost = url.host.replace(/\./g, "_");
|
|
12
|
+
const cleanPath = url.pathname.replace(/\//g, "_");
|
|
13
|
+
let patterns;
|
|
14
|
+
const whiteListPath = argv.w;
|
|
15
|
+
if (whiteListPath) {
|
|
16
|
+
console.log(whiteListPath);
|
|
17
|
+
const resolvedPath = resolve(whiteListPath);
|
|
18
|
+
const contents = readFileSync(resolvedPath, "utf-8");
|
|
19
|
+
patterns = contents.split("\n");
|
|
20
|
+
}
|
|
21
|
+
else {
|
|
22
|
+
patterns = inputURL.endsWith("/") ? [`${inputURL}[.*]`] : [`${inputURL}/[.*]`];
|
|
23
|
+
}
|
|
24
|
+
const directory = Array.isArray(argv.d) ? argv.d[1] : argv.d;
|
|
25
|
+
const path = directory ? resolve(directory) : undefined;
|
|
26
|
+
if (path) {
|
|
27
|
+
// Make sure it exists
|
|
28
|
+
if (!existsSync(path)) {
|
|
29
|
+
throw new Error(`Path ${directory} does not exist. Please provide an existing path to save the HTML`);
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
if (typeof argv.b === "number") {
|
|
33
|
+
// make sure it is less than the timeout
|
|
34
|
+
const timeout = argv.o || 4000;
|
|
35
|
+
if (argv.b > timeout) {
|
|
36
|
+
throw new Error(`Settling time is greater than page handler timeout, please adjust your inputs. -o ${argv.b + 4000} `);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
let saveDir;
|
|
40
|
+
if (path) {
|
|
41
|
+
const exportDirName = `${cleanHost}${cleanPath}-${new Date().getTime()}`;
|
|
42
|
+
saveDir = resolve(path, exportDirName);
|
|
43
|
+
mkdirSync(saveDir);
|
|
44
|
+
}
|
|
45
|
+
const queue = new MemoryRequestQueue();
|
|
46
|
+
queue.addRequest({ url: inputURL });
|
|
47
|
+
const crawlPatterns = patterns.map((pattern) => new ArachneURLPattern(pattern));
|
|
48
|
+
const answers = new Map();
|
|
49
|
+
console.log(`Starting crawl at ${inputURL}`);
|
|
50
|
+
const crawler = Arachne.crawler({
|
|
51
|
+
maxConcurrentPages: argv.p,
|
|
52
|
+
maxCrawlAttempts: argv.m,
|
|
53
|
+
pageHandlerTimeout: argv.o,
|
|
54
|
+
stealth: argv.s,
|
|
55
|
+
launchOptions: { headless: argv.h, timeout: 5000, executablePath: argv.e },
|
|
56
|
+
queue,
|
|
57
|
+
pageHandler: async (page, request, response) => {
|
|
58
|
+
const title = await page.title();
|
|
59
|
+
console.log(`Loaded ${request.url} : ${title}`);
|
|
60
|
+
if (typeof argv.b === "number") {
|
|
61
|
+
const pageWaitTime = argv.b;
|
|
62
|
+
console.log(`Waiting for ${pageWaitTime} ms for page to completely load....`);
|
|
63
|
+
await autoScroll(page, pageWaitTime);
|
|
64
|
+
console.log('...and done waiting.');
|
|
65
|
+
}
|
|
66
|
+
// Get the raw HTML
|
|
67
|
+
// It is important to use page.content() versus response.text() since
|
|
68
|
+
// page.content() gets you the HTML after it has been modified by the JavaScript
|
|
69
|
+
// on the page while response.text() only gives you the HTML from the original GET
|
|
70
|
+
const text = await page.content();
|
|
71
|
+
// Pull out the raw html
|
|
72
|
+
if (saveDir) {
|
|
73
|
+
const requestURL = new URL(request.url);
|
|
74
|
+
const cleanHost = requestURL.host.replace(/(\.|\/)/g, "_").replace(/_+$/g, "").replace(/^_+/g, "");
|
|
75
|
+
const cleanRequestPath = requestURL.pathname.replace(/\//g, "_").replace(".html", "").replace(/_+$/g, "").replace(/^_+/g, "");
|
|
76
|
+
const cleanFileName = `${cleanHost}_${cleanRequestPath}`.replace(/_+$/g, "");
|
|
77
|
+
const htmlName = `${cleanFileName}.html`;
|
|
78
|
+
console.log(`File Name: ${htmlName}`);
|
|
79
|
+
const filePath = resolve(saveDir, htmlName);
|
|
80
|
+
console.log(`Saving to ${filePath}`);
|
|
81
|
+
writeFileSync(filePath, text);
|
|
82
|
+
if (argv.x) {
|
|
83
|
+
const date = new Date().toISOString();
|
|
84
|
+
// Doc title
|
|
85
|
+
const metadata = {
|
|
86
|
+
Attributes: {
|
|
87
|
+
_created_at: date,
|
|
88
|
+
_source_uri: request.url,
|
|
89
|
+
_last_updated_at: date
|
|
90
|
+
},
|
|
91
|
+
Title: title,
|
|
92
|
+
ContentType: "HTML"
|
|
93
|
+
};
|
|
94
|
+
const metadataFileName = `${htmlName}.metadata.json`;
|
|
95
|
+
const metadataFilePath = resolve(saveDir, metadataFileName);
|
|
96
|
+
console.log(`Saving metadata to ${metadataFilePath}`);
|
|
97
|
+
const metadataText = JSON.stringify(metadata, undefined, 2);
|
|
98
|
+
writeFileSync(metadataFilePath, metadataText);
|
|
99
|
+
}
|
|
100
|
+
if (typeof argv.t === "string") {
|
|
101
|
+
const toMD = argv.t === "markdown" ? true : false;
|
|
102
|
+
// Write some MARKDOWN!
|
|
103
|
+
const convertedHTML = new ArachneHTMLtoText().convert(text, toMD);
|
|
104
|
+
const extension = toMD ? `md` : `txt`;
|
|
105
|
+
const markdownName = `${cleanFileName}.${extension}`;
|
|
106
|
+
const markdownFilePath = resolve(saveDir, markdownName);
|
|
107
|
+
console.log(`Saving markdown to ${markdownFilePath}`);
|
|
108
|
+
writeFileSync(markdownFilePath, convertedHTML);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
// Let's see if we can find some structured data.
|
|
112
|
+
const questions = new ArachneJSONLD(text).getQuestions();
|
|
113
|
+
if (questions.length > 0) {
|
|
114
|
+
console.log(`${questions.length} questions found on page.`);
|
|
115
|
+
// Clean the answer
|
|
116
|
+
questions.forEach((question) => {
|
|
117
|
+
// name has the question text
|
|
118
|
+
// acceptedAnswer has the answer text
|
|
119
|
+
const questionText = typeof question.name === "string" ? question.name : "";
|
|
120
|
+
const acceptedAnswer = question.acceptedAnswer;
|
|
121
|
+
if (acceptedAnswer && isAnswer(acceptedAnswer)) {
|
|
122
|
+
const answerText = typeof acceptedAnswer.text === "string" ? acceptedAnswer.text : "";
|
|
123
|
+
if (answerText.length > 0) {
|
|
124
|
+
// Clean it
|
|
125
|
+
const cleanAnswer = new ArachneHTMLtoText().convert(answerText);
|
|
126
|
+
let existingAnswer = answers.get(cleanAnswer);
|
|
127
|
+
if (existingAnswer) {
|
|
128
|
+
existingAnswer.questions.push(questionText);
|
|
129
|
+
}
|
|
130
|
+
else {
|
|
131
|
+
existingAnswer = {
|
|
132
|
+
answer: cleanAnswer,
|
|
133
|
+
questions: [questionText],
|
|
134
|
+
url: request.url
|
|
135
|
+
};
|
|
136
|
+
}
|
|
137
|
+
// Add it back!
|
|
138
|
+
answers.set(cleanAnswer, existingAnswer);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
});
|
|
142
|
+
}
|
|
143
|
+
await enqueueLinks(page, queue, crawlPatterns);
|
|
144
|
+
const info = await queue.getInfo();
|
|
145
|
+
console.log(`Completed ${info.completeRequests} of ${info.totalRequests}, ${info.inProgressRequests} are in progress with ${info.pendingRequests} pending and ${info.failedRequests} failed.`);
|
|
146
|
+
}
|
|
147
|
+
});
|
|
148
|
+
try {
|
|
149
|
+
await crawler.run();
|
|
150
|
+
}
|
|
151
|
+
catch (e) {
|
|
152
|
+
console.log('Unable to start the crawler.');
|
|
153
|
+
console.error(e);
|
|
154
|
+
}
|
|
155
|
+
console.log('Crawler complete.');
|
|
156
|
+
const info = await queue.getInfo();
|
|
157
|
+
console.log(`Final count, completed ${info.completeRequests} of ${info.totalRequests}, ${info.failedRequests} failed.`);
|
|
158
|
+
const failed = queue.getFailed();
|
|
159
|
+
if (failed.length > 0) {
|
|
160
|
+
console.log('Failures....');
|
|
161
|
+
failed.forEach((failedRequest) => {
|
|
162
|
+
console.log(`\t${failedRequest.url} failed after ${failedRequest.requestAttempts} attempts with final error: ${failedRequest.errorMessages ? failedRequest.errorMessages[failedRequest.errorMessages.length - 1] : "unknown"}`);
|
|
163
|
+
});
|
|
164
|
+
}
|
|
165
|
+
console.log(`Found ${answers.size} answers.`);
|
|
166
|
+
if (argv.faq && saveDir) {
|
|
167
|
+
// This follows the JSON format https://docs.aws.amazon.com/kendra/latest/dg/in-creating-faq.html#faq-custom-json
|
|
168
|
+
const qaJSON = {
|
|
169
|
+
"SchemaVersion": 1,
|
|
170
|
+
"FaqDocuments": []
|
|
171
|
+
};
|
|
172
|
+
let qaCSV = `"Question","Answer","URL"`;
|
|
173
|
+
answers.forEach((answer) => {
|
|
174
|
+
// For the questions on the answer, add a line
|
|
175
|
+
answer.questions.forEach((question) => {
|
|
176
|
+
qaJSON.FaqDocuments.push({
|
|
177
|
+
Question: question,
|
|
178
|
+
Answer: answer.answer,
|
|
179
|
+
Attributes: {
|
|
180
|
+
_source_uri: answer.url
|
|
181
|
+
}
|
|
182
|
+
});
|
|
183
|
+
qaCSV += `\r\n"${question}","${answer.answer}","${answer.url}"`;
|
|
184
|
+
});
|
|
185
|
+
});
|
|
186
|
+
const questionAnswerName = `question-answers.json`;
|
|
187
|
+
const qaFilePath = resolve(saveDir, questionAnswerName);
|
|
188
|
+
console.log(`Saving FAQ JSON to ${qaFilePath}`);
|
|
189
|
+
writeFileSync(qaFilePath, JSON.stringify(qaJSON, undefined, 2));
|
|
190
|
+
const csvQuestionName = `question-answers.csv`;
|
|
191
|
+
const csvQaFilePath = resolve(saveDir, csvQuestionName);
|
|
192
|
+
console.log(`Saving FAQ CSV to ${csvQaFilePath}`);
|
|
193
|
+
writeFileSync(csvQaFilePath, qaCSV);
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
//# sourceMappingURL=crawl.js.map
|
package/lib/crawl.js.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"crawl.js","sourceRoot":"","sources":["../src/crawl.ts"],"names":[],"mappings":"AAAA,kCAAkC;AAClC,+BAA+B;AAC/B,OAAO,EACH,OAAO,EAIP,iBAAiB,EACjB,UAAU,EACV,YAAY,EACZ,kBAAkB,GACrB,MAAM,eAAe,CAAC;AAEvB,OAAO,EACH,aAAa,EACb,iBAAiB,EACjB,QAAQ,EACX,MAAM,uBAAuB,CAAC;AAE/B,OAAO,EAAE,GAAG,EAAE,MAAM,KAAK,CAAC;AAC1B,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,aAAa,EAAE,YAAY,EAAE,MAAM,IAAI,CAAC;AACxE,OAAO,EAAE,OAAO,EAAE,MAAM,MAAM,CAAC;AAqE/B,MAAM,CAAC,KAAK,UAAU,KAAK,CAAC,IAAe;IAEvC,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAa,CAAC;IACpC,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC;IAC9B,MAAM,SAAS,GAAG,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;IAC/C,MAAM,SAAS,GAAG,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;IAEnD,IAAI,QAAkB,CAAC;IAEvB,MAAM,aAAa,GAAuB,IAAI,CAAC,CAAC,CAAC;IACjD,IAAI,aAAa,EAAE;QACf,OAAO,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC;QAE3B,MAAM,YAAY,GAAG,OAAO,CAAC,aAAa,CAAC,CAAC;QAC5C,MAAM,QAAQ,GAAG,YAAY,CAAC,YAAY,EAAE,OAAO,CAAC,CAAC;QACrD,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;KAEnC;SAAM;QACH,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,QAAQ,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,QAAQ,OAAO,CAAC,CAAC;KAClF;IAED,MAAM,SAAS,GAAuB,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;IAEjF,MAAM,IAAI,GAAuB,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;IAE5E,IAAI,IAAI,EAAE;QACN,sBAAsB;QACtB,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE;YACnB,MAAM,IAAI,KAAK,CAAC,QAAQ,SAAS,oEAAoE,CAAC,CAAC;SAC1G;KACJ;IAED,IAAI,OAAO,IAAI,CAAC,CAAC,KAAK,QAAQ,EAAE;QAC5B,wCAAwC;QACxC,MAAM,OAAO,GAAG,IAAI,CAAC,CAAC,IAAI,IAAI,CAAC;QAC/B,IAAI,IAAI,CAAC,CAAC,GAAG,OAAO,EAAE;YAClB,MAAM,IAAI,KAAK,CAAC,qFAAqF,IAAI,CAAC,CAAC,GAAG,IAAI,IAAI,CAAC,CAAA;SAC1H;KACJ;IAED,IAAI,OAA2B,CAAC;IAEhC,IAAI,IAAI,EAAE;QACN,MAAM,aAAa,GAAG,GAAG,SAAS,GAAG,SAAS,IAAI,IAAI,IAAI,EAAE,CAAC,OAAO,EAAE,EAAE,CAAC;QACzE,OAAO,GAAG,OAAO,CAAC,IAAI,EAAE,aAAa,CAAC,CAAC;QACvC,SAAS,CAAC,OAAO,CAAC,CAAC;KACtB;IAGD,MAAM,KAAK,GAAG,IAAI,kBAAkB,EAAE,CAAC;IACvC,KAAK,CAAC,UAAU,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,CAAC,CAAC;IAEpC,MAAM,aAAa,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,IAAI,iBAAiB,CAAC,OAAO,CAAC,CAAC,CAAC;IAQhF,MAAM,OAAO,GAAwB,IAAI,GAAG,EAAE,CAAC;IAE/C,OAAO,CAAC,GAAG,CAAC,qBAAqB,QAAQ,EAAE,CAAC,CAAC;IAE7C,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC;QAC5B,kBAAkB,EAAE,IAAI,CAAC,CAAC;QAC1B,gBAAgB,EAAE,IAAI,CAAC,CAAC;QACxB,kBAAkB,EAAE,IAAI,CAAC,CAAC;QAC1B,OAAO,EAAE,IAAI,CAAC,CAAC;QACf,aAAa,EAAE,EAAE,QAAQ,EAAE,IAAI,CAAC,CAAC,EAAE,OAAO,EAAE,IAAI,EAAE,cAAc,EAAE,IAAI,CAAC,CAAC,EAAE;QAC1E,KAAK;QACL,WAAW,EAAE,KAAK,EAAE,IAAiB,EAAE,OAAuB,EAAE,QAAyB,EAAE,EAAE;YAEzF,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC;YACjC,OAAO,CAAC,GAAG,CAAC,UAAU,OAAO,CAAC,GAAG,MAAM,KAAK,EAAE,CAAC,CAAC;YAEhD,IAAI,OAAO,IAAI,CAAC,CAAC,KAAK,QAAQ,EAAE;gBAC5B,MAAM,YAAY,GAAG,IAAI,CAAC,CAAC,CAAC;gBAC5B,OAAO,CAAC,GAAG,CAAC,eAAe,YAAY,qCAAqC,CAAC,CAAC;gBAC9E,MAAM,UAAU,CAAC,IAAI,EAAE,YAAY,CAAC,CAAC;gBACrC,OAAO,CAAC,GAAG,CAAC,sBAAsB,CAAC,CAAC;aACvC;YAED,mBAAmB;YACnB,qEAAqE;YACrE,gFAAgF;YAChF,kFAAkF;YAClF,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;YAElC,wBAAwB;YACxB,IAAI,OAAO,EAAE;gBACT,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;gBAExC,MAAM,SAAS,GAAG,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;gBACnG,MAAM,gBAAgB,GAAG,UAAU,CAAC,QAAQ,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;gBAC9H,MAAM,aAAa,GAAG,GAAG,SAAS,IAAI,gBAAgB,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;gBAC7E,MAAM,QAAQ,GAAG,GAAG,aAAa,OAAO,CAAC;gBACzC,OAAO,CAAC,GAAG,CAAC,cAAc,QAAQ,EAAE,CAAC,CAAC;gBACtC,MAAM,QAAQ,GAAG,OAAO,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;gBAE5C,OAAO,CAAC,GAAG,CAAC,aAAa,QAAQ,EAAE,CAAC,CAAC;gBACrC,aAAa,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC;gBAE9B,IAAI,IAAI,CAAC,CAAC,EAAE;oBAER,MAAM,IAAI,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;oBACtC,YAAY;oBACZ,MAAM,QAAQ,GAAuB;wBACjC,UAAU,EAAE;4BACR,WAAW,EAAE,IAAI;4BACjB,WAAW,EAAE,OAAO,CAAC,GAAG;4BACxB,gBAAgB,EAAE,IAAI;yBACzB;wBACD,KAAK,EAAE,KAAK;wBACZ,WAAW,EAAE,MAAM;qBACtB,CAAA;oBAED,MAAM,gBAAgB,GAAG,GAAG,QAAQ,gBAAgB,CAAC;oBACrD,MAAM,gBAAgB,GAAG,OAAO,CAAC,OAAO,EAAE,gBAAgB,CAAC,CAAC;oBAE5D,OAAO,CAAC,GAAG,CAAC,sBAAsB,gBAAgB,EAAE,CAAC,CAAC;oBACtD,MAAM,YAAY,GAAG,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,SAAS,EAAE,CAAC,CAAC,CAAC;oBAC5D,aAAa,CAAC,gBAAgB,EAAE,YAAY,CAAC,CAAC;iBACjD;gBAED,IAAI,OAAO,IAAI,CAAC,CAAC,KAAK,QAAQ,EAAE;oBAC5B,MAAM,IAAI,GAAY,IAAI,CAAC,CAAC,KAAK,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAA;oBAC1D,uBAAuB;oBACvB,MAAM,aAAa,GAAG,IAAI,iBAAiB,EAAE,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;oBAClE,MAAM,SAAS,GAAW,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC;oBAC9C,MAAM,YAAY,GAAG,GAAG,aAAa,IAAI,SAAS,EAAE,CAAC;oBACrD,MAAM,gBAAgB,GAAG,OAAO,CAAC,OAAO,EAAE,YAAY,CAAC,CAAC;oBACxD,OAAO,CAAC,GAAG,CAAC,sBAAsB,gBAAgB,EAAE,CAAC,CAAC;oBACtD,aAAa,CAAC,gBAAgB,EAAE,aAAa,CAAC,CAAC;iBAClD;aACJ;YAED,iDAAiD;YAEjD,MAAM,SAAS,GAAG,IAAI,aAAa,CAAC,IAAI,CAAC,CAAC,YAAY,EAAE,CAAC;YACzD,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE;gBACtB,OAAO,CAAC,GAAG,CAAC,GAAG,SAAS,CAAC,MAAM,2BAA2B,CAAC,CAAC;gBAC5D,mBAAmB;gBACnB,SAAS,CAAC,OAAO,CAAC,CAAC,QAAQ,EAAE,EAAE;oBAC3B,6BAA6B;oBAC7B,qCAAqC;oBACrC,MAAM,YAAY,GAAW,OAAO,QAAQ,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;oBACpF,MAAM,cAAc,GAAG,QAAQ,CAAC,cAAc,CAAC;oBAC/C,IAAI,cAAc,IAAI,QAAQ,CAAC,cAAc,CAAC,EAAE;wBAC5C,MAAM,UAAU,GAAW,OAAO,cAAc,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;wBAC9F,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE;4BACvB,WAAW;4BACX,MAAM,WAAW,GAAG,IAAI,iBAAiB,EAAE,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;4BAEhE,IAAI,cAAc,GAAG,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;4BAC9C,IAAI,cAAc,EAAE;gCAChB,cAAc,CAAC,SAAS,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;6BAC/C;iCAAM;gCACH,cAAc,GAAG;oCACb,MAAM,EAAE,WAAW;oCACnB,SAAS,EAAE,CAAC,YAAY,CAAC;oCACzB,GAAG,EAAE,OAAO,CAAC,GAAG;iCACnB,CAAA;6BACJ;4BACD,eAAe;4BACf,OAAO,CAAC,GAAG,CAAC,WAAW,EAAE,cAAc,CAAC,CAAC;yBAC5C;qBACJ;gBACL,CAAC,CAAC,CAAC;aACN;YACD,MAAM,YAAY,CAAC,IAAI,EAAE,KAAK,EAAE,aAAa,CAAC,CAAC;YAC/C,MAAM,IAAI,GAAG,MAAM,KAAK,CAAC,OAAO,EAAE,CAAC;YACnC,OAAO,CAAC,GAAG,CAAC,aAAa,IAAI,CAAC,gBAAgB,OAAO,IAAI,CAAC,aAAa,KAAK,IAAI,CAAC,kBAAkB,yBAAyB,IAAI,CAAC,eAAe,gBAAgB,IAAI,CAAC,cAAc,UAAU,CAAC,CAAC;QACnM,CAAC;KACJ,CAAC,CAAC;IAEH,IAAI;QACA,MAAM,OAAO,CAAC,GAAG,EAAE,CAAC;KACvB;IAAC,OAAO,CAAC,EAAE;QACR,OAAO,CAAC,GAAG,CAAC,8BAA8B,CAAC,CAAC;QAC5C,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;KACpB;IAED,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;IACjC,MAAM,IAAI,GAAG,MAAM,KAAK,CAAC,OAAO,EAAE,CAAC;IACnC,OAAO,CAAC,GAAG,CAAC,0BAA0B,IAAI,CAAC,gBAAgB,OAAO,IAAI,CAAC,aAAa,KAAK,IAAI,CAAC,cAAc,UAAU,CAAC,CAAC;IAExH,MAAM,MAAM,GAAG,KAAK,CAAC,SAAS,EAAE,CAAC;IACjC,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE;QACnB,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC;QAC5B,MAAM,CAAC,OAAO,CAAC,CAAC,aAAa,EAAE,EAAE;YAC7B,OAAO,CAAC,GAAG,CAAC,KAAK,aAAa,CAAC,GAAG,iBAAiB,aAAa,CAAC,eAAe,+BAA+B,aAAa,CAAC,aAAa,CAAC,CAAC,CAAC,aAAa,CAAC,aAAa,CAAC,aAAa,CAAC,aAAa,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,EAAE,CAAC,CAAA;QACnO,CAAC,CAAC,CAAA;KACL;IAED,OAAO,CAAC,GAAG,CAAC,SAAS,OAAO,CAAC,IAAI,WAAW,CAAC,CAAC;IAC9C,IAAI,IAAI,CAAC,GAAG,IAAI,OAAO,EAAE;QAErB,iHAAiH;QACjH,MAAM,MAAM,GAAkB;YAC1B,eAAe,EAAE,CAAC;YAClB,cAAc,EAAE,EACf;SACJ,CAAC;QAEF,IAAI,KAAK,GAAW,2BAA2B,CAAC;QAEhD,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,EAAE;YACvB,8CAA8C;YAC9C,MAAM,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,QAAQ,EAAE,EAAE;gBAClC,MAAM,CAAC,YAAY,CAAC,IAAI,CAAC;oBACrB,QAAQ,EAAE,QAAQ;oBAClB,MAAM,EAAE,MAAM,CAAC,MAAM;oBACrB,UAAU,EAAE;wBACR,WAAW,EAAE,MAAM,CAAC,GAAG;qBAC1B;iBACJ,CAAC,CAAC;gBAEH,KAAK,IAAI,QAAQ,QAAQ,MAAM,MAAM,CAAC,MAAM,MAAM,MAAM,CAAC,GAAG,GAAG,CAAC;YACpE,CAAC,CAAC,CAAC;QACP,CAAC,CAAC,CAAC;QAEH,MAAM,kBAAkB,GAAG,uBAAuB,CAAC;QACnD,MAAM,UAAU,GAAG,OAAO,CAAC,OAAO,EAAE,kBAAkB,CAAC,CAAC;QAExD,OAAO,CAAC,GAAG,CAAC,sBAAsB,UAAU,EAAE,CAAC,CAAC;QAChD,aAAa,CAAC,UAAU,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC;QAEhE,MAAM,eAAe,GAAG,sBAAsB,CAAC;QAC/C,MAAM,aAAa,GAAG,OAAO,CAAC,OAAO,EAAE,eAAe,CAAC,CAAC;QACxD,OAAO,CAAC,GAAG,CAAC,qBAAqB,aAAa,EAAE,CAAC,CAAC;QAClD,aAAa,CAAC,aAAa,EAAE,KAAK,CAAC,CAAC;KACvC;AACL,CAAC"}
|
package/lib/index.js
CHANGED
|
@@ -1,233 +1,38 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
"use strict";
|
|
3
|
-
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
4
|
-
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
5
|
-
return new (P || (P = Promise))(function (resolve, reject) {
|
|
6
|
-
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
7
|
-
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
8
|
-
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
9
|
-
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
10
|
-
});
|
|
11
|
-
};
|
|
12
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
13
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
14
|
-
};
|
|
15
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
16
2
|
/*! Copyright (c) 2020, XAPP AI */
|
|
17
3
|
/* eslint-disable no-console */
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
const
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
4
|
+
import yargs from 'yargs/yargs';
|
|
5
|
+
import { hideBin } from 'yargs/helpers';
|
|
6
|
+
import { crawl } from './crawl.js';
|
|
7
|
+
import { analyze } from './analyze.js';
|
|
8
|
+
const yargsInstance = yargs(hideBin(process.argv));
|
|
9
|
+
yargsInstance.command('crawl <url>', 'Crawl the provided URL', (yargs) => {
|
|
10
|
+
yargs.positional("url", { type: 'string', alias: ["u"], demandOption: true, describe: "The URL to crawl" });
|
|
11
|
+
yargs.options({
|
|
12
|
+
a: { type: 'boolean', alias: ["faq"], default: false, describe: "Generate a JSON file with FAQs, AWS Kendra format, found in JSON+LD while crawling." },
|
|
13
|
+
d: { type: 'string', alias: ["dir"], describe: "The directory to save the HTML source to" },
|
|
14
|
+
e: { type: 'string', alias: ["executablePath"], describe: "Executable path Chrome" },
|
|
15
|
+
h: { type: 'boolean', alias: ["headless"], default: false },
|
|
16
|
+
m: { type: 'number', alias: ["max"], default: 1000, describe: "The max number of total crawls." },
|
|
17
|
+
o: { type: 'number', alias: ["timeout"], default: 4000, describe: "The timeout for each individual page load." },
|
|
18
|
+
b: { type: 'number', alias: ["break", "settle"], describe: "Settle time, in MS, to wait for the page to finish loading. By default it only waits for page load." },
|
|
19
|
+
p: { type: 'number', alias: ["pages"], default: 10, describe: "The max number of pages (tabs) running simultaneously." },
|
|
20
|
+
s: { type: 'boolean', alias: ["stealth"], describe: "Turns on stealth mode" },
|
|
21
|
+
t: { type: 'string', alias: ["text"], default: undefined, describe: "Possible values are 'text' or 'markdown', requires a directory to be provided as well. Will save the output to the directory." },
|
|
22
|
+
w: { type: 'string', alias: ["whitelist"], describe: "A text file where each line is a whitelisted pattern" },
|
|
23
|
+
x: { type: 'boolean', alias: ["metadata"], describe: "Generate a JSON metadata file for Amazon Kendra" }
|
|
24
|
+
});
|
|
25
|
+
}, async (argv) => {
|
|
26
|
+
await crawl(argv);
|
|
27
|
+
});
|
|
28
|
+
yargsInstance.command('analyze <url>', 'Performs an analysis on the provided URL', (yargs) => {
|
|
27
29
|
yargs.positional("url", { type: 'string', alias: ["u"], demandOption: true, describe: "The URL to crawl" });
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
d: { type: 'string', alias: ["dir"], describe: "The directory to save the HTML source to" },
|
|
32
|
-
e: { type: 'string', alias: ["executablePath"], describe: "Executable path Chrome" },
|
|
33
|
-
h: { type: 'boolean', alias: ["headless"], default: false },
|
|
34
|
-
m: { type: 'number', alias: ["max"], default: 1000, describe: "The max number of total crawls." },
|
|
35
|
-
o: { type: 'number', alias: ["timeout"], default: 4000, describe: "The timeout for each individual page load." },
|
|
36
|
-
b: { type: 'number', alias: ["break", "settle"], describe: "Settle time, in MS, to wait for the page to finish loading. By default it only waits for page load." },
|
|
37
|
-
p: { type: 'number', alias: ["pages"], default: 10, describe: "The max number of pages (tabs) running simultaneously." },
|
|
38
|
-
s: { type: 'boolean', alias: ["stealth"], describe: "Turns on stealth mode" },
|
|
39
|
-
t: { type: 'string', alias: ["text"], default: undefined, describe: "Possible values are 'text' or 'markdown', requires a directory to be provided as well. Will save the output to the directory." },
|
|
40
|
-
w: { type: 'string', alias: ["whitelist"], describe: "A text file where each line is a whitelisted pattern" },
|
|
41
|
-
x: { type: 'boolean', alias: ["metadata"], describe: "Generate a JSON metadata file for Amazon Kendra" }
|
|
42
|
-
})
|
|
43
|
-
.nargs('u', 1)
|
|
44
|
-
.parseSync();
|
|
45
|
-
const inputURL = argv.url;
|
|
46
|
-
const url = new url_1.URL(inputURL);
|
|
47
|
-
const cleanHost = url.host.replace(/\./g, "_");
|
|
48
|
-
const cleanPath = url.pathname.replace(/\//g, "_");
|
|
49
|
-
let patterns;
|
|
50
|
-
const whiteListPath = argv.w;
|
|
51
|
-
if (whiteListPath) {
|
|
52
|
-
console.log(whiteListPath);
|
|
53
|
-
const resolvedPath = (0, path_1.resolve)(whiteListPath);
|
|
54
|
-
const contents = (0, fs_1.readFileSync)(resolvedPath, "utf-8");
|
|
55
|
-
patterns = contents.split("\n");
|
|
56
|
-
}
|
|
57
|
-
else {
|
|
58
|
-
patterns = inputURL.endsWith("/") ? [`${inputURL}[.*]`] : [`${inputURL}/[.*]`];
|
|
59
|
-
}
|
|
60
|
-
const directory = Array.isArray(argv.d) ? argv.d[1] : argv.d;
|
|
61
|
-
const path = directory ? (0, path_1.resolve)(directory) : undefined;
|
|
62
|
-
if (path) {
|
|
63
|
-
// Make sure it exists
|
|
64
|
-
if (!(0, fs_1.existsSync)(path)) {
|
|
65
|
-
throw new Error(`Path ${directory} does not exist. Please provide an existing path to save the HTML`);
|
|
66
|
-
}
|
|
67
|
-
}
|
|
68
|
-
if (typeof argv.b === "number") {
|
|
69
|
-
// make sure it is less than the timeout
|
|
70
|
-
const timeout = argv.o || 4000;
|
|
71
|
-
if (argv.b > timeout) {
|
|
72
|
-
throw new Error(`Settling time is greater than page handler timeout, please adjust your inputs. -o ${argv.b + 4000} `);
|
|
73
|
-
}
|
|
74
|
-
}
|
|
75
|
-
let saveDir;
|
|
76
|
-
if (path) {
|
|
77
|
-
const exportDirName = `${cleanHost}${cleanPath}-${new Date().getTime()}`;
|
|
78
|
-
saveDir = (0, path_1.resolve)(path, exportDirName);
|
|
79
|
-
(0, fs_1.mkdirSync)(saveDir);
|
|
80
|
-
}
|
|
81
|
-
(() => __awaiter(void 0, void 0, void 0, function* () {
|
|
82
|
-
const queue = new arachne_1.MemoryRequestQueue();
|
|
83
|
-
queue.addRequest({ url: inputURL });
|
|
84
|
-
const crawlPatterns = patterns.map((pattern) => new arachne_1.ArachneURLPattern(pattern));
|
|
85
|
-
const answers = new Map();
|
|
86
|
-
console.log(`Starting crawl at ${inputURL}`);
|
|
87
|
-
const crawler = arachne_1.Arachne.crawler({
|
|
88
|
-
maxConcurrentPages: argv.p,
|
|
89
|
-
maxCrawlAttempts: argv.m,
|
|
90
|
-
pageHandlerTimeout: argv.o,
|
|
91
|
-
stealth: argv.s,
|
|
92
|
-
launchOptions: { headless: argv.h, timeout: 5000, executablePath: argv.e },
|
|
93
|
-
queue,
|
|
94
|
-
pageHandler: (page, request, response) => __awaiter(void 0, void 0, void 0, function* () {
|
|
95
|
-
const title = yield page.title();
|
|
96
|
-
console.log(`Loaded ${request.url} : ${title}`);
|
|
97
|
-
if (typeof argv.b === "number") {
|
|
98
|
-
const pageWaitTime = argv.b;
|
|
99
|
-
console.log(`Waiting for ${pageWaitTime} ms for page to completely load....`);
|
|
100
|
-
yield (0, arachne_1.autoScroll)(page, pageWaitTime);
|
|
101
|
-
console.log('...and done waiting.');
|
|
102
|
-
}
|
|
103
|
-
// Get the raw HTML
|
|
104
|
-
// It is important to use page.content() versus response.text() since
|
|
105
|
-
// page.content() gets you the HTML after it has been modified by the JavaScript
|
|
106
|
-
// on the page while response.text() only gives you the HTML from the original GET
|
|
107
|
-
const text = yield page.content();
|
|
108
|
-
// Pull out the raw html
|
|
109
|
-
if (saveDir) {
|
|
110
|
-
const requestURL = new url_1.URL(request.url);
|
|
111
|
-
const cleanHost = requestURL.host.replace(/(\.|\/)/g, "_").replace(/_+$/g, "").replace(/^_+/g, "");
|
|
112
|
-
const cleanRequestPath = requestURL.pathname.replace(/\//g, "_").replace(".html", "").replace(/_+$/g, "").replace(/^_+/g, "");
|
|
113
|
-
const cleanFileName = `${cleanHost}_${cleanRequestPath}`.replace(/_+$/g, "");
|
|
114
|
-
const htmlName = `${cleanFileName}.html`;
|
|
115
|
-
console.log(`File Name: ${htmlName}`);
|
|
116
|
-
const filePath = (0, path_1.resolve)(saveDir, htmlName);
|
|
117
|
-
console.log(`Saving to ${filePath}`);
|
|
118
|
-
(0, fs_1.writeFileSync)(filePath, text);
|
|
119
|
-
if (argv.x) {
|
|
120
|
-
const date = new Date().toISOString();
|
|
121
|
-
// Doc title
|
|
122
|
-
const metadata = {
|
|
123
|
-
Attributes: {
|
|
124
|
-
_created_at: date,
|
|
125
|
-
_source_uri: request.url,
|
|
126
|
-
_last_updated_at: date
|
|
127
|
-
},
|
|
128
|
-
Title: title,
|
|
129
|
-
ContentType: "HTML"
|
|
130
|
-
};
|
|
131
|
-
const metadataFileName = `${htmlName}.metadata.json`;
|
|
132
|
-
const metadataFilePath = (0, path_1.resolve)(saveDir, metadataFileName);
|
|
133
|
-
console.log(`Saving metadata to ${metadataFilePath}`);
|
|
134
|
-
const metadataText = JSON.stringify(metadata, undefined, 2);
|
|
135
|
-
(0, fs_1.writeFileSync)(metadataFilePath, metadataText);
|
|
136
|
-
}
|
|
137
|
-
if (typeof argv.t === "string") {
|
|
138
|
-
const toMD = argv.t === "markdown" ? true : false;
|
|
139
|
-
// Write some MARKDOWN!
|
|
140
|
-
const convertedHTML = new arachne_parsers_1.ArachneHTMLtoText().convert(text, toMD);
|
|
141
|
-
const extension = toMD ? `md` : `txt`;
|
|
142
|
-
const markdownName = `${cleanFileName}.${extension}`;
|
|
143
|
-
const markdownFilePath = (0, path_1.resolve)(saveDir, markdownName);
|
|
144
|
-
console.log(`Saving markdown to ${markdownFilePath}`);
|
|
145
|
-
(0, fs_1.writeFileSync)(markdownFilePath, convertedHTML);
|
|
146
|
-
}
|
|
147
|
-
}
|
|
148
|
-
// Let's see if we can find some structured data.
|
|
149
|
-
const questions = new arachne_parsers_1.ArachneJSONLD(text).getQuestions();
|
|
150
|
-
if (questions.length > 0) {
|
|
151
|
-
console.log(`${questions.length} questions found on page.`);
|
|
152
|
-
// Clean the answer
|
|
153
|
-
questions.forEach((question) => {
|
|
154
|
-
// name has the question text
|
|
155
|
-
// acceptedAnswer has the answer text
|
|
156
|
-
const questionText = typeof question.name === "string" ? question.name : "";
|
|
157
|
-
const acceptedAnswer = question.acceptedAnswer;
|
|
158
|
-
if (acceptedAnswer && (0, arachne_parsers_1.isAnswer)(acceptedAnswer)) {
|
|
159
|
-
const answerText = typeof acceptedAnswer.text === "string" ? acceptedAnswer.text : "";
|
|
160
|
-
if (answerText.length > 0) {
|
|
161
|
-
// Clean it
|
|
162
|
-
const cleanAnswer = new arachne_parsers_1.ArachneHTMLtoText().convert(answerText);
|
|
163
|
-
let existingAnswer = answers.get(cleanAnswer);
|
|
164
|
-
if (existingAnswer) {
|
|
165
|
-
existingAnswer.questions.push(questionText);
|
|
166
|
-
}
|
|
167
|
-
else {
|
|
168
|
-
existingAnswer = {
|
|
169
|
-
answer: cleanAnswer,
|
|
170
|
-
questions: [questionText],
|
|
171
|
-
url: request.url
|
|
172
|
-
};
|
|
173
|
-
}
|
|
174
|
-
// Add it back!
|
|
175
|
-
answers.set(cleanAnswer, existingAnswer);
|
|
176
|
-
}
|
|
177
|
-
}
|
|
178
|
-
});
|
|
179
|
-
}
|
|
180
|
-
yield (0, arachne_1.enqueueLinks)(page, queue, crawlPatterns);
|
|
181
|
-
const info = yield queue.getInfo();
|
|
182
|
-
console.log(`Completed ${info.completeRequests} of ${info.totalRequests}, ${info.inProgressRequests} are in progress with ${info.pendingRequests} pending and ${info.failedRequests} failed.`);
|
|
183
|
-
})
|
|
30
|
+
yargs.options({
|
|
31
|
+
h: { type: 'boolean', alias: ["headless"], default: false },
|
|
32
|
+
v: { type: 'boolean', alias: ["verbose"], default: false }
|
|
184
33
|
});
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
console.log('Unable to start the crawler.');
|
|
190
|
-
console.error(e);
|
|
191
|
-
}
|
|
192
|
-
console.log('Crawler complete.');
|
|
193
|
-
const info = yield queue.getInfo();
|
|
194
|
-
console.log(`Final count, completed ${info.completeRequests} of ${info.totalRequests}, ${info.failedRequests} failed.`);
|
|
195
|
-
const failed = queue.getFailed();
|
|
196
|
-
if (failed.length > 0) {
|
|
197
|
-
console.log('Failures....');
|
|
198
|
-
failed.forEach((failedRequest) => {
|
|
199
|
-
console.log(`\t${failedRequest.url} failed after ${failedRequest.requestAttempts} attempts with final error: ${failedRequest.errorMessages ? failedRequest.errorMessages[failedRequest.errorMessages.length - 1] : "unknown"}`);
|
|
200
|
-
});
|
|
201
|
-
}
|
|
202
|
-
console.log(`Found ${answers.size} answers.`);
|
|
203
|
-
if (argv.faq && saveDir) {
|
|
204
|
-
// This follows the JSON format https://docs.aws.amazon.com/kendra/latest/dg/in-creating-faq.html#faq-custom-json
|
|
205
|
-
const qaJSON = {
|
|
206
|
-
"SchemaVersion": 1,
|
|
207
|
-
"FaqDocuments": []
|
|
208
|
-
};
|
|
209
|
-
let qaCSV = `"Question","Answer","URL"`;
|
|
210
|
-
answers.forEach((answer) => {
|
|
211
|
-
// For the questions on the answer, add a line
|
|
212
|
-
answer.questions.forEach((question) => {
|
|
213
|
-
qaJSON.FaqDocuments.push({
|
|
214
|
-
Question: question,
|
|
215
|
-
Answer: answer.answer,
|
|
216
|
-
Attributes: {
|
|
217
|
-
_source_uri: answer.url
|
|
218
|
-
}
|
|
219
|
-
});
|
|
220
|
-
qaCSV += `\r\n"${question}","${answer.answer}","${answer.url}"`;
|
|
221
|
-
});
|
|
222
|
-
});
|
|
223
|
-
const questionAnswerName = `question-answers.json`;
|
|
224
|
-
const qaFilePath = (0, path_1.resolve)(saveDir, questionAnswerName);
|
|
225
|
-
console.log(`Saving FAQ JSON to ${qaFilePath}`);
|
|
226
|
-
(0, fs_1.writeFileSync)(qaFilePath, JSON.stringify(qaJSON, undefined, 2));
|
|
227
|
-
const csvQuestionName = `question-answers.csv`;
|
|
228
|
-
const csvQaFilePath = (0, path_1.resolve)(saveDir, csvQuestionName);
|
|
229
|
-
console.log(`Saving FAQ CSV to ${csvQaFilePath}`);
|
|
230
|
-
(0, fs_1.writeFileSync)(csvQaFilePath, qaCSV);
|
|
231
|
-
}
|
|
232
|
-
}))();
|
|
34
|
+
}, async (argv) => {
|
|
35
|
+
await analyze(argv);
|
|
36
|
+
});
|
|
37
|
+
yargsInstance.parse();
|
|
233
38
|
//# sourceMappingURL=index.js.map
|
package/lib/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AACA,kCAAkC;AAClC,+BAA+B;AAC/B,OAAO,KAAK,MAAM,aAAa,CAAC;AAChC,OAAO,EAAE,OAAO,EAAE,MAAM,eAAe,CAAA;AACvC,OAAO,EAAE,KAAK,EAAa,MAAM,YAAY,CAAC;AAC9C,OAAO,EAAE,OAAO,EAAe,MAAM,cAAc,CAAC;AAEpD,MAAM,aAAa,GAAG,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;AAEnD,aAAa,CAAC,OAAO,CACjB,aAAa,EACb,wBAAwB,EACxB,CAAC,KAAK,EAAE,EAAE;IACN,KAAK,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,GAAG,CAAC,EAAE,YAAY,EAAE,IAAI,EAAE,QAAQ,EAAE,kBAAkB,EAAE,CAAC,CAAC;IAC5G,KAAK,CAAC,OAAO,CAAC;QACV,CAAC,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC,KAAK,CAAC,EAAE,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,qFAAqF,EAAE;QACvJ,CAAC,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,KAAK,CAAC,EAAE,QAAQ,EAAE,0CAA0C,EAAE;QAC3F,CAAC,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,gBAAgB,CAAC,EAAE,QAAQ,EAAE,wBAAwB,EAAE;QACpF,CAAC,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC,UAAU,CAAC,EAAE,OAAO,EAAE,KAAK,EAAE;QAC3D,CAAC,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,KAAK,CAAC,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,iCAAiC,EAAE;QACjG,CAAC,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,SAAS,CAAC,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,4CAA4C,EAAE;QAChH,CAAC,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,OAAO,EAAE,QAAQ,CAAC,EAAE,QAAQ,EAAE,sGAAsG,EAAE;QACnK,CAAC,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,OAAO,CAAC,EAAE,OAAO,EAAE,EAAE,EAAE,QAAQ,EAAE,wDAAwD,EAAE;QACxH,CAAC,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC,SAAS,CAAC,EAAE,QAAQ,EAAE,uBAAuB,EAAE;QAC7E,CAAC,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,MAAM,CAAC,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,gIAAgI,EAAE;QACtM,CAAC,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,WAAW,CAAC,EAAE,QAAQ,EAAE,sDAAsD,EAAE;QAC7G,CAAC,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC,UAAU,CAAC,EAAE,QAAQ,EAAE,iDAAiD,EAAE;KAC3G,CAAC,CAAA;AACN,CAAC,EACD,KAAK,EAAE,IAAI,EAAE,EAAE;IACX,MAAM,KAAK,CAAC,IAAI,CAAC,CAAC;AACtB,CAAC,CACJ,CAAC;AAEF,aAAa,CAAC,OAAO,CACjB,eAAe,EACf,0CAA0C,EAC1C,CAAC,KAAK,EAAE,EAAE;IACN,KAAK,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,GAAG,CAAC,EAAE,YAAY,EAAE,IAAI,EAAE,QAAQ,EAAE,kBAAkB,EAAE,CAAC,CAAC;IAC5G,KAAK,CAAC,OAAO,CAAC;QACV,CAAC,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC,UAAU,CAAC,EAAE,OAAO,EAAE,KAAK,EAAE;QAC3D,CAAC,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC,SAAS,CAAC,EAAE,OAAO,EAAE,KAAK,EAAE;KAC7D,CAAC,CAAA;AACN,CAAC,EACD,KAAK,EAAE,IAAI,EAAE,EAAE;IACX,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;AACxB,CAAC,CACJ,CAAC;AAEF,aAAa,CAAC,KAAK,EAAE,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/utils/index.ts"],"names":[],"mappings":"AAAA,kCAAkC;AAElC,MAAM,UAAU,mBAAmB,CAAC,GAAW,EAAE,QAAgB;IAC7D,OAAO,WAAW,GAAG,KAAK,QAAQ,YAAY,CAAC;AACnD,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@xapp/arachne-cli",
|
|
3
3
|
"preferGlobal": true,
|
|
4
|
-
"version": "0.
|
|
4
|
+
"version": "0.10.0",
|
|
5
5
|
"types": "lib/index",
|
|
6
6
|
"main": "lib/index",
|
|
7
|
+
"type": "module",
|
|
7
8
|
"files": [
|
|
8
9
|
"lib"
|
|
9
10
|
],
|
|
@@ -19,10 +20,10 @@
|
|
|
19
20
|
},
|
|
20
21
|
"devDependencies": {
|
|
21
22
|
"@microsoft/api-extractor": "7.36.4",
|
|
22
|
-
"@types/chai": "4.3.
|
|
23
|
+
"@types/chai": "4.3.6",
|
|
23
24
|
"@types/generic-pool": "3.1.11",
|
|
24
25
|
"@types/mocha": "10.0.1",
|
|
25
|
-
"@types/node": "18.17.
|
|
26
|
+
"@types/node": "18.17.15",
|
|
26
27
|
"@types/sinon": "10.0.16",
|
|
27
28
|
"@types/sinon-chai": "3.2.9",
|
|
28
29
|
"@types/yargs": "17.0.24",
|
|
@@ -33,12 +34,16 @@
|
|
|
33
34
|
"sinon-chai": "3.7.0",
|
|
34
35
|
"ts-node": "10.9.1",
|
|
35
36
|
"tslib": "2.6.2",
|
|
36
|
-
"typescript": "5.
|
|
37
|
+
"typescript": "5.2.2"
|
|
37
38
|
},
|
|
38
39
|
"dependencies": {
|
|
39
|
-
"@xapp/arachne": "0.
|
|
40
|
-
"@xapp/arachne-parsers": "0.
|
|
41
|
-
"
|
|
40
|
+
"@xapp/arachne": "0.10.0",
|
|
41
|
+
"@xapp/arachne-parsers": "0.10.0",
|
|
42
|
+
"@xapp/arachne-web-service": "0.10.0",
|
|
43
|
+
"isomorphic-fetch": "3.0.0",
|
|
44
|
+
"puppeteer": "21.4.1",
|
|
45
|
+
"terminal-image": "2.0.0",
|
|
46
|
+
"terminal-link": "3.0.0",
|
|
42
47
|
"yargs": "17.7.2"
|
|
43
48
|
},
|
|
44
49
|
"scripts": {
|
|
@@ -46,5 +51,5 @@
|
|
|
46
51
|
"build": "tsc -d true -p .",
|
|
47
52
|
"clean": "rm -rf ./lib/*"
|
|
48
53
|
},
|
|
49
|
-
"gitHead": "
|
|
54
|
+
"gitHead": "fd78541a771decde474ba3de38918c6731d16906"
|
|
50
55
|
}
|