@mintlify/scraping 4.0.28 → 4.0.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +4 -0
- package/bin/cli.js.map +1 -1
- package/bin/nav/listItems.js +6 -4
- package/bin/nav/listItems.js.map +1 -1
- package/bin/nav/retrieve.js +2 -1
- package/bin/nav/retrieve.js.map +1 -1
- package/bin/scrapingPipeline/site.js +11 -3
- package/bin/scrapingPipeline/site.js.map +1 -1
- package/bin/tsconfig.build.tsbuildinfo +1 -1
- package/bin/utils/images.js +3 -0
- package/bin/utils/images.js.map +1 -1
- package/bin/utils/network.js +46 -13
- package/bin/utils/network.js.map +1 -1
- package/package.json +5 -5
- package/src/cli.ts +4 -0
- package/src/nav/listItems.ts +6 -5
- package/src/nav/retrieve.ts +2 -1
- package/src/scrapingPipeline/site.ts +11 -3
- package/src/utils/images.ts +3 -0
- package/src/utils/network.ts +54 -16
package/bin/utils/images.js
CHANGED
|
@@ -7,6 +7,9 @@ import { write } from './file.js';
|
|
|
7
7
|
import { log } from './log.js';
|
|
8
8
|
import { fetchImage } from './network.js';
|
|
9
9
|
export async function downloadImage(src, rootPath) {
|
|
10
|
+
if (src.startsWith('data:image/')) {
|
|
11
|
+
return { success: true, data: [src, src] };
|
|
12
|
+
}
|
|
10
13
|
try {
|
|
11
14
|
let filename = await writeImageToFile(src, rootPath);
|
|
12
15
|
filename = filename.replace(process.cwd(), '');
|
package/bin/utils/images.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"images.js","sourceRoot":"","sources":["../../src/utils/images.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAChD,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAE1C,OAAO,EAAE,0BAA0B,EAAE,MAAM,iBAAiB,CAAC;AAE7D,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAC9C,OAAO,EAAE,gBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAClD,OAAO,EAAE,KAAK,EAAE,MAAM,WAAW,CAAC;AAClC,OAAO,EAAE,GAAG,EAAE,MAAM,UAAU,CAAC;AAC/B,OAAO,EAAE,UAAU,EAAE,MAAM,cAAc,CAAC;AAE1C,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,GAAW,EACX,QAAgB;IAEhB,IAAI,CAAC;QACH,IAAI,QAAQ,GAAG,MAAM,gBAAgB,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;QACrD,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,EAAE,CAAC,CAAC;QAE/C,MAAM,iBAAiB,GAAG,QAAQ,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC,CAAC,MAAM,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC;QACxF,GAAG,CAAC,GAAG,iBAAiB,iCAAiC,EAAE,SAAS,CAAC,CAAC;QAEtE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,GAAG,EAAE,QAAQ,CAAC,EAAE,CAAC;IAClD,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;YAC3B,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,CAAC,OAAO,EAAE,CAAC;QACpD,CAAC;aAAM,CAAC;YACN,OAAO;gBACL,OAAO,EAAE,KAAK;gBACd,OAAO,EAAE,GAAG,GAAG,qDAAqD;aACrE,CAAC;QACJ,CAAC;IACH,CAAC;AACH,CAAC;AAED,KAAK,UAAU,gBAAgB,CAAC,GAAW,EAAE,QAAgB;IAC3D,MAAM,QAAQ,GAAG,0BAA0B,CAAC,GAAG,CAAC,CAAC;IACjD,MAAM,SAAS,GAAG,IAAI,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;IAC3C,MAAM,iBAAiB,GAAG,QAAQ,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC,CAAC,MAAM,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC;IAExF,IAAI,CAAC,eAAe,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC/B,MAAM,IAAI,KAAK,CAAC,GAAG,iBAAiB,iCAAiC,CAAC,CAAC;IACzE,CAAC;IAED,IAAI,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QAC1B,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,IAAI,CAAC;QACH,SAAS,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACrD,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,GAAG,SAAS,+BAA+B,CAAC,CAAC;IAC/D,CAAC;IAED,IAAI,CAAC;QACH,MAAM,SAAS,GAAG,MAAM,UAAU,CAAC,GAAG,CAAC,CAAC;QACxC,KAAK,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC;QAC5B,OAAO,SAAS,CAAC;IACnB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,YAAY,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAC5C,MAAM,IAAI,KAAK,CAAC,GAAG,iBAAiB,yCAAyC,YAAY,EAAE,CAAC,CAAC;IAC/F,CAAC;AACH,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,GAAW;IACzC,IAAI,CAAC,GAAG,EAAE,CAAC;QACT,OAAO,KAAK,CAAC;IACf,CAAC;IAED,IAAI,GAAG,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;QAC5B,OAAO,KAAK,CAAC;IACf,CAAC;IAED,MAAM,GAAG,GAAG,gBAAgB,CAAC,GAAG,CAAC,CAAC;IAClC,IAAI,GAAG,IAAI,CAAC,0BAA0B,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;QACrD,OAAO,KAAK,CAAC;IACf,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED,MAAM,UAAU,yBAAyB,CAAC,GAAW,EAAE,GAAW;IAChE,MAAM,mBAAmB,GAAG,GAAG,CAAC,OAAO,CAAC,IAAI,GAAG,EAAE,CAAC,GAAG,IAAI,GAAG,EAAE,CAAC,MAAM,CAAC;IACtE,OAAO,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,mBAAmB,CAAC,CAAC;AAC3C,CAAC;AAED,MAAM,UAAU,0BAA0B,CAAC,GAAW;IACpD,IAAI,QAAQ,GAAG,EAAE,CAAC;IAClB,IAAI,GAAG,CAAC,QAAQ,CAAC,eAAe,CAAC,EAAE,CAAC;QAClC,KAAK,MAAM,GAAG,IAAI,0BAA0B,EAAE,CAAC;YAC7C,IAAI,GAAG,CAAC,QAAQ,CAAC,IAAI,GAAG,EAAE,CAAC,EAAE,CAAC;gBAC5B,QAAQ,GAAG,yBAAyB,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;YACjD,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,IAAI,GAAG,CAAC,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC;YAC3B,GAAG,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;QAC9B,CAAC;QACD,QAAQ;YACN,kBAAkB,CAChB,GAAG;iBACA,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAE;iBACd,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAE;iBACd,OAAO,CAAC,WAAW,EAAE,GAAG,CAAC,CAC7B,CAAC,OAAO,CAAC,sCAAsC,EAAE,GAAG,CAAC,IAAI,OAAO,CAAC;QACpE,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,OAAO,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;AACpD,CAAC"}
|
|
1
|
+
{"version":3,"file":"images.js","sourceRoot":"","sources":["../../src/utils/images.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAChD,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAE1C,OAAO,EAAE,0BAA0B,EAAE,MAAM,iBAAiB,CAAC;AAE7D,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAC9C,OAAO,EAAE,gBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAClD,OAAO,EAAE,KAAK,EAAE,MAAM,WAAW,CAAC;AAClC,OAAO,EAAE,GAAG,EAAE,MAAM,UAAU,CAAC;AAC/B,OAAO,EAAE,UAAU,EAAE,MAAM,cAAc,CAAC;AAE1C,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,GAAW,EACX,QAAgB;IAEhB,IAAI,GAAG,CAAC,UAAU,CAAC,aAAa,CAAC,EAAE,CAAC;QAClC,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,GAAG,EAAE,GAAG,CAAC,EAAE,CAAC;IAC7C,CAAC;IACD,IAAI,CAAC;QACH,IAAI,QAAQ,GAAG,MAAM,gBAAgB,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;QACrD,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,EAAE,CAAC,CAAC;QAE/C,MAAM,iBAAiB,GAAG,QAAQ,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC,CAAC,MAAM,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC;QACxF,GAAG,CAAC,GAAG,iBAAiB,iCAAiC,EAAE,SAAS,CAAC,CAAC;QAEtE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,GAAG,EAAE,QAAQ,CAAC,EAAE,CAAC;IAClD,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;YAC3B,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,CAAC,OAAO,EAAE,CAAC;QACpD,CAAC;aAAM,CAAC;YACN,OAAO;gBACL,OAAO,EAAE,KAAK;gBACd,OAAO,EAAE,GAAG,GAAG,qDAAqD;aACrE,CAAC;QACJ,CAAC;IACH,CAAC;AACH,CAAC;AAED,KAAK,UAAU,gBAAgB,CAAC,GAAW,EAAE,QAAgB;IAC3D,MAAM,QAAQ,GAAG,0BAA0B,CAAC,GAAG,CAAC,CAAC;IACjD,MAAM,SAAS,GAAG,IAAI,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;IAC3C,MAAM,iBAAiB,GAAG,QAAQ,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC,CAAC,MAAM,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC;IAExF,IAAI,CAAC,eAAe,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC/B,MAAM,IAAI,KAAK,CAAC,GAAG,iBAAiB,iCAAiC,CAAC,CAAC;IACzE,CAAC;IAED,IAAI,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QAC1B,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,IAAI,CAAC;QACH,SAAS,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACrD,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,GAAG,SAAS,+BAA+B,CAAC,CAAC;IAC/D,CAAC;IAED,IAAI,CAAC;QACH,MAAM,SAAS,GAAG,MAAM,UAAU,CAAC,GAAG,CAAC,CAAC;QACxC,KAAK,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC;QAC5B,OAAO,SAAS,CAAC;IACnB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,YAAY,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAC5C,MAAM,IAAI,KAAK,CAAC,GAAG,iBAAiB,yCAAyC,YAAY,EAAE,CAAC,CAAC;IAC/F,CAAC;AACH,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,GAAW;IACzC,IAAI,CAAC,GAAG,EAAE,CAAC;QACT,OAAO,KAAK,CAAC;IACf,CAAC;IAED,IAAI,GAAG,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;QAC5B,OAAO,KAAK,CAAC;IACf,CAAC;IAED,MAAM,GAAG,GAAG,gBAAgB,CAAC,GAAG,CAAC,CAAC;IAClC,IAAI,GAAG,IAAI,CAAC,0BAA0B,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;QACrD,OAAO,KAAK,CAAC;IACf,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED,MAAM,UAAU,yBAAyB,CAAC,GAAW,EAAE,GAAW;IAChE,MAAM,mBAAmB,GAAG,GAAG,CAAC,OAAO,CAAC,IAAI,GAAG,EAAE,CAAC,GAAG,IAAI,GAAG,EAAE,CAAC,MAAM,CAAC;IACtE,OAAO,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,mBAAmB,CAAC,CAAC;AAC3C,CAAC;AAED,MAAM,UAAU,0BAA0B,CAAC,GAAW;IACpD,IAAI,QAAQ,GAAG,EAAE,CAAC;IAClB,IAAI,GAAG,CAAC,QAAQ,CAAC,eAAe,CAAC,EAAE,CAAC;QAClC,KAAK,MAAM,GAAG,IAAI,0BAA0B,EAAE,CAAC;YAC7C,IAAI,GAAG,CAAC,QAAQ,CAAC,IAAI,GAAG,EAAE,CAAC,EAAE,CAAC;gBAC5B,QAAQ,GAAG,yBAAyB,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;YACjD,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,IAAI,GAAG,CAAC,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC;YAC3B,GAAG,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;QAC9B,CAAC;QACD,QAAQ;YACN,kBAAkB,CAChB,GAAG;iBACA,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAE;iBACd,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAE;iBACd,OAAO,CAAC,WAAW,EAAE,GAAG,CAAC,CAC7B,CAAC,OAAO,CAAC,sCAAsC,EAAE,GAAG,CAAC,IAAI,OAAO,CAAC;QACpE,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,OAAO,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;AACpD,CAAC"}
|
package/bin/utils/network.js
CHANGED
|
@@ -1,6 +1,21 @@
|
|
|
1
1
|
import { launch } from 'puppeteer';
|
|
2
|
+
import { framework } from './detectFramework.js';
|
|
2
3
|
import { getErrorMessage } from './errors.js';
|
|
3
4
|
import { log } from './log.js';
|
|
5
|
+
const userAgents = [
|
|
6
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
|
|
7
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36',
|
|
8
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
|
|
9
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
|
|
10
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
|
|
11
|
+
];
|
|
12
|
+
const headers = {
|
|
13
|
+
'Accept-Language': 'en-US,en;q=0.9',
|
|
14
|
+
Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
|
15
|
+
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
|
|
16
|
+
'Accept-Encoding': 'gzip, deflate, br, zstd',
|
|
17
|
+
Connection: 'keep-alive',
|
|
18
|
+
};
|
|
4
19
|
async function exponentialBackoff(operation, retries = 3, delay = 1000, factor = 2) {
|
|
5
20
|
try {
|
|
6
21
|
return await operation();
|
|
@@ -31,26 +46,44 @@ export async function startPuppeteer() {
|
|
|
31
46
|
export async function getHtmlWithPuppeteer(browser, url) {
|
|
32
47
|
try {
|
|
33
48
|
const page = await browser.newPage();
|
|
34
|
-
await page.
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
49
|
+
await page.setViewport({
|
|
50
|
+
width: 3072,
|
|
51
|
+
height: 2048,
|
|
52
|
+
deviceScaleFactor: 2,
|
|
53
|
+
isMobile: false,
|
|
54
|
+
hasTouch: false,
|
|
55
|
+
isLandscape: true,
|
|
40
56
|
});
|
|
41
|
-
|
|
42
|
-
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
|
|
43
|
-
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36',
|
|
44
|
-
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
|
|
45
|
-
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
|
|
46
|
-
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
|
|
47
|
-
];
|
|
57
|
+
await page.setExtraHTTPHeaders(headers);
|
|
48
58
|
await page.setUserAgent(userAgents[Math.floor(Math.random() * userAgents.length)] || userAgents[0]);
|
|
49
59
|
await page.setJavaScriptEnabled(true);
|
|
50
60
|
await page.goto(url.toString(), {
|
|
51
61
|
waitUntil: 'networkidle2',
|
|
52
62
|
timeout: 30000,
|
|
53
63
|
});
|
|
64
|
+
if (framework.vendor === 'docusaurus') {
|
|
65
|
+
await page.evaluate(() => {
|
|
66
|
+
const clickMenuItems = (parentElement = document) => {
|
|
67
|
+
const menuItems = parentElement.getElementsByClassName('menu__link--sublist');
|
|
68
|
+
for (const item of menuItems) {
|
|
69
|
+
const clickEvent = new MouseEvent('click', {
|
|
70
|
+
bubbles: true,
|
|
71
|
+
cancelable: true,
|
|
72
|
+
view: window,
|
|
73
|
+
});
|
|
74
|
+
item.dispatchEvent(clickEvent);
|
|
75
|
+
const parentLi = item.parentElement;
|
|
76
|
+
if (parentLi) {
|
|
77
|
+
const nestedUl = parentLi.querySelector('ul');
|
|
78
|
+
if (nestedUl) {
|
|
79
|
+
clickMenuItems(nestedUl);
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
};
|
|
84
|
+
clickMenuItems();
|
|
85
|
+
});
|
|
86
|
+
}
|
|
54
87
|
const content = await exponentialBackoff(() => page.content());
|
|
55
88
|
await page.close();
|
|
56
89
|
return content;
|
package/bin/utils/network.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"network.js","sourceRoot":"","sources":["../../src/utils/network.ts"],"names":[],"mappings":"AAAA,OAAO,EAAW,MAAM,EAAE,MAAM,WAAW,CAAC;AAE5C,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAC9C,OAAO,EAAE,GAAG,EAAE,MAAM,UAAU,CAAC;AAE/B,KAAK,UAAU,kBAAkB,CAC/B,SAA2B,EAC3B,UAAkB,CAAC,EACnB,QAAgB,IAAI,EACpB,SAAiB,CAAC;IAElB,IAAI,CAAC;QACH,OAAO,MAAM,SAAS,EAAE,CAAC;IAC3B,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,OAAO,GAAG,CAAC,EAAE,CAAC;YAChB,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC;YAC3D,OAAO,kBAAkB,CAAC,SAAS,EAAE,OAAO,GAAG,CAAC,EAAE,KAAK,GAAG,MAAM,EAAE,MAAM,CAAC,CAAC;QAC5E,CAAC;aAAM,CAAC;YACN,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,cAAc;IAClC,IAAI,CAAC;QACH,OAAO,MAAM,MAAM,CAAC;YAClB,QAAQ,EAAE,IAAI;YACd,iBAAiB,EAAE,IAAI;SACxB,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;YAC3B,GAAG,CAAC,wCAAwC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QAC/D,CAAC;IACH,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,OAAgB,EAChB,GAAiB;IAEjB,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;QAErC,MAAM,IAAI,CAAC,
|
|
1
|
+
{"version":3,"file":"network.js","sourceRoot":"","sources":["../../src/utils/network.ts"],"names":[],"mappings":"AAAA,OAAO,EAAW,MAAM,EAAE,MAAM,WAAW,CAAC;AAE5C,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AACjD,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAC9C,OAAO,EAAE,GAAG,EAAE,MAAM,UAAU,CAAC;AAE/B,MAAM,UAAU,GAAG;IACjB,uHAAuH;IACvH,uHAAuH;IACvH,uHAAuH;IACvH,uHAAuH;IACvH,uHAAuH;CAC/G,CAAC;AAEX,MAAM,OAAO,GAAG;IACd,iBAAiB,EAAE,gBAAgB;IACnC,MAAM,EACJ,yIAAyI;IAC3I,YAAY,EACV,uHAAuH;IACzH,iBAAiB,EAAE,yBAAyB;IAC5C,UAAU,EAAE,YAAY;CAChB,CAAC;AAEX,KAAK,UAAU,kBAAkB,CAC/B,SAA2B,EAC3B,UAAkB,CAAC,EACnB,QAAgB,IAAI,EACpB,SAAiB,CAAC;IAElB,IAAI,CAAC;QACH,OAAO,MAAM,SAAS,EAAE,CAAC;IAC3B,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,OAAO,GAAG,CAAC,EAAE,CAAC;YAChB,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC;YAC3D,OAAO,kBAAkB,CAAC,SAAS,EAAE,OAAO,GAAG,CAAC,EAAE,KAAK,GAAG,MAAM,EAAE,MAAM,CAAC,CAAC;QAC5E,CAAC;aAAM,CAAC;YACN,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,cAAc;IAClC,IAAI,CAAC;QACH,OAAO,MAAM,MAAM,CAAC;YAClB,QAAQ,EAAE,IAAI;YACd,iBAAiB,EAAE,IAAI;SACxB,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;YAC3B,GAAG,CAAC,wCAAwC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QAC/D,CAAC;IACH,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,OAAgB,EAChB,GAAiB;IAEjB,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;QAErC,MAAM,IAAI,CAAC,WAAW,CAAC;YACrB,KAAK,EAAE,IAAI;YACX,MAAM,EAAE,IAAI;YACZ,iBAAiB,EAAE,CAAC;YACpB,QAAQ,EAAE,KAAK;YACf,QAAQ,EAAE,KAAK;YACf,WAAW,EAAE,IAAI;SAClB,CAAC,CAAC;QACH,MAAM,IAAI,CAAC,mBAAmB,CAAC,OAAO,CAAC,CAAC;QACxC,MAAM,IAAI,CAAC,YAAY,CACrB,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,IAAI,UAAU,CAAC,CAAC,CAAC,CAC3E,CAAC;QACF,MAAM,IAAI,CAAC,oBAAoB,CAAC,IAAI,CAAC,CAAC;QAEtC,MAAM,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,EAAE;YAC9B,SAAS,EAAE,cAAc;YACzB,OAAO,EAAE,KAAK;SACf,CAAC,CAAC;QAEH,IAAI,SAAS,CAAC,MAAM,KAAK,YAAY,EAAE,CAAC;YACtC,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE;gBACvB,MAAM,cAAc,GAAG,CAAC,gBAAoC,QAAQ,EAAE,EAAE;oBACtE,MAAM,SAAS,GAAG,aAAa,CAAC,sBAAsB,CAAC,qBAAqB,CAAC,CAAC;oBAE9E,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;wBAC7B,MAAM,UAAU,GAAG,IAAI,UAAU,CAAC,OAAO,EAAE;4BACzC,OAAO,EAAE,IAAI;4BACb,UAAU,EAAE,IAAI;4BAChB,IAAI,EAAE,MAAM;yBACb,CAAC,CAAC;wBACH,IAAI,CAAC,aAAa,CAAC,UAAU,CAAC,CAAC;wBAE/B,MAAM,QAAQ,GAAG,IAAI,CAAC,aAAa,CAAC;wBACpC,IAAI,QAAQ,EAAE,CAAC;4BACb,MAAM,QAAQ,GAAG,QAAQ,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;4BAC9C,IAAI,QAAQ,EAAE,CAAC;gCACb,cAAc,CAAC,QAAQ,CAAC,CAAC;4BAC3B,CAAC;wBACH,CAAC;oBACH,CAAC;gBACH,CAAC,CAAC;gBAEF,cAAc,EAAE,CAAC;YACnB,CAAC,CAAC,CAAC;QACL,CAAC;QAED,MAAM,OAAO,GAAG,MAAM,kBAAkB,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC;QAC/D,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC;QACnB,OAAO,OAAO,CAAC;IACjB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,YAAY,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAC5C,MAAM,IAAI,KAAK,CAAC,yCAAyC,YAAY,EAAE,CAAC,CAAC;IAC3E,CAAC;AACH,CAAC;AAED,KAAK,UAAU,iBAAiB,CAAC,GAAiB;IAChD,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,CAAC;QAC7B,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,GAAG,GAAG,CAAC,MAAM,IAAI,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC;QACrD,CAAC;QACD,OAAO,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC;IAC1B,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,YAAY,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAC5C,MAAM,IAAI,KAAK,CAAC,mCAAmC,YAAY,EAAE,CAAC,CAAC;IACrE,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,GAAiB,EACjB,UAA+B,SAAS;IAExC,IAAI,CAAC;QACH,IAAI,GAAG,GAAuB,SAAS,CAAC;QACxC,IAAI,OAAO,EAAE,CAAC;YACZ,GAAG,GAAG,MAAM,oBAAoB,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;QACjD,CAAC;aAAM,CAAC;YACN,GAAG,GAAG,MAAM,kBAAkB,CAAC,GAAG,EAAE,CAAC,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAC;QAC/D,CAAC;QACD,IAAI,GAAG;YAAE,OAAO,GAAG,CAAC;QACpB,MAAM,IAAI,KAAK,CAAC,2BAA2B,CAAC,CAAC;IAC/C,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,YAAY,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAC5C,MAAM,IAAI,KAAK,CAAC,6BAA6B,GAAG,CAAC,QAAQ,EAAE,GAAG,YAAY,EAAE,CAAC,CAAC;IAChF,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,GAAW;IAC1C,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,kBAAkB,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC;QACvD,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,GAAG,GAAG,CAAC,MAAM,IAAI,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC;QACrD,CAAC;QAED,MAAM,WAAW,GAAG,MAAM,GAAG,CAAC,WAAW,EAAE,CAAC;QAC5C,MAAM,SAAS,GAAG,IAAI,UAAU,CAAC,WAAW,CAAC,CAAC;QAE9C,OAAO,SAAS,CAAC;IACnB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,YAAY,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAC5C,MAAM,IAAI,KAAK,CAAC,GAAG,GAAG,0CAA0C,YAAY,EAAE,CAAC,CAAC;IAClF,CAAC;AACH,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mintlify/scraping",
|
|
3
|
-
"version": "4.0.
|
|
3
|
+
"version": "4.0.30",
|
|
4
4
|
"description": "Scrape documentation frameworks to Mintlify docs",
|
|
5
5
|
"engines": {
|
|
6
6
|
"node": ">=18.0.0"
|
|
@@ -38,7 +38,7 @@
|
|
|
38
38
|
"format:check": "prettier . --check"
|
|
39
39
|
},
|
|
40
40
|
"dependencies": {
|
|
41
|
-
"@mintlify/common": "1.0.
|
|
41
|
+
"@mintlify/common": "1.0.188",
|
|
42
42
|
"@mintlify/openapi-parser": "^0.0.7",
|
|
43
43
|
"fs-extra": "^11.1.1",
|
|
44
44
|
"hast": "^1.0.0",
|
|
@@ -60,10 +60,10 @@
|
|
|
60
60
|
},
|
|
61
61
|
"devDependencies": {
|
|
62
62
|
"@mintlify/eslint-config-typescript": "1.0.13",
|
|
63
|
-
"@mintlify/models": "0.0.
|
|
63
|
+
"@mintlify/models": "0.0.147",
|
|
64
64
|
"@mintlify/prettier-config": "1.0.4",
|
|
65
65
|
"@mintlify/ts-config": "2.0.2",
|
|
66
|
-
"@mintlify/validation": "0.1.
|
|
66
|
+
"@mintlify/validation": "0.1.219",
|
|
67
67
|
"@trivago/prettier-plugin-sort-imports": "^4.2.1",
|
|
68
68
|
"@tsconfig/recommended": "1.x",
|
|
69
69
|
"@types/node": "^18.7.13",
|
|
@@ -78,5 +78,5 @@
|
|
|
78
78
|
"typescript": "^5.5.3",
|
|
79
79
|
"vitest": "^2.0.4"
|
|
80
80
|
},
|
|
81
|
-
"gitHead": "
|
|
81
|
+
"gitHead": "03182511296c892727bca1ccb672bbd8deebe4c5"
|
|
82
82
|
}
|
package/src/cli.ts
CHANGED
|
@@ -103,9 +103,11 @@ async function page(url: string) {
|
|
|
103
103
|
} else {
|
|
104
104
|
log(result.message);
|
|
105
105
|
}
|
|
106
|
+
process.exit(0);
|
|
106
107
|
} catch (error) {
|
|
107
108
|
const errorMessage = getErrorMessage(error);
|
|
108
109
|
log(errorMessage);
|
|
110
|
+
process.exit(1);
|
|
109
111
|
}
|
|
110
112
|
}
|
|
111
113
|
|
|
@@ -122,8 +124,10 @@ async function site(url: string) {
|
|
|
122
124
|
} else {
|
|
123
125
|
log(result.message);
|
|
124
126
|
}
|
|
127
|
+
process.exit(0);
|
|
125
128
|
} catch (error) {
|
|
126
129
|
const errorMessage = getErrorMessage(error);
|
|
127
130
|
log(errorMessage);
|
|
131
|
+
process.exit(1);
|
|
128
132
|
}
|
|
129
133
|
}
|
package/src/nav/listItems.ts
CHANGED
|
@@ -50,18 +50,19 @@ export function processListItem(
|
|
|
50
50
|
|
|
51
51
|
const sectionHeader = findFirstChild(node, opts.sectionTagName);
|
|
52
52
|
const childList = findFirstChild(node, opts.childListTagName);
|
|
53
|
+
if (!childList) {
|
|
54
|
+
return linkHref;
|
|
55
|
+
}
|
|
56
|
+
|
|
53
57
|
let title = opts.title;
|
|
54
58
|
if (!title) {
|
|
55
|
-
title = getText(link) || getText(sectionHeader) || '';
|
|
56
59
|
if (framework.vendor === 'readme') {
|
|
57
60
|
title = getText(sectionHeader) || getText(link) || '';
|
|
61
|
+
} else {
|
|
62
|
+
title = getText(link) || getText(sectionHeader) || '';
|
|
58
63
|
}
|
|
59
64
|
}
|
|
60
65
|
|
|
61
|
-
if (!childList) {
|
|
62
|
-
return linkHref;
|
|
63
|
-
}
|
|
64
|
-
|
|
65
66
|
let childEntries = retrieveNavItems(childList);
|
|
66
67
|
const newLink = childEntries.find(
|
|
67
68
|
(child) => typeof child === 'string' && child.startsWith(linkHref)
|
package/src/nav/retrieve.ts
CHANGED
|
@@ -49,8 +49,9 @@ export function retrieveNavItems(rootNode: Element): Array<NavigationEntry> {
|
|
|
49
49
|
node.children[0].tagName === 'div' &&
|
|
50
50
|
node.children[0].children.filter((child) => child.type === 'text').length ===
|
|
51
51
|
node.children[0].children.length
|
|
52
|
-
)
|
|
52
|
+
) {
|
|
53
53
|
title = findTitle(node.children[0], { delete: false });
|
|
54
|
+
}
|
|
54
55
|
|
|
55
56
|
if (
|
|
56
57
|
framework.vendor === 'readme' &&
|
|
@@ -9,7 +9,7 @@ import { retrieveRootNavElement } from '../nav/root.js';
|
|
|
9
9
|
import type { Result } from '../types/result.js';
|
|
10
10
|
import { detectFramework, framework } from '../utils/detectFramework.js';
|
|
11
11
|
import { logErrorResults } from '../utils/errors.js';
|
|
12
|
-
import { startPuppeteer } from '../utils/network.js';
|
|
12
|
+
import { fetchPageHtml, startPuppeteer } from '../utils/network.js';
|
|
13
13
|
import { INDEX_NAMES, iterateThroughReservedNames } from '../utils/reservedNames.js';
|
|
14
14
|
import { removeTrailingSlash, removeLeadingSlash } from '../utils/strings.js';
|
|
15
15
|
import { downloadColors } from './color.js';
|
|
@@ -32,6 +32,13 @@ export async function scrapeSite(
|
|
|
32
32
|
|
|
33
33
|
detectFramework(hast);
|
|
34
34
|
|
|
35
|
+
if (framework.vendor === 'docusaurus') {
|
|
36
|
+
const browser = await startPuppeteer();
|
|
37
|
+
html = await fetchPageHtml(url, browser);
|
|
38
|
+
hast = htmlToHast(html);
|
|
39
|
+
if (browser) await browser.close();
|
|
40
|
+
}
|
|
41
|
+
|
|
35
42
|
const sidebar = retrieveRootNavElement(hast);
|
|
36
43
|
if (!sidebar) return { success: false, message: `${url.toString()}: ${NAV_FAILURE_MSG}` };
|
|
37
44
|
|
|
@@ -118,8 +125,9 @@ export async function scrapeSite(
|
|
|
118
125
|
|
|
119
126
|
navItems.forEach((navItem, index) => {
|
|
120
127
|
if (typeof navItem !== 'string') return;
|
|
121
|
-
const
|
|
122
|
-
|
|
128
|
+
const lastItemInPath = navItem.split('/').pop() || navItem;
|
|
129
|
+
const name = lastItemInPath
|
|
130
|
+
.split(/[-_]/)
|
|
123
131
|
.map((str) => (str[0] ? `${str[0].toUpperCase()}${str.substring(1)}` : str))
|
|
124
132
|
.join(' ');
|
|
125
133
|
|
package/src/utils/images.ts
CHANGED
|
@@ -13,6 +13,9 @@ export async function downloadImage(
|
|
|
13
13
|
src: string,
|
|
14
14
|
rootPath: string
|
|
15
15
|
): Promise<Result<[string, string]>> {
|
|
16
|
+
if (src.startsWith('data:image/')) {
|
|
17
|
+
return { success: true, data: [src, src] };
|
|
18
|
+
}
|
|
16
19
|
try {
|
|
17
20
|
let filename = await writeImageToFile(src, rootPath);
|
|
18
21
|
filename = filename.replace(process.cwd(), '');
|
package/src/utils/network.ts
CHANGED
|
@@ -1,8 +1,27 @@
|
|
|
1
1
|
import { Browser, launch } from 'puppeteer';
|
|
2
2
|
|
|
3
|
+
import { framework } from './detectFramework.js';
|
|
3
4
|
import { getErrorMessage } from './errors.js';
|
|
4
5
|
import { log } from './log.js';
|
|
5
6
|
|
|
7
|
+
const userAgents = [
|
|
8
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
|
|
9
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36',
|
|
10
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
|
|
11
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
|
|
12
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
|
|
13
|
+
] as const;
|
|
14
|
+
|
|
15
|
+
const headers = {
|
|
16
|
+
'Accept-Language': 'en-US,en;q=0.9',
|
|
17
|
+
Accept:
|
|
18
|
+
'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
|
19
|
+
'User-Agent':
|
|
20
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
|
|
21
|
+
'Accept-Encoding': 'gzip, deflate, br, zstd',
|
|
22
|
+
Connection: 'keep-alive',
|
|
23
|
+
} as const;
|
|
24
|
+
|
|
6
25
|
async function exponentialBackoff<T>(
|
|
7
26
|
operation: () => Promise<T>,
|
|
8
27
|
retries: number = 3,
|
|
@@ -41,23 +60,15 @@ export async function getHtmlWithPuppeteer(
|
|
|
41
60
|
try {
|
|
42
61
|
const page = await browser.newPage();
|
|
43
62
|
|
|
44
|
-
await page.
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
Connection: 'keep-alive',
|
|
63
|
+
await page.setViewport({
|
|
64
|
+
width: 3072,
|
|
65
|
+
height: 2048,
|
|
66
|
+
deviceScaleFactor: 2,
|
|
67
|
+
isMobile: false,
|
|
68
|
+
hasTouch: false,
|
|
69
|
+
isLandscape: true,
|
|
52
70
|
});
|
|
53
|
-
|
|
54
|
-
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
|
|
55
|
-
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36',
|
|
56
|
-
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
|
|
57
|
-
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
|
|
58
|
-
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
|
|
59
|
-
] as const;
|
|
60
|
-
|
|
71
|
+
await page.setExtraHTTPHeaders(headers);
|
|
61
72
|
await page.setUserAgent(
|
|
62
73
|
userAgents[Math.floor(Math.random() * userAgents.length)] || userAgents[0]
|
|
63
74
|
);
|
|
@@ -68,6 +79,33 @@ export async function getHtmlWithPuppeteer(
|
|
|
68
79
|
timeout: 30000,
|
|
69
80
|
});
|
|
70
81
|
|
|
82
|
+
if (framework.vendor === 'docusaurus') {
|
|
83
|
+
await page.evaluate(() => {
|
|
84
|
+
const clickMenuItems = (parentElement: Element | Document = document) => {
|
|
85
|
+
const menuItems = parentElement.getElementsByClassName('menu__link--sublist');
|
|
86
|
+
|
|
87
|
+
for (const item of menuItems) {
|
|
88
|
+
const clickEvent = new MouseEvent('click', {
|
|
89
|
+
bubbles: true,
|
|
90
|
+
cancelable: true,
|
|
91
|
+
view: window,
|
|
92
|
+
});
|
|
93
|
+
item.dispatchEvent(clickEvent);
|
|
94
|
+
|
|
95
|
+
const parentLi = item.parentElement;
|
|
96
|
+
if (parentLi) {
|
|
97
|
+
const nestedUl = parentLi.querySelector('ul');
|
|
98
|
+
if (nestedUl) {
|
|
99
|
+
clickMenuItems(nestedUl);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
};
|
|
104
|
+
|
|
105
|
+
clickMenuItems();
|
|
106
|
+
});
|
|
107
|
+
}
|
|
108
|
+
|
|
71
109
|
const content = await exponentialBackoff(() => page.content());
|
|
72
110
|
await page.close();
|
|
73
111
|
return content;
|