@mintlify/scraping 4.0.28 → 4.0.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,6 +7,9 @@ import { write } from './file.js';
7
7
  import { log } from './log.js';
8
8
  import { fetchImage } from './network.js';
9
9
  export async function downloadImage(src, rootPath) {
10
+ if (src.startsWith('data:image/')) {
11
+ return { success: true, data: [src, src] };
12
+ }
10
13
  try {
11
14
  let filename = await writeImageToFile(src, rootPath);
12
15
  filename = filename.replace(process.cwd(), '');
@@ -1 +1 @@
1
- {"version":3,"file":"images.js","sourceRoot":"","sources":["../../src/utils/images.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAChD,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAE1C,OAAO,EAAE,0BAA0B,EAAE,MAAM,iBAAiB,CAAC;AAE7D,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAC9C,OAAO,EAAE,gBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAClD,OAAO,EAAE,KAAK,EAAE,MAAM,WAAW,CAAC;AAClC,OAAO,EAAE,GAAG,EAAE,MAAM,UAAU,CAAC;AAC/B,OAAO,EAAE,UAAU,EAAE,MAAM,cAAc,CAAC;AAE1C,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,GAAW,EACX,QAAgB;IAEhB,IAAI,CAAC;QACH,IAAI,QAAQ,GAAG,MAAM,gBAAgB,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;QACrD,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,EAAE,CAAC,CAAC;QAE/C,MAAM,iBAAiB,GAAG,QAAQ,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC,CAAC,MAAM,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC;QACxF,GAAG,CAAC,GAAG,iBAAiB,iCAAiC,EAAE,SAAS,CAAC,CAAC;QAEtE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,GAAG,EAAE,QAAQ,CAAC,EAAE,CAAC;IAClD,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;YAC3B,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,CAAC,OAAO,EAAE,CAAC;QACpD,CAAC;aAAM,CAAC;YACN,OAAO;gBACL,OAAO,EAAE,KAAK;gBACd,OAAO,EAAE,GAAG,GAAG,qDAAqD;aACrE,CAAC;QACJ,CAAC;IACH,CAAC;AACH,CAAC;AAED,KAAK,UAAU,gBAAgB,CAAC,GAAW,EAAE,QAAgB;IAC3D,MAAM,QAAQ,GAAG,0BAA0B,CAAC,GAAG,CAAC,CAAC;IACjD,MAAM,SAAS,GAAG,IAAI,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;IAC3C,MAAM,iBAAiB,GAAG,QAAQ,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC,CAAC,MAAM,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC;IAExF,IAAI,CAAC,eAAe,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC/B,MAAM,IAAI,KAAK,CAAC,GAAG,iBAAiB,iCAAiC,CAAC,CAAC;IACzE,CAAC;IAED,IAAI,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QAC1B,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,IAAI,CAAC;QACH,SAAS,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACrD,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,GAAG,SAAS,+BAA+B,CAAC,CAAC;IAC/D,CAAC;IAED,IAAI,CAAC;QACH,MAAM,SAAS,GAAG,MAAM,UAAU,CAAC,GAAG,CAAC,CAAC;QACxC,KAAK,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC;QAC5B,OAAO,SAAS,CAAC;IACnB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,YAAY,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAC5C,MAAM,IAAI,KAAK,CAAC,GAAG,iBAAiB,yCAAyC,YAAY,EAAE,CAAC,CAAC;IAC/F,CAAC;AACH,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,GAAW;IACzC,IAAI,CAAC,GAAG,EAAE,CAAC;QACT,OAAO,KAAK,CAAC;IACf,CAAC;IAED,IAAI,GAAG,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;QAC5B,OAAO,KAAK,CAAC;IACf,CAAC;IAED,MAAM,GAAG,GAAG,gBAAgB,CAAC,GAAG,CAAC,CAAC;IAClC,IAAI,GAAG,IAAI,CAAC,0BAA0B,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;QACrD,OAAO,KAAK,CAAC;IACf,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED,MAAM,UAAU,yBAAyB,CAAC,GAAW,EAAE,GAAW;IAChE,MAAM,mBAAmB,GAAG,GAAG,CAAC,OAAO,CAAC,IAAI,GAAG,EAAE,CAAC,GAAG,IAAI,GAAG,EAAE,CAAC,MAAM,CAAC;IACtE,OAAO,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,mBAAmB,CAAC,CAAC;AAC3C,CAAC;AAED,MAAM,UAAU,0BAA0B,CAAC,GAAW;IACpD,IAAI,QAAQ,GAAG,EAAE,CAAC;IAClB,IAAI,GAAG,CAAC,QAAQ,CAAC,eAAe,CAAC,EAAE,CAAC;QAClC,KAAK,MAAM,GAAG,IAAI,0BAA0B,EAAE,CAAC;YAC7C,IAAI,GAAG,CAAC,QAAQ,CAAC,IAAI,GAAG,EAAE,CAAC,EAAE,CAAC;gBAC5B,QAAQ,GAAG,yBAAyB,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;YACjD,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,IAAI,GAAG,CAAC,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC;YAC3B,GAAG,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;QAC9B,CAAC;QACD,QAAQ;YACN,kBAAkB,CAChB,GAAG;iBACA,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAE;iBACd,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAE;iBACd,OAAO,CAAC,WAAW,EAAE,GAAG,CAAC,CAC7B,CAAC,OAAO,CAAC,sCAAsC,EAAE,GAAG,CAAC,IAAI,OAAO,CAAC;QACpE,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,OAAO,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;AACpD,CAAC"}
1
+ {"version":3,"file":"images.js","sourceRoot":"","sources":["../../src/utils/images.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAChD,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAE1C,OAAO,EAAE,0BAA0B,EAAE,MAAM,iBAAiB,CAAC;AAE7D,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAC9C,OAAO,EAAE,gBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAClD,OAAO,EAAE,KAAK,EAAE,MAAM,WAAW,CAAC;AAClC,OAAO,EAAE,GAAG,EAAE,MAAM,UAAU,CAAC;AAC/B,OAAO,EAAE,UAAU,EAAE,MAAM,cAAc,CAAC;AAE1C,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,GAAW,EACX,QAAgB;IAEhB,IAAI,GAAG,CAAC,UAAU,CAAC,aAAa,CAAC,EAAE,CAAC;QAClC,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,GAAG,EAAE,GAAG,CAAC,EAAE,CAAC;IAC7C,CAAC;IACD,IAAI,CAAC;QACH,IAAI,QAAQ,GAAG,MAAM,gBAAgB,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;QACrD,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,EAAE,CAAC,CAAC;QAE/C,MAAM,iBAAiB,GAAG,QAAQ,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC,CAAC,MAAM,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC;QACxF,GAAG,CAAC,GAAG,iBAAiB,iCAAiC,EAAE,SAAS,CAAC,CAAC;QAEtE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,GAAG,EAAE,QAAQ,CAAC,EAAE,CAAC;IAClD,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;YAC3B,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,CAAC,OAAO,EAAE,CAAC;QACpD,CAAC;aAAM,CAAC;YACN,OAAO;gBACL,OAAO,EAAE,KAAK;gBACd,OAAO,EAAE,GAAG,GAAG,qDAAqD;aACrE,CAAC;QACJ,CAAC;IACH,CAAC;AACH,CAAC;AAED,KAAK,UAAU,gBAAgB,CAAC,GAAW,EAAE,QAAgB;IAC3D,MAAM,QAAQ,GAAG,0BAA0B,CAAC,GAAG,CAAC,CAAC;IACjD,MAAM,SAAS,GAAG,IAAI,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;IAC3C,MAAM,iBAAiB,GAAG,QAAQ,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC,CAAC,MAAM,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC;IAExF,IAAI,CAAC,eAAe,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC/B,MAAM,IAAI,KAAK,CAAC,GAAG,iBAAiB,iCAAiC,CAAC,CAAC;IACzE,CAAC;IAED,IAAI,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QAC1B,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,IAAI,CAAC;QACH,SAAS,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACrD,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,GAAG,SAAS,+BAA+B,CAAC,CAAC;IAC/D,CAAC;IAED,IAAI,CAAC;QACH,MAAM,SAAS,GAAG,MAAM,UAAU,CAAC,GAAG,CAAC,CAAC;QACxC,KAAK,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC;QAC5B,OAAO,SAAS,CAAC;IACnB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,YAAY,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAC5C,MAAM,IAAI,KAAK,CAAC,GAAG,iBAAiB,yCAAyC,YAAY,EAAE,CAAC,CAAC;IAC/F,CAAC;AACH,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,GAAW;IACzC,IAAI,CAAC,GAAG,EAAE,CAAC;QACT,OAAO,KAAK,CAAC;IACf,CAAC;IAED,IAAI,GAAG,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;QAC5B,OAAO,KAAK,CAAC;IACf,CAAC;IAED,MAAM,GAAG,GAAG,gBAAgB,CAAC,GAAG,CAAC,CAAC;IAClC,IAAI,GAAG,IAAI,CAAC,0BAA0B,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;QACrD,OAAO,KAAK,CAAC;IACf,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED,MAAM,UAAU,yBAAyB,CAAC,GAAW,EAAE,GAAW;IAChE,MAAM,mBAAmB,GAAG,GAAG,CAAC,OAAO,CAAC,IAAI,GAAG,EAAE,CAAC,GAAG,IAAI,GAAG,EAAE,CAAC,MAAM,CAAC;IACtE,OAAO,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,mBAAmB,CAAC,CAAC;AAC3C,CAAC;AAED,MAAM,UAAU,0BAA0B,CAAC,GAAW;IACpD,IAAI,QAAQ,GAAG,EAAE,CAAC;IAClB,IAAI,GAAG,CAAC,QAAQ,CAAC,eAAe,CAAC,EAAE,CAAC;QAClC,KAAK,MAAM,GAAG,IAAI,0BAA0B,EAAE,CAAC;YAC7C,IAAI,GAAG,CAAC,QAAQ,CAAC,IAAI,GAAG,EAAE,CAAC,EAAE,CAAC;gBAC5B,QAAQ,GAAG,yBAAyB,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;YACjD,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,IAAI,GAAG,CAAC,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC;YAC3B,GAAG,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;QAC9B,CAAC;QACD,QAAQ;YACN,kBAAkB,CAChB,GAAG;iBACA,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAE;iBACd,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAE;iBACd,OAAO,CAAC,WAAW,EAAE,GAAG,CAAC,CAC7B,CAAC,OAAO,CAAC,sCAAsC,EAAE,GAAG,CAAC,IAAI,OAAO,CAAC;QACpE,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,OAAO,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;AACpD,CAAC"}
@@ -1,6 +1,21 @@
1
1
  import { launch } from 'puppeteer';
2
+ import { framework } from './detectFramework.js';
2
3
  import { getErrorMessage } from './errors.js';
3
4
  import { log } from './log.js';
5
+ const userAgents = [
6
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
7
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36',
8
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
9
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
10
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
11
+ ];
12
+ const headers = {
13
+ 'Accept-Language': 'en-US,en;q=0.9',
14
+ Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
15
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
16
+ 'Accept-Encoding': 'gzip, deflate, br, zstd',
17
+ Connection: 'keep-alive',
18
+ };
4
19
  async function exponentialBackoff(operation, retries = 3, delay = 1000, factor = 2) {
5
20
  try {
6
21
  return await operation();
@@ -31,26 +46,44 @@ export async function startPuppeteer() {
31
46
  export async function getHtmlWithPuppeteer(browser, url) {
32
47
  try {
33
48
  const page = await browser.newPage();
34
- await page.setExtraHTTPHeaders({
35
- 'Accept-Language': 'en-US,en;q=0.9',
36
- Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
37
- 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
38
- 'Accept-Encoding': 'gzip, deflate, br, zstd',
39
- Connection: 'keep-alive',
49
+ await page.setViewport({
50
+ width: 3072,
51
+ height: 2048,
52
+ deviceScaleFactor: 2,
53
+ isMobile: false,
54
+ hasTouch: false,
55
+ isLandscape: true,
40
56
  });
41
- const userAgents = [
42
- 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
43
- 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36',
44
- 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
45
- 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
46
- 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
47
- ];
57
+ await page.setExtraHTTPHeaders(headers);
48
58
  await page.setUserAgent(userAgents[Math.floor(Math.random() * userAgents.length)] || userAgents[0]);
49
59
  await page.setJavaScriptEnabled(true);
50
60
  await page.goto(url.toString(), {
51
61
  waitUntil: 'networkidle2',
52
62
  timeout: 30000,
53
63
  });
64
+ if (framework.vendor === 'docusaurus') {
65
+ await page.evaluate(() => {
66
+ const clickMenuItems = (parentElement = document) => {
67
+ const menuItems = parentElement.getElementsByClassName('menu__link--sublist');
68
+ for (const item of menuItems) {
69
+ const clickEvent = new MouseEvent('click', {
70
+ bubbles: true,
71
+ cancelable: true,
72
+ view: window,
73
+ });
74
+ item.dispatchEvent(clickEvent);
75
+ const parentLi = item.parentElement;
76
+ if (parentLi) {
77
+ const nestedUl = parentLi.querySelector('ul');
78
+ if (nestedUl) {
79
+ clickMenuItems(nestedUl);
80
+ }
81
+ }
82
+ }
83
+ };
84
+ clickMenuItems();
85
+ });
86
+ }
54
87
  const content = await exponentialBackoff(() => page.content());
55
88
  await page.close();
56
89
  return content;
@@ -1 +1 @@
1
- {"version":3,"file":"network.js","sourceRoot":"","sources":["../../src/utils/network.ts"],"names":[],"mappings":"AAAA,OAAO,EAAW,MAAM,EAAE,MAAM,WAAW,CAAC;AAE5C,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAC9C,OAAO,EAAE,GAAG,EAAE,MAAM,UAAU,CAAC;AAE/B,KAAK,UAAU,kBAAkB,CAC/B,SAA2B,EAC3B,UAAkB,CAAC,EACnB,QAAgB,IAAI,EACpB,SAAiB,CAAC;IAElB,IAAI,CAAC;QACH,OAAO,MAAM,SAAS,EAAE,CAAC;IAC3B,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,OAAO,GAAG,CAAC,EAAE,CAAC;YAChB,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC;YAC3D,OAAO,kBAAkB,CAAC,SAAS,EAAE,OAAO,GAAG,CAAC,EAAE,KAAK,GAAG,MAAM,EAAE,MAAM,CAAC,CAAC;QAC5E,CAAC;aAAM,CAAC;YACN,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,cAAc;IAClC,IAAI,CAAC;QACH,OAAO,MAAM,MAAM,CAAC;YAClB,QAAQ,EAAE,IAAI;YACd,iBAAiB,EAAE,IAAI;SACxB,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;YAC3B,GAAG,CAAC,wCAAwC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QAC/D,CAAC;IACH,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,OAAgB,EAChB,GAAiB;IAEjB,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;QAErC,MAAM,IAAI,CAAC,mBAAmB,CAAC;YAC7B,iBAAiB,EAAE,gBAAgB;YACnC,MAAM,EACJ,yIAAyI;YAC3I,YAAY,EACV,uHAAuH;YACzH,iBAAiB,EAAE,yBAAyB;YAC5C,UAAU,EAAE,YAAY;SACzB,CAAC,CAAC;QACH,MAAM,UAAU,GAAG;YACjB,uHAAuH;YACvH,uHAAuH;YACvH,uHAAuH;YACvH,uHAAuH;YACvH,uHAAuH;SAC/G,CAAC;QAEX,MAAM,IAAI,CAAC,YAAY,CACrB,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,IAAI,UAAU,CAAC,CAAC,CAAC,CAC3E,CAAC;QACF,MAAM,IAAI,CAAC,oBAAoB,CAAC,IAAI,CAAC,CAAC;QAEtC,MAAM,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,EAAE;YAC9B,SAAS,EAAE,cAAc;YACzB,OAAO,EAAE,KAAK;SACf,CAAC,CAAC;QAEH,MAAM,OAAO,GAAG,MAAM,kBAAkB,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC;QAC/D,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC;QACnB,OAAO,OAAO,CAAC;IACjB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,YAAY,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAC5C,MAAM,IAAI,KAAK,CAAC,yCAAyC,YAAY,EAAE,CAAC,CAAC;IAC3E,CAAC;AACH,CAAC;AAED,KAAK,UAAU,iBAAiB,CAAC,GAAiB;IAChD,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,CAAC;QAC7B,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,GAAG,GAAG,CAAC,MAAM,IAAI,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC;QACrD,CAAC;QACD,OAAO,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC;IAC1B,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,YAAY,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAC5C,MAAM,IAAI,KAAK,CAAC,mCAAmC,YAAY,EAAE,CAAC,CAAC;IACrE,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,GAAiB,EACjB,UAA+B,SAAS;IAExC,IAAI,CAAC;QACH,IAAI,GAAG,GAAuB,SAAS,CAAC;QACxC,IAAI,OAAO,EAAE,CAAC;YACZ,GAAG,GAAG,MAAM,oBAAoB,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;QACjD,CAAC;aAAM,CAAC;YACN,GAAG,GAAG,MAAM,kBAAkB,CAAC,GAAG,EAAE,CAAC,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAC;QAC/D,CAAC;QACD,IAAI,GAAG;YAAE,OAAO,GAAG,CAAC;QACpB,MAAM,IAAI,KAAK,CAAC,2BAA2B,CAAC,CAAC;IAC/C,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,YAAY,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAC5C,MAAM,IAAI,KAAK,CAAC,6BAA6B,GAAG,CAAC,QAAQ,EAAE,GAAG,YAAY,EAAE,CAAC,CAAC;IAChF,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,GAAW;IAC1C,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,kBAAkB,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC;QACvD,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,GAAG,GAAG,CAAC,MAAM,IAAI,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC;QACrD,CAAC;QAED,MAAM,WAAW,GAAG,MAAM,GAAG,CAAC,WAAW,EAAE,CAAC;QAC5C,MAAM,SAAS,GAAG,IAAI,UAAU,CAAC,WAAW,CAAC,CAAC;QAE9C,OAAO,SAAS,CAAC;IACnB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,YAAY,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAC5C,MAAM,IAAI,KAAK,CAAC,GAAG,GAAG,0CAA0C,YAAY,EAAE,CAAC,CAAC;IAClF,CAAC;AACH,CAAC"}
1
+ {"version":3,"file":"network.js","sourceRoot":"","sources":["../../src/utils/network.ts"],"names":[],"mappings":"AAAA,OAAO,EAAW,MAAM,EAAE,MAAM,WAAW,CAAC;AAE5C,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AACjD,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAC9C,OAAO,EAAE,GAAG,EAAE,MAAM,UAAU,CAAC;AAE/B,MAAM,UAAU,GAAG;IACjB,uHAAuH;IACvH,uHAAuH;IACvH,uHAAuH;IACvH,uHAAuH;IACvH,uHAAuH;CAC/G,CAAC;AAEX,MAAM,OAAO,GAAG;IACd,iBAAiB,EAAE,gBAAgB;IACnC,MAAM,EACJ,yIAAyI;IAC3I,YAAY,EACV,uHAAuH;IACzH,iBAAiB,EAAE,yBAAyB;IAC5C,UAAU,EAAE,YAAY;CAChB,CAAC;AAEX,KAAK,UAAU,kBAAkB,CAC/B,SAA2B,EAC3B,UAAkB,CAAC,EACnB,QAAgB,IAAI,EACpB,SAAiB,CAAC;IAElB,IAAI,CAAC;QACH,OAAO,MAAM,SAAS,EAAE,CAAC;IAC3B,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,OAAO,GAAG,CAAC,EAAE,CAAC;YAChB,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC;YAC3D,OAAO,kBAAkB,CAAC,SAAS,EAAE,OAAO,GAAG,CAAC,EAAE,KAAK,GAAG,MAAM,EAAE,MAAM,CAAC,CAAC;QAC5E,CAAC;aAAM,CAAC;YACN,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,cAAc;IAClC,IAAI,CAAC;QACH,OAAO,MAAM,MAAM,CAAC;YAClB,QAAQ,EAAE,IAAI;YACd,iBAAiB,EAAE,IAAI;SACxB,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;YAC3B,GAAG,CAAC,wCAAwC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QAC/D,CAAC;IACH,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,OAAgB,EAChB,GAAiB;IAEjB,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;QAErC,MAAM,IAAI,CAAC,WAAW,CAAC;YACrB,KAAK,EAAE,IAAI;YACX,MAAM,EAAE,IAAI;YACZ,iBAAiB,EAAE,CAAC;YACpB,QAAQ,EAAE,KAAK;YACf,QAAQ,EAAE,KAAK;YACf,WAAW,EAAE,IAAI;SAClB,CAAC,CAAC;QACH,MAAM,IAAI,CAAC,mBAAmB,CAAC,OAAO,CAAC,CAAC;QACxC,MAAM,IAAI,CAAC,YAAY,CACrB,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,IAAI,UAAU,CAAC,CAAC,CAAC,CAC3E,CAAC;QACF,MAAM,IAAI,CAAC,oBAAoB,CAAC,IAAI,CAAC,CAAC;QAEtC,MAAM,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,EAAE;YAC9B,SAAS,EAAE,cAAc;YACzB,OAAO,EAAE,KAAK;SACf,CAAC,CAAC;QAEH,IAAI,SAAS,CAAC,MAAM,KAAK,YAAY,EAAE,CAAC;YACtC,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE;gBACvB,MAAM,cAAc,GAAG,CAAC,gBAAoC,QAAQ,EAAE,EAAE;oBACtE,MAAM,SAAS,GAAG,aAAa,CAAC,sBAAsB,CAAC,qBAAqB,CAAC,CAAC;oBAE9E,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;wBAC7B,MAAM,UAAU,GAAG,IAAI,UAAU,CAAC,OAAO,EAAE;4BACzC,OAAO,EAAE,IAAI;4BACb,UAAU,EAAE,IAAI;4BAChB,IAAI,EAAE,MAAM;yBACb,CAAC,CAAC;wBACH,IAAI,CAAC,aAAa,CAAC,UAAU,CAAC,CAAC;wBAE/B,MAAM,QAAQ,GAAG,IAAI,CAAC,aAAa,CAAC;wBACpC,IAAI,QAAQ,EAAE,CAAC;4BACb,MAAM,QAAQ,GAAG,QAAQ,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;4BAC9C,IAAI,QAAQ,EAAE,CAAC;gCACb,cAAc,CAAC,QAAQ,CAAC,CAAC;4BAC3B,CAAC;wBACH,CAAC;oBACH,CAAC;gBACH,CAAC,CAAC;gBAEF,cAAc,EAAE,CAAC;YACnB,CAAC,CAAC,CAAC;QACL,CAAC;QAED,MAAM,OAAO,GAAG,MAAM,kBAAkB,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC;QAC/D,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC;QACnB,OAAO,OAAO,CAAC;IACjB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,YAAY,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAC5C,MAAM,IAAI,KAAK,CAAC,yCAAyC,YAAY,EAAE,CAAC,CAAC;IAC3E,CAAC;AACH,CAAC;AAED,KAAK,UAAU,iBAAiB,CAAC,GAAiB;IAChD,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,CAAC;QAC7B,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,GAAG,GAAG,CAAC,MAAM,IAAI,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC;QACrD,CAAC;QACD,OAAO,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC;IAC1B,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,YAAY,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAC5C,MAAM,IAAI,KAAK,CAAC,mCAAmC,YAAY,EAAE,CAAC,CAAC;IACrE,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,GAAiB,EACjB,UAA+B,SAAS;IAExC,IAAI,CAAC;QACH,IAAI,GAAG,GAAuB,SAAS,CAAC;QACxC,IAAI,OAAO,EAAE,CAAC;YACZ,GAAG,GAAG,MAAM,oBAAoB,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;QACjD,CAAC;aAAM,CAAC;YACN,GAAG,GAAG,MAAM,kBAAkB,CAAC,GAAG,EAAE,CAAC,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAC;QAC/D,CAAC;QACD,IAAI,GAAG;YAAE,OAAO,GAAG,CAAC;QACpB,MAAM,IAAI,KAAK,CAAC,2BAA2B,CAAC,CAAC;IAC/C,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,YAAY,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAC5C,MAAM,IAAI,KAAK,CAAC,6BAA6B,GAAG,CAAC,QAAQ,EAAE,GAAG,YAAY,EAAE,CAAC,CAAC;IAChF,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,GAAW;IAC1C,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,kBAAkB,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC;QACvD,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,GAAG,GAAG,CAAC,MAAM,IAAI,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC;QACrD,CAAC;QAED,MAAM,WAAW,GAAG,MAAM,GAAG,CAAC,WAAW,EAAE,CAAC;QAC5C,MAAM,SAAS,GAAG,IAAI,UAAU,CAAC,WAAW,CAAC,CAAC;QAE9C,OAAO,SAAS,CAAC;IACnB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,YAAY,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAC5C,MAAM,IAAI,KAAK,CAAC,GAAG,GAAG,0CAA0C,YAAY,EAAE,CAAC,CAAC;IAClF,CAAC;AACH,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mintlify/scraping",
3
- "version": "4.0.28",
3
+ "version": "4.0.30",
4
4
  "description": "Scrape documentation frameworks to Mintlify docs",
5
5
  "engines": {
6
6
  "node": ">=18.0.0"
@@ -38,7 +38,7 @@
38
38
  "format:check": "prettier . --check"
39
39
  },
40
40
  "dependencies": {
41
- "@mintlify/common": "1.0.187",
41
+ "@mintlify/common": "1.0.188",
42
42
  "@mintlify/openapi-parser": "^0.0.7",
43
43
  "fs-extra": "^11.1.1",
44
44
  "hast": "^1.0.0",
@@ -60,10 +60,10 @@
60
60
  },
61
61
  "devDependencies": {
62
62
  "@mintlify/eslint-config-typescript": "1.0.13",
63
- "@mintlify/models": "0.0.146",
63
+ "@mintlify/models": "0.0.147",
64
64
  "@mintlify/prettier-config": "1.0.4",
65
65
  "@mintlify/ts-config": "2.0.2",
66
- "@mintlify/validation": "0.1.218",
66
+ "@mintlify/validation": "0.1.219",
67
67
  "@trivago/prettier-plugin-sort-imports": "^4.2.1",
68
68
  "@tsconfig/recommended": "1.x",
69
69
  "@types/node": "^18.7.13",
@@ -78,5 +78,5 @@
78
78
  "typescript": "^5.5.3",
79
79
  "vitest": "^2.0.4"
80
80
  },
81
- "gitHead": "e8725b5e25908dc7d301bb50df5848017aac6bbd"
81
+ "gitHead": "03182511296c892727bca1ccb672bbd8deebe4c5"
82
82
  }
package/src/cli.ts CHANGED
@@ -103,9 +103,11 @@ async function page(url: string) {
103
103
  } else {
104
104
  log(result.message);
105
105
  }
106
+ process.exit(0);
106
107
  } catch (error) {
107
108
  const errorMessage = getErrorMessage(error);
108
109
  log(errorMessage);
110
+ process.exit(1);
109
111
  }
110
112
  }
111
113
 
@@ -122,8 +124,10 @@ async function site(url: string) {
122
124
  } else {
123
125
  log(result.message);
124
126
  }
127
+ process.exit(0);
125
128
  } catch (error) {
126
129
  const errorMessage = getErrorMessage(error);
127
130
  log(errorMessage);
131
+ process.exit(1);
128
132
  }
129
133
  }
@@ -50,18 +50,19 @@ export function processListItem(
50
50
 
51
51
  const sectionHeader = findFirstChild(node, opts.sectionTagName);
52
52
  const childList = findFirstChild(node, opts.childListTagName);
53
+ if (!childList) {
54
+ return linkHref;
55
+ }
56
+
53
57
  let title = opts.title;
54
58
  if (!title) {
55
- title = getText(link) || getText(sectionHeader) || '';
56
59
  if (framework.vendor === 'readme') {
57
60
  title = getText(sectionHeader) || getText(link) || '';
61
+ } else {
62
+ title = getText(link) || getText(sectionHeader) || '';
58
63
  }
59
64
  }
60
65
 
61
- if (!childList) {
62
- return linkHref;
63
- }
64
-
65
66
  let childEntries = retrieveNavItems(childList);
66
67
  const newLink = childEntries.find(
67
68
  (child) => typeof child === 'string' && child.startsWith(linkHref)
@@ -49,8 +49,9 @@ export function retrieveNavItems(rootNode: Element): Array<NavigationEntry> {
49
49
  node.children[0].tagName === 'div' &&
50
50
  node.children[0].children.filter((child) => child.type === 'text').length ===
51
51
  node.children[0].children.length
52
- )
52
+ ) {
53
53
  title = findTitle(node.children[0], { delete: false });
54
+ }
54
55
 
55
56
  if (
56
57
  framework.vendor === 'readme' &&
@@ -9,7 +9,7 @@ import { retrieveRootNavElement } from '../nav/root.js';
9
9
  import type { Result } from '../types/result.js';
10
10
  import { detectFramework, framework } from '../utils/detectFramework.js';
11
11
  import { logErrorResults } from '../utils/errors.js';
12
- import { startPuppeteer } from '../utils/network.js';
12
+ import { fetchPageHtml, startPuppeteer } from '../utils/network.js';
13
13
  import { INDEX_NAMES, iterateThroughReservedNames } from '../utils/reservedNames.js';
14
14
  import { removeTrailingSlash, removeLeadingSlash } from '../utils/strings.js';
15
15
  import { downloadColors } from './color.js';
@@ -32,6 +32,13 @@ export async function scrapeSite(
32
32
 
33
33
  detectFramework(hast);
34
34
 
35
+ if (framework.vendor === 'docusaurus') {
36
+ const browser = await startPuppeteer();
37
+ html = await fetchPageHtml(url, browser);
38
+ hast = htmlToHast(html);
39
+ if (browser) await browser.close();
40
+ }
41
+
35
42
  const sidebar = retrieveRootNavElement(hast);
36
43
  if (!sidebar) return { success: false, message: `${url.toString()}: ${NAV_FAILURE_MSG}` };
37
44
 
@@ -118,8 +125,9 @@ export async function scrapeSite(
118
125
 
119
126
  navItems.forEach((navItem, index) => {
120
127
  if (typeof navItem !== 'string') return;
121
- const name = navItem
122
- .split('-')
128
+ const lastItemInPath = navItem.split('/').pop() || navItem;
129
+ const name = lastItemInPath
130
+ .split(/[-_]/)
123
131
  .map((str) => (str[0] ? `${str[0].toUpperCase()}${str.substring(1)}` : str))
124
132
  .join(' ');
125
133
 
@@ -13,6 +13,9 @@ export async function downloadImage(
13
13
  src: string,
14
14
  rootPath: string
15
15
  ): Promise<Result<[string, string]>> {
16
+ if (src.startsWith('data:image/')) {
17
+ return { success: true, data: [src, src] };
18
+ }
16
19
  try {
17
20
  let filename = await writeImageToFile(src, rootPath);
18
21
  filename = filename.replace(process.cwd(), '');
@@ -1,8 +1,27 @@
1
1
  import { Browser, launch } from 'puppeteer';
2
2
 
3
+ import { framework } from './detectFramework.js';
3
4
  import { getErrorMessage } from './errors.js';
4
5
  import { log } from './log.js';
5
6
 
7
+ const userAgents = [
8
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
9
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36',
10
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
11
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
12
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
13
+ ] as const;
14
+
15
+ const headers = {
16
+ 'Accept-Language': 'en-US,en;q=0.9',
17
+ Accept:
18
+ 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
19
+ 'User-Agent':
20
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
21
+ 'Accept-Encoding': 'gzip, deflate, br, zstd',
22
+ Connection: 'keep-alive',
23
+ } as const;
24
+
6
25
  async function exponentialBackoff<T>(
7
26
  operation: () => Promise<T>,
8
27
  retries: number = 3,
@@ -41,23 +60,15 @@ export async function getHtmlWithPuppeteer(
41
60
  try {
42
61
  const page = await browser.newPage();
43
62
 
44
- await page.setExtraHTTPHeaders({
45
- 'Accept-Language': 'en-US,en;q=0.9',
46
- Accept:
47
- 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
48
- 'User-Agent':
49
- 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
50
- 'Accept-Encoding': 'gzip, deflate, br, zstd',
51
- Connection: 'keep-alive',
63
+ await page.setViewport({
64
+ width: 3072,
65
+ height: 2048,
66
+ deviceScaleFactor: 2,
67
+ isMobile: false,
68
+ hasTouch: false,
69
+ isLandscape: true,
52
70
  });
53
- const userAgents = [
54
- 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
55
- 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36',
56
- 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
57
- 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
58
- 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
59
- ] as const;
60
-
71
+ await page.setExtraHTTPHeaders(headers);
61
72
  await page.setUserAgent(
62
73
  userAgents[Math.floor(Math.random() * userAgents.length)] || userAgents[0]
63
74
  );
@@ -68,6 +79,33 @@ export async function getHtmlWithPuppeteer(
68
79
  timeout: 30000,
69
80
  });
70
81
 
82
+ if (framework.vendor === 'docusaurus') {
83
+ await page.evaluate(() => {
84
+ const clickMenuItems = (parentElement: Element | Document = document) => {
85
+ const menuItems = parentElement.getElementsByClassName('menu__link--sublist');
86
+
87
+ for (const item of menuItems) {
88
+ const clickEvent = new MouseEvent('click', {
89
+ bubbles: true,
90
+ cancelable: true,
91
+ view: window,
92
+ });
93
+ item.dispatchEvent(clickEvent);
94
+
95
+ const parentLi = item.parentElement;
96
+ if (parentLi) {
97
+ const nestedUl = parentLi.querySelector('ul');
98
+ if (nestedUl) {
99
+ clickMenuItems(nestedUl);
100
+ }
101
+ }
102
+ }
103
+ };
104
+
105
+ clickMenuItems();
106
+ });
107
+ }
108
+
71
109
  const content = await exponentialBackoff(() => page.content());
72
110
  await page.close();
73
111
  return content;