@mintlify/scraping 4.0.589 → 4.0.591

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -65,6 +65,52 @@ export async function getHtmlWithPuppeteer(browser, url) {
65
65
  clickItems(document);
66
66
  });
67
67
  }
68
+ if (framework.vendor === 'gitbook') {
69
+ for (let round = 0; round < 10; round++) {
70
+ const clickedCount = await page.evaluate(() => {
71
+ const tocEl = document.getElementById('table-of-contents');
72
+ if (!tocEl)
73
+ return 0;
74
+ let count = 0;
75
+ const items = tocEl.querySelectorAll('li.page-document-item');
76
+ items.forEach((li) => {
77
+ const btn = li.querySelector(':scope > a button');
78
+ if (!btn || !(btn instanceof HTMLElement))
79
+ return;
80
+ const anchor = btn.closest('a');
81
+ if (!anchor)
82
+ return;
83
+ const sibling = anchor.nextElementSibling;
84
+ if (sibling instanceof HTMLElement &&
85
+ sibling.style.opacity === '1' &&
86
+ sibling.style.height === 'auto')
87
+ return;
88
+ btn.click();
89
+ count++;
90
+ });
91
+ return count;
92
+ });
93
+ if (clickedCount === 0)
94
+ break;
95
+ await page
96
+ .waitForFunction(() => {
97
+ const tocEl = document.getElementById('table-of-contents');
98
+ if (!tocEl)
99
+ return true;
100
+ const anchors = tocEl.querySelectorAll('li.page-document-item > a');
101
+ return Array.from(anchors).every((a) => {
102
+ const btn = a.querySelector('button');
103
+ if (!btn)
104
+ return true;
105
+ const sibling = a.nextElementSibling;
106
+ if (!sibling || !(sibling instanceof HTMLElement))
107
+ return true;
108
+ return sibling.style.opacity === '1' && sibling.style.height === 'auto';
109
+ });
110
+ }, { timeout: 5000 })
111
+ .catch(() => { });
112
+ }
113
+ }
68
114
  const content = await exponentialBackoff(() => page.content());
69
115
  await page.close();
70
116
  return content;
@@ -1 +1 @@
1
- {"version":3,"file":"network.js","sourceRoot":"","sources":["../../src/utils/network.ts"],"names":[],"mappings":"AAAA,OAAO,EAA6B,kBAAkB,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC;AACnG,OAAO,IAAI,MAAM,SAAS,CAAC;AAE3B,OAAO,EAAW,MAAM,EAAE,MAAM,WAAW,CAAC;AAE5C,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AACjD,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAC9C,OAAO,EAAE,GAAG,EAAE,MAAM,UAAU,CAAC;AAE/B,MAAM,UAAU,GAAG;IACjB,uHAAuH;IACvH,uHAAuH;IACvH,uHAAuH;IACvH,uHAAuH;IACvH,uHAAuH;CAC/G,CAAC;AAEX,MAAM,OAAO,GAAG;IACd,iBAAiB,EAAE,gBAAgB;IACnC,MAAM,EACJ,yIAAyI;IAC3I,YAAY,EACV,uHAAuH;IACzH,iBAAiB,EAAE,yBAAyB;IAC5C,UAAU,EAAE,YAAY;CAChB,CAAC;AAEX,MAAM,CAAC,KAAK,UAAU,cAAc;IAClC,IAAI,CAAC;QACH,OAAO,MAAM,MAAM,CAAC;YAClB,QAAQ,EAAE,IAAI;YACd,iBAAiB,EAAE,IAAI;SACxB,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;YAC3B,GAAG,CAAC,wCAAwC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QAC/D,CAAC;IACH,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,OAAgB,EAChB,GAAiB;IAEjB,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;QAErC,MAAM,IAAI,CAAC,WAAW,CAAC;YACrB,KAAK,EAAE,IAAI;YACX,MAAM,EAAE,IAAI;YACZ,iBAAiB,EAAE,CAAC;YACpB,QAAQ,EAAE,KAAK;YACf,QAAQ,EAAE,KAAK;YACf,WAAW,EAAE,IAAI;SAClB,CAAC,CAAC;QACH,MAAM,IAAI,CAAC,mBAAmB,CAAC,OAAO,CAAC,CAAC;QACxC,MAAM,IAAI,CAAC,YAAY,CACrB,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,IAAI,UAAU,CAAC,CAAC,CAAC,CAC3E,CAAC;QACF,MAAM,IAAI,CAAC,oBAAoB,CAAC,IAAI,CAAC,CAAC;QAEtC,MAAM,kBAAkB,CAAC,GAAG,EAAE,CAC5B,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,EAAE;YACxB,SAAS,EAAE,cAAc;YACzB,OAAO,EAAE,KAAK;SACf,CAAC,CACH,CAAC;QAEF,IAAI,SAAS,CAAC,MAAM,KAAK,YAAY,EAAE,CAAC;YACtC,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE;gBACvB,QAAQ,CAAC,gBAAgB,CACvB,OAAO,EACP,CAAC,CAAC,EAAE,EAAE;oBACJ,IAAI,CAAC,CAAC,MAAM,YAAY,OAAO,IAAI,CAAC,CAAC,MAAM,CAAC,SAAS,CAAC,QAAQ,CAAC,qBAAqB,CAAC;wBACnF,CAAC,CAAC,cAAc,EAAE,CAAC;gBACvB,CAAC,EACD,IAAI,CACL,CAAC;gBAEF,SAAS,UAAU,CAAC,EAA0B;oBAC5C,MAAM,SAAS,GAAG,EAAE,CAAC,sBAAsB,CACzC,qBAAqB,CACW,CAAC;oBACnC,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;wBAC7B,IAAI,CAAC,KAAK,EAAE,CAAC;wBACb,UAAU,CAAC,IAAI,CAAC,CAAC;oBACnB,CAAC;gBACH,CAAC;gBACD,UAAU,CAAC,QAAQ,CAAC,CAAC;YACvB,CAAC,CAAC,CAAC;QACL,CAAC;QAED,MAAM,OAAO,GAAG,MAAM,kBAAkB,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC;QAC/D,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC;QACnB,OAAO,OAAO,CAAC;IACjB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,YAAY,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAC5C,MAAM,IAAI,KAAK,CAAC,yCAAyC,YAAY,EAAE,CAAC,CAAC;IAC3E,CAAC;AACH,CAAC;AAED,KAAK,UAAU,iBAAiB,CAAC,GAAiB;IAChD,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,CAAC;QAC7B,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,GAAG,GAAG,CAAC,MAAM,IAAI,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC;QACrD,CAAC;QACD,OAAO,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC;IAC1B,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,YAAY,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAC5C,MAAM,IAAI,KAAK,CAAC,mCAAmC,YAAY,EAAE,CAAC,CAAC;IACrE,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,GAAiB,EACjB,UAA+B,SAAS;IAExC,IAAI,CAAC;QACH,IAAI,GAAG,GAAuB,SAAS,CAAC;QACxC,IAAI,OAAO,EAAE,CAAC;YACZ,GAAG,GAAG,MAAM,oBAAoB,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;QACjD,CAAC;aAAM,CAAC;YACN,GAAG,GAAG,MAAM,kBAAkB,CAAC,GAAG,EAAE,CAAC,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAC;QAC/D,CAAC;QACD,IAAI,GAAG;YAAE,OAAO,GAAG,CAAC;QACpB,MAAM,IAAI,KAAK,CAAC,2BAA2B,CAAC,CAAC;IAC/C,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,YAAY,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAC5C,MAAM,IAAI,KAAK,CAAC,6BAA6B,GAAG,CAAC,QAAQ,EAAE,GAAG,YAAY,EAAE,CAAC,CAAC;IAChF,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,GAAW;IAC1C,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,kBAAkB,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC;QACvD,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,GAAG,GAAG,CAAC,MAAM,IAAI,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC;QACrD,CAAC;QAED,MAAM,WAAW,GAAG,MAAM,GAAG,CAAC,WAAW,EAAE,CAAC;QAC5C,MAAM,SAAS,GAAG,IAAI,UAAU,CAAC,WAAW,CAAC,CAAC;QAE9C,OAAO,SAAS,CAAC;IACnB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,YAAY,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAC5C,MAAM,IAAI,KAAK,CAAC,GAAG,GAAG,0CAA0C,YAAY,EAAE,CAAC,CAAC;IAClF,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,GAAQ;IACzC,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,kBAAkB,CAAC,KAAK,IAAI,EAAE;YAC9C,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,CAAC;YAClC,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;gBACjB,MAAM,IAAI,KAAK,CAAC,GAAG,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAC;YAC/D,CAAC;YACD,OAAO,QAAQ,CAAC;QAClB,CAAC,CAAC,CAAC;QACH,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC;QAC9B,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAqB,CAAC;IAC7C,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,YAAY,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAC5C,MAAM,IAAI,KAAK,CAAC,GAAG,GAAG,iDAAiD,YAAY,EAAE,CAAC,CAAC;IACzF,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,aAAa,CAAC,GAAQ;IAC1C,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,kBAAkB,CAAC,KAAK,IAAI,EAAE;YAC9C,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,CAAC;YAClC,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;gBACjB,MAAM,IAAI,KAAK,CAAC,GAAG,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAC;YAC/D,CAAC;YACD,OAAO,QAAQ,CAAC;QAClB,CAAC,CAAC,CAAC;QACH,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC;QAC9B,MAAM,EAAE,QAAQ,EAAE,YAAY,EAAE,GAAG,MAAM,gBAAgB,CAAC,IAAI,CAAC,CAAC;QAChE,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,MAAM,IAAI,KAAK,CAAC,GAAG,GAAG,uDAAuD,YAAY,EAAE,CAAC,CAAC;QAC/F,CAAC;QACD,OAAO,QAAQ,CAAC;IAClB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,YAAY,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAC5C,MAAM,IAAI,KAAK,CAAC,GAAG,GAAG,qDAAqD,YAAY,EAAE,CAAC,CAAC;IAC7F,CAAC;AACH,CAAC"}
1
+ {"version":3,"file":"network.js","sourceRoot":"","sources":["../../src/utils/network.ts"],"names":[],"mappings":"AAAA,OAAO,EAA6B,kBAAkB,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC;AACnG,OAAO,IAAI,MAAM,SAAS,CAAC;AAE3B,OAAO,EAAW,MAAM,EAAE,MAAM,WAAW,CAAC;AAE5C,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AACjD,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAC9C,OAAO,EAAE,GAAG,EAAE,MAAM,UAAU,CAAC;AAE/B,MAAM,UAAU,GAAG;IACjB,uHAAuH;IACvH,uHAAuH;IACvH,uHAAuH;IACvH,uHAAuH;IACvH,uHAAuH;CAC/G,CAAC;AAEX,MAAM,OAAO,GAAG;IACd,iBAAiB,EAAE,gBAAgB;IACnC,MAAM,EACJ,yIAAyI;IAC3I,YAAY,EACV,uHAAuH;IACzH,iBAAiB,EAAE,yBAAyB;IAC5C,UAAU,EAAE,YAAY;CAChB,CAAC;AAEX,MAAM,CAAC,KAAK,UAAU,cAAc;IAClC,IAAI,CAAC;QACH,OAAO,MAAM,MAAM,CAAC;YAClB,QAAQ,EAAE,IAAI;YACd,iBAAiB,EAAE,IAAI;SACxB,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;YAC3B,GAAG,CAAC,wCAAwC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QAC/D,CAAC;IACH,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,OAAgB,EAChB,GAAiB;IAEjB,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;QAErC,MAAM,IAAI,CAAC,WAAW,CAAC;YACrB,KAAK,EAAE,IAAI;YACX,MAAM,EAAE,IAAI;YACZ,iBAAiB,EAAE,CAAC;YACpB,QAAQ,EAAE,KAAK;YACf,QAAQ,EAAE,KAAK;YACf,WAAW,EAAE,IAAI;SAClB,CAAC,CAAC;QACH,MAAM,IAAI,CAAC,mBAAmB,CAAC,OAAO,CAAC,CAAC;QACxC,MAAM,IAAI,CAAC,YAAY,CACrB,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,IAAI,UAAU,CAAC,CAAC,CAAC,CAC3E,CAAC;QACF,MAAM,IAAI,CAAC,oBAAoB,CAAC,IAAI,CAAC,CAAC;QAEtC,MAAM,kBAAkB,CAAC,GAAG,EAAE,CAC5B,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,EAAE;YACxB,SAAS,EAAE,cAAc;YACzB,OAAO,EAAE,KAAK;SACf,CAAC,CACH,CAAC;QAEF,IAAI,SAAS,CAAC,MAAM,KAAK,YAAY,EAAE,CAAC;YACtC,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE;gBACvB,QAAQ,CAAC,gBAAgB,CACvB,OAAO,EACP,CAAC,CAAC,EAAE,EAAE;oBACJ,IAAI,CAAC,CAAC,MAAM,YAAY,OAAO,IAAI,CAAC,CAAC,MAAM,CAAC,SAAS,CAAC,QAAQ,CAAC,qBAAqB,CAAC;wBACnF,CAAC,CAAC,cAAc,EAAE,CAAC;gBACvB,CAAC,EACD,IAAI,CACL,CAAC;gBAEF,SAAS,UAAU,CAAC,EAA0B;oBAC5C,MAAM,SAAS,GAAG,EAAE,CAAC,sBAAsB,CACzC,qBAAqB,CACW,CAAC;oBACnC,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;wBAC7B,IAAI,CAAC,KAAK,EAAE,CAAC;wBACb,UAAU,CAAC,IAAI,CAAC,CAAC;oBACnB,CAAC;gBACH,CAAC;gBACD,UAAU,CAAC,QAAQ,CAAC,CAAC;YACvB,CAAC,CAAC,CAAC;QACL,CAAC;QAED,IAAI,SAAS,CAAC,MAAM,KAAK,SAAS,EAAE,CAAC;YACnC,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,EAAE,EAAE,KAAK,EAAE,EAAE,CAAC;gBACxC,MAAM,YAAY,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE;oBAC5C,MAAM,KAAK,GAAG,QAAQ,CAAC,cAAc,CAAC,mBAAmB,CAAC,CAAC;oBAC3D,IAAI,CAAC,KAAK;wBAAE,OAAO,CAAC,CAAC;oBACrB,IAAI,KAAK,GAAG,CAAC,CAAC;oBACd,MAAM,KAAK,GAAG,KAAK,CAAC,gBAAgB,CAAC,uBAAuB,CAAC,CAAC;oBAC9D,KAAK,CAAC,OAAO,CAAC,CAAC,EAAE,EAAE,EAAE;wBACnB,MAAM,GAAG,GAAG,EAAE,CAAC,aAAa,CAAC,mBAAmB,CAAC,CAAC;wBAClD,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC,GAAG,YAAY,WAAW,CAAC;4BAAE,OAAO;wBAClD,MAAM,MAAM,GAAG,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;wBAChC,IAAI,CAAC,MAAM;4BAAE,OAAO;wBACpB,MAAM,OAAO,GAAG,MAAM,CAAC,kBAAkB,CAAC;wBAC1C,IACE,OAAO,YAAY,WAAW;4BAC9B,OAAO,CAAC,KAAK,CAAC,OAAO,KAAK,GAAG;4BAC7B,OAAO,CAAC,KAAK,CAAC,MAAM,KAAK,MAAM;4BAE/B,OAAO;wBACT,GAAG,CAAC,KAAK,EAAE,CAAC;wBACZ,KAAK,EAAE,CAAC;oBACV,CAAC,CAAC,CAAC;oBACH,OAAO,KAAK,CAAC;gBACf,CAAC,CAAC,CAAC;gBAEH,IAAI,YAAY,KAAK,CAAC;oBAAE,MAAM;gBAE9B,MAAM,IAAI;qBACP,eAAe,CACd,GAAG,EAAE;oBACH,MAAM,KAAK,GAAG,QAAQ,CAAC,cAAc,CAAC,mBAAmB,CAAC,CAAC;oBAC3D,IAAI,CAAC,KAAK;wBAAE,OAAO,IAAI,CAAC;oBACxB,MAAM,OAAO,GAAG,KAAK,CAAC,gBAAgB,CAAC,2BAA2B,CAAC,CAAC;oBACpE,OAAO,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE;wBACrC,MAAM,GAAG,GAAG,CAAC,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC;wBACtC,IAAI,CAAC,GAAG;4BAAE,OAAO,IAAI,CAAC;wBACtB,MAAM,OAAO,GAAG,CAAC,CAAC,kBAAkB,CAAC;wBACrC,IAAI,CAAC,OAAO,IAAI,CAAC,CAAC,OAAO,YAAY,WAAW,CAAC;4BAAE,OAAO,IAAI,CAAC;wBAC/D,OAAO,OAAO,CAAC,KAAK,CAAC,OAAO,KAAK,GAAG,IAAI,OAAO,CAAC,KAAK,CAAC,MAAM,KAAK,MAAM,CAAC;oBAC1E,CAAC,CAAC,CAAC;gBACL,CAAC,EACD,EAAE,OAAO,EAAE,IAAI,EAAE,CAClB;qBACA,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;YACrB,CAAC;QACH,CAAC;QAED,MAAM,OAAO,GAAG,MAAM,kBAAkB,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC;QAC/D,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC;QACnB,OAAO,OAAO,CAAC;IACjB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,YAAY,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAC5C,MAAM,IAAI,KAAK,CAAC,yCAAyC,YAAY,EAAE,CAAC,CAAC;IAC3E,CAAC;AACH,CAAC;AAED,KAAK,UAAU,iBAAiB,CAAC,GAAiB;IAChD,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,CAAC;QAC7B,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,GAAG,GAAG,CAAC,MAAM,IAAI,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC;QACrD,CAAC;QACD,OAAO,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC;IAC1B,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,YAAY,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAC5C,MAAM,IAAI,KAAK,CAAC,mCAAmC,YAAY,EAAE,CAAC,CAAC;IACrE,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,GAAiB,EACjB,UAA+B,SAAS;IAExC,IAAI,CAAC;QACH,IAAI,GAAG,GAAuB,SAAS,CAAC;QACxC,IAAI,OAAO,EAAE,CAAC;YACZ,GAAG,GAAG,MAAM,oBAAoB,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;QACjD,CAAC;aAAM,CAAC;YACN,GAAG,GAAG,MAAM,kBAAkB,CAAC,GAAG,EAAE,CAAC,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAC;QAC/D,CAAC;QACD,IAAI,GAAG;YAAE,OAAO,GAAG,CAAC;QACpB,MAAM,IAAI,KAAK,CAAC,2BAA2B,CAAC,CAAC;IAC/C,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,YAAY,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAC5C,MAAM,IAAI,KAAK,CAAC,6BAA6B,GAAG,CAAC,QAAQ,EAAE,GAAG,YAAY,EAAE,CAAC,CAAC;IAChF,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,GAAW;IAC1C,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,kBAAkB,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC;QACvD,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,GAAG,GAAG,CAAC,MAAM,IAAI,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC;QACrD,CAAC;QAED,MAAM,WAAW,GAAG,MAAM,GAAG,CAAC,WAAW,EAAE,CAAC;QAC5C,MAAM,SAAS,GAAG,IAAI,UAAU,CAAC,WAAW,CAAC,CAAC;QAE9C,OAAO,SAAS,CAAC;IACnB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,YAAY,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAC5C,MAAM,IAAI,KAAK,CAAC,GAAG,GAAG,0CAA0C,YAAY,EAAE,CAAC,CAAC;IAClF,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,GAAQ;IACzC,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,kBAAkB,CAAC,KAAK,IAAI,EAAE;YAC9C,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,CAAC;YAClC,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;gBACjB,MAAM,IAAI,KAAK,CAAC,GAAG,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAC;YAC/D,CAAC;YACD,OAAO,QAAQ,CAAC;QAClB,CAAC,CAAC,CAAC;QACH,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC;QAC9B,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAqB,CAAC;IAC7C,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,YAAY,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAC5C,MAAM,IAAI,KAAK,CAAC,GAAG,GAAG,iDAAiD,YAAY,EAAE,CAAC,CAAC;IACzF,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,aAAa,CAAC,GAAQ;IAC1C,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,kBAAkB,CAAC,KAAK,IAAI,EAAE;YAC9C,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,CAAC;YAClC,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;gBACjB,MAAM,IAAI,KAAK,CAAC,GAAG,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAC;YAC/D,CAAC;YACD,OAAO,QAAQ,CAAC;QAClB,CAAC,CAAC,CAAC;QACH,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC;QAC9B,MAAM,EAAE,QAAQ,EAAE,YAAY,EAAE,GAAG,MAAM,gBAAgB,CAAC,IAAI,CAAC,CAAC;QAChE,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,MAAM,IAAI,KAAK,CAAC,GAAG,GAAG,uDAAuD,YAAY,EAAE,CAAC,CAAC;QAC/F,CAAC;QACD,OAAO,QAAQ,CAAC;IAClB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,YAAY,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAC5C,MAAM,IAAI,KAAK,CAAC,GAAG,GAAG,qDAAqD,YAAY,EAAE,CAAC,CAAC;IAC7F,CAAC;AACH,CAAC"}
@@ -1,2 +1,3 @@
1
1
  export declare function removeTrailingSlash(str: string): string;
2
2
  export declare function removeLeadingSlash(str: string): string;
3
+ export declare function optionallyAddLeadingSlash(str: string): string;
@@ -4,4 +4,7 @@ export function removeTrailingSlash(str) {
4
4
  export function removeLeadingSlash(str) {
5
5
  return str.startsWith('/') ? str.substring(1) : str;
6
6
  }
7
+ export function optionallyAddLeadingSlash(str) {
8
+ return str.startsWith('/') ? str : '/' + str;
9
+ }
7
10
  //# sourceMappingURL=strings.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"strings.js","sourceRoot":"","sources":["../../src/utils/strings.ts"],"names":[],"mappings":"AAAA,MAAM,UAAU,mBAAmB,CAAC,GAAW;IAC7C,OAAO,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;AACpE,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,GAAW;IAC5C,OAAO,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;AACtD,CAAC"}
1
+ {"version":3,"file":"strings.js","sourceRoot":"","sources":["../../src/utils/strings.ts"],"names":[],"mappings":"AAAA,MAAM,UAAU,mBAAmB,CAAC,GAAW;IAC7C,OAAO,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;AACpE,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,GAAW;IAC5C,OAAO,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;AACtD,CAAC;AAED,MAAM,UAAU,yBAAyB,CAAC,GAAW;IACnD,OAAO,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,GAAG,GAAG,CAAC;AAC/C,CAAC"}
package/bin/utils/text.js CHANGED
@@ -1,10 +1,14 @@
1
- import { visit } from 'unist-util-visit';
1
+ import { CONTINUE, SKIP, visit } from 'unist-util-visit';
2
2
  export function getText(element) {
3
3
  if (!element)
4
4
  return '';
5
5
  let text = '';
6
- visit(element, 'text', function (node) {
7
- text += node.value;
6
+ visit(element, function (node) {
7
+ if (node.type === 'element' && node.tagName === 'svg')
8
+ return SKIP;
9
+ if (node.type === 'text')
10
+ text += node.value;
11
+ return CONTINUE;
8
12
  });
9
13
  return text;
10
14
  }
@@ -1 +1 @@
1
- {"version":3,"file":"text.js","sourceRoot":"","sources":["../../src/utils/text.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAEzC,MAAM,UAAU,OAAO,CAAC,OAA4B;IAClD,IAAI,CAAC,OAAO;QAAE,OAAO,EAAE,CAAC;IACxB,IAAI,IAAI,GAAG,EAAE,CAAC;IACd,KAAK,CAAC,OAAO,EAAE,MAAM,EAAE,UAAU,IAAI;QACnC,IAAI,IAAI,IAAI,CAAC,KAAK,CAAC;IACrB,CAAC,CAAC,CAAC;IACH,OAAO,IAAI,CAAC;AACd,CAAC"}
1
+ {"version":3,"file":"text.js","sourceRoot":"","sources":["../../src/utils/text.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,QAAQ,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAEzD,MAAM,UAAU,OAAO,CAAC,OAA4B;IAClD,IAAI,CAAC,OAAO;QAAE,OAAO,EAAE,CAAC;IACxB,IAAI,IAAI,GAAG,EAAE,CAAC;IACd,KAAK,CAAC,OAAO,EAAE,UAAU,IAAI;QAC3B,IAAI,IAAI,CAAC,IAAI,KAAK,SAAS,IAAI,IAAI,CAAC,OAAO,KAAK,KAAK;YAAE,OAAO,IAAI,CAAC;QACnE,IAAI,IAAI,CAAC,IAAI,KAAK,MAAM;YAAE,IAAI,IAAI,IAAI,CAAC,KAAK,CAAC;QAC7C,OAAO,QAAQ,CAAC;IAClB,CAAC,CAAC,CAAC;IACH,OAAO,IAAI,CAAC;AACd,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mintlify/scraping",
3
- "version": "4.0.589",
3
+ "version": "4.0.591",
4
4
  "description": "Scrape documentation frameworks to Mintlify docs",
5
5
  "engines": {
6
6
  "node": ">=18.0.0"
@@ -43,7 +43,7 @@
43
43
  "format:check": "prettier . --check"
44
44
  },
45
45
  "dependencies": {
46
- "@mintlify/common": "1.0.728",
46
+ "@mintlify/common": "1.0.729",
47
47
  "@mintlify/openapi-parser": "^0.0.8",
48
48
  "fs-extra": "11.1.1",
49
49
  "hast-util-to-mdast": "10.1.0",
@@ -66,7 +66,7 @@
66
66
  "@mintlify/models": "0.0.272",
67
67
  "@mintlify/prettier-config": "1.0.4",
68
68
  "@mintlify/ts-config": "2.0.2",
69
- "@mintlify/validation": "0.1.594",
69
+ "@mintlify/validation": "0.1.595",
70
70
  "@trivago/prettier-plugin-sort-imports": "4.3.0",
71
71
  "@tsconfig/recommended": "1.0.2",
72
72
  "@types/hast": "3.0.4",
@@ -82,5 +82,5 @@
82
82
  "typescript": "5.5.3",
83
83
  "vitest": "2.0.4"
84
84
  },
85
- "gitHead": "fdbcfc062b0d175a55530b0021aca2acaa98906b"
85
+ "gitHead": "c40f23d742edb39a24f4259970f9cd5f0a506f3c"
86
86
  }
package/src/cli.ts CHANGED
@@ -27,8 +27,17 @@ await yargs(hideBin(process.argv))
27
27
  .command(
28
28
  'section <url>',
29
29
  'Scrapes the entire docs site based on the URL provided',
30
- (yargs) => yargs.positional('url', { type: 'string', demandOption: true }).check(checkUrl),
31
- async ({ url }) => await site(url)
30
+ (yargs) =>
31
+ yargs
32
+ .positional('url', { type: 'string', demandOption: true })
33
+ .option('filter', {
34
+ describe:
35
+ 'Only scrape URLs matching this path filter (e.g. /docs will match /docs and /docs/*)',
36
+ type: 'string',
37
+ alias: 'f',
38
+ })
39
+ .check(checkUrl),
40
+ async ({ url, filter }) => await site(url, filter)
32
41
  )
33
42
 
34
43
  .command(
@@ -117,13 +126,13 @@ async function page(url: string) {
117
126
  }
118
127
  }
119
128
 
120
- async function site(url: string) {
129
+ async function site(url: string, filter?: string) {
121
130
  try {
122
131
  const urlObj = new URL(url);
123
132
  const html = await fetchPageHtml(urlObj);
124
133
  log('Successfully retrieved initial HTML from src: ' + urlObj.toString());
125
134
 
126
- const result = await scrapeAllSiteTabs(html, urlObj);
135
+ const result = await scrapeAllSiteTabs(html, urlObj, { filter });
127
136
  if (result.success) {
128
137
  const mintConfig = result.data as MintConfigType;
129
138
  const docsConfig = upgradeToDocsConfig(mintConfig, {
package/src/constants.ts CHANGED
@@ -1,5 +1,3 @@
1
- import { activeColors } from './utils/log.js';
2
-
3
1
  export const OVERVIEW_PAGE_SLUG = '/mintie_overview';
4
2
 
5
3
  export const SUPPORTED_MEDIA_EXTENSIONS = [
@@ -50,6 +48,4 @@ ${SPACES}We currently support: ReadMe, GitBook, and Docusaurus`;
50
48
 
51
49
  export const MDAST_FAILURE_MSG = 'failed to convert MDAST to Markdown string';
52
50
 
53
- export const FINAL_SUCCESS_MESSAGE = `We've successfully scraped your docs site.
54
- ${SPACES}We've downloaded the ${activeColors.cyan}\`navigation\`${activeColors.default} array (and if necessary, the ${activeColors.cyan}\`tabs\`${activeColors.default} array)
55
- ${SPACES}into ${activeColors.blue}\`docs.json\`${activeColors.default}.`;
51
+ export const FINAL_SUCCESS_MESSAGE = `We've successfully scraped your docs site. We've downloaded the \`navigation\` array (and if necessary, the \`tabs\` array) into \`docs.json\`.`;
package/src/index.ts CHANGED
@@ -2,3 +2,13 @@ export { generateOpenApiPages } from './openapi/generateOpenApiPages.js';
2
2
  export { generateOpenApiPagesForDocsConfig } from './openapi/generateOpenApiPagesForDocsConfig.js';
3
3
  export * from './utils/log.js';
4
4
  export { generateAsyncApiPagesForDocsConfig } from './asyncapi/generateAsyncApiPagesForDocsConfig.js';
5
+
6
+ export { scrapePageGroup } from './pipeline/group.js';
7
+ export { scrapeAllSiteTabs } from './pipeline/tabs.js';
8
+ export { htmlToHast } from './pipeline/root.js';
9
+ export { detectFramework, framework } from './utils/detectFramework.js';
10
+ export { fetchPageHtml } from './utils/network.js';
11
+ export { write } from './utils/file.js';
12
+ export { getErrorMessage } from './utils/errors.js';
13
+ export { checkUrl } from './utils/url.js';
14
+ export { FINAL_SUCCESS_MESSAGE } from './constants.js';
@@ -39,6 +39,28 @@ export function retrieveNavItems(rootNode: Element): Array<NavigationEntry> {
39
39
  if (node.tagName === rootSectionTagName) node.tagName = 'li';
40
40
  if (node.tagName !== 'li') return CONTINUE;
41
41
 
42
+ const className = node.properties.className;
43
+ if (
44
+ framework.vendor === 'gitbook' &&
45
+ Array.isArray(className) &&
46
+ className.includes('page-group-item')
47
+ ) {
48
+ const titleDiv = node.children.find(
49
+ (child) => child.type === 'element' && child.tagName === 'div'
50
+ );
51
+ const childList = node.children.find(
52
+ (child) => child.type === 'element' && child.tagName === 'ul'
53
+ );
54
+ if (titleDiv && titleDiv.type === 'element' && childList && childList.type === 'element') {
55
+ const title = findTitle(titleDiv, { delete: false });
56
+ const childEntries = retrieveNavItems(childList);
57
+ if (title && childEntries.length > 0) {
58
+ result.push({ group: title, pages: childEntries });
59
+ }
60
+ }
61
+ return SKIP;
62
+ }
63
+
42
64
  let title: string | undefined = undefined;
43
65
  if (
44
66
  node.children[0] &&
package/src/nav/root.ts CHANGED
@@ -38,6 +38,17 @@ export function retrieveRootNavElement(rootNode: HastRoot): Element | undefined
38
38
  let element: Element | undefined = undefined;
39
39
  visit(rootNode, 'element', function (node) {
40
40
  const { className } = node.properties;
41
+
42
+ if (
43
+ framework.vendor === 'gitbook' &&
44
+ node.tagName === 'aside' &&
45
+ (node.properties.id === 'table-of-contents' ||
46
+ node.properties.dataTestid === 'table-of-contents')
47
+ ) {
48
+ element = node;
49
+ return EXIT;
50
+ }
51
+
41
52
  if (
42
53
  node.tagName === rootTagName &&
43
54
  Array.isArray(className) &&
@@ -12,7 +12,11 @@ import { detectFramework, framework } from '../utils/detectFramework.js';
12
12
  import { logErrorResults } from '../utils/errors.js';
13
13
  import { fetchPageHtml, startPuppeteer } from '../utils/network.js';
14
14
  import { INDEX_NAMES, iterateThroughReservedNames } from '../utils/reservedNames.js';
15
- import { removeTrailingSlash, removeLeadingSlash } from '../utils/strings.js';
15
+ import {
16
+ removeTrailingSlash,
17
+ removeLeadingSlash,
18
+ optionallyAddLeadingSlash,
19
+ } from '../utils/strings.js';
16
20
  import { downloadColors } from './color.js';
17
21
  import { scrapePageGroup } from './group.js';
18
22
  import { downloadFavicon } from './icon.js';
@@ -20,10 +24,18 @@ import { downloadLogos } from './logo.js';
20
24
  import { htmlToHast } from './root.js';
21
25
  import { downloadTitle } from './title.js';
22
26
 
27
+ function matchesFilter(pathname: string, filter: string): boolean {
28
+ const normalizedPathname = removeTrailingSlash(pathname);
29
+ const normalizedFilter = removeTrailingSlash(optionallyAddLeadingSlash(filter));
30
+ return (
31
+ normalizedPathname === normalizedFilter || normalizedPathname.startsWith(normalizedFilter + '/')
32
+ );
33
+ }
34
+
23
35
  export async function scrapeSite(
24
36
  html: string,
25
37
  url: string | URL,
26
- opts: { hast?: HastRoot; tabs?: Array<Tab> } = {}
38
+ opts: { hast?: HastRoot; tabs?: Array<Tab>; filter?: string } = {}
27
39
  ): Promise<Result<MintConfig>> {
28
40
  let hast = opts.hast;
29
41
  if (!hast) hast = htmlToHast(html);
@@ -33,7 +45,7 @@ export async function scrapeSite(
33
45
 
34
46
  if (!framework.vendor) detectFramework(hast);
35
47
 
36
- if (framework.vendor === 'docusaurus') {
48
+ if (framework.vendor === 'docusaurus' || framework.vendor === 'gitbook') {
37
49
  const browser = await startPuppeteer();
38
50
  html = await fetchPageHtml(url, browser);
39
51
  hast = htmlToHast(html);
@@ -57,12 +69,16 @@ export async function scrapeSite(
57
69
  const needsBrowser = framework.vendor === 'gitbook';
58
70
 
59
71
  const externalLinks = listOfLinks.filter((url) => url.origin !== origin);
60
- const internalLinks = listOfLinks.filter(
61
- (url) => url.origin === origin && removeTrailingSlash(url.toString()) !== origin
62
- );
63
- const rootLinks = listOfLinks.filter(
64
- (url) => url.origin === origin && removeTrailingSlash(url.toString()) === origin
65
- );
72
+ const internalLinks = listOfLinks.filter((url) => {
73
+ if (url.origin !== origin || removeTrailingSlash(url.toString()) === origin) return false;
74
+ if (opts.filter && !matchesFilter(url.pathname, opts.filter)) return false;
75
+ return true;
76
+ });
77
+ const rootLinks = listOfLinks.filter((url) => {
78
+ if (url.origin !== origin || removeTrailingSlash(url.toString()) !== origin) return false;
79
+ if (opts.filter && !matchesFilter('/', opts.filter)) return false;
80
+ return true;
81
+ });
66
82
 
67
83
  const allPathnames = [
68
84
  ...internalLinks.map((url) => url.toString()),
@@ -157,14 +173,24 @@ export async function scrapeSite(
157
173
  })
158
174
  .filter(Boolean);
159
175
 
176
+ function filterErroredOrFilteredPaths(value: string) {
177
+ if (allErroredPaths.includes(value)) return true;
178
+ if (opts.filter && !matchesFilter('/' + value, opts.filter)) return true;
179
+ return false;
180
+ }
181
+
160
182
  traverse(navItems).forEach(function (value) {
161
- if (typeof value === 'string' && allErroredPaths.includes(value)) {
183
+ if (
184
+ typeof value === 'string' &&
185
+ this.key !== 'group' &&
186
+ filterErroredOrFilteredPaths(value)
187
+ ) {
162
188
  this.remove();
163
189
  } else if (Array.isArray(value)) {
164
190
  this.update(
165
191
  value
166
192
  .filter((item) =>
167
- typeof item === 'string' && allErroredPaths.includes(item) ? undefined : item
193
+ typeof item === 'string' && filterErroredOrFilteredPaths(item) ? undefined : item
168
194
  )
169
195
  .filter(Boolean)
170
196
  );
@@ -199,15 +225,12 @@ export async function scrapeSite(
199
225
  typeof val === 'string' && (val.startsWith('https://') || val.startsWith('http://'))
200
226
  )
201
227
  ) {
202
- this.update(
203
- value.filter(
204
- (val) =>
205
- !(
206
- typeof val === 'string' &&
207
- (val.startsWith('https://') || val.startsWith('http://'))
208
- )
209
- )
228
+ const newPages = value.filter(
229
+ (val) =>
230
+ !(typeof val === 'string' && (val.startsWith('https://') || val.startsWith('http://')))
210
231
  );
232
+ if (newPages.length) this.update(newPages);
233
+ else this.parent?.remove();
211
234
  }
212
235
  });
213
236
 
@@ -16,7 +16,8 @@ import { downloadTitle } from './title.js';
16
16
 
17
17
  export async function scrapeAllSiteTabs(
18
18
  html: string,
19
- url: string | URL
19
+ url: string | URL,
20
+ opts: { filter?: string } = {}
20
21
  ): Promise<Result<MintConfig>> {
21
22
  const hast = htmlToHast(html);
22
23
  url = new URL(url);
@@ -34,7 +35,7 @@ export async function scrapeAllSiteTabs(
34
35
  !links.length ||
35
36
  (links.length === 1 && links[0] && links[0].url === url.pathname)
36
37
  )
37
- return scrapeSite(html, url, { hast });
38
+ return scrapeSite(html, url, { hast, filter: opts.filter });
38
39
 
39
40
  if (!links.find((link) => url.pathname.startsWith(link.url))) {
40
41
  links.push({
@@ -49,7 +50,7 @@ export async function scrapeAllSiteTabs(
49
50
  newUrl.pathname = tabEntry.url;
50
51
  try {
51
52
  const newHtml = await fetchPageHtml(newUrl, undefined);
52
- return await scrapeSite(newHtml, newUrl, { tabs: [tabEntry] });
53
+ return await scrapeSite(newHtml, newUrl, { tabs: [tabEntry], filter: opts.filter });
53
54
  } catch (error) {
54
55
  return { success: false as const, message: getErrorMessage(error) };
55
56
  }
@@ -95,5 +96,5 @@ export async function scrapeAllSiteTabs(
95
96
  };
96
97
  }
97
98
 
98
- return scrapeSite(html, url, { hast });
99
+ return scrapeSite(html, url, { hast, filter: opts.filter });
99
100
  }
@@ -90,6 +90,53 @@ export async function getHtmlWithPuppeteer(
90
90
  });
91
91
  }
92
92
 
93
+ if (framework.vendor === 'gitbook') {
94
+ for (let round = 0; round < 10; round++) {
95
+ const clickedCount = await page.evaluate(() => {
96
+ const tocEl = document.getElementById('table-of-contents');
97
+ if (!tocEl) return 0;
98
+ let count = 0;
99
+ const items = tocEl.querySelectorAll('li.page-document-item');
100
+ items.forEach((li) => {
101
+ const btn = li.querySelector(':scope > a button');
102
+ if (!btn || !(btn instanceof HTMLElement)) return;
103
+ const anchor = btn.closest('a');
104
+ if (!anchor) return;
105
+ const sibling = anchor.nextElementSibling;
106
+ if (
107
+ sibling instanceof HTMLElement &&
108
+ sibling.style.opacity === '1' &&
109
+ sibling.style.height === 'auto'
110
+ )
111
+ return;
112
+ btn.click();
113
+ count++;
114
+ });
115
+ return count;
116
+ });
117
+
118
+ if (clickedCount === 0) break;
119
+
120
+ await page
121
+ .waitForFunction(
122
+ () => {
123
+ const tocEl = document.getElementById('table-of-contents');
124
+ if (!tocEl) return true;
125
+ const anchors = tocEl.querySelectorAll('li.page-document-item > a');
126
+ return Array.from(anchors).every((a) => {
127
+ const btn = a.querySelector('button');
128
+ if (!btn) return true;
129
+ const sibling = a.nextElementSibling;
130
+ if (!sibling || !(sibling instanceof HTMLElement)) return true;
131
+ return sibling.style.opacity === '1' && sibling.style.height === 'auto';
132
+ });
133
+ },
134
+ { timeout: 5000 }
135
+ )
136
+ .catch(() => {});
137
+ }
138
+ }
139
+
93
140
  const content = await exponentialBackoff(() => page.content());
94
141
  await page.close();
95
142
  return content;
@@ -5,3 +5,7 @@ export function removeTrailingSlash(str: string): string {
5
5
  export function removeLeadingSlash(str: string): string {
6
6
  return str.startsWith('/') ? str.substring(1) : str;
7
7
  }
8
+
9
+ export function optionallyAddLeadingSlash(str: string): string {
10
+ return str.startsWith('/') ? str : '/' + str;
11
+ }
package/src/utils/text.ts CHANGED
@@ -1,11 +1,13 @@
1
1
  import type { Element } from 'hast';
2
- import { visit } from 'unist-util-visit';
2
+ import { CONTINUE, SKIP, visit } from 'unist-util-visit';
3
3
 
4
4
  export function getText(element: Element | undefined): string {
5
5
  if (!element) return '';
6
6
  let text = '';
7
- visit(element, 'text', function (node) {
8
- text += node.value;
7
+ visit(element, function (node) {
8
+ if (node.type === 'element' && node.tagName === 'svg') return SKIP;
9
+ if (node.type === 'text') text += node.value;
10
+ return CONTINUE;
9
11
  });
10
12
  return text;
11
13
  }