@mintlify/scraping 3.0.186 → 3.0.188

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (344) hide show
  1. package/bin/assert.d.ts +5 -0
  2. package/bin/assert.js +13 -0
  3. package/bin/assert.js.map +1 -0
  4. package/bin/cli.js +43 -72
  5. package/bin/cli.js.map +1 -1
  6. package/bin/components/Accordion.d.ts +5 -0
  7. package/bin/components/Accordion.js +54 -0
  8. package/bin/components/Accordion.js.map +1 -0
  9. package/bin/components/AccordionGroup.d.ts +5 -0
  10. package/bin/components/AccordionGroup.js +52 -0
  11. package/bin/components/AccordionGroup.js.map +1 -0
  12. package/bin/components/Callout.d.ts +5 -0
  13. package/bin/components/Callout.js +114 -0
  14. package/bin/components/Callout.js.map +1 -0
  15. package/bin/components/Card.d.ts +5 -0
  16. package/bin/components/Card.js +135 -0
  17. package/bin/components/Card.js.map +1 -0
  18. package/bin/components/CardGroup.d.ts +5 -0
  19. package/bin/components/CardGroup.js +52 -0
  20. package/bin/components/CardGroup.js.map +1 -0
  21. package/bin/components/CodeGroup.d.ts +5 -0
  22. package/bin/components/CodeGroup.js +166 -0
  23. package/bin/components/CodeGroup.js.map +1 -0
  24. package/bin/components/Frame.d.ts +5 -0
  25. package/bin/components/Frame.js +51 -0
  26. package/bin/components/Frame.js.map +1 -0
  27. package/bin/components/Tabs.d.ts +5 -0
  28. package/bin/components/Tabs.js +122 -0
  29. package/bin/components/Tabs.js.map +1 -0
  30. package/bin/components/link.d.ts +2 -0
  31. package/bin/components/link.js +16 -0
  32. package/bin/components/link.js.map +1 -0
  33. package/bin/constants.d.ts +6 -7
  34. package/bin/constants.js +31 -12
  35. package/bin/constants.js.map +1 -1
  36. package/bin/customComponents/create.d.ts +10 -0
  37. package/bin/customComponents/create.js +69 -0
  38. package/bin/customComponents/create.js.map +1 -0
  39. package/bin/customComponents/plugin.d.ts +2 -0
  40. package/bin/customComponents/plugin.js +26 -0
  41. package/bin/customComponents/plugin.js.map +1 -0
  42. package/bin/customComponents/selective.d.ts +6 -0
  43. package/bin/customComponents/selective.js +29 -0
  44. package/bin/customComponents/selective.js.map +1 -0
  45. package/bin/nav/iterate.d.ts +2 -0
  46. package/bin/nav/iterate.js +15 -0
  47. package/bin/nav/iterate.js.map +1 -0
  48. package/bin/nav/listItems.d.ts +8 -0
  49. package/bin/nav/listItems.js +62 -0
  50. package/bin/nav/listItems.js.map +1 -0
  51. package/bin/nav/retrieve.d.ts +3 -0
  52. package/bin/nav/retrieve.js +75 -0
  53. package/bin/nav/retrieve.js.map +1 -0
  54. package/bin/nav/root.d.ts +2 -0
  55. package/bin/nav/root.js +40 -0
  56. package/bin/nav/root.js.map +1 -0
  57. package/bin/openapi/generateOpenApiPages.js +18 -5
  58. package/bin/openapi/generateOpenApiPages.js.map +1 -1
  59. package/bin/root/retrieve.d.ts +2 -0
  60. package/bin/root/retrieve.js +46 -0
  61. package/bin/root/retrieve.js.map +1 -0
  62. package/bin/scrapingPipeline/group.d.ts +5 -0
  63. package/bin/scrapingPipeline/group.js +46 -0
  64. package/bin/scrapingPipeline/group.js.map +1 -0
  65. package/bin/scrapingPipeline/icon.d.ts +2 -0
  66. package/bin/scrapingPipeline/icon.js +22 -0
  67. package/bin/scrapingPipeline/icon.js.map +1 -0
  68. package/bin/scrapingPipeline/images.d.ts +3 -0
  69. package/bin/scrapingPipeline/images.js +50 -0
  70. package/bin/scrapingPipeline/images.js.map +1 -0
  71. package/bin/scrapingPipeline/logo.d.ts +5 -0
  72. package/bin/scrapingPipeline/logo.js +92 -0
  73. package/bin/scrapingPipeline/logo.js.map +1 -0
  74. package/bin/scrapingPipeline/page.d.ts +6 -0
  75. package/bin/scrapingPipeline/page.js +102 -0
  76. package/bin/scrapingPipeline/page.js.map +1 -0
  77. package/bin/scrapingPipeline/root.d.ts +2 -0
  78. package/bin/scrapingPipeline/root.js +8 -0
  79. package/bin/scrapingPipeline/root.js.map +1 -0
  80. package/bin/scrapingPipeline/site.d.ts +7 -0
  81. package/bin/scrapingPipeline/site.js +129 -0
  82. package/bin/scrapingPipeline/site.js.map +1 -0
  83. package/bin/scrapingPipeline/tabs.d.ts +3 -0
  84. package/bin/scrapingPipeline/tabs.js +67 -0
  85. package/bin/scrapingPipeline/tabs.js.map +1 -0
  86. package/bin/tabs/retrieveReadme.d.ts +3 -0
  87. package/bin/tabs/retrieveReadme.js +78 -0
  88. package/bin/tabs/retrieveReadme.js.map +1 -0
  89. package/bin/tsconfig.build.tsbuildinfo +1 -1
  90. package/bin/types/components.d.ts +2 -0
  91. package/bin/types/components.js +2 -0
  92. package/bin/types/components.js.map +1 -0
  93. package/bin/types/framework.d.ts +8 -0
  94. package/bin/types/framework.js +3 -0
  95. package/bin/types/framework.js.map +1 -0
  96. package/bin/types/hast.d.ts +6 -0
  97. package/bin/types/hast.js +2 -0
  98. package/bin/types/hast.js.map +1 -0
  99. package/bin/types/result.d.ts +7 -0
  100. package/bin/types/result.js +2 -0
  101. package/bin/types/result.js.map +1 -0
  102. package/bin/types/scrapeFunc.d.ts +3 -0
  103. package/bin/types/scrapeFunc.js +2 -0
  104. package/bin/types/scrapeFunc.js.map +1 -0
  105. package/bin/utils/append.d.ts +1 -0
  106. package/bin/utils/append.js +12 -0
  107. package/bin/utils/append.js.map +1 -0
  108. package/bin/utils/children.d.ts +5 -0
  109. package/bin/utils/children.js +35 -0
  110. package/bin/utils/children.js.map +1 -0
  111. package/bin/utils/className.d.ts +3 -0
  112. package/bin/utils/className.js +13 -0
  113. package/bin/utils/className.js.map +1 -0
  114. package/bin/utils/detectFramework.d.ts +4 -0
  115. package/bin/utils/detectFramework.js +60 -0
  116. package/bin/utils/detectFramework.js.map +1 -0
  117. package/bin/utils/emptyParagraphs.d.ts +3 -0
  118. package/bin/utils/emptyParagraphs.js +19 -0
  119. package/bin/utils/emptyParagraphs.js.map +1 -0
  120. package/bin/utils/errors.d.ts +3 -0
  121. package/bin/utils/errors.js +16 -0
  122. package/bin/utils/errors.js.map +1 -0
  123. package/bin/utils/escape.d.ts +2 -0
  124. package/bin/utils/escape.js +25 -0
  125. package/bin/utils/escape.js.map +1 -0
  126. package/bin/utils/extension.d.ts +3 -0
  127. package/bin/utils/extension.js +18 -0
  128. package/bin/utils/extension.js.map +1 -0
  129. package/bin/utils/file.d.ts +4 -0
  130. package/bin/utils/file.js +43 -0
  131. package/bin/utils/file.js.map +1 -0
  132. package/bin/utils/firstChild.d.ts +2 -0
  133. package/bin/utils/firstChild.js +12 -0
  134. package/bin/utils/firstChild.js.map +1 -0
  135. package/bin/utils/images.d.ts +5 -0
  136. package/bin/utils/images.js +86 -0
  137. package/bin/utils/images.js.map +1 -0
  138. package/bin/utils/img.d.ts +2 -0
  139. package/bin/utils/img.js +15 -0
  140. package/bin/utils/img.js.map +1 -0
  141. package/bin/utils/log.d.ts +18 -0
  142. package/bin/utils/log.js +68 -0
  143. package/bin/utils/log.js.map +1 -0
  144. package/bin/utils/nestedRoots.d.ts +7 -0
  145. package/bin/utils/nestedRoots.js +19 -0
  146. package/bin/utils/nestedRoots.js.map +1 -0
  147. package/bin/utils/network.d.ts +5 -0
  148. package/bin/utils/network.js +82 -0
  149. package/bin/utils/network.js.map +1 -0
  150. package/bin/utils/path.d.ts +1 -0
  151. package/bin/utils/path.js +22 -0
  152. package/bin/utils/path.js.map +1 -0
  153. package/bin/utils/position.d.ts +3 -0
  154. package/bin/utils/position.js +12 -0
  155. package/bin/utils/position.js.map +1 -0
  156. package/bin/utils/reservedNames.d.ts +4 -0
  157. package/bin/utils/reservedNames.js +27 -0
  158. package/bin/utils/reservedNames.js.map +1 -0
  159. package/bin/utils/strings.d.ts +2 -0
  160. package/bin/utils/strings.js +7 -0
  161. package/bin/utils/strings.js.map +1 -0
  162. package/bin/utils/text.d.ts +2 -0
  163. package/bin/utils/text.js +11 -0
  164. package/bin/utils/text.js.map +1 -0
  165. package/bin/utils/title.d.ts +10 -0
  166. package/bin/utils/title.js +58 -0
  167. package/bin/utils/title.js.map +1 -0
  168. package/bin/utils/url.d.ts +3 -0
  169. package/bin/utils/url.js +10 -0
  170. package/bin/utils/url.js.map +1 -0
  171. package/package.json +18 -9
  172. package/src/assert.ts +15 -0
  173. package/src/cli.ts +53 -90
  174. package/src/components/Accordion.ts +84 -0
  175. package/src/components/AccordionGroup.ts +69 -0
  176. package/src/components/Callout.ts +159 -0
  177. package/src/components/Card.ts +168 -0
  178. package/src/components/CardGroup.ts +69 -0
  179. package/src/components/CodeGroup.ts +209 -0
  180. package/src/components/Frame.ts +86 -0
  181. package/src/components/Tabs.ts +154 -0
  182. package/src/components/link.ts +17 -0
  183. package/src/constants.ts +37 -19
  184. package/src/customComponents/create.ts +106 -0
  185. package/src/customComponents/plugin.ts +31 -0
  186. package/src/customComponents/selective.ts +37 -0
  187. package/src/nav/iterate.ts +18 -0
  188. package/src/nav/listItems.ts +82 -0
  189. package/src/nav/retrieve.ts +88 -0
  190. package/src/nav/root.ts +47 -0
  191. package/src/openapi/generateOpenApiPages.ts +19 -4
  192. package/src/root/retrieve.ts +52 -0
  193. package/src/scrapingPipeline/group.ts +62 -0
  194. package/src/scrapingPipeline/icon.ts +26 -0
  195. package/src/scrapingPipeline/images.ts +67 -0
  196. package/src/scrapingPipeline/logo.ts +127 -0
  197. package/src/scrapingPipeline/page.ts +130 -0
  198. package/src/scrapingPipeline/root.ts +10 -0
  199. package/src/scrapingPipeline/site.ts +161 -0
  200. package/src/scrapingPipeline/tabs.ts +87 -0
  201. package/src/tabs/retrieveReadme.ts +99 -0
  202. package/src/types/components.ts +3 -0
  203. package/src/types/framework.ts +10 -0
  204. package/src/types/hast.ts +12 -0
  205. package/src/types/result.ts +1 -0
  206. package/src/types/scrapeFunc.ts +9 -0
  207. package/src/utils/append.ts +9 -0
  208. package/src/utils/children.ts +51 -0
  209. package/src/utils/className.ts +14 -0
  210. package/src/utils/detectFramework.ts +72 -0
  211. package/src/utils/emptyParagraphs.ts +21 -0
  212. package/src/utils/errors.ts +24 -0
  213. package/src/utils/escape.ts +30 -0
  214. package/src/utils/extension.ts +19 -0
  215. package/src/utils/file.ts +58 -0
  216. package/src/utils/firstChild.ts +13 -0
  217. package/src/utils/images.ts +101 -0
  218. package/src/utils/img.ts +17 -0
  219. package/src/utils/log.ts +82 -0
  220. package/src/utils/nestedRoots.ts +20 -0
  221. package/src/utils/network.ts +95 -0
  222. package/src/utils/path.ts +27 -0
  223. package/src/utils/position.ts +14 -0
  224. package/src/utils/reservedNames.ts +31 -0
  225. package/src/utils/strings.ts +7 -0
  226. package/src/utils/text.ts +11 -0
  227. package/src/utils/title.ts +68 -0
  228. package/src/utils/url.ts +8 -0
  229. package/bin/browser.d.ts +0 -2
  230. package/bin/browser.js +0 -24
  231. package/bin/browser.js.map +0 -1
  232. package/bin/checks.d.ts +0 -8
  233. package/bin/checks.js +0 -24
  234. package/bin/checks.js.map +0 -1
  235. package/bin/downloadImage.d.ts +0 -5
  236. package/bin/downloadImage.js +0 -88
  237. package/bin/downloadImage.js.map +0 -1
  238. package/bin/scraping/combineNavWithEmptyGroupTitles.d.ts +0 -2
  239. package/bin/scraping/combineNavWithEmptyGroupTitles.js +0 -20
  240. package/bin/scraping/combineNavWithEmptyGroupTitles.js.map +0 -1
  241. package/bin/scraping/detectFramework.d.ts +0 -9
  242. package/bin/scraping/detectFramework.js +0 -36
  243. package/bin/scraping/detectFramework.js.map +0 -1
  244. package/bin/scraping/downloadAllImages.d.ts +0 -4
  245. package/bin/scraping/downloadAllImages.js +0 -36
  246. package/bin/scraping/downloadAllImages.js.map +0 -1
  247. package/bin/scraping/downloadLogoImage.d.ts +0 -1
  248. package/bin/scraping/downloadLogoImage.js +0 -12
  249. package/bin/scraping/downloadLogoImage.js.map +0 -1
  250. package/bin/scraping/replaceImagePaths.d.ts +0 -1
  251. package/bin/scraping/replaceImagePaths.js +0 -14
  252. package/bin/scraping/replaceImagePaths.js.map +0 -1
  253. package/bin/scraping/scrapeFileGettingFileNameFromUrl.d.ts +0 -6
  254. package/bin/scraping/scrapeFileGettingFileNameFromUrl.js +0 -46
  255. package/bin/scraping/scrapeFileGettingFileNameFromUrl.js.map +0 -1
  256. package/bin/scraping/scrapeGettingFileNameFromUrl.d.ts +0 -6
  257. package/bin/scraping/scrapeGettingFileNameFromUrl.js +0 -13
  258. package/bin/scraping/scrapeGettingFileNameFromUrl.js.map +0 -1
  259. package/bin/scraping/scrapePage.d.ts +0 -8
  260. package/bin/scraping/scrapePage.js +0 -10
  261. package/bin/scraping/scrapePage.js.map +0 -1
  262. package/bin/scraping/scrapePageCommands.d.ts +0 -7
  263. package/bin/scraping/scrapePageCommands.js +0 -50
  264. package/bin/scraping/scrapePageCommands.js.map +0 -1
  265. package/bin/scraping/scrapeSection.d.ts +0 -3
  266. package/bin/scraping/scrapeSection.js +0 -12
  267. package/bin/scraping/scrapeSection.js.map +0 -1
  268. package/bin/scraping/scrapeSectionCommands.d.ts +0 -6
  269. package/bin/scraping/scrapeSectionCommands.js +0 -63
  270. package/bin/scraping/scrapeSectionCommands.js.map +0 -1
  271. package/bin/scraping/site-scrapers/Intercom/scrapeIntercomPage.d.ts +0 -5
  272. package/bin/scraping/site-scrapers/Intercom/scrapeIntercomPage.js +0 -29
  273. package/bin/scraping/site-scrapers/Intercom/scrapeIntercomPage.js.map +0 -1
  274. package/bin/scraping/site-scrapers/Intercom/scrapeIntercomSection.d.ts +0 -2
  275. package/bin/scraping/site-scrapers/Intercom/scrapeIntercomSection.js +0 -31
  276. package/bin/scraping/site-scrapers/Intercom/scrapeIntercomSection.js.map +0 -1
  277. package/bin/scraping/site-scrapers/alternateGroupTitle.d.ts +0 -3
  278. package/bin/scraping/site-scrapers/alternateGroupTitle.js +0 -9
  279. package/bin/scraping/site-scrapers/alternateGroupTitle.js.map +0 -1
  280. package/bin/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.d.ts +0 -5
  281. package/bin/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.js +0 -33
  282. package/bin/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.js.map +0 -1
  283. package/bin/scraping/site-scrapers/links-per-group/getLinksRecursively.d.ts +0 -3
  284. package/bin/scraping/site-scrapers/links-per-group/getLinksRecursively.js +0 -35
  285. package/bin/scraping/site-scrapers/links-per-group/getLinksRecursively.js.map +0 -1
  286. package/bin/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.d.ts +0 -3
  287. package/bin/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.js +0 -33
  288. package/bin/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.js.map +0 -1
  289. package/bin/scraping/site-scrapers/openNestedDocusaurusMenus.d.ts +0 -2
  290. package/bin/scraping/site-scrapers/openNestedDocusaurusMenus.js +0 -30
  291. package/bin/scraping/site-scrapers/openNestedDocusaurusMenus.js.map +0 -1
  292. package/bin/scraping/site-scrapers/openNestedGitbookMenus.d.ts +0 -2
  293. package/bin/scraping/site-scrapers/openNestedGitbookMenus.js +0 -21
  294. package/bin/scraping/site-scrapers/openNestedGitbookMenus.js.map +0 -1
  295. package/bin/scraping/site-scrapers/scrapeDocusaurusPage.d.ts +0 -5
  296. package/bin/scraping/site-scrapers/scrapeDocusaurusPage.js +0 -53
  297. package/bin/scraping/site-scrapers/scrapeDocusaurusPage.js.map +0 -1
  298. package/bin/scraping/site-scrapers/scrapeDocusaurusSection.d.ts +0 -2
  299. package/bin/scraping/site-scrapers/scrapeDocusaurusSection.js +0 -32
  300. package/bin/scraping/site-scrapers/scrapeDocusaurusSection.js.map +0 -1
  301. package/bin/scraping/site-scrapers/scrapeGitBookPage.d.ts +0 -5
  302. package/bin/scraping/site-scrapers/scrapeGitBookPage.js +0 -56
  303. package/bin/scraping/site-scrapers/scrapeGitBookPage.js.map +0 -1
  304. package/bin/scraping/site-scrapers/scrapeGitBookSection.d.ts +0 -2
  305. package/bin/scraping/site-scrapers/scrapeGitBookSection.js +0 -42
  306. package/bin/scraping/site-scrapers/scrapeGitBookSection.js.map +0 -1
  307. package/bin/scraping/site-scrapers/scrapeReadMePage.d.ts +0 -5
  308. package/bin/scraping/site-scrapers/scrapeReadMePage.js +0 -38
  309. package/bin/scraping/site-scrapers/scrapeReadMePage.js.map +0 -1
  310. package/bin/scraping/site-scrapers/scrapeReadMeSection.d.ts +0 -2
  311. package/bin/scraping/site-scrapers/scrapeReadMeSection.js +0 -39
  312. package/bin/scraping/site-scrapers/scrapeReadMeSection.js.map +0 -1
  313. package/bin/util.d.ts +0 -29
  314. package/bin/util.js +0 -97
  315. package/bin/util.js.map +0 -1
  316. package/src/browser.ts +0 -24
  317. package/src/checks.ts +0 -32
  318. package/src/downloadImage.ts +0 -102
  319. package/src/scraping/combineNavWithEmptyGroupTitles.ts +0 -21
  320. package/src/scraping/detectFramework.ts +0 -55
  321. package/src/scraping/downloadAllImages.ts +0 -61
  322. package/src/scraping/downloadLogoImage.ts +0 -24
  323. package/src/scraping/replaceImagePaths.ts +0 -17
  324. package/src/scraping/scrapeFileGettingFileNameFromUrl.ts +0 -84
  325. package/src/scraping/scrapeGettingFileNameFromUrl.ts +0 -56
  326. package/src/scraping/scrapePage.ts +0 -40
  327. package/src/scraping/scrapePageCommands.ts +0 -68
  328. package/src/scraping/scrapeSection.ts +0 -30
  329. package/src/scraping/scrapeSectionCommands.ts +0 -98
  330. package/src/scraping/site-scrapers/Intercom/scrapeIntercomPage.ts +0 -52
  331. package/src/scraping/site-scrapers/Intercom/scrapeIntercomSection.ts +0 -54
  332. package/src/scraping/site-scrapers/alternateGroupTitle.ts +0 -11
  333. package/src/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.ts +0 -45
  334. package/src/scraping/site-scrapers/links-per-group/getLinksRecursively.ts +0 -47
  335. package/src/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.ts +0 -44
  336. package/src/scraping/site-scrapers/openNestedDocusaurusMenus.ts +0 -42
  337. package/src/scraping/site-scrapers/openNestedGitbookMenus.ts +0 -27
  338. package/src/scraping/site-scrapers/scrapeDocusaurusPage.ts +0 -85
  339. package/src/scraping/site-scrapers/scrapeDocusaurusSection.ts +0 -63
  340. package/src/scraping/site-scrapers/scrapeGitBookPage.ts +0 -82
  341. package/src/scraping/site-scrapers/scrapeGitBookSection.ts +0 -69
  342. package/src/scraping/site-scrapers/scrapeReadMePage.ts +0 -56
  343. package/src/scraping/site-scrapers/scrapeReadMeSection.ts +0 -66
  344. package/src/util.ts +0 -122
@@ -1 +0,0 @@
1
- {"version":3,"file":"scrapeSectionCommands.js","sourceRoot":"","sources":["../../src/scraping/scrapeSectionCommands.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAG1B,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAC7C,OAAO,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AACvC,OAAO,EAAE,eAAe,EAA4B,MAAM,sBAAsB,CAAC;AACjF,OAAO,EAAmB,aAAa,EAAE,MAAM,oBAAoB,CAAC;AACpE,OAAO,EAAE,qBAAqB,EAAE,MAAM,mDAAmD,CAAC;AAC1F,OAAO,yBAAyB,MAAM,8CAA8C,CAAC;AACrF,OAAO,sBAAsB,MAAM,2CAA2C,CAAC;AAC/E,OAAO,EAAE,uBAAuB,EAAE,MAAM,4CAA4C,CAAC;AACrF,OAAO,EAAE,oBAAoB,EAAE,MAAM,yCAAyC,CAAC;AAC/E,OAAO,EAAE,mBAAmB,EAAE,MAAM,wCAAwC,CAAC;AAE7E,MAAM,CAAC,KAAK,UAAU,yBAAyB,CAC7C,GAAW,EACX,SAAkB,EAClB,UAA2B;IAE3B,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IACjC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC;IACtB,MAAM,aAAa,CAAC,UAAU,EAAE,IAAI,EAAE,SAAS,CAAC,GAAG,CAAC,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC;IAC5E,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,8BAA8B,CAClD,GAAW,EACX,SAAkB,EAClB,OAA2B,CAAC,kBAAkB;;IAE9C,MAAM,6BAA6B,CACjC,GAAG,EACH,SAAS,EACT,yBAAyB,EACzB,uBAAuB,EACvB,OAAO,CACR,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,2BAA2B,CAAC,GAAW,EAAE,SAAkB;IAC/E,MAAM,6BAA6B,CAAC,GAAG,EAAE,SAAS,EAAE,sBAAsB,EAAE,oBAAoB,CAAC,CAAC;AACpG,CAAC;AAED,KAAK,UAAU,6BAA6B,CAC1C,GAAW,EACX,SAAkB,EAClB,SAA0C,EAC1C,UAA2B,EAC3B,OAAgB;IAEhB,MAAM,OAAO,GAAG,MAAM,YAAY,EAAE,CAAC;IACrC,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;IACrC,MAAM,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE;QACnB,SAAS,EAAE,cAAc;KAC1B,CAAC,CAAC;IAEH,MAAM,IAAI,GAAG,MAAM,SAAS,CAAC,IAAI,CAAC,CAAC;IACnC,KAAK,OAAO,CAAC,KAAK,EAAE,CAAC;IACrB,MAAM,aAAa,CAAC,UAAU,EAAE,IAAI,EAAE,SAAS,CAAC,GAAG,CAAC,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC;IAC1E,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,0BAA0B,CAC9C,GAAW,EACX,SAAkB,EAClB,aAA4B;IAE5B,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IACjC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC;IACtB,aAAa,GAAG,aAAa,CAAC,SAAS,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC;IAEhF,iBAAiB,CAAC,aAAa,CAAC,SAAS,CAAC,CAAC;IAC3C,OAAO,CAAC,GAAG,CAAC,sBAAsB,GAAG,aAAa,CAAC,SAAS,CAAC,CAAC;IAE9D,QAAQ,aAAa,CAAC,SAAS,EAAE,CAAC;QAChC,KAAK,YAAY;YACf,MAAM,8BAA8B,CAAC,GAAG,EAAE,SAAS,EAAE,aAAa,CAAC,OAAO,CAAC,CAAC;YAC5E,MAAM;QACR,KAAK,SAAS;YACZ,MAAM,2BAA2B,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;YAClD,MAAM;QACR,KAAK,QAAQ;YACX,MAAM,yBAAyB,CAAC,GAAG,EAAE,SAAS,EAAE,mBAAmB,CAAC,CAAC;YACrE,MAAM;QACR,KAAK,UAAU;YACb,MAAM,yBAAyB,CAAC,GAAG,EAAE,SAAS,EAAE,qBAAqB,CAAC,CAAC;YACvE,MAAM;IACV,CAAC;AACH,CAAC;AAED,SAAS,iBAAiB,CAAC,SAAgC;IACzD,IAAI,CAAC,SAAS,EAAE,CAAC;QACf,OAAO,CAAC,GAAG,CACT,4GAA4G,CAC7G,CAAC;QACF,OAAO,CAAC,IAAI,EAAE,CAAC;IACjB,CAAC;AACH,CAAC"}
@@ -1,5 +0,0 @@
1
- export declare function scrapeIntercomPage(html: string, origin: string, cliDir: string, imageBaseDir: string, overwrite: boolean, _: string | undefined): Promise<{
2
- title: string;
3
- description: string;
4
- markdown: string;
5
- }>;
@@ -1,29 +0,0 @@
1
- import * as cheerio from 'cheerio';
2
- import { NodeHtmlMarkdown } from 'node-html-markdown';
3
- import downloadAllImages from '../../downloadAllImages.js';
4
- import replaceImagePaths from '../../replaceImagePaths.js';
5
- export async function scrapeIntercomPage(html, origin, cliDir, imageBaseDir, overwrite, _ // version
6
- ) {
7
- const $ = cheerio.load(html);
8
- const titleComponent = $('.t__h1').first();
9
- const title = titleComponent.text().trim();
10
- const description = $('.article__desc', titleComponent.parent()).text().trim();
11
- const content = $('article').first();
12
- const contentHtml = $.html(content);
13
- const origToWritePath = await downloadAllImages($, content, origin, imageBaseDir, overwrite, undefined);
14
- const nhm = new NodeHtmlMarkdown({ useInlineLinks: false });
15
- let markdown = nhm.translate(contentHtml);
16
- // Keep headers on one line
17
- markdown = markdown.replace(/# \n\n/g, '# ');
18
- // Remove unnecessary nonwidth blank space characters
19
- markdown = markdown.replace(/\u200b/g, '');
20
- // Reduce unnecessary blank lines
21
- markdown = markdown.replace(/\n\n\n/g, '\n\n');
22
- // Mintlify doesn't support bolded headers, remove the asterisks
23
- markdown = markdown.replace(/(\n#+) \*\*(.*)\*\*\n/g, '$1 $2\n');
24
- if (origToWritePath) {
25
- markdown = replaceImagePaths(origToWritePath, cliDir, markdown);
26
- }
27
- return { title, description, markdown };
28
- }
29
- //# sourceMappingURL=scrapeIntercomPage.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"scrapeIntercomPage.js","sourceRoot":"","sources":["../../../../src/scraping/site-scrapers/Intercom/scrapeIntercomPage.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AACnC,OAAO,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AAEtD,OAAO,iBAAiB,MAAM,4BAA4B,CAAC;AAC3D,OAAO,iBAAiB,MAAM,4BAA4B,CAAC;AAE3D,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,IAAY,EACZ,MAAc,EACd,MAAc,EACd,YAAoB,EACpB,SAAkB,EAClB,CAAqB,CAAC,UAAU;;IAEhC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,MAAM,cAAc,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC,KAAK,EAAE,CAAC;IAC3C,MAAM,KAAK,GAAG,cAAc,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IAC3C,MAAM,WAAW,GAAG,CAAC,CAAC,gBAAgB,EAAE,cAAc,CAAC,MAAM,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IAE/E,MAAM,OAAO,GAAG,CAAC,CAAC,SAAS,CAAC,CAAC,KAAK,EAAE,CAAC;IACrC,MAAM,WAAW,GAAG,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAEpC,MAAM,eAAe,GAAG,MAAM,iBAAiB,CAC7C,CAAC,EACD,OAAO,EACP,MAAM,EACN,YAAY,EACZ,SAAS,EACT,SAAS,CACV,CAAC;IAEF,MAAM,GAAG,GAAG,IAAI,gBAAgB,CAAC,EAAE,cAAc,EAAE,KAAK,EAAE,CAAC,CAAC;IAC5D,IAAI,QAAQ,GAAG,GAAG,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;IAE1C,2BAA2B;IAC3B,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;IAE7C,qDAAqD;IACrD,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;IAE3C,iCAAiC;IACjC,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IAE/C,gEAAgE;IAChE,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,wBAAwB,EAAE,SAAS,CAAC,CAAC;IACjE,IAAI,eAAe,EAAE,CAAC;QACpB,QAAQ,GAAG,iBAAiB,CAAC,eAAe,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;IAClE,CAAC;IAED,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,QAAQ,EAAE,CAAC;AAC1C,CAAC"}
@@ -1,2 +0,0 @@
1
- import { NavigationEntry } from '@mintlify/models';
2
- export declare function scrapeIntercomSection(html: string, origin: string, cliDir: string, imageBaseDir: string, overwrite: boolean, version: string | undefined): Promise<NavigationEntry[]>;
@@ -1,31 +0,0 @@
1
- import axios from 'axios';
2
- import * as cheerio from 'cheerio';
3
- import downloadLogoImage from '../../downloadLogoImage.js';
4
- import { scrapeGettingFileNameFromUrl } from '../../scrapeGettingFileNameFromUrl.js';
5
- import { scrapeIntercomPage } from './scrapeIntercomPage.js';
6
- export async function scrapeIntercomSection(html, origin, cliDir, imageBaseDir, overwrite, version) {
7
- let $ = cheerio.load(html);
8
- const logoSrc = $('.header__logo img').first().attr('src');
9
- void downloadLogoImage(logoSrc, imageBaseDir, origin, overwrite);
10
- const collectionsLink = $('.section .g__space a');
11
- const collectionsMap = collectionsLink.toArray().map(async (s) => {
12
- const href = $(s).attr('href');
13
- const res = await axios.get(`${origin}${href}`);
14
- const html = res.data;
15
- $ = cheerio.load(html);
16
- const sectionTitle = $('.collection h1').first().text().trim();
17
- const sectionPages = $('.section .g__space a')
18
- .toArray()
19
- .map((s) => $(s).attr('href'))
20
- .filter((page) => page !== undefined);
21
- return {
22
- group: sectionTitle,
23
- pages: sectionPages,
24
- };
25
- });
26
- const collections = await Promise.all(collectionsMap);
27
- return await Promise.all(collections.map(async (entry) => {
28
- return await scrapeGettingFileNameFromUrl(entry, cliDir, origin, overwrite, scrapeIntercomPage, false, version);
29
- }));
30
- }
31
- //# sourceMappingURL=scrapeIntercomSection.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"scrapeIntercomSection.js","sourceRoot":"","sources":["../../../../src/scraping/site-scrapers/Intercom/scrapeIntercomSection.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAEnC,OAAO,iBAAiB,MAAM,4BAA4B,CAAC;AAC3D,OAAO,EAAE,4BAA4B,EAAE,MAAM,uCAAuC,CAAC;AACrF,OAAO,EAAE,kBAAkB,EAAE,MAAM,yBAAyB,CAAC;AAE7D,MAAM,CAAC,KAAK,UAAU,qBAAqB,CACzC,IAAY,EACZ,MAAc,EACd,MAAc,EACd,YAAoB,EACpB,SAAkB,EAClB,OAA2B;IAE3B,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE3B,MAAM,OAAO,GAAG,CAAC,CAAC,mBAAmB,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAC3D,KAAK,iBAAiB,CAAC,OAAO,EAAE,YAAY,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC;IAEjE,MAAM,eAAe,GAAG,CAAC,CAAC,sBAAsB,CAAC,CAAC;IAClD,MAAM,cAAc,GAAG,eAAe,CAAC,OAAO,EAAE,CAAC,GAAG,CAAC,KAAK,EAAE,CAAkB,EAAE,EAAE;QAChF,MAAM,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAC/B,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,GAAG,MAAM,GAAG,IAAI,EAAE,CAAC,CAAC;QAChD,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC;QACtB,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvB,MAAM,YAAY,GAAG,CAAC,CAAC,gBAAgB,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QAC/D,MAAM,YAAY,GAAG,CAAC,CAAC,sBAAsB,CAAC;aAC3C,OAAO,EAAE;aACT,GAAG,CAAC,CAAC,CAAkB,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;aAC9C,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,KAAK,SAAS,CAAa,CAAC;QACpD,OAAO;YACL,KAAK,EAAE,YAAY;YACnB,KAAK,EAAE,YAAY;SACpB,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,MAAM,WAAW,GAAe,MAAM,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC;IAElE,OAAO,MAAM,OAAO,CAAC,GAAG,CACtB,WAAW,CAAC,GAAG,CAAC,KAAK,EAAE,KAAsB,EAAE,EAAE;QAC/C,OAAO,MAAM,4BAA4B,CACvC,KAAK,EACL,MAAM,EACN,MAAM,EACN,SAAS,EACT,kBAAkB,EAClB,KAAK,EACL,OAAO,CACR,CAAC;IACJ,CAAC,CAAC,CACH,CAAC;AACJ,CAAC"}
@@ -1,3 +0,0 @@
1
- import { NavigationEntry } from '@mintlify/models';
2
- import { Cheerio, Element } from 'cheerio';
3
- export default function alternateGroupTitle(firstLink: Cheerio<Element>, pages: NavigationEntry[]): string;
@@ -1,9 +0,0 @@
1
- export default function alternateGroupTitle(firstLink, pages) {
2
- // Only assign titles to nested navigation menus outside a section.
3
- // Others should not have a title so we can merge them into one section.
4
- if (pages.length > 0) {
5
- return firstLink.text();
6
- }
7
- return '';
8
- }
9
- //# sourceMappingURL=alternateGroupTitle.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"alternateGroupTitle.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/alternateGroupTitle.ts"],"names":[],"mappings":"AAGA,MAAM,CAAC,OAAO,UAAU,mBAAmB,CAAC,SAA2B,EAAE,KAAwB;IAC/F,mEAAmE;IACnE,wEAAwE;IACxE,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACrB,OAAO,SAAS,CAAC,IAAI,EAAE,CAAC;IAC1B,CAAC;IACD,OAAO,EAAE,CAAC;AACZ,CAAC"}
@@ -1,5 +0,0 @@
1
- import { Cheerio, CheerioAPI, Element } from 'cheerio';
2
- export declare function getDocusaurusLinksPerGroup(navigationSections: Cheerio<Element>, $: CheerioAPI, version: string | undefined): {
3
- group: string;
4
- pages: import("@mintlify/models").NavigationEntry[];
5
- }[];
@@ -1,33 +0,0 @@
1
- import alternateGroupTitle from '../alternateGroupTitle.js';
2
- import getLinksRecursively from './getLinksRecursively.js';
3
- export function getDocusaurusLinksPerGroup(navigationSections, $, version) {
4
- if (version === '3' || version === '2') {
5
- return getDocusaurusLinksPerGroupLoop(navigationSections, $);
6
- }
7
- return [];
8
- }
9
- function getDocusaurusLinksPerGroupLoop(navigationSections, $) {
10
- return navigationSections.toArray().map((s) => {
11
- const section = $(s);
12
- // Links without a group
13
- if (section.hasClass('theme-doc-sidebar-item-link') || section.hasClass('menu__link')) {
14
- const linkHref = section.find('a[href]').first().attr('href');
15
- return {
16
- group: '',
17
- pages: linkHref !== undefined ? [linkHref] : [],
18
- };
19
- }
20
- const firstLink = !section.find('.menu__list-item-collapsible').first().find('a[href]').length
21
- ? section.find('.menu__link--sublist').first().find('a[href]')
22
- : section.find('.menu__list-item-collapsible').first().find('a[href]');
23
- const sectionTitle = firstLink.text();
24
- const firstHref = firstLink.attr('href');
25
- const linkSections = section.children().eq(1).children();
26
- const pages = getLinksRecursively(linkSections, $);
27
- return {
28
- group: sectionTitle || alternateGroupTitle(firstLink, pages),
29
- pages: firstHref ? [firstHref, ...pages] : pages,
30
- };
31
- });
32
- }
33
- //# sourceMappingURL=getDocusaurusLinksPerGroup.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"getDocusaurusLinksPerGroup.js","sourceRoot":"","sources":["../../../../src/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.ts"],"names":[],"mappings":"AAEA,OAAO,mBAAmB,MAAM,2BAA2B,CAAC;AAC5D,OAAO,mBAAmB,MAAM,0BAA0B,CAAC;AAE3D,MAAM,UAAU,0BAA0B,CACxC,kBAAoC,EACpC,CAAa,EACb,OAA2B;IAE3B,IAAI,OAAO,KAAK,GAAG,IAAI,OAAO,KAAK,GAAG,EAAE,CAAC;QACvC,OAAO,8BAA8B,CAAC,kBAAkB,EAAE,CAAC,CAAC,CAAC;IAC/D,CAAC;IACD,OAAO,EAAE,CAAC;AACZ,CAAC;AAED,SAAS,8BAA8B,CAAC,kBAAoC,EAAE,CAAa;IACzF,OAAO,kBAAkB,CAAC,OAAO,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;QAC5C,MAAM,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QAErB,wBAAwB;QACxB,IAAI,OAAO,CAAC,QAAQ,CAAC,6BAA6B,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAC,EAAE,CAAC;YACtF,MAAM,QAAQ,GAAG,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAC9D,OAAO;gBACL,KAAK,EAAE,EAAE;gBACT,KAAK,EAAE,QAAQ,KAAK,SAAS,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE;aAChD,CAAC;QACJ,CAAC;QAED,MAAM,SAAS,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,8BAA8B,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,MAAM;YAC5F,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,sBAAsB,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC;YAC9D,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,8BAA8B,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAEzE,MAAM,YAAY,GAAG,SAAS,CAAC,IAAI,EAAE,CAAC;QACtC,MAAM,SAAS,GAAG,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACzC,MAAM,YAAY,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC;QAEzD,MAAM,KAAK,GAAG,mBAAmB,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC;QAEnD,OAAO;YACL,KAAK,EAAE,YAAY,IAAI,mBAAmB,CAAC,SAAS,EAAE,KAAK,CAAC;YAC5D,KAAK,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,EAAE,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK;SACjD,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC"}
@@ -1,3 +0,0 @@
1
- import { NavigationEntry } from '@mintlify/models';
2
- import { Cheerio, CheerioAPI, Element } from 'cheerio';
3
- export default function getLinksRecursively(linkSections: Cheerio<Element>, $: CheerioAPI): NavigationEntry[];
@@ -1,35 +0,0 @@
1
- // Used by Docusaurus and ReadMe section scrapers
2
- export default function getLinksRecursively(linkSections, $) {
3
- return linkSections
4
- .map((_, s) => {
5
- const subsection = $(s);
6
- let link = subsection.children().first();
7
- if (!link.attr('href')) {
8
- // Docusaurus nests the <a> inside a <div>
9
- link = link.find('a[href]').first();
10
- }
11
- const linkHref = link.attr('href');
12
- // Skip missing links. For example, GitBook uses
13
- // empty divs are used for styling a line beside the nav.
14
- // Skip external links until Mintlify supports them
15
- if (!linkHref ||
16
- linkHref === '#' ||
17
- linkHref.startsWith('https://') ||
18
- linkHref.startsWith('http://')) {
19
- return undefined;
20
- }
21
- const childLinks = subsection.children().eq(1).children();
22
- if (childLinks.length > 0) {
23
- // Put the section link in the list of pages.
24
- // When we support the section itself being a link we should update this
25
- return {
26
- group: link.text(),
27
- pages: [linkHref, ...getLinksRecursively(childLinks, $)],
28
- };
29
- }
30
- return linkHref;
31
- })
32
- .toArray()
33
- .filter(Boolean);
34
- }
35
- //# sourceMappingURL=getLinksRecursively.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"getLinksRecursively.js","sourceRoot":"","sources":["../../../../src/scraping/site-scrapers/links-per-group/getLinksRecursively.ts"],"names":[],"mappings":"AAGA,iDAAiD;AACjD,MAAM,CAAC,OAAO,UAAU,mBAAmB,CACzC,YAA8B,EAC9B,CAAa;IAEb,OAAO,YAAY;SAChB,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACZ,MAAM,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACxB,IAAI,IAAI,GAAG,UAAU,CAAC,QAAQ,EAAE,CAAC,KAAK,EAAE,CAAC;QAEzC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC;YACvB,0CAA0C;YAC1C,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,KAAK,EAAE,CAAC;QACtC,CAAC;QACD,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAEnC,gDAAgD;QAChD,yDAAyD;QACzD,mDAAmD;QACnD,IACE,CAAC,QAAQ;YACT,QAAQ,KAAK,GAAG;YAChB,QAAQ,CAAC,UAAU,CAAC,UAAU,CAAC;YAC/B,QAAQ,CAAC,UAAU,CAAC,SAAS,CAAC,EAC9B,CAAC;YACD,OAAO,SAAS,CAAC;QACnB,CAAC;QAED,MAAM,UAAU,GAAG,UAAU,CAAC,QAAQ,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC;QAE1D,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC1B,6CAA6C;YAC7C,wEAAwE;YACxE,OAAO;gBACL,KAAK,EAAE,IAAI,CAAC,IAAI,EAAE;gBAClB,KAAK,EAAE,CAAC,QAAQ,EAAE,GAAG,mBAAmB,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;aACzD,CAAC;QACJ,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC,CAAC;SACD,OAAO,EAAE;SACT,MAAM,CAAC,OAAO,CAAC,CAAC;AACrB,CAAC"}
@@ -1,3 +0,0 @@
1
- import { NavigationEntry } from '@mintlify/models';
2
- import { Cheerio, CheerioAPI, Element } from 'cheerio';
3
- export default function getLinksRecursivelyGitBook(linkSections: Cheerio<Element>, $: CheerioAPI): NavigationEntry[];
@@ -1,33 +0,0 @@
1
- // Used by GitBook section scraper
2
- export default function getLinksRecursivelyGitBook(linkSections, $) {
3
- return linkSections
4
- .map((_, s) => {
5
- const subsection = $(s);
6
- const sectionHeader = subsection.find('div').first();
7
- const link = subsection.find('a').first();
8
- const linkHref = link.attr('href');
9
- // Skip missing links. For example, GitBook uses
10
- // empty divs are used for styling a line beside the nav.
11
- // Skip external links until Mintlify supports them
12
- if (!linkHref ||
13
- linkHref === '#' ||
14
- linkHref.startsWith('https://') ||
15
- linkHref.startsWith('http://')) {
16
- return undefined;
17
- }
18
- const childLinks = subsection.find('ul').first().children();
19
- const title = link.text() ? link.text() : sectionHeader.text() ? sectionHeader.text() : '';
20
- if (childLinks.length > 0) {
21
- // Put the section link in the list of pages.
22
- // When we support the section itself being a link we should update this
23
- return {
24
- group: title,
25
- pages: [linkHref, ...getLinksRecursivelyGitBook(childLinks, $)],
26
- };
27
- }
28
- return linkHref;
29
- })
30
- .toArray()
31
- .filter(Boolean);
32
- }
33
- //# sourceMappingURL=getLinksRecursivelyGitBook.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"getLinksRecursivelyGitBook.js","sourceRoot":"","sources":["../../../../src/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.ts"],"names":[],"mappings":"AAGA,kCAAkC;AAClC,MAAM,CAAC,OAAO,UAAU,0BAA0B,CAChD,YAA8B,EAC9B,CAAa;IAEb,OAAO,YAAY;SAChB,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACZ,MAAM,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACxB,MAAM,aAAa,GAAG,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,CAAC;QACrD,MAAM,IAAI,GAAG,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC;QAC1C,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAEnC,gDAAgD;QAChD,yDAAyD;QACzD,mDAAmD;QACnD,IACE,CAAC,QAAQ;YACT,QAAQ,KAAK,GAAG;YAChB,QAAQ,CAAC,UAAU,CAAC,UAAU,CAAC;YAC/B,QAAQ,CAAC,UAAU,CAAC,SAAS,CAAC,EAC9B,CAAC;YACD,OAAO,SAAS,CAAC;QACnB,CAAC;QAED,MAAM,UAAU,GAAG,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC,QAAQ,EAAE,CAAC;QAC5D,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,aAAa,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,aAAa,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QAE3F,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC1B,6CAA6C;YAC7C,wEAAwE;YACxE,OAAO;gBACL,KAAK,EAAE,KAAK;gBACZ,KAAK,EAAE,CAAC,QAAQ,EAAE,GAAG,0BAA0B,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;aAChE,CAAC;QACJ,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC,CAAC;SACD,OAAO,EAAE;SACT,MAAM,CAAC,OAAO,CAAC,CAAC;AACrB,CAAC"}
@@ -1,2 +0,0 @@
1
- import { Page } from 'puppeteer';
2
- export default function openNestedDocusaurusMenus(page: Page): Promise<string>;
@@ -1,30 +0,0 @@
1
- export default async function openNestedDocusaurusMenus(page) {
2
- let prevEncountered = [];
3
- let encounteredHref = ['fake-href-to-make-loop-run-at-least-once'];
4
- // Loop until we've encountered every link
5
- while (!encounteredHref.every((href) => prevEncountered.includes(href))) {
6
- prevEncountered = encounteredHref;
7
- encounteredHref = await page.evaluate((encounteredHref) => {
8
- const collapsible = Array.from(document.querySelectorAll('.menu__link.menu__link--sublist'));
9
- const linksFound = [];
10
- collapsible.forEach((collapsibleItem) => {
11
- const href = collapsibleItem.getAttribute('href');
12
- // Should never occur but we keep it as a fail-safe
13
- if (href?.startsWith('https://') || href?.startsWith('http://')) {
14
- return;
15
- }
16
- // Click any links we haven't seen before
17
- if (href && !encounteredHref.includes(href)) {
18
- collapsibleItem.click();
19
- }
20
- if (href) {
21
- linksFound.push(href);
22
- }
23
- });
24
- return linksFound;
25
- }, encounteredHref // Need to pass array into the browser
26
- );
27
- }
28
- return await page.content();
29
- }
30
- //# sourceMappingURL=openNestedDocusaurusMenus.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"openNestedDocusaurusMenus.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/openNestedDocusaurusMenus.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,OAAO,CAAC,KAAK,UAAU,yBAAyB,CAAC,IAAU;IAChE,IAAI,eAAe,GAAa,EAAE,CAAC;IACnC,IAAI,eAAe,GAAG,CAAC,0CAA0C,CAAC,CAAC;IAEnE,0CAA0C;IAC1C,OAAO,CAAC,eAAe,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,eAAe,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC;QACxE,eAAe,GAAG,eAAe,CAAC;QAClC,eAAe,GAAG,MAAM,IAAI,CAAC,QAAQ,CACnC,CAAC,eAAe,EAAE,EAAE;YAClB,MAAM,WAAW,GAAkB,KAAK,CAAC,IAAI,CAC3C,QAAQ,CAAC,gBAAgB,CAAC,iCAAiC,CAAC,CAC7D,CAAC;YAEF,MAAM,UAAU,GAAa,EAAE,CAAC;YAChC,WAAW,CAAC,OAAO,CAAC,CAAC,eAAe,EAAE,EAAE;gBACtC,MAAM,IAAI,GAAG,eAAe,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;gBAElD,mDAAmD;gBACnD,IAAI,IAAI,EAAE,UAAU,CAAC,UAAU,CAAC,IAAI,IAAI,EAAE,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;oBAChE,OAAO;gBACT,CAAC;gBAED,yCAAyC;gBACzC,IAAI,IAAI,IAAI,CAAC,eAAe,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;oBAC5C,eAAe,CAAC,KAAK,EAAE,CAAC;gBAC1B,CAAC;gBAED,IAAI,IAAI,EAAE,CAAC;oBACT,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACxB,CAAC;YACH,CAAC,CAAC,CAAC;YAEH,OAAO,UAAU,CAAC;QACpB,CAAC,EACD,eAAe,CAAC,sCAAsC;SACvD,CAAC;IACJ,CAAC;IAED,OAAO,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;AAC9B,CAAC"}
@@ -1,2 +0,0 @@
1
- import { Page } from 'puppeteer';
2
- export default function openNestedGitbookMenus(page: Page): Promise<string>;
@@ -1,21 +0,0 @@
1
- export default async function openNestedGitbookMenus(page) {
2
- let clickedAny = true;
3
- // Loop until we've encountered every closed menu
4
- while (clickedAny) {
5
- clickedAny = await page.evaluate(() => {
6
- let clicked = false;
7
- // Right pointing arrow. Only menus have this icon
8
- const icons = document.querySelectorAll('div > a > span > svg[style*="mask-image:url(https://ka-p.fontawesome.com/releases/v6.6.0/svgs/regular/chevron-right.svg?v=1&token=a463935e93)"]');
9
- icons.forEach((icon) => {
10
- const span = icon.parentElement;
11
- if (span && span.className.includes('rotate-0')) {
12
- span.click();
13
- clicked = true;
14
- }
15
- });
16
- return clicked;
17
- });
18
- }
19
- return await page.content();
20
- }
21
- //# sourceMappingURL=openNestedGitbookMenus.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"openNestedGitbookMenus.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/openNestedGitbookMenus.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,OAAO,CAAC,KAAK,UAAU,sBAAsB,CAAC,IAAU;IAC7D,IAAI,UAAU,GAAG,IAAI,CAAC;IAEtB,iDAAiD;IACjD,OAAO,UAAU,EAAE,CAAC;QAClB,UAAU,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE;YACpC,IAAI,OAAO,GAAG,KAAK,CAAC;YACpB,kDAAkD;YAClD,MAAM,KAAK,GAAG,QAAQ,CAAC,gBAAgB,CACrC,iJAAiJ,CAClJ,CAAC;YAEF,KAAK,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,EAAE;gBACrB,MAAM,IAAI,GAAG,IAAI,CAAC,aAAa,CAAC;gBAChC,IAAI,IAAI,IAAI,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,UAAU,CAAC,EAAE,CAAC;oBAChD,IAAI,CAAC,KAAK,EAAE,CAAC;oBACb,OAAO,GAAG,IAAI,CAAC;gBACjB,CAAC;YACH,CAAC,CAAC,CAAC;YACH,OAAO,OAAO,CAAC;QACjB,CAAC,CAAC,CAAC;IACL,CAAC;IAED,OAAO,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;AAC9B,CAAC"}
@@ -1,5 +0,0 @@
1
- export declare function scrapeDocusaurusPage(html: string, origin: string, cliDir: string, imageBaseDir: string, overwrite: boolean, version: string | undefined): Promise<{
2
- title: string;
3
- description?: string;
4
- markdown?: string;
5
- }>;
@@ -1,53 +0,0 @@
1
- import * as cheerio from 'cheerio';
2
- import { NodeHtmlMarkdown } from 'node-html-markdown';
3
- import downloadAllImages from '../downloadAllImages.js';
4
- import replaceImagePaths from '../replaceImagePaths.js';
5
- export async function scrapeDocusaurusPage(html, origin, cliDir, imageBaseDir, overwrite, version // expects "2", or "3". Have not written support for "1" yet
6
- ) {
7
- const $ = cheerio.load(html);
8
- const article = version === '3' ? $('.theme-doc-markdown').first() : $('article').first();
9
- if (article.length === 0) {
10
- // Index pages with no additional text don't have the markdown class
11
- return {
12
- title: '',
13
- };
14
- }
15
- const titleComponent = article.find('h1');
16
- const title = titleComponent.text().trim();
17
- // Do not include title in the content when we insert it in our metadata
18
- titleComponent.remove();
19
- const markdownContent = version === '3' ? article : article.find('.markdown').first();
20
- const origToWritePath = await downloadAllImages($, markdownContent, origin, imageBaseDir, overwrite);
21
- const markdownHtml = markdownContent.html();
22
- const nhm = new NodeHtmlMarkdown({ useInlineLinks: false });
23
- let markdown = markdownHtml ? nhm.translate(markdownHtml) : null;
24
- if (markdown == null) {
25
- console.error('We do not support scraping this page. Content will be empty');
26
- return { title, description: undefined, markdown: '' };
27
- }
28
- // Description only exists in meta tags. The code is commented out because its prone to incorrectly
29
- // including a description if the first line of text had markdown annotations like `.
30
- // The commented out alternative is to ignore description if it's the first line of text,
31
- // this means it was not set in the metadata and Docusaurus defaulted to the text.
32
- const description = undefined;
33
- // let description = $('meta[property="og:description"]').attr("content");
34
- // if (markdown.startsWith(description)) {
35
- // description = null;
36
- // }
37
- // Remove Docusaurus links from headers
38
- // When we parse their HTML the parser adds things like:
39
- // [](#setup "Direct link to heading")
40
- // to the end of each header.
41
- markdown = markdown.replace(/\[\]\(#.+ ".+"\)\n/g, '\n');
42
- // Remove unnecessary nonwidth blank space characters
43
- markdown = markdown.replace(/\u200b/g, '');
44
- // Reduce unnecessary blank lines
45
- markdown = markdown.replace(/\n\n\n/g, '\n\n');
46
- // Mintlify doesn't support bolded headers, remove the asterisks
47
- markdown = markdown.replace(/(\n#+) \*\*(.*)\*\*\n/g, '$1 $2\n');
48
- if (origToWritePath) {
49
- markdown = replaceImagePaths(origToWritePath, cliDir, markdown);
50
- }
51
- return { title, description, markdown };
52
- }
53
- //# sourceMappingURL=scrapeDocusaurusPage.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"scrapeDocusaurusPage.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeDocusaurusPage.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AACnC,OAAO,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AAEtD,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AACxD,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AAExD,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,IAAY,EACZ,MAAc,EACd,MAAc,EACd,YAAoB,EACpB,SAAkB,EAClB,OAA2B,CAAC,4DAA4D;;IAMxF,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,MAAM,OAAO,GAAG,OAAO,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,qBAAqB,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,KAAK,EAAE,CAAC;IAE1F,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,oEAAoE;QACpE,OAAO;YACL,KAAK,EAAE,EAAE;SACV,CAAC;IACJ,CAAC;IAED,MAAM,cAAc,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC1C,MAAM,KAAK,GAAG,cAAc,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IAE3C,wEAAwE;IACxE,cAAc,CAAC,MAAM,EAAE,CAAC;IAExB,MAAM,eAAe,GAAG,OAAO,KAAK,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,KAAK,EAAE,CAAC;IAEtF,MAAM,eAAe,GAAG,MAAM,iBAAiB,CAC7C,CAAC,EACD,eAAe,EACf,MAAM,EACN,YAAY,EACZ,SAAS,CACV,CAAC;IAEF,MAAM,YAAY,GAAG,eAAe,CAAC,IAAI,EAAE,CAAC;IAE5C,MAAM,GAAG,GAAG,IAAI,gBAAgB,CAAC,EAAE,cAAc,EAAE,KAAK,EAAE,CAAC,CAAC;IAC5D,IAAI,QAAQ,GAAG,YAAY,CAAC,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAEjE,IAAI,QAAQ,IAAI,IAAI,EAAE,CAAC;QACrB,OAAO,CAAC,KAAK,CAAC,6DAA6D,CAAC,CAAC;QAC7E,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,SAAS,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;IACzD,CAAC;IAED,mGAAmG;IACnG,qFAAqF;IACrF,yFAAyF;IACzF,kFAAkF;IAClF,MAAM,WAAW,GAAG,SAAS,CAAC;IAC9B,0EAA0E;IAC1E,0CAA0C;IAC1C,wBAAwB;IACxB,IAAI;IAEJ,uCAAuC;IACvC,wDAAwD;IACxD,sCAAsC;IACtC,6BAA6B;IAC7B,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,qBAAqB,EAAE,IAAI,CAAC,CAAC;IAEzD,qDAAqD;IACrD,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;IAE3C,iCAAiC;IACjC,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IAE/C,gEAAgE;IAChE,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,wBAAwB,EAAE,SAAS,CAAC,CAAC;IACjE,IAAI,eAAe,EAAE,CAAC;QACpB,QAAQ,GAAG,iBAAiB,CAAC,eAAe,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;IAClE,CAAC;IAED,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,QAAQ,EAAE,CAAC;AAC1C,CAAC"}
@@ -1,2 +0,0 @@
1
- import { Navigation } from '@mintlify/models';
2
- export declare function scrapeDocusaurusSection(html: string, origin: string, cliDir: string, imageBaseDir: string, overwrite: boolean, version?: string): Promise<Navigation>;
@@ -1,32 +0,0 @@
1
- import * as cheerio from 'cheerio';
2
- import combineNavWithEmptyGroupTitles from '../combineNavWithEmptyGroupTitles.js';
3
- import downloadLogoImage from '../downloadLogoImage.js';
4
- import { scrapeGettingFileNameFromUrl } from '../scrapeGettingFileNameFromUrl.js';
5
- import { getDocusaurusLinksPerGroup } from './links-per-group/getDocusaurusLinksPerGroup.js';
6
- import { scrapeDocusaurusPage } from './scrapeDocusaurusPage.js';
7
- export async function scrapeDocusaurusSection(html, origin, cliDir, imageBaseDir, overwrite, version) {
8
- const $ = cheerio.load(html);
9
- // Download the logo
10
- const logoSrc = $('.navbar__logo img').attr('src');
11
- void downloadLogoImage(logoSrc, imageBaseDir, origin, overwrite);
12
- // Get all the navigation sections
13
- const navigationSections = !$('.theme-doc-sidebar-menu').first().children().length
14
- ? $('.main-wrapper').first().find('.menu__list').first().children()
15
- : $('.theme-doc-sidebar-menu').first().children();
16
- // Get all links per group
17
- const groupsConfig = getDocusaurusLinksPerGroup(navigationSections, $, version);
18
- // Merge groups with empty titles together
19
- const reducedGroupsConfig = combineNavWithEmptyGroupTitles(groupsConfig);
20
- // Scrape each link in the navigation.
21
- const groupsConfigCleanPaths = await Promise.all(reducedGroupsConfig.map(async (groupConfig) => {
22
- groupConfig.pages = (await Promise.all(groupConfig.pages.map(async (navEntry) =>
23
- // Docusaurus requires a directory on all sections wheras we use root.
24
- // /docs is their default directory so we remove it
25
- scrapeGettingFileNameFromUrl(navEntry, cliDir, origin, overwrite, scrapeDocusaurusPage, false, version, '/docs'))))
26
- // Remove skipped index pages (they return undefined from the above function)
27
- .filter(Boolean);
28
- return groupConfig;
29
- }));
30
- return groupsConfigCleanPaths;
31
- }
32
- //# sourceMappingURL=scrapeDocusaurusSection.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"scrapeDocusaurusSection.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeDocusaurusSection.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAEnC,OAAO,8BAA8B,MAAM,sCAAsC,CAAC;AAClF,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AACxD,OAAO,EAAE,4BAA4B,EAAE,MAAM,oCAAoC,CAAC;AAClF,OAAO,EAAE,0BAA0B,EAAE,MAAM,iDAAiD,CAAC;AAC7F,OAAO,EAAE,oBAAoB,EAAE,MAAM,2BAA2B,CAAC;AAEjE,MAAM,CAAC,KAAK,UAAU,uBAAuB,CAC3C,IAAY,EACZ,MAAc,EACd,MAAc,EACd,YAAoB,EACpB,SAAkB,EAClB,OAAgB;IAEhB,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,oBAAoB;IACpB,MAAM,OAAO,GAAG,CAAC,CAAC,mBAAmB,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACnD,KAAK,iBAAiB,CAAC,OAAO,EAAE,YAAY,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC;IAEjE,kCAAkC;IAClC,MAAM,kBAAkB,GAAG,CAAC,CAAC,CAAC,yBAAyB,CAAC,CAAC,KAAK,EAAE,CAAC,QAAQ,EAAE,CAAC,MAAM;QAChF,CAAC,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,KAAK,EAAE,CAAC,QAAQ,EAAE;QACnE,CAAC,CAAC,CAAC,CAAC,yBAAyB,CAAC,CAAC,KAAK,EAAE,CAAC,QAAQ,EAAE,CAAC;IAEpD,0BAA0B;IAC1B,MAAM,YAAY,GAAe,0BAA0B,CAAC,kBAAkB,EAAE,CAAC,EAAE,OAAO,CAAC,CAAC;IAE5F,0CAA0C;IAC1C,MAAM,mBAAmB,GAAG,8BAA8B,CAAC,YAAY,CAAC,CAAC;IAEzE,sCAAsC;IACtC,MAAM,sBAAsB,GAAG,MAAM,OAAO,CAAC,GAAG,CAC9C,mBAAmB,CAAC,GAAG,CAAC,KAAK,EAAE,WAAW,EAAE,EAAE;QAC5C,WAAW,CAAC,KAAK,GAAG,CAClB,MAAM,OAAO,CAAC,GAAG,CACf,WAAW,CAAC,KAAK,CAAC,GAAG,CAAC,KAAK,EAAE,QAAyB,EAAE,EAAE;QACxD,sEAAsE;QACtE,mDAAmD;QACnD,4BAA4B,CAC1B,QAAQ,EACR,MAAM,EACN,MAAM,EACN,SAAS,EACT,oBAAoB,EACpB,KAAK,EACL,OAAO,EACP,OAAO,CACR,CACF,CACF,CACF;YACC,6EAA6E;aAC5E,MAAM,CAAC,OAAO,CAAC,CAAC;QACnB,OAAO,WAAW,CAAC;IACrB,CAAC,CAAC,CACH,CAAC;IAEF,OAAO,sBAAsB,CAAC;AAChC,CAAC"}
@@ -1,5 +0,0 @@
1
- export declare function scrapeGitBookPage(html: string, origin: string, cliDir: string, imageBaseDir: string, overwrite: boolean, _: string | undefined): Promise<{
2
- title: string;
3
- description: string;
4
- markdown: string;
5
- }>;
@@ -1,56 +0,0 @@
1
- import * as cheerio from 'cheerio';
2
- import { NodeHtmlMarkdown } from 'node-html-markdown';
3
- import { SUPPORTED_MEDIA_EXTENSIONS } from '../../constants.js';
4
- import { getLengthUntilMetadata } from '../../downloadImage.js';
5
- import downloadAllImages from '../downloadAllImages.js';
6
- import replaceImagePaths from '../replaceImagePaths.js';
7
- export async function scrapeGitBookPage(html, origin, cliDir, imageBaseDir, overwrite, _ // version
8
- ) {
9
- const $ = cheerio.load(html);
10
- const mainContent = $('body > div > div > div > div > main');
11
- const titleAndDescription = mainContent.children('header');
12
- const titleComponent = titleAndDescription.children('h1');
13
- const description = titleAndDescription.text().replace(titleComponent.text(), '').trim();
14
- const title = titleComponent.text().trim();
15
- const content = titleAndDescription.next('div');
16
- // Replace code blocks with parseable html
17
- const codeBlocks = content.find('pre > code');
18
- codeBlocks.each((_, c) => {
19
- const code = $(c);
20
- const codeContent = code
21
- .children()
22
- .toArray()
23
- .map((d) => $(d).text())
24
- .filter((text) => text !== '')
25
- .join('\n');
26
- code.replaceWith(`<pre><code>${codeContent}</code></pre>`);
27
- });
28
- const contentHtml = $.html(content);
29
- const modifyFileName = (fileName) => {
30
- // Remove GitBook metadata from the start
31
- // The first four %2F split metadata fields. Remaining ones are part of the file name.
32
- for (const ext of SUPPORTED_MEDIA_EXTENSIONS) {
33
- if (fileName.includes(`.${ext}`)) {
34
- const splitFileName = fileName.split('%2F').slice(4).join('%2F');
35
- return getLengthUntilMetadata(splitFileName, ext);
36
- }
37
- }
38
- return fileName.split('%2F').slice(4).join('%2F');
39
- };
40
- const origToWritePath = await downloadAllImages($, content, origin, imageBaseDir, overwrite, modifyFileName);
41
- const nhm = new NodeHtmlMarkdown({ useInlineLinks: false });
42
- let markdown = nhm.translate(contentHtml);
43
- // Keep headers on one line
44
- markdown = markdown.replace(/# \n\n/g, '# ');
45
- // Remove unnecessary nonwidth blank space characters
46
- markdown = markdown.replace(/\u200b/g, '');
47
- // Reduce unnecessary blank lines
48
- markdown = markdown.replace(/\n\n\n/g, '\n\n');
49
- // Mintlify doesn't support bolded headers, remove the asterisks
50
- markdown = markdown.replace(/(\n#+) \*\*(.*)\*\*\n/g, '$1 $2\n');
51
- if (origToWritePath) {
52
- markdown = replaceImagePaths(origToWritePath, cliDir, markdown);
53
- }
54
- return { title, description, markdown };
55
- }
56
- //# sourceMappingURL=scrapeGitBookPage.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"scrapeGitBookPage.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeGitBookPage.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AACnC,OAAO,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AAEtD,OAAO,EAAE,0BAA0B,EAAE,MAAM,oBAAoB,CAAC;AAChE,OAAO,EAAE,sBAAsB,EAAE,MAAM,wBAAwB,CAAC;AAChE,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AACxD,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AAExD,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,IAAY,EACZ,MAAc,EACd,MAAc,EACd,YAAoB,EACpB,SAAkB,EAClB,CAAqB,CAAC,UAAU;;IAEhC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,MAAM,WAAW,GAAG,CAAC,CAAC,qCAAqC,CAAC,CAAC;IAC7D,MAAM,mBAAmB,GAAG,WAAW,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAC3D,MAAM,cAAc,GAAG,mBAAmB,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;IAC1D,MAAM,WAAW,GAAG,mBAAmB,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,cAAc,CAAC,IAAI,EAAE,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;IACzF,MAAM,KAAK,GAAG,cAAc,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IAE3C,MAAM,OAAO,GAAG,mBAAmB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAEhD,0CAA0C;IAC1C,MAAM,UAAU,GAAG,OAAO,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;IAC9C,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACvB,MAAM,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QAClB,MAAM,WAAW,GAAG,IAAI;aACrB,QAAQ,EAAE;aACV,OAAO,EAAE;aACT,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;aACvB,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,KAAK,EAAE,CAAC;aAC7B,IAAI,CAAC,IAAI,CAAC,CAAC;QACd,IAAI,CAAC,WAAW,CAAC,cAAc,WAAW,eAAe,CAAC,CAAC;IAC7D,CAAC,CAAC,CAAC;IAEH,MAAM,WAAW,GAAG,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAEpC,MAAM,cAAc,GAAG,CAAC,QAAgB,EAAE,EAAE;QAC1C,yCAAyC;QACzC,sFAAsF;QACtF,KAAK,MAAM,GAAG,IAAI,0BAA0B,EAAE,CAAC;YAC7C,IAAI,QAAQ,CAAC,QAAQ,CAAC,IAAI,GAAG,EAAE,CAAC,EAAE,CAAC;gBACjC,MAAM,aAAa,GAAG,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBACjE,OAAO,sBAAsB,CAAC,aAAa,EAAE,GAAG,CAAC,CAAC;YACpD,CAAC;QACH,CAAC;QACD,OAAO,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACpD,CAAC,CAAC;IAEF,MAAM,eAAe,GAAG,MAAM,iBAAiB,CAC7C,CAAC,EACD,OAAO,EACP,MAAM,EACN,YAAY,EACZ,SAAS,EACT,cAAc,CACf,CAAC;IAEF,MAAM,GAAG,GAAG,IAAI,gBAAgB,CAAC,EAAE,cAAc,EAAE,KAAK,EAAE,CAAC,CAAC;IAC5D,IAAI,QAAQ,GAAG,GAAG,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;IAE1C,2BAA2B;IAC3B,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;IAE7C,qDAAqD;IACrD,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;IAE3C,iCAAiC;IACjC,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IAE/C,gEAAgE;IAChE,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,wBAAwB,EAAE,SAAS,CAAC,CAAC;IACjE,IAAI,eAAe,EAAE,CAAC;QACpB,QAAQ,GAAG,iBAAiB,CAAC,eAAe,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;IAClE,CAAC;IAED,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,QAAQ,EAAE,CAAC;AAC1C,CAAC"}
@@ -1,2 +0,0 @@
1
- import { NavigationEntry } from '@mintlify/models';
2
- export declare function scrapeGitBookSection(html: string, origin: string, cliDir: string, imageBaseDir: string, overwrite: boolean, version: string | undefined): Promise<NavigationEntry[]>;
@@ -1,42 +0,0 @@
1
- import * as cheerio from 'cheerio';
2
- import combineNavWithEmptyGroupTitles from '../combineNavWithEmptyGroupTitles.js';
3
- import downloadLogoImage from '../downloadLogoImage.js';
4
- import { scrapeGettingFileNameFromUrl } from '../scrapeGettingFileNameFromUrl.js';
5
- import alternateGroupTitle from './alternateGroupTitle.js';
6
- import getLinksRecursivelyGitBook from './links-per-group/getLinksRecursivelyGitBook.js';
7
- import { scrapeGitBookPage } from './scrapeGitBookPage.js';
8
- export async function scrapeGitBookSection(html, origin, cliDir, imageBaseDir, overwrite, version) {
9
- const $ = cheerio.load(html);
10
- // Download the logo
11
- const logoSrc = $('body > header > div > div > div > a > img').first().attr('src');
12
- downloadLogoImage(logoSrc, imageBaseDir, origin, overwrite).catch(console.error);
13
- // Get all the navigation sections
14
- // Some variants of the GitBook UI show the logo and search base in the side navigation bar,
15
- // but the navigation sections are always the last value.
16
- const navigationSections = $('body > div > div > aside > div > ul > li');
17
- // Get all links per group
18
- const groupsConfig = navigationSections
19
- .toArray()
20
- .map((s) => {
21
- const section = $(s);
22
- const sectionHeader = section.children('div').first();
23
- const sectionTitle = sectionHeader.text();
24
- // Only present if the nested navigation is not in a group
25
- const firstLink = section.find('li > div > a').first();
26
- const firstHref = firstLink.attr('href') || '/';
27
- const linkSections = section.find('ul').first().children();
28
- const pages = getLinksRecursivelyGitBook(linkSections, $);
29
- return {
30
- group: sectionTitle || alternateGroupTitle(firstLink, pages),
31
- pages: firstHref ? [firstHref, ...pages] : pages,
32
- };
33
- })
34
- .filter(Boolean);
35
- // Merge groups with empty titles together
36
- const reducedGroupsConfig = combineNavWithEmptyGroupTitles(groupsConfig);
37
- // Scrape each link in the navigation.
38
- return Promise.all(reducedGroupsConfig.map(async (navEntry) => {
39
- return await scrapeGettingFileNameFromUrl(navEntry, cliDir, origin, overwrite, scrapeGitBookPage, true, version);
40
- }));
41
- }
42
- //# sourceMappingURL=scrapeGitBookSection.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"scrapeGitBookSection.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeGitBookSection.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAEnC,OAAO,8BAA8B,MAAM,sCAAsC,CAAC;AAClF,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AACxD,OAAO,EAAE,4BAA4B,EAAE,MAAM,oCAAoC,CAAC;AAClF,OAAO,mBAAmB,MAAM,0BAA0B,CAAC;AAC3D,OAAO,0BAA0B,MAAM,iDAAiD,CAAC;AACzF,OAAO,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAE3D,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,IAAY,EACZ,MAAc,EACd,MAAc,EACd,YAAoB,EACpB,SAAkB,EAClB,OAA2B;IAE3B,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,oBAAoB;IACpB,MAAM,OAAO,GAAG,CAAC,CAAC,2CAA2C,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACnF,iBAAiB,CAAC,OAAO,EAAE,YAAY,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;IAEjF,kCAAkC;IAClC,4FAA4F;IAC5F,yDAAyD;IACzD,MAAM,kBAAkB,GAAG,CAAC,CAAC,0CAA0C,CAAC,CAAC;IAEzE,0BAA0B;IAC1B,MAAM,YAAY,GAAe,kBAAkB;SAChD,OAAO,EAAE;SACT,GAAG,CAAC,CAAC,CAAkB,EAAE,EAAE;QAC1B,MAAM,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACrB,MAAM,aAAa,GAAG,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,CAAC;QACtD,MAAM,YAAY,GAAG,aAAa,CAAC,IAAI,EAAE,CAAC;QAE1C,0DAA0D;QAC1D,MAAM,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC,KAAK,EAAE,CAAC;QACvD,MAAM,SAAS,GAAG,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,GAAG,CAAC;QAEhD,MAAM,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC,QAAQ,EAAE,CAAC;QAC3D,MAAM,KAAK,GAAG,0BAA0B,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC;QAE1D,OAAO;YACL,KAAK,EAAE,YAAY,IAAI,mBAAmB,CAAC,SAAS,EAAE,KAAK,CAAC;YAC5D,KAAK,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,EAAE,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK;SACjD,CAAC;IACJ,CAAC,CAAC;SACD,MAAM,CAAC,OAAO,CAAC,CAAC;IAEnB,0CAA0C;IAC1C,MAAM,mBAAmB,GAAG,8BAA8B,CAAC,YAAY,CAAC,CAAC;IAEzE,sCAAsC;IACtC,OAAO,OAAO,CAAC,GAAG,CAChB,mBAAmB,CAAC,GAAG,CAAC,KAAK,EAAE,QAAyB,EAAE,EAAE;QAC1D,OAAO,MAAM,4BAA4B,CACvC,QAAQ,EACR,MAAM,EACN,MAAM,EACN,SAAS,EACT,iBAAiB,EACjB,IAAI,EACJ,OAAO,CACR,CAAC;IACJ,CAAC,CAAC,CACH,CAAC;AACJ,CAAC"}
@@ -1,5 +0,0 @@
1
- export declare function scrapeReadMePage(html: string, origin: string, cliDir: string, imageBaseDir: string, overwrite: boolean, _: string | undefined): Promise<{
2
- title: string;
3
- description: string;
4
- markdown: string;
5
- }>;
@@ -1,38 +0,0 @@
1
- import * as cheerio from 'cheerio';
2
- import { NodeHtmlMarkdown } from 'node-html-markdown';
3
- import downloadAllImages from '../downloadAllImages.js';
4
- import replaceImagePaths from '../replaceImagePaths.js';
5
- export async function scrapeReadMePage(html, origin, cliDir, imageBaseDir, overwrite, _ // version
6
- ) {
7
- const $ = cheerio.load(html);
8
- const titleComponent = $('h1').first();
9
- const title = titleComponent.text().trim();
10
- let description = $('.markdown-body', titleComponent.parent()).text().trim();
11
- if (!description) {
12
- description = $('.rm-Article > header p').text().trim();
13
- }
14
- let content = $('.content-body .markdown-body').first();
15
- if (content.length === 0) {
16
- content = $('.rm-Article > .markdown-body');
17
- }
18
- // API Pages don't have a markdown body in the same position so there's no HTML
19
- const contentHtml = content.html() || '';
20
- const origToWritePath = await downloadAllImages($, content, origin, imageBaseDir, overwrite);
21
- const nhm = new NodeHtmlMarkdown({ useInlineLinks: false });
22
- let markdown = nhm.translate(contentHtml);
23
- // Keep headers on one line and increase their depth by one
24
- markdown = markdown.replace(/# \n\n/g, '## ');
25
- // Remove unnecessary nonwidth blank space characters
26
- markdown = markdown.replace(/\u200b/g, '');
27
- // Remove ReadMe anchor links
28
- markdown = markdown.replace(/\n\[\]\(#.+\)\n/g, '\n');
29
- // Reduce unnecessary blank lines
30
- markdown = markdown.replace(/\n\n\n/g, '\n\n');
31
- // Mintlify doesn't support bolded headers, remove the asterisks
32
- markdown = markdown.replace(/(\n#+) \*\*(.*)\*\*\n/g, '$1 $2\n');
33
- if (origToWritePath) {
34
- markdown = replaceImagePaths(origToWritePath, cliDir, markdown);
35
- }
36
- return { title, description, markdown };
37
- }
38
- //# sourceMappingURL=scrapeReadMePage.js.map