@mintlify/scraping 3.0.187 → 3.0.189

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (345) hide show
  1. package/README.md +0 -5
  2. package/bin/assert.d.ts +5 -0
  3. package/bin/assert.js +13 -0
  4. package/bin/assert.js.map +1 -0
  5. package/bin/cli.js +43 -72
  6. package/bin/cli.js.map +1 -1
  7. package/bin/components/Accordion.d.ts +5 -0
  8. package/bin/components/Accordion.js +54 -0
  9. package/bin/components/Accordion.js.map +1 -0
  10. package/bin/components/AccordionGroup.d.ts +5 -0
  11. package/bin/components/AccordionGroup.js +52 -0
  12. package/bin/components/AccordionGroup.js.map +1 -0
  13. package/bin/components/Callout.d.ts +5 -0
  14. package/bin/components/Callout.js +114 -0
  15. package/bin/components/Callout.js.map +1 -0
  16. package/bin/components/Card.d.ts +5 -0
  17. package/bin/components/Card.js +135 -0
  18. package/bin/components/Card.js.map +1 -0
  19. package/bin/components/CardGroup.d.ts +5 -0
  20. package/bin/components/CardGroup.js +52 -0
  21. package/bin/components/CardGroup.js.map +1 -0
  22. package/bin/components/CodeGroup.d.ts +5 -0
  23. package/bin/components/CodeGroup.js +166 -0
  24. package/bin/components/CodeGroup.js.map +1 -0
  25. package/bin/components/Frame.d.ts +5 -0
  26. package/bin/components/Frame.js +51 -0
  27. package/bin/components/Frame.js.map +1 -0
  28. package/bin/components/Tabs.d.ts +5 -0
  29. package/bin/components/Tabs.js +122 -0
  30. package/bin/components/Tabs.js.map +1 -0
  31. package/bin/components/link.d.ts +2 -0
  32. package/bin/components/link.js +16 -0
  33. package/bin/components/link.js.map +1 -0
  34. package/bin/constants.d.ts +6 -7
  35. package/bin/constants.js +31 -12
  36. package/bin/constants.js.map +1 -1
  37. package/bin/customComponents/create.d.ts +10 -0
  38. package/bin/customComponents/create.js +69 -0
  39. package/bin/customComponents/create.js.map +1 -0
  40. package/bin/customComponents/plugin.d.ts +2 -0
  41. package/bin/customComponents/plugin.js +26 -0
  42. package/bin/customComponents/plugin.js.map +1 -0
  43. package/bin/customComponents/selective.d.ts +6 -0
  44. package/bin/customComponents/selective.js +29 -0
  45. package/bin/customComponents/selective.js.map +1 -0
  46. package/bin/nav/iterate.d.ts +2 -0
  47. package/bin/nav/iterate.js +15 -0
  48. package/bin/nav/iterate.js.map +1 -0
  49. package/bin/nav/listItems.d.ts +8 -0
  50. package/bin/nav/listItems.js +62 -0
  51. package/bin/nav/listItems.js.map +1 -0
  52. package/bin/nav/retrieve.d.ts +3 -0
  53. package/bin/nav/retrieve.js +75 -0
  54. package/bin/nav/retrieve.js.map +1 -0
  55. package/bin/nav/root.d.ts +2 -0
  56. package/bin/nav/root.js +40 -0
  57. package/bin/nav/root.js.map +1 -0
  58. package/bin/openapi/generateOpenApiPages.js +2 -2
  59. package/bin/openapi/generateOpenApiPages.js.map +1 -1
  60. package/bin/root/retrieve.d.ts +2 -0
  61. package/bin/root/retrieve.js +46 -0
  62. package/bin/root/retrieve.js.map +1 -0
  63. package/bin/scrapingPipeline/group.d.ts +5 -0
  64. package/bin/scrapingPipeline/group.js +46 -0
  65. package/bin/scrapingPipeline/group.js.map +1 -0
  66. package/bin/scrapingPipeline/icon.d.ts +2 -0
  67. package/bin/scrapingPipeline/icon.js +22 -0
  68. package/bin/scrapingPipeline/icon.js.map +1 -0
  69. package/bin/scrapingPipeline/images.d.ts +3 -0
  70. package/bin/scrapingPipeline/images.js +50 -0
  71. package/bin/scrapingPipeline/images.js.map +1 -0
  72. package/bin/scrapingPipeline/logo.d.ts +5 -0
  73. package/bin/scrapingPipeline/logo.js +92 -0
  74. package/bin/scrapingPipeline/logo.js.map +1 -0
  75. package/bin/scrapingPipeline/page.d.ts +6 -0
  76. package/bin/scrapingPipeline/page.js +102 -0
  77. package/bin/scrapingPipeline/page.js.map +1 -0
  78. package/bin/scrapingPipeline/root.d.ts +2 -0
  79. package/bin/scrapingPipeline/root.js +8 -0
  80. package/bin/scrapingPipeline/root.js.map +1 -0
  81. package/bin/scrapingPipeline/site.d.ts +7 -0
  82. package/bin/scrapingPipeline/site.js +129 -0
  83. package/bin/scrapingPipeline/site.js.map +1 -0
  84. package/bin/scrapingPipeline/tabs.d.ts +3 -0
  85. package/bin/scrapingPipeline/tabs.js +67 -0
  86. package/bin/scrapingPipeline/tabs.js.map +1 -0
  87. package/bin/tabs/retrieveReadme.d.ts +3 -0
  88. package/bin/tabs/retrieveReadme.js +78 -0
  89. package/bin/tabs/retrieveReadme.js.map +1 -0
  90. package/bin/tsconfig.build.tsbuildinfo +1 -1
  91. package/bin/types/components.d.ts +2 -0
  92. package/bin/types/components.js +2 -0
  93. package/bin/types/components.js.map +1 -0
  94. package/bin/types/framework.d.ts +8 -0
  95. package/bin/types/framework.js +3 -0
  96. package/bin/types/framework.js.map +1 -0
  97. package/bin/types/hast.d.ts +6 -0
  98. package/bin/types/hast.js +2 -0
  99. package/bin/types/hast.js.map +1 -0
  100. package/bin/types/result.d.ts +7 -0
  101. package/bin/types/result.js +2 -0
  102. package/bin/types/result.js.map +1 -0
  103. package/bin/types/scrapeFunc.d.ts +3 -0
  104. package/bin/types/scrapeFunc.js +2 -0
  105. package/bin/types/scrapeFunc.js.map +1 -0
  106. package/bin/utils/append.d.ts +1 -0
  107. package/bin/utils/append.js +12 -0
  108. package/bin/utils/append.js.map +1 -0
  109. package/bin/utils/children.d.ts +5 -0
  110. package/bin/utils/children.js +35 -0
  111. package/bin/utils/children.js.map +1 -0
  112. package/bin/utils/className.d.ts +3 -0
  113. package/bin/utils/className.js +13 -0
  114. package/bin/utils/className.js.map +1 -0
  115. package/bin/utils/detectFramework.d.ts +4 -0
  116. package/bin/utils/detectFramework.js +60 -0
  117. package/bin/utils/detectFramework.js.map +1 -0
  118. package/bin/utils/emptyParagraphs.d.ts +3 -0
  119. package/bin/utils/emptyParagraphs.js +19 -0
  120. package/bin/utils/emptyParagraphs.js.map +1 -0
  121. package/bin/utils/errors.d.ts +3 -0
  122. package/bin/utils/errors.js +16 -0
  123. package/bin/utils/errors.js.map +1 -0
  124. package/bin/utils/escape.d.ts +2 -0
  125. package/bin/utils/escape.js +25 -0
  126. package/bin/utils/escape.js.map +1 -0
  127. package/bin/utils/extension.d.ts +3 -0
  128. package/bin/utils/extension.js +18 -0
  129. package/bin/utils/extension.js.map +1 -0
  130. package/bin/utils/file.d.ts +4 -0
  131. package/bin/utils/file.js +43 -0
  132. package/bin/utils/file.js.map +1 -0
  133. package/bin/utils/firstChild.d.ts +2 -0
  134. package/bin/utils/firstChild.js +12 -0
  135. package/bin/utils/firstChild.js.map +1 -0
  136. package/bin/utils/images.d.ts +5 -0
  137. package/bin/utils/images.js +86 -0
  138. package/bin/utils/images.js.map +1 -0
  139. package/bin/utils/img.d.ts +2 -0
  140. package/bin/utils/img.js +15 -0
  141. package/bin/utils/img.js.map +1 -0
  142. package/bin/utils/log.d.ts +18 -0
  143. package/bin/utils/log.js +68 -0
  144. package/bin/utils/log.js.map +1 -0
  145. package/bin/utils/nestedRoots.d.ts +7 -0
  146. package/bin/utils/nestedRoots.js +19 -0
  147. package/bin/utils/nestedRoots.js.map +1 -0
  148. package/bin/utils/network.d.ts +5 -0
  149. package/bin/utils/network.js +82 -0
  150. package/bin/utils/network.js.map +1 -0
  151. package/bin/utils/path.d.ts +1 -0
  152. package/bin/utils/path.js +22 -0
  153. package/bin/utils/path.js.map +1 -0
  154. package/bin/utils/position.d.ts +3 -0
  155. package/bin/utils/position.js +12 -0
  156. package/bin/utils/position.js.map +1 -0
  157. package/bin/utils/reservedNames.d.ts +4 -0
  158. package/bin/utils/reservedNames.js +27 -0
  159. package/bin/utils/reservedNames.js.map +1 -0
  160. package/bin/utils/strings.d.ts +2 -0
  161. package/bin/utils/strings.js +7 -0
  162. package/bin/utils/strings.js.map +1 -0
  163. package/bin/utils/text.d.ts +2 -0
  164. package/bin/utils/text.js +11 -0
  165. package/bin/utils/text.js.map +1 -0
  166. package/bin/utils/title.d.ts +10 -0
  167. package/bin/utils/title.js +58 -0
  168. package/bin/utils/title.js.map +1 -0
  169. package/bin/utils/url.d.ts +3 -0
  170. package/bin/utils/url.js +10 -0
  171. package/bin/utils/url.js.map +1 -0
  172. package/package.json +20 -11
  173. package/src/assert.ts +15 -0
  174. package/src/cli.ts +53 -90
  175. package/src/components/Accordion.ts +84 -0
  176. package/src/components/AccordionGroup.ts +69 -0
  177. package/src/components/Callout.ts +159 -0
  178. package/src/components/Card.ts +168 -0
  179. package/src/components/CardGroup.ts +69 -0
  180. package/src/components/CodeGroup.ts +209 -0
  181. package/src/components/Frame.ts +86 -0
  182. package/src/components/Tabs.ts +154 -0
  183. package/src/components/link.ts +17 -0
  184. package/src/constants.ts +37 -19
  185. package/src/customComponents/create.ts +106 -0
  186. package/src/customComponents/plugin.ts +31 -0
  187. package/src/customComponents/selective.ts +37 -0
  188. package/src/nav/iterate.ts +18 -0
  189. package/src/nav/listItems.ts +82 -0
  190. package/src/nav/retrieve.ts +88 -0
  191. package/src/nav/root.ts +47 -0
  192. package/src/openapi/generateOpenApiPages.ts +2 -2
  193. package/src/root/retrieve.ts +52 -0
  194. package/src/scrapingPipeline/group.ts +62 -0
  195. package/src/scrapingPipeline/icon.ts +26 -0
  196. package/src/scrapingPipeline/images.ts +67 -0
  197. package/src/scrapingPipeline/logo.ts +127 -0
  198. package/src/scrapingPipeline/page.ts +130 -0
  199. package/src/scrapingPipeline/root.ts +10 -0
  200. package/src/scrapingPipeline/site.ts +161 -0
  201. package/src/scrapingPipeline/tabs.ts +87 -0
  202. package/src/tabs/retrieveReadme.ts +99 -0
  203. package/src/types/components.ts +3 -0
  204. package/src/types/framework.ts +10 -0
  205. package/src/types/hast.ts +12 -0
  206. package/src/types/result.ts +1 -0
  207. package/src/types/scrapeFunc.ts +9 -0
  208. package/src/utils/append.ts +9 -0
  209. package/src/utils/children.ts +51 -0
  210. package/src/utils/className.ts +14 -0
  211. package/src/utils/detectFramework.ts +72 -0
  212. package/src/utils/emptyParagraphs.ts +21 -0
  213. package/src/utils/errors.ts +24 -0
  214. package/src/utils/escape.ts +30 -0
  215. package/src/utils/extension.ts +19 -0
  216. package/src/utils/file.ts +58 -0
  217. package/src/utils/firstChild.ts +13 -0
  218. package/src/utils/images.ts +101 -0
  219. package/src/utils/img.ts +17 -0
  220. package/src/utils/log.ts +82 -0
  221. package/src/utils/nestedRoots.ts +20 -0
  222. package/src/utils/network.ts +95 -0
  223. package/src/utils/path.ts +27 -0
  224. package/src/utils/position.ts +14 -0
  225. package/src/utils/reservedNames.ts +31 -0
  226. package/src/utils/strings.ts +7 -0
  227. package/src/utils/text.ts +11 -0
  228. package/src/utils/title.ts +68 -0
  229. package/src/utils/url.ts +8 -0
  230. package/bin/browser.d.ts +0 -2
  231. package/bin/browser.js +0 -24
  232. package/bin/browser.js.map +0 -1
  233. package/bin/checks.d.ts +0 -8
  234. package/bin/checks.js +0 -24
  235. package/bin/checks.js.map +0 -1
  236. package/bin/downloadImage.d.ts +0 -5
  237. package/bin/downloadImage.js +0 -88
  238. package/bin/downloadImage.js.map +0 -1
  239. package/bin/scraping/combineNavWithEmptyGroupTitles.d.ts +0 -2
  240. package/bin/scraping/combineNavWithEmptyGroupTitles.js +0 -20
  241. package/bin/scraping/combineNavWithEmptyGroupTitles.js.map +0 -1
  242. package/bin/scraping/detectFramework.d.ts +0 -9
  243. package/bin/scraping/detectFramework.js +0 -36
  244. package/bin/scraping/detectFramework.js.map +0 -1
  245. package/bin/scraping/downloadAllImages.d.ts +0 -4
  246. package/bin/scraping/downloadAllImages.js +0 -36
  247. package/bin/scraping/downloadAllImages.js.map +0 -1
  248. package/bin/scraping/downloadLogoImage.d.ts +0 -1
  249. package/bin/scraping/downloadLogoImage.js +0 -12
  250. package/bin/scraping/downloadLogoImage.js.map +0 -1
  251. package/bin/scraping/replaceImagePaths.d.ts +0 -1
  252. package/bin/scraping/replaceImagePaths.js +0 -14
  253. package/bin/scraping/replaceImagePaths.js.map +0 -1
  254. package/bin/scraping/scrapeFileGettingFileNameFromUrl.d.ts +0 -6
  255. package/bin/scraping/scrapeFileGettingFileNameFromUrl.js +0 -46
  256. package/bin/scraping/scrapeFileGettingFileNameFromUrl.js.map +0 -1
  257. package/bin/scraping/scrapeGettingFileNameFromUrl.d.ts +0 -6
  258. package/bin/scraping/scrapeGettingFileNameFromUrl.js +0 -13
  259. package/bin/scraping/scrapeGettingFileNameFromUrl.js.map +0 -1
  260. package/bin/scraping/scrapePage.d.ts +0 -8
  261. package/bin/scraping/scrapePage.js +0 -10
  262. package/bin/scraping/scrapePage.js.map +0 -1
  263. package/bin/scraping/scrapePageCommands.d.ts +0 -7
  264. package/bin/scraping/scrapePageCommands.js +0 -50
  265. package/bin/scraping/scrapePageCommands.js.map +0 -1
  266. package/bin/scraping/scrapeSection.d.ts +0 -3
  267. package/bin/scraping/scrapeSection.js +0 -12
  268. package/bin/scraping/scrapeSection.js.map +0 -1
  269. package/bin/scraping/scrapeSectionCommands.d.ts +0 -6
  270. package/bin/scraping/scrapeSectionCommands.js +0 -63
  271. package/bin/scraping/scrapeSectionCommands.js.map +0 -1
  272. package/bin/scraping/site-scrapers/Intercom/scrapeIntercomPage.d.ts +0 -5
  273. package/bin/scraping/site-scrapers/Intercom/scrapeIntercomPage.js +0 -29
  274. package/bin/scraping/site-scrapers/Intercom/scrapeIntercomPage.js.map +0 -1
  275. package/bin/scraping/site-scrapers/Intercom/scrapeIntercomSection.d.ts +0 -2
  276. package/bin/scraping/site-scrapers/Intercom/scrapeIntercomSection.js +0 -31
  277. package/bin/scraping/site-scrapers/Intercom/scrapeIntercomSection.js.map +0 -1
  278. package/bin/scraping/site-scrapers/alternateGroupTitle.d.ts +0 -3
  279. package/bin/scraping/site-scrapers/alternateGroupTitle.js +0 -9
  280. package/bin/scraping/site-scrapers/alternateGroupTitle.js.map +0 -1
  281. package/bin/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.d.ts +0 -5
  282. package/bin/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.js +0 -33
  283. package/bin/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.js.map +0 -1
  284. package/bin/scraping/site-scrapers/links-per-group/getLinksRecursively.d.ts +0 -3
  285. package/bin/scraping/site-scrapers/links-per-group/getLinksRecursively.js +0 -35
  286. package/bin/scraping/site-scrapers/links-per-group/getLinksRecursively.js.map +0 -1
  287. package/bin/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.d.ts +0 -3
  288. package/bin/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.js +0 -33
  289. package/bin/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.js.map +0 -1
  290. package/bin/scraping/site-scrapers/openNestedDocusaurusMenus.d.ts +0 -2
  291. package/bin/scraping/site-scrapers/openNestedDocusaurusMenus.js +0 -30
  292. package/bin/scraping/site-scrapers/openNestedDocusaurusMenus.js.map +0 -1
  293. package/bin/scraping/site-scrapers/openNestedGitbookMenus.d.ts +0 -2
  294. package/bin/scraping/site-scrapers/openNestedGitbookMenus.js +0 -21
  295. package/bin/scraping/site-scrapers/openNestedGitbookMenus.js.map +0 -1
  296. package/bin/scraping/site-scrapers/scrapeDocusaurusPage.d.ts +0 -5
  297. package/bin/scraping/site-scrapers/scrapeDocusaurusPage.js +0 -53
  298. package/bin/scraping/site-scrapers/scrapeDocusaurusPage.js.map +0 -1
  299. package/bin/scraping/site-scrapers/scrapeDocusaurusSection.d.ts +0 -2
  300. package/bin/scraping/site-scrapers/scrapeDocusaurusSection.js +0 -32
  301. package/bin/scraping/site-scrapers/scrapeDocusaurusSection.js.map +0 -1
  302. package/bin/scraping/site-scrapers/scrapeGitBookPage.d.ts +0 -5
  303. package/bin/scraping/site-scrapers/scrapeGitBookPage.js +0 -56
  304. package/bin/scraping/site-scrapers/scrapeGitBookPage.js.map +0 -1
  305. package/bin/scraping/site-scrapers/scrapeGitBookSection.d.ts +0 -2
  306. package/bin/scraping/site-scrapers/scrapeGitBookSection.js +0 -42
  307. package/bin/scraping/site-scrapers/scrapeGitBookSection.js.map +0 -1
  308. package/bin/scraping/site-scrapers/scrapeReadMePage.d.ts +0 -5
  309. package/bin/scraping/site-scrapers/scrapeReadMePage.js +0 -38
  310. package/bin/scraping/site-scrapers/scrapeReadMePage.js.map +0 -1
  311. package/bin/scraping/site-scrapers/scrapeReadMeSection.d.ts +0 -2
  312. package/bin/scraping/site-scrapers/scrapeReadMeSection.js +0 -39
  313. package/bin/scraping/site-scrapers/scrapeReadMeSection.js.map +0 -1
  314. package/bin/util.d.ts +0 -29
  315. package/bin/util.js +0 -97
  316. package/bin/util.js.map +0 -1
  317. package/src/browser.ts +0 -24
  318. package/src/checks.ts +0 -32
  319. package/src/downloadImage.ts +0 -102
  320. package/src/scraping/combineNavWithEmptyGroupTitles.ts +0 -21
  321. package/src/scraping/detectFramework.ts +0 -55
  322. package/src/scraping/downloadAllImages.ts +0 -61
  323. package/src/scraping/downloadLogoImage.ts +0 -24
  324. package/src/scraping/replaceImagePaths.ts +0 -17
  325. package/src/scraping/scrapeFileGettingFileNameFromUrl.ts +0 -84
  326. package/src/scraping/scrapeGettingFileNameFromUrl.ts +0 -56
  327. package/src/scraping/scrapePage.ts +0 -40
  328. package/src/scraping/scrapePageCommands.ts +0 -68
  329. package/src/scraping/scrapeSection.ts +0 -30
  330. package/src/scraping/scrapeSectionCommands.ts +0 -98
  331. package/src/scraping/site-scrapers/Intercom/scrapeIntercomPage.ts +0 -52
  332. package/src/scraping/site-scrapers/Intercom/scrapeIntercomSection.ts +0 -54
  333. package/src/scraping/site-scrapers/alternateGroupTitle.ts +0 -11
  334. package/src/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.ts +0 -45
  335. package/src/scraping/site-scrapers/links-per-group/getLinksRecursively.ts +0 -47
  336. package/src/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.ts +0 -44
  337. package/src/scraping/site-scrapers/openNestedDocusaurusMenus.ts +0 -42
  338. package/src/scraping/site-scrapers/openNestedGitbookMenus.ts +0 -27
  339. package/src/scraping/site-scrapers/scrapeDocusaurusPage.ts +0 -85
  340. package/src/scraping/site-scrapers/scrapeDocusaurusSection.ts +0 -63
  341. package/src/scraping/site-scrapers/scrapeGitBookPage.ts +0 -82
  342. package/src/scraping/site-scrapers/scrapeGitBookSection.ts +0 -69
  343. package/src/scraping/site-scrapers/scrapeReadMePage.ts +0 -56
  344. package/src/scraping/site-scrapers/scrapeReadMeSection.ts +0 -66
  345. package/src/util.ts +0 -122
@@ -1,40 +0,0 @@
1
- import path from 'path';
2
-
3
- import { createPage, getOrigin } from '../util.js';
4
-
5
- type ScrapePageResult = {
6
- title: string;
7
- description?: string;
8
- markdown?: string;
9
- };
10
-
11
- export type ScrapePageFn = (
12
- html: string,
13
- origin: string,
14
- cliDir: string,
15
- imageBaseDir: string,
16
- overwrite: boolean,
17
- version: string | undefined
18
- ) => Promise<ScrapePageResult>;
19
-
20
- export async function scrapePage(
21
- scrapeFunc: ScrapePageFn,
22
- href: string,
23
- html: string,
24
- overwrite: boolean,
25
- version: string | undefined
26
- ) {
27
- const origin = getOrigin(href);
28
- const cwd = process.cwd();
29
- const imageBaseDir = path.join(cwd, 'images');
30
-
31
- const { title, description, markdown } = await scrapeFunc(
32
- html,
33
- origin,
34
- cwd,
35
- imageBaseDir,
36
- overwrite,
37
- version
38
- );
39
- createPage(title, description, markdown, overwrite, process.cwd());
40
- }
@@ -1,68 +0,0 @@
1
- import axios from 'axios';
2
-
3
- import { getHtmlWithPuppeteer } from '../browser.js';
4
- import { detectFramework, Framework, FrameworkHint, frameworks } from './detectFramework.js';
5
- import { scrapePage, ScrapePageFn } from './scrapePage.js';
6
- import { scrapeIntercomPage } from './site-scrapers/Intercom/scrapeIntercomPage.js';
7
- import { scrapeDocusaurusPage } from './site-scrapers/scrapeDocusaurusPage.js';
8
- import { scrapeGitBookPage } from './site-scrapers/scrapeGitBookPage.js';
9
- import { scrapeReadMePage } from './site-scrapers/scrapeReadMePage.js';
10
-
11
- function validateFramework(framework: Framework | undefined) {
12
- if (!framework) {
13
- console.log(
14
- `Could not detect the framework automatically. Please use the -t flag to specify one of: ${frameworks.join(
15
- ', '
16
- )}`
17
- );
18
- return process.exit(1);
19
- }
20
- }
21
-
22
- export async function scrapePageWrapper(
23
- url: string,
24
- overwrite: boolean,
25
- scrapeFunc: ScrapePageFn,
26
- options?: { version?: string; puppeteer?: boolean }
27
- ) {
28
- let html: string;
29
- if (options?.puppeteer) {
30
- html = await getHtmlWithPuppeteer(url);
31
- } else {
32
- const res = await axios.get(url);
33
- html = res.data;
34
- }
35
- await scrapePage(scrapeFunc, url, html, overwrite, options?.version);
36
- process.exit(0);
37
- }
38
-
39
- export async function scrapePageAutomatically(
40
- url: string,
41
- overwrite: boolean,
42
- frameworkHint: FrameworkHint
43
- ) {
44
- const res = await axios.get(url);
45
- const html = res.data;
46
- frameworkHint = frameworkHint.framework ? frameworkHint : detectFramework(html);
47
-
48
- validateFramework(frameworkHint.framework);
49
-
50
- console.log('Detected framework: ' + frameworkHint.framework);
51
-
52
- switch (frameworkHint.framework) {
53
- case 'docusaurus':
54
- await scrapePageWrapper(url, overwrite, scrapeDocusaurusPage, {
55
- version: frameworkHint.version,
56
- });
57
- break;
58
- case 'gitbook':
59
- await scrapePageWrapper(url, overwrite, scrapeGitBookPage, { puppeteer: true });
60
- break;
61
- case 'readme':
62
- await scrapePageWrapper(url, overwrite, scrapeReadMePage);
63
- break;
64
- case 'intercom':
65
- await scrapePageWrapper(url, overwrite, scrapeIntercomPage);
66
- break;
67
- }
68
- }
@@ -1,30 +0,0 @@
1
- import { NavigationEntry } from '@mintlify/models';
2
- import path from 'path';
3
-
4
- import { objToReadableString } from '../util.js';
5
-
6
- export type ScrapeSectionFn = (
7
- html: string,
8
- origin: string,
9
- cliDir: string,
10
- imageBaseDir: string,
11
- overwrite: boolean,
12
- version: string | undefined
13
- ) => Promise<NavigationEntry[]>;
14
-
15
- export async function scrapeSection(
16
- scrapeFunc: ScrapeSectionFn,
17
- html: string,
18
- origin: string,
19
- overwrite: boolean,
20
- version: string | undefined
21
- ) {
22
- console.log(`Started scraping${overwrite ? ', overwrite mode is on' : ''}...`);
23
- const cwd = process.cwd();
24
- const imageBaseDir = path.join(cwd, 'images');
25
-
26
- const groupsConfig = await scrapeFunc(html, origin, cwd, imageBaseDir, overwrite, version);
27
- console.log('Finished scraping.');
28
- console.log('Add the following to your navigation in mint.json:');
29
- console.log(objToReadableString(groupsConfig));
30
- }
@@ -1,98 +0,0 @@
1
- import axios from 'axios';
2
- import { Page } from 'puppeteer';
3
-
4
- import { startBrowser } from '../browser.js';
5
- import { getOrigin } from '../util.js';
6
- import { detectFramework, Framework, FrameworkHint } from './detectFramework.js';
7
- import { ScrapeSectionFn, scrapeSection } from './scrapeSection.js';
8
- import { scrapeIntercomSection } from './site-scrapers/Intercom/scrapeIntercomSection.js';
9
- import openNestedDocusaurusMenus from './site-scrapers/openNestedDocusaurusMenus.js';
10
- import openNestedGitbookMenus from './site-scrapers/openNestedGitbookMenus.js';
11
- import { scrapeDocusaurusSection } from './site-scrapers/scrapeDocusaurusSection.js';
12
- import { scrapeGitBookSection } from './site-scrapers/scrapeGitBookSection.js';
13
- import { scrapeReadMeSection } from './site-scrapers/scrapeReadMeSection.js';
14
-
15
- export async function scrapeSectionAxiosWrapper(
16
- url: string,
17
- overwrite: boolean,
18
- scrapeFunc: ScrapeSectionFn
19
- ) {
20
- const res = await axios.get(url);
21
- const html = res.data;
22
- await scrapeSection(scrapeFunc, html, getOrigin(url), overwrite, undefined);
23
- process.exit(0);
24
- }
25
-
26
- export async function scrapeDocusaurusSectionCommand(
27
- url: string,
28
- overwrite: boolean,
29
- version: string | undefined // "1" | "2" | "3"
30
- ) {
31
- await scrapeSectionOpeningAllNested(
32
- url,
33
- overwrite,
34
- openNestedDocusaurusMenus,
35
- scrapeDocusaurusSection,
36
- version
37
- );
38
- }
39
-
40
- export async function scrapeGitbookSectionCommand(url: string, overwrite: boolean) {
41
- await scrapeSectionOpeningAllNested(url, overwrite, openNestedGitbookMenus, scrapeGitBookSection);
42
- }
43
-
44
- async function scrapeSectionOpeningAllNested(
45
- url: string,
46
- overwrite: boolean,
47
- openLinks: (page: Page) => Promise<string>,
48
- scrapeFunc: ScrapeSectionFn,
49
- version?: string
50
- ) {
51
- const browser = await startBrowser();
52
- const page = await browser.newPage();
53
- await page.goto(url, {
54
- waitUntil: 'networkidle2',
55
- });
56
-
57
- const html = await openLinks(page);
58
- void browser.close();
59
- await scrapeSection(scrapeFunc, html, getOrigin(url), overwrite, version);
60
- process.exit(0);
61
- }
62
-
63
- export async function scrapeSectionAutomatically(
64
- url: string,
65
- overwrite: boolean,
66
- frameworkHint: FrameworkHint
67
- ) {
68
- const res = await axios.get(url);
69
- const html = res.data;
70
- frameworkHint = frameworkHint.framework ? frameworkHint : detectFramework(html);
71
-
72
- validateFramework(frameworkHint.framework);
73
- console.log('Detected framework: ' + frameworkHint.framework);
74
-
75
- switch (frameworkHint.framework) {
76
- case 'docusaurus':
77
- await scrapeDocusaurusSectionCommand(url, overwrite, frameworkHint.version);
78
- break;
79
- case 'gitbook':
80
- await scrapeGitbookSectionCommand(url, overwrite);
81
- break;
82
- case 'readme':
83
- await scrapeSectionAxiosWrapper(url, overwrite, scrapeReadMeSection);
84
- break;
85
- case 'intercom':
86
- await scrapeSectionAxiosWrapper(url, overwrite, scrapeIntercomSection);
87
- break;
88
- }
89
- }
90
-
91
- function validateFramework(framework: Framework | undefined) {
92
- if (!framework) {
93
- console.log(
94
- 'Could not detect the framework automatically. We only support Docusaurus (V2 and V3), GitBook, and ReadMe.'
95
- );
96
- process.exit();
97
- }
98
- }
@@ -1,52 +0,0 @@
1
- import * as cheerio from 'cheerio';
2
- import { NodeHtmlMarkdown } from 'node-html-markdown';
3
-
4
- import downloadAllImages from '../../downloadAllImages.js';
5
- import replaceImagePaths from '../../replaceImagePaths.js';
6
-
7
- export async function scrapeIntercomPage(
8
- html: string,
9
- origin: string,
10
- cliDir: string,
11
- imageBaseDir: string,
12
- overwrite: boolean,
13
- _: string | undefined // version
14
- ) {
15
- const $ = cheerio.load(html);
16
-
17
- const titleComponent = $('.t__h1').first();
18
- const title = titleComponent.text().trim();
19
- const description = $('.article__desc', titleComponent.parent()).text().trim();
20
-
21
- const content = $('article').first();
22
- const contentHtml = $.html(content);
23
-
24
- const origToWritePath = await downloadAllImages(
25
- $,
26
- content,
27
- origin,
28
- imageBaseDir,
29
- overwrite,
30
- undefined
31
- );
32
-
33
- const nhm = new NodeHtmlMarkdown({ useInlineLinks: false });
34
- let markdown = nhm.translate(contentHtml);
35
-
36
- // Keep headers on one line
37
- markdown = markdown.replace(/# \n\n/g, '# ');
38
-
39
- // Remove unnecessary nonwidth blank space characters
40
- markdown = markdown.replace(/\u200b/g, '');
41
-
42
- // Reduce unnecessary blank lines
43
- markdown = markdown.replace(/\n\n\n/g, '\n\n');
44
-
45
- // Mintlify doesn't support bolded headers, remove the asterisks
46
- markdown = markdown.replace(/(\n#+) \*\*(.*)\*\*\n/g, '$1 $2\n');
47
- if (origToWritePath) {
48
- markdown = replaceImagePaths(origToWritePath, cliDir, markdown);
49
- }
50
-
51
- return { title, description, markdown };
52
- }
@@ -1,54 +0,0 @@
1
- import { Navigation, NavigationEntry } from '@mintlify/models';
2
- import axios from 'axios';
3
- import * as cheerio from 'cheerio';
4
-
5
- import downloadLogoImage from '../../downloadLogoImage.js';
6
- import { scrapeGettingFileNameFromUrl } from '../../scrapeGettingFileNameFromUrl.js';
7
- import { scrapeIntercomPage } from './scrapeIntercomPage.js';
8
-
9
- export async function scrapeIntercomSection(
10
- html: string,
11
- origin: string,
12
- cliDir: string,
13
- imageBaseDir: string,
14
- overwrite: boolean,
15
- version: string | undefined
16
- ): Promise<NavigationEntry[]> {
17
- let $ = cheerio.load(html);
18
-
19
- const logoSrc = $('.header__logo img').first().attr('src');
20
- void downloadLogoImage(logoSrc, imageBaseDir, origin, overwrite);
21
-
22
- const collectionsLink = $('.section .g__space a');
23
- const collectionsMap = collectionsLink.toArray().map(async (s: cheerio.Element) => {
24
- const href = $(s).attr('href');
25
- const res = await axios.get(`${origin}${href}`);
26
- const html = res.data;
27
- $ = cheerio.load(html);
28
- const sectionTitle = $('.collection h1').first().text().trim();
29
- const sectionPages = $('.section .g__space a')
30
- .toArray()
31
- .map((s: cheerio.Element) => $(s).attr('href'))
32
- .filter((page) => page !== undefined) as string[];
33
- return {
34
- group: sectionTitle,
35
- pages: sectionPages,
36
- };
37
- });
38
-
39
- const collections: Navigation = await Promise.all(collectionsMap);
40
-
41
- return await Promise.all(
42
- collections.map(async (entry: NavigationEntry) => {
43
- return await scrapeGettingFileNameFromUrl(
44
- entry,
45
- cliDir,
46
- origin,
47
- overwrite,
48
- scrapeIntercomPage,
49
- false,
50
- version
51
- );
52
- })
53
- );
54
- }
@@ -1,11 +0,0 @@
1
- import { NavigationEntry } from '@mintlify/models';
2
- import { Cheerio, Element } from 'cheerio';
3
-
4
- export default function alternateGroupTitle(firstLink: Cheerio<Element>, pages: NavigationEntry[]) {
5
- // Only assign titles to nested navigation menus outside a section.
6
- // Others should not have a title so we can merge them into one section.
7
- if (pages.length > 0) {
8
- return firstLink.text();
9
- }
10
- return '';
11
- }
@@ -1,45 +0,0 @@
1
- import { Cheerio, CheerioAPI, Element } from 'cheerio';
2
-
3
- import alternateGroupTitle from '../alternateGroupTitle.js';
4
- import getLinksRecursively from './getLinksRecursively.js';
5
-
6
- export function getDocusaurusLinksPerGroup(
7
- navigationSections: Cheerio<Element>,
8
- $: CheerioAPI,
9
- version: string | undefined
10
- ) {
11
- if (version === '3' || version === '2') {
12
- return getDocusaurusLinksPerGroupLoop(navigationSections, $);
13
- }
14
- return [];
15
- }
16
-
17
- function getDocusaurusLinksPerGroupLoop(navigationSections: Cheerio<Element>, $: CheerioAPI) {
18
- return navigationSections.toArray().map((s) => {
19
- const section = $(s);
20
-
21
- // Links without a group
22
- if (section.hasClass('theme-doc-sidebar-item-link') || section.hasClass('menu__link')) {
23
- const linkHref = section.find('a[href]').first().attr('href');
24
- return {
25
- group: '',
26
- pages: linkHref !== undefined ? [linkHref] : [],
27
- };
28
- }
29
-
30
- const firstLink = !section.find('.menu__list-item-collapsible').first().find('a[href]').length
31
- ? section.find('.menu__link--sublist').first().find('a[href]')
32
- : section.find('.menu__list-item-collapsible').first().find('a[href]');
33
-
34
- const sectionTitle = firstLink.text();
35
- const firstHref = firstLink.attr('href');
36
- const linkSections = section.children().eq(1).children();
37
-
38
- const pages = getLinksRecursively(linkSections, $);
39
-
40
- return {
41
- group: sectionTitle || alternateGroupTitle(firstLink, pages),
42
- pages: firstHref ? [firstHref, ...pages] : pages,
43
- };
44
- });
45
- }
@@ -1,47 +0,0 @@
1
- import { NavigationEntry } from '@mintlify/models';
2
- import { Cheerio, CheerioAPI, Element } from 'cheerio';
3
-
4
- // Used by Docusaurus and ReadMe section scrapers
5
- export default function getLinksRecursively(
6
- linkSections: Cheerio<Element>,
7
- $: CheerioAPI
8
- ): NavigationEntry[] {
9
- return linkSections
10
- .map((_, s) => {
11
- const subsection = $(s);
12
- let link = subsection.children().first();
13
-
14
- if (!link.attr('href')) {
15
- // Docusaurus nests the <a> inside a <div>
16
- link = link.find('a[href]').first();
17
- }
18
- const linkHref = link.attr('href');
19
-
20
- // Skip missing links. For example, GitBook uses
21
- // empty divs are used for styling a line beside the nav.
22
- // Skip external links until Mintlify supports them
23
- if (
24
- !linkHref ||
25
- linkHref === '#' ||
26
- linkHref.startsWith('https://') ||
27
- linkHref.startsWith('http://')
28
- ) {
29
- return undefined;
30
- }
31
-
32
- const childLinks = subsection.children().eq(1).children();
33
-
34
- if (childLinks.length > 0) {
35
- // Put the section link in the list of pages.
36
- // When we support the section itself being a link we should update this
37
- return {
38
- group: link.text(),
39
- pages: [linkHref, ...getLinksRecursively(childLinks, $)],
40
- };
41
- }
42
-
43
- return linkHref;
44
- })
45
- .toArray()
46
- .filter(Boolean);
47
- }
@@ -1,44 +0,0 @@
1
- import { NavigationEntry } from '@mintlify/models';
2
- import { Cheerio, CheerioAPI, Element } from 'cheerio';
3
-
4
- // Used by GitBook section scraper
5
- export default function getLinksRecursivelyGitBook(
6
- linkSections: Cheerio<Element>,
7
- $: CheerioAPI
8
- ): NavigationEntry[] {
9
- return linkSections
10
- .map((_, s) => {
11
- const subsection = $(s);
12
- const sectionHeader = subsection.find('div').first();
13
- const link = subsection.find('a').first();
14
- const linkHref = link.attr('href');
15
-
16
- // Skip missing links. For example, GitBook uses
17
- // empty divs are used for styling a line beside the nav.
18
- // Skip external links until Mintlify supports them
19
- if (
20
- !linkHref ||
21
- linkHref === '#' ||
22
- linkHref.startsWith('https://') ||
23
- linkHref.startsWith('http://')
24
- ) {
25
- return undefined;
26
- }
27
-
28
- const childLinks = subsection.find('ul').first().children();
29
- const title = link.text() ? link.text() : sectionHeader.text() ? sectionHeader.text() : '';
30
-
31
- if (childLinks.length > 0) {
32
- // Put the section link in the list of pages.
33
- // When we support the section itself being a link we should update this
34
- return {
35
- group: title,
36
- pages: [linkHref, ...getLinksRecursivelyGitBook(childLinks, $)],
37
- };
38
- }
39
-
40
- return linkHref;
41
- })
42
- .toArray()
43
- .filter(Boolean);
44
- }
@@ -1,42 +0,0 @@
1
- import { Page } from 'puppeteer';
2
-
3
- export default async function openNestedDocusaurusMenus(page: Page) {
4
- let prevEncountered: string[] = [];
5
- let encounteredHref = ['fake-href-to-make-loop-run-at-least-once'];
6
-
7
- // Loop until we've encountered every link
8
- while (!encounteredHref.every((href) => prevEncountered.includes(href))) {
9
- prevEncountered = encounteredHref;
10
- encounteredHref = await page.evaluate(
11
- (encounteredHref) => {
12
- const collapsible: HTMLElement[] = Array.from(
13
- document.querySelectorAll('.menu__link.menu__link--sublist')
14
- );
15
-
16
- const linksFound: string[] = [];
17
- collapsible.forEach((collapsibleItem) => {
18
- const href = collapsibleItem.getAttribute('href');
19
-
20
- // Should never occur but we keep it as a fail-safe
21
- if (href?.startsWith('https://') || href?.startsWith('http://')) {
22
- return;
23
- }
24
-
25
- // Click any links we haven't seen before
26
- if (href && !encounteredHref.includes(href)) {
27
- collapsibleItem.click();
28
- }
29
-
30
- if (href) {
31
- linksFound.push(href);
32
- }
33
- });
34
-
35
- return linksFound;
36
- },
37
- encounteredHref // Need to pass array into the browser
38
- );
39
- }
40
-
41
- return await page.content();
42
- }
@@ -1,27 +0,0 @@
1
- import { Page } from 'puppeteer';
2
-
3
- export default async function openNestedGitbookMenus(page: Page) {
4
- let clickedAny = true;
5
-
6
- // Loop until we've encountered every closed menu
7
- while (clickedAny) {
8
- clickedAny = await page.evaluate(() => {
9
- let clicked = false;
10
- // Right pointing arrow. Only menus have this icon
11
- const icons = document.querySelectorAll(
12
- 'div > a > span > svg[style*="mask-image:url(https://ka-p.fontawesome.com/releases/v6.6.0/svgs/regular/chevron-right.svg?v=1&token=a463935e93)"]'
13
- );
14
-
15
- icons.forEach((icon) => {
16
- const span = icon.parentElement;
17
- if (span && span.className.includes('rotate-0')) {
18
- span.click();
19
- clicked = true;
20
- }
21
- });
22
- return clicked;
23
- });
24
- }
25
-
26
- return await page.content();
27
- }
@@ -1,85 +0,0 @@
1
- import * as cheerio from 'cheerio';
2
- import { NodeHtmlMarkdown } from 'node-html-markdown';
3
-
4
- import downloadAllImages from '../downloadAllImages.js';
5
- import replaceImagePaths from '../replaceImagePaths.js';
6
-
7
- export async function scrapeDocusaurusPage(
8
- html: string,
9
- origin: string,
10
- cliDir: string,
11
- imageBaseDir: string,
12
- overwrite: boolean,
13
- version: string | undefined // expects "2", or "3". Have not written support for "1" yet
14
- ): Promise<{
15
- title: string;
16
- description?: string;
17
- markdown?: string;
18
- }> {
19
- const $ = cheerio.load(html);
20
-
21
- const article = version === '3' ? $('.theme-doc-markdown').first() : $('article').first();
22
-
23
- if (article.length === 0) {
24
- // Index pages with no additional text don't have the markdown class
25
- return {
26
- title: '',
27
- };
28
- }
29
-
30
- const titleComponent = article.find('h1');
31
- const title = titleComponent.text().trim();
32
-
33
- // Do not include title in the content when we insert it in our metadata
34
- titleComponent.remove();
35
-
36
- const markdownContent = version === '3' ? article : article.find('.markdown').first();
37
-
38
- const origToWritePath = await downloadAllImages(
39
- $,
40
- markdownContent,
41
- origin,
42
- imageBaseDir,
43
- overwrite
44
- );
45
-
46
- const markdownHtml = markdownContent.html();
47
-
48
- const nhm = new NodeHtmlMarkdown({ useInlineLinks: false });
49
- let markdown = markdownHtml ? nhm.translate(markdownHtml) : null;
50
-
51
- if (markdown == null) {
52
- console.error('We do not support scraping this page. Content will be empty');
53
- return { title, description: undefined, markdown: '' };
54
- }
55
-
56
- // Description only exists in meta tags. The code is commented out because its prone to incorrectly
57
- // including a description if the first line of text had markdown annotations like `.
58
- // The commented out alternative is to ignore description if it's the first line of text,
59
- // this means it was not set in the metadata and Docusaurus defaulted to the text.
60
- const description = undefined;
61
- // let description = $('meta[property="og:description"]').attr("content");
62
- // if (markdown.startsWith(description)) {
63
- // description = null;
64
- // }
65
-
66
- // Remove Docusaurus links from headers
67
- // When we parse their HTML the parser adds things like:
68
- // [](#setup "Direct link to heading")
69
- // to the end of each header.
70
- markdown = markdown.replace(/\[\]\(#.+ ".+"\)\n/g, '\n');
71
-
72
- // Remove unnecessary nonwidth blank space characters
73
- markdown = markdown.replace(/\u200b/g, '');
74
-
75
- // Reduce unnecessary blank lines
76
- markdown = markdown.replace(/\n\n\n/g, '\n\n');
77
-
78
- // Mintlify doesn't support bolded headers, remove the asterisks
79
- markdown = markdown.replace(/(\n#+) \*\*(.*)\*\*\n/g, '$1 $2\n');
80
- if (origToWritePath) {
81
- markdown = replaceImagePaths(origToWritePath, cliDir, markdown);
82
- }
83
-
84
- return { title, description, markdown };
85
- }