@mintlify/scraping 3.0.187 → 3.0.188

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (344) hide show
  1. package/bin/assert.d.ts +5 -0
  2. package/bin/assert.js +13 -0
  3. package/bin/assert.js.map +1 -0
  4. package/bin/cli.js +43 -72
  5. package/bin/cli.js.map +1 -1
  6. package/bin/components/Accordion.d.ts +5 -0
  7. package/bin/components/Accordion.js +54 -0
  8. package/bin/components/Accordion.js.map +1 -0
  9. package/bin/components/AccordionGroup.d.ts +5 -0
  10. package/bin/components/AccordionGroup.js +52 -0
  11. package/bin/components/AccordionGroup.js.map +1 -0
  12. package/bin/components/Callout.d.ts +5 -0
  13. package/bin/components/Callout.js +114 -0
  14. package/bin/components/Callout.js.map +1 -0
  15. package/bin/components/Card.d.ts +5 -0
  16. package/bin/components/Card.js +135 -0
  17. package/bin/components/Card.js.map +1 -0
  18. package/bin/components/CardGroup.d.ts +5 -0
  19. package/bin/components/CardGroup.js +52 -0
  20. package/bin/components/CardGroup.js.map +1 -0
  21. package/bin/components/CodeGroup.d.ts +5 -0
  22. package/bin/components/CodeGroup.js +166 -0
  23. package/bin/components/CodeGroup.js.map +1 -0
  24. package/bin/components/Frame.d.ts +5 -0
  25. package/bin/components/Frame.js +51 -0
  26. package/bin/components/Frame.js.map +1 -0
  27. package/bin/components/Tabs.d.ts +5 -0
  28. package/bin/components/Tabs.js +122 -0
  29. package/bin/components/Tabs.js.map +1 -0
  30. package/bin/components/link.d.ts +2 -0
  31. package/bin/components/link.js +16 -0
  32. package/bin/components/link.js.map +1 -0
  33. package/bin/constants.d.ts +6 -7
  34. package/bin/constants.js +31 -12
  35. package/bin/constants.js.map +1 -1
  36. package/bin/customComponents/create.d.ts +10 -0
  37. package/bin/customComponents/create.js +69 -0
  38. package/bin/customComponents/create.js.map +1 -0
  39. package/bin/customComponents/plugin.d.ts +2 -0
  40. package/bin/customComponents/plugin.js +26 -0
  41. package/bin/customComponents/plugin.js.map +1 -0
  42. package/bin/customComponents/selective.d.ts +6 -0
  43. package/bin/customComponents/selective.js +29 -0
  44. package/bin/customComponents/selective.js.map +1 -0
  45. package/bin/nav/iterate.d.ts +2 -0
  46. package/bin/nav/iterate.js +15 -0
  47. package/bin/nav/iterate.js.map +1 -0
  48. package/bin/nav/listItems.d.ts +8 -0
  49. package/bin/nav/listItems.js +62 -0
  50. package/bin/nav/listItems.js.map +1 -0
  51. package/bin/nav/retrieve.d.ts +3 -0
  52. package/bin/nav/retrieve.js +75 -0
  53. package/bin/nav/retrieve.js.map +1 -0
  54. package/bin/nav/root.d.ts +2 -0
  55. package/bin/nav/root.js +40 -0
  56. package/bin/nav/root.js.map +1 -0
  57. package/bin/openapi/generateOpenApiPages.js +2 -2
  58. package/bin/openapi/generateOpenApiPages.js.map +1 -1
  59. package/bin/root/retrieve.d.ts +2 -0
  60. package/bin/root/retrieve.js +46 -0
  61. package/bin/root/retrieve.js.map +1 -0
  62. package/bin/scrapingPipeline/group.d.ts +5 -0
  63. package/bin/scrapingPipeline/group.js +46 -0
  64. package/bin/scrapingPipeline/group.js.map +1 -0
  65. package/bin/scrapingPipeline/icon.d.ts +2 -0
  66. package/bin/scrapingPipeline/icon.js +22 -0
  67. package/bin/scrapingPipeline/icon.js.map +1 -0
  68. package/bin/scrapingPipeline/images.d.ts +3 -0
  69. package/bin/scrapingPipeline/images.js +50 -0
  70. package/bin/scrapingPipeline/images.js.map +1 -0
  71. package/bin/scrapingPipeline/logo.d.ts +5 -0
  72. package/bin/scrapingPipeline/logo.js +92 -0
  73. package/bin/scrapingPipeline/logo.js.map +1 -0
  74. package/bin/scrapingPipeline/page.d.ts +6 -0
  75. package/bin/scrapingPipeline/page.js +102 -0
  76. package/bin/scrapingPipeline/page.js.map +1 -0
  77. package/bin/scrapingPipeline/root.d.ts +2 -0
  78. package/bin/scrapingPipeline/root.js +8 -0
  79. package/bin/scrapingPipeline/root.js.map +1 -0
  80. package/bin/scrapingPipeline/site.d.ts +7 -0
  81. package/bin/scrapingPipeline/site.js +129 -0
  82. package/bin/scrapingPipeline/site.js.map +1 -0
  83. package/bin/scrapingPipeline/tabs.d.ts +3 -0
  84. package/bin/scrapingPipeline/tabs.js +67 -0
  85. package/bin/scrapingPipeline/tabs.js.map +1 -0
  86. package/bin/tabs/retrieveReadme.d.ts +3 -0
  87. package/bin/tabs/retrieveReadme.js +78 -0
  88. package/bin/tabs/retrieveReadme.js.map +1 -0
  89. package/bin/tsconfig.build.tsbuildinfo +1 -1
  90. package/bin/types/components.d.ts +2 -0
  91. package/bin/types/components.js +2 -0
  92. package/bin/types/components.js.map +1 -0
  93. package/bin/types/framework.d.ts +8 -0
  94. package/bin/types/framework.js +3 -0
  95. package/bin/types/framework.js.map +1 -0
  96. package/bin/types/hast.d.ts +6 -0
  97. package/bin/types/hast.js +2 -0
  98. package/bin/types/hast.js.map +1 -0
  99. package/bin/types/result.d.ts +7 -0
  100. package/bin/types/result.js +2 -0
  101. package/bin/types/result.js.map +1 -0
  102. package/bin/types/scrapeFunc.d.ts +3 -0
  103. package/bin/types/scrapeFunc.js +2 -0
  104. package/bin/types/scrapeFunc.js.map +1 -0
  105. package/bin/utils/append.d.ts +1 -0
  106. package/bin/utils/append.js +12 -0
  107. package/bin/utils/append.js.map +1 -0
  108. package/bin/utils/children.d.ts +5 -0
  109. package/bin/utils/children.js +35 -0
  110. package/bin/utils/children.js.map +1 -0
  111. package/bin/utils/className.d.ts +3 -0
  112. package/bin/utils/className.js +13 -0
  113. package/bin/utils/className.js.map +1 -0
  114. package/bin/utils/detectFramework.d.ts +4 -0
  115. package/bin/utils/detectFramework.js +60 -0
  116. package/bin/utils/detectFramework.js.map +1 -0
  117. package/bin/utils/emptyParagraphs.d.ts +3 -0
  118. package/bin/utils/emptyParagraphs.js +19 -0
  119. package/bin/utils/emptyParagraphs.js.map +1 -0
  120. package/bin/utils/errors.d.ts +3 -0
  121. package/bin/utils/errors.js +16 -0
  122. package/bin/utils/errors.js.map +1 -0
  123. package/bin/utils/escape.d.ts +2 -0
  124. package/bin/utils/escape.js +25 -0
  125. package/bin/utils/escape.js.map +1 -0
  126. package/bin/utils/extension.d.ts +3 -0
  127. package/bin/utils/extension.js +18 -0
  128. package/bin/utils/extension.js.map +1 -0
  129. package/bin/utils/file.d.ts +4 -0
  130. package/bin/utils/file.js +43 -0
  131. package/bin/utils/file.js.map +1 -0
  132. package/bin/utils/firstChild.d.ts +2 -0
  133. package/bin/utils/firstChild.js +12 -0
  134. package/bin/utils/firstChild.js.map +1 -0
  135. package/bin/utils/images.d.ts +5 -0
  136. package/bin/utils/images.js +86 -0
  137. package/bin/utils/images.js.map +1 -0
  138. package/bin/utils/img.d.ts +2 -0
  139. package/bin/utils/img.js +15 -0
  140. package/bin/utils/img.js.map +1 -0
  141. package/bin/utils/log.d.ts +18 -0
  142. package/bin/utils/log.js +68 -0
  143. package/bin/utils/log.js.map +1 -0
  144. package/bin/utils/nestedRoots.d.ts +7 -0
  145. package/bin/utils/nestedRoots.js +19 -0
  146. package/bin/utils/nestedRoots.js.map +1 -0
  147. package/bin/utils/network.d.ts +5 -0
  148. package/bin/utils/network.js +82 -0
  149. package/bin/utils/network.js.map +1 -0
  150. package/bin/utils/path.d.ts +1 -0
  151. package/bin/utils/path.js +22 -0
  152. package/bin/utils/path.js.map +1 -0
  153. package/bin/utils/position.d.ts +3 -0
  154. package/bin/utils/position.js +12 -0
  155. package/bin/utils/position.js.map +1 -0
  156. package/bin/utils/reservedNames.d.ts +4 -0
  157. package/bin/utils/reservedNames.js +27 -0
  158. package/bin/utils/reservedNames.js.map +1 -0
  159. package/bin/utils/strings.d.ts +2 -0
  160. package/bin/utils/strings.js +7 -0
  161. package/bin/utils/strings.js.map +1 -0
  162. package/bin/utils/text.d.ts +2 -0
  163. package/bin/utils/text.js +11 -0
  164. package/bin/utils/text.js.map +1 -0
  165. package/bin/utils/title.d.ts +10 -0
  166. package/bin/utils/title.js +58 -0
  167. package/bin/utils/title.js.map +1 -0
  168. package/bin/utils/url.d.ts +3 -0
  169. package/bin/utils/url.js +10 -0
  170. package/bin/utils/url.js.map +1 -0
  171. package/package.json +17 -8
  172. package/src/assert.ts +15 -0
  173. package/src/cli.ts +53 -90
  174. package/src/components/Accordion.ts +84 -0
  175. package/src/components/AccordionGroup.ts +69 -0
  176. package/src/components/Callout.ts +159 -0
  177. package/src/components/Card.ts +168 -0
  178. package/src/components/CardGroup.ts +69 -0
  179. package/src/components/CodeGroup.ts +209 -0
  180. package/src/components/Frame.ts +86 -0
  181. package/src/components/Tabs.ts +154 -0
  182. package/src/components/link.ts +17 -0
  183. package/src/constants.ts +37 -19
  184. package/src/customComponents/create.ts +106 -0
  185. package/src/customComponents/plugin.ts +31 -0
  186. package/src/customComponents/selective.ts +37 -0
  187. package/src/nav/iterate.ts +18 -0
  188. package/src/nav/listItems.ts +82 -0
  189. package/src/nav/retrieve.ts +88 -0
  190. package/src/nav/root.ts +47 -0
  191. package/src/openapi/generateOpenApiPages.ts +2 -2
  192. package/src/root/retrieve.ts +52 -0
  193. package/src/scrapingPipeline/group.ts +62 -0
  194. package/src/scrapingPipeline/icon.ts +26 -0
  195. package/src/scrapingPipeline/images.ts +67 -0
  196. package/src/scrapingPipeline/logo.ts +127 -0
  197. package/src/scrapingPipeline/page.ts +130 -0
  198. package/src/scrapingPipeline/root.ts +10 -0
  199. package/src/scrapingPipeline/site.ts +161 -0
  200. package/src/scrapingPipeline/tabs.ts +87 -0
  201. package/src/tabs/retrieveReadme.ts +99 -0
  202. package/src/types/components.ts +3 -0
  203. package/src/types/framework.ts +10 -0
  204. package/src/types/hast.ts +12 -0
  205. package/src/types/result.ts +1 -0
  206. package/src/types/scrapeFunc.ts +9 -0
  207. package/src/utils/append.ts +9 -0
  208. package/src/utils/children.ts +51 -0
  209. package/src/utils/className.ts +14 -0
  210. package/src/utils/detectFramework.ts +72 -0
  211. package/src/utils/emptyParagraphs.ts +21 -0
  212. package/src/utils/errors.ts +24 -0
  213. package/src/utils/escape.ts +30 -0
  214. package/src/utils/extension.ts +19 -0
  215. package/src/utils/file.ts +58 -0
  216. package/src/utils/firstChild.ts +13 -0
  217. package/src/utils/images.ts +101 -0
  218. package/src/utils/img.ts +17 -0
  219. package/src/utils/log.ts +82 -0
  220. package/src/utils/nestedRoots.ts +20 -0
  221. package/src/utils/network.ts +95 -0
  222. package/src/utils/path.ts +27 -0
  223. package/src/utils/position.ts +14 -0
  224. package/src/utils/reservedNames.ts +31 -0
  225. package/src/utils/strings.ts +7 -0
  226. package/src/utils/text.ts +11 -0
  227. package/src/utils/title.ts +68 -0
  228. package/src/utils/url.ts +8 -0
  229. package/bin/browser.d.ts +0 -2
  230. package/bin/browser.js +0 -24
  231. package/bin/browser.js.map +0 -1
  232. package/bin/checks.d.ts +0 -8
  233. package/bin/checks.js +0 -24
  234. package/bin/checks.js.map +0 -1
  235. package/bin/downloadImage.d.ts +0 -5
  236. package/bin/downloadImage.js +0 -88
  237. package/bin/downloadImage.js.map +0 -1
  238. package/bin/scraping/combineNavWithEmptyGroupTitles.d.ts +0 -2
  239. package/bin/scraping/combineNavWithEmptyGroupTitles.js +0 -20
  240. package/bin/scraping/combineNavWithEmptyGroupTitles.js.map +0 -1
  241. package/bin/scraping/detectFramework.d.ts +0 -9
  242. package/bin/scraping/detectFramework.js +0 -36
  243. package/bin/scraping/detectFramework.js.map +0 -1
  244. package/bin/scraping/downloadAllImages.d.ts +0 -4
  245. package/bin/scraping/downloadAllImages.js +0 -36
  246. package/bin/scraping/downloadAllImages.js.map +0 -1
  247. package/bin/scraping/downloadLogoImage.d.ts +0 -1
  248. package/bin/scraping/downloadLogoImage.js +0 -12
  249. package/bin/scraping/downloadLogoImage.js.map +0 -1
  250. package/bin/scraping/replaceImagePaths.d.ts +0 -1
  251. package/bin/scraping/replaceImagePaths.js +0 -14
  252. package/bin/scraping/replaceImagePaths.js.map +0 -1
  253. package/bin/scraping/scrapeFileGettingFileNameFromUrl.d.ts +0 -6
  254. package/bin/scraping/scrapeFileGettingFileNameFromUrl.js +0 -46
  255. package/bin/scraping/scrapeFileGettingFileNameFromUrl.js.map +0 -1
  256. package/bin/scraping/scrapeGettingFileNameFromUrl.d.ts +0 -6
  257. package/bin/scraping/scrapeGettingFileNameFromUrl.js +0 -13
  258. package/bin/scraping/scrapeGettingFileNameFromUrl.js.map +0 -1
  259. package/bin/scraping/scrapePage.d.ts +0 -8
  260. package/bin/scraping/scrapePage.js +0 -10
  261. package/bin/scraping/scrapePage.js.map +0 -1
  262. package/bin/scraping/scrapePageCommands.d.ts +0 -7
  263. package/bin/scraping/scrapePageCommands.js +0 -50
  264. package/bin/scraping/scrapePageCommands.js.map +0 -1
  265. package/bin/scraping/scrapeSection.d.ts +0 -3
  266. package/bin/scraping/scrapeSection.js +0 -12
  267. package/bin/scraping/scrapeSection.js.map +0 -1
  268. package/bin/scraping/scrapeSectionCommands.d.ts +0 -6
  269. package/bin/scraping/scrapeSectionCommands.js +0 -63
  270. package/bin/scraping/scrapeSectionCommands.js.map +0 -1
  271. package/bin/scraping/site-scrapers/Intercom/scrapeIntercomPage.d.ts +0 -5
  272. package/bin/scraping/site-scrapers/Intercom/scrapeIntercomPage.js +0 -29
  273. package/bin/scraping/site-scrapers/Intercom/scrapeIntercomPage.js.map +0 -1
  274. package/bin/scraping/site-scrapers/Intercom/scrapeIntercomSection.d.ts +0 -2
  275. package/bin/scraping/site-scrapers/Intercom/scrapeIntercomSection.js +0 -31
  276. package/bin/scraping/site-scrapers/Intercom/scrapeIntercomSection.js.map +0 -1
  277. package/bin/scraping/site-scrapers/alternateGroupTitle.d.ts +0 -3
  278. package/bin/scraping/site-scrapers/alternateGroupTitle.js +0 -9
  279. package/bin/scraping/site-scrapers/alternateGroupTitle.js.map +0 -1
  280. package/bin/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.d.ts +0 -5
  281. package/bin/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.js +0 -33
  282. package/bin/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.js.map +0 -1
  283. package/bin/scraping/site-scrapers/links-per-group/getLinksRecursively.d.ts +0 -3
  284. package/bin/scraping/site-scrapers/links-per-group/getLinksRecursively.js +0 -35
  285. package/bin/scraping/site-scrapers/links-per-group/getLinksRecursively.js.map +0 -1
  286. package/bin/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.d.ts +0 -3
  287. package/bin/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.js +0 -33
  288. package/bin/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.js.map +0 -1
  289. package/bin/scraping/site-scrapers/openNestedDocusaurusMenus.d.ts +0 -2
  290. package/bin/scraping/site-scrapers/openNestedDocusaurusMenus.js +0 -30
  291. package/bin/scraping/site-scrapers/openNestedDocusaurusMenus.js.map +0 -1
  292. package/bin/scraping/site-scrapers/openNestedGitbookMenus.d.ts +0 -2
  293. package/bin/scraping/site-scrapers/openNestedGitbookMenus.js +0 -21
  294. package/bin/scraping/site-scrapers/openNestedGitbookMenus.js.map +0 -1
  295. package/bin/scraping/site-scrapers/scrapeDocusaurusPage.d.ts +0 -5
  296. package/bin/scraping/site-scrapers/scrapeDocusaurusPage.js +0 -53
  297. package/bin/scraping/site-scrapers/scrapeDocusaurusPage.js.map +0 -1
  298. package/bin/scraping/site-scrapers/scrapeDocusaurusSection.d.ts +0 -2
  299. package/bin/scraping/site-scrapers/scrapeDocusaurusSection.js +0 -32
  300. package/bin/scraping/site-scrapers/scrapeDocusaurusSection.js.map +0 -1
  301. package/bin/scraping/site-scrapers/scrapeGitBookPage.d.ts +0 -5
  302. package/bin/scraping/site-scrapers/scrapeGitBookPage.js +0 -56
  303. package/bin/scraping/site-scrapers/scrapeGitBookPage.js.map +0 -1
  304. package/bin/scraping/site-scrapers/scrapeGitBookSection.d.ts +0 -2
  305. package/bin/scraping/site-scrapers/scrapeGitBookSection.js +0 -42
  306. package/bin/scraping/site-scrapers/scrapeGitBookSection.js.map +0 -1
  307. package/bin/scraping/site-scrapers/scrapeReadMePage.d.ts +0 -5
  308. package/bin/scraping/site-scrapers/scrapeReadMePage.js +0 -38
  309. package/bin/scraping/site-scrapers/scrapeReadMePage.js.map +0 -1
  310. package/bin/scraping/site-scrapers/scrapeReadMeSection.d.ts +0 -2
  311. package/bin/scraping/site-scrapers/scrapeReadMeSection.js +0 -39
  312. package/bin/scraping/site-scrapers/scrapeReadMeSection.js.map +0 -1
  313. package/bin/util.d.ts +0 -29
  314. package/bin/util.js +0 -97
  315. package/bin/util.js.map +0 -1
  316. package/src/browser.ts +0 -24
  317. package/src/checks.ts +0 -32
  318. package/src/downloadImage.ts +0 -102
  319. package/src/scraping/combineNavWithEmptyGroupTitles.ts +0 -21
  320. package/src/scraping/detectFramework.ts +0 -55
  321. package/src/scraping/downloadAllImages.ts +0 -61
  322. package/src/scraping/downloadLogoImage.ts +0 -24
  323. package/src/scraping/replaceImagePaths.ts +0 -17
  324. package/src/scraping/scrapeFileGettingFileNameFromUrl.ts +0 -84
  325. package/src/scraping/scrapeGettingFileNameFromUrl.ts +0 -56
  326. package/src/scraping/scrapePage.ts +0 -40
  327. package/src/scraping/scrapePageCommands.ts +0 -68
  328. package/src/scraping/scrapeSection.ts +0 -30
  329. package/src/scraping/scrapeSectionCommands.ts +0 -98
  330. package/src/scraping/site-scrapers/Intercom/scrapeIntercomPage.ts +0 -52
  331. package/src/scraping/site-scrapers/Intercom/scrapeIntercomSection.ts +0 -54
  332. package/src/scraping/site-scrapers/alternateGroupTitle.ts +0 -11
  333. package/src/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.ts +0 -45
  334. package/src/scraping/site-scrapers/links-per-group/getLinksRecursively.ts +0 -47
  335. package/src/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.ts +0 -44
  336. package/src/scraping/site-scrapers/openNestedDocusaurusMenus.ts +0 -42
  337. package/src/scraping/site-scrapers/openNestedGitbookMenus.ts +0 -27
  338. package/src/scraping/site-scrapers/scrapeDocusaurusPage.ts +0 -85
  339. package/src/scraping/site-scrapers/scrapeDocusaurusSection.ts +0 -63
  340. package/src/scraping/site-scrapers/scrapeGitBookPage.ts +0 -82
  341. package/src/scraping/site-scrapers/scrapeGitBookSection.ts +0 -69
  342. package/src/scraping/site-scrapers/scrapeReadMePage.ts +0 -56
  343. package/src/scraping/site-scrapers/scrapeReadMeSection.ts +0 -66
  344. package/src/util.ts +0 -122
package/src/cli.ts CHANGED
@@ -2,106 +2,69 @@
2
2
  import yargs from 'yargs';
3
3
  import { hideBin } from 'yargs/helpers';
4
4
 
5
- import { checkUrl, checkVersion } from './checks.js';
6
- import { generateOpenApiPages } from './openapi/generateOpenApiPages.js';
7
- import { FrameworkHint, frameworks } from './scraping/detectFramework.js';
8
- import { scrapePageAutomatically } from './scraping/scrapePageCommands.js';
9
- import { scrapeSectionAutomatically } from './scraping/scrapeSectionCommands.js';
5
+ import { FINAL_SUCCESS_MESSAGE } from './constants.js';
6
+ import { scrapePageGroup } from './scrapingPipeline/group.js';
7
+ import { htmlToHast } from './scrapingPipeline/root.js';
8
+ import { scrapeAllSiteTabs } from './scrapingPipeline/tabs.js';
9
+ import { detectFramework, framework } from './utils/detectFramework.js';
10
+ import { write } from './utils/file.js';
11
+ import { log } from './utils/log.js';
12
+ import { fetchPageHtml } from './utils/network.js';
13
+ import { checkUrl } from './utils/url.js';
10
14
 
11
15
  await yargs(hideBin(process.argv))
12
16
  .command(
13
17
  'page <url>',
14
- 'Scrapes a page',
15
- (yargs) =>
16
- yargs
17
- .positional('url', { type: 'string', demandOption: true })
18
- .check(checkUrl)
19
- .option('overwrite', { alias: 'O', type: 'boolean', default: false })
20
- .option('tool', { alias: 't', choices: frameworks })
21
- .option('docusaurusVersion', {
22
- alias: 'd',
23
- type: 'string',
24
- choices: ['1', '2', '3'] as const,
25
- })
26
- .check(checkVersion),
27
- async ({ url, overwrite, tool, docusaurusVersion }) => {
28
- const frameworkHint: FrameworkHint = {
29
- framework: tool,
30
- version: docusaurusVersion ?? '3',
31
- };
32
-
33
- await scrapePageAutomatically(url, overwrite, frameworkHint);
34
- }
18
+ 'Scrapes the docs page for the URL provided',
19
+ (yargs) => yargs.positional('url', { type: 'string', demandOption: true }).check(checkUrl),
20
+ async ({ url }) => await page(url)
35
21
  )
36
- .command(
37
- 'section <url>',
38
- 'Scrapes the docs in the section',
39
- (yargs) =>
40
- yargs
41
- .positional('url', { type: 'string', demandOption: true })
42
- .check(checkUrl)
43
- .option('overwrite', { alias: 'O', type: 'boolean', default: false })
44
- .option('tool', { alias: 't', choices: frameworks })
45
- .option('docusaurusVersion', {
46
- alias: 'd',
47
- type: 'string',
48
- choices: ['1', '2', '3'] as const,
49
- })
50
- .check(checkVersion),
51
- async ({ url, overwrite, tool, docusaurusVersion }) => {
52
- const frameworkHint: FrameworkHint = {
53
- framework: tool,
54
- version: docusaurusVersion ?? '3',
55
- };
56
22
 
57
- await scrapeSectionAutomatically(url, overwrite, frameworkHint);
58
- }
59
- )
60
23
  .command(
61
- 'openapi-file <openapiFilename>',
62
- 'Creates MDX files from an OpenAPI spec',
63
- (yargs) =>
64
- yargs
65
- .positional('openapiFilename', {
66
- describe: 'The filename of the OpenAPI spec',
67
- type: 'string',
68
- demandOption: true,
69
- })
70
- .option('writeFiles', {
71
- describe: 'Whether or not to write the frontmatter files',
72
- default: true,
73
- type: 'boolean',
74
- alias: 'w',
75
- })
76
- .option('outDir', {
77
- describe: 'The folder in which to write any created frontmatter files',
78
- type: 'string',
79
- alias: 'o',
80
- }),
81
- async (argv) => {
82
- try {
83
- const { nav } = await generateOpenApiPages(
84
- argv.openapiFilename,
85
- argv.writeFiles,
86
- argv.outDir
87
- );
88
- console.log('navigation object suggestion:');
89
- console.log(JSON.stringify(nav, undefined, 2));
90
- } catch (error) {
91
- if (error instanceof Error) {
92
- console.error(error.message);
93
- } else {
94
- console.error(error);
95
- }
96
- }
97
- }
24
+ 'section <url>',
25
+ 'Scrapes the entire docs site based on the URL provided',
26
+ (yargs) => yargs.positional('url', { type: 'string', demandOption: true }).check(checkUrl),
27
+ async ({ url }) => await site(url)
98
28
  )
99
- // Print the help menu when the user enters an invalid command.
29
+
100
30
  .strictCommands()
101
31
  .demandCommand(1, 'Unknown command. See above for the list of supported commands.')
102
-
103
- // Alias option flags --help = -h, --version = -v
104
32
  .alias('h', 'help')
105
33
  .alias('v', 'version')
106
-
107
34
  .parse();
35
+
36
+ async function page(url: string) {
37
+ const urlObj = new URL(url);
38
+ const html = await fetchPageHtml(urlObj);
39
+ log('Successfully retrieved initial HTML from src: ' + urlObj.toString());
40
+
41
+ const hast = htmlToHast(html);
42
+ detectFramework(hast);
43
+
44
+ const needsBrowser = framework.vendor === 'gitbook';
45
+ const results = await scrapePageGroup([urlObj], needsBrowser);
46
+ const result = results[0] || {
47
+ success: false,
48
+ message: `An unknown error occurred when scraping ${url}`,
49
+ };
50
+
51
+ if (result.success) {
52
+ log(`Successfully scraped ${url} ${result.data ? `into ${result.data[1]}` : ''}`);
53
+ } else {
54
+ log(result.message);
55
+ }
56
+ }
57
+
58
+ async function site(url: string) {
59
+ const urlObj = new URL(url);
60
+ const html = await fetchPageHtml(urlObj);
61
+ log('Successfully retrieved initial HTML from src: ' + urlObj.toString());
62
+
63
+ const result = await scrapeAllSiteTabs(html, urlObj);
64
+ if (result.success) {
65
+ write('mint.json', JSON.stringify(result.data, undefined, 2));
66
+ log(FINAL_SUCCESS_MESSAGE);
67
+ } else {
68
+ log(result.message);
69
+ }
70
+ }
@@ -0,0 +1,84 @@
1
+ import type { Element, ElementContent } from 'hast';
2
+
3
+ import { assertIsDefined, assertIsNumber } from '../assert.js';
4
+ import type { HastNode, HastNodeIndex, HastNodeParent } from '../types/hast.js';
5
+ import { turnChildrenIntoMdx } from '../utils/children.js';
6
+ import { findTitle } from '../utils/title.js';
7
+
8
+ export function gitBookScrapeAccordion(
9
+ node: HastNode,
10
+ _: HastNodeIndex,
11
+ __: HastNodeParent
12
+ ): Element | undefined {
13
+ if (node.tagName !== 'details') {
14
+ return undefined;
15
+ }
16
+
17
+ const title = findTitle(node, { delete: true, nodeType: 'element', tagName: 'summary' });
18
+
19
+ const newNode: Element = {
20
+ type: 'element',
21
+ tagName: 'Accordion',
22
+ properties: {
23
+ title: title,
24
+ },
25
+ children: turnChildrenIntoMdx(node.children) as Array<ElementContent>,
26
+ };
27
+
28
+ return newNode;
29
+ }
30
+
31
+ export function readmeScrapeAccordion(
32
+ node: HastNode,
33
+ index: HastNodeIndex,
34
+ parent: HastNodeParent
35
+ ): Element | undefined {
36
+ if (
37
+ node.tagName !== 'button' ||
38
+ !node.properties.className ||
39
+ !(node.properties.className as Array<string>).includes('accordion')
40
+ ) {
41
+ return undefined;
42
+ }
43
+
44
+ assertIsNumber(index);
45
+ assertIsDefined(parent);
46
+
47
+ const title = findTitle(node);
48
+
49
+ parent.children.shift();
50
+
51
+ const newNode: Element = {
52
+ type: 'element',
53
+ tagName: 'Accordion',
54
+ properties: {
55
+ title: title,
56
+ },
57
+ children: turnChildrenIntoMdx(parent.children) as Array<ElementContent>,
58
+ };
59
+
60
+ return newNode;
61
+ }
62
+
63
+ export function docusaurusScrapeAccordion(
64
+ node: HastNode,
65
+ _: HastNodeIndex,
66
+ __: HastNodeParent
67
+ ): Element | undefined {
68
+ if (node.tagName !== 'details') {
69
+ return undefined;
70
+ }
71
+
72
+ const title = findTitle(node, { delete: true, nodeType: 'element', tagName: 'summary' });
73
+
74
+ const newNode: Element = {
75
+ type: 'element',
76
+ tagName: 'Accordion',
77
+ properties: {
78
+ title: title,
79
+ },
80
+ children: turnChildrenIntoMdx(node.children) as Array<ElementContent>,
81
+ };
82
+
83
+ return newNode;
84
+ }
@@ -0,0 +1,69 @@
1
+ import type { Element } from 'hast';
2
+
3
+ import type { HastNode, HastNodeIndex, HastNodeParent } from '../types/hast.js';
4
+
5
+ export function gitBookScrapeAccordionGroup(
6
+ node: HastNode,
7
+ _: HastNodeIndex,
8
+ parent: HastNodeParent
9
+ ): Element | undefined {
10
+ if (node.tagName !== 'Accordion') return undefined;
11
+ // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
12
+ if (!parent || !parent.children) return undefined;
13
+
14
+ let accordionCount = 0;
15
+ for (const child of parent.children) {
16
+ if (child.type === 'element' && child.tagName === 'Accordion') accordionCount++;
17
+ }
18
+
19
+ if (accordionCount === parent.children.length) {
20
+ parent.type = 'element';
21
+ (parent as Element).tagName = 'AccordionGroup';
22
+ }
23
+
24
+ return undefined;
25
+ }
26
+
27
+ export function readmeScrapeAccordionGroup(
28
+ node: HastNode,
29
+ _: HastNodeIndex,
30
+ parent: HastNodeParent
31
+ ): Element | undefined {
32
+ if (node.tagName !== 'Accordion') return undefined;
33
+ // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
34
+ if (!parent || !parent.children) return undefined;
35
+
36
+ let accordionCount = 0;
37
+ for (const child of parent.children) {
38
+ if (child.type === 'element' && child.tagName === 'Accordion') accordionCount++;
39
+ }
40
+
41
+ if (accordionCount === parent.children.length) {
42
+ parent.type = 'element';
43
+ (parent as Element).tagName = 'AccordionGroup';
44
+ }
45
+
46
+ return undefined;
47
+ }
48
+
49
+ export function docusaurusScrapeAccordionGroup(
50
+ node: HastNode,
51
+ _: HastNodeIndex,
52
+ parent: HastNodeParent
53
+ ): Element | undefined {
54
+ if (node.tagName !== 'Accordion') return undefined;
55
+ // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
56
+ if (!parent || !parent.children) return undefined;
57
+
58
+ let accordionCount = 0;
59
+ for (const child of parent.children) {
60
+ if (child.type === 'element' && child.tagName === 'Accordion') accordionCount++;
61
+ }
62
+
63
+ if (accordionCount === parent.children.length) {
64
+ parent.type = 'element';
65
+ (parent as Element).tagName = 'AccordionGroup';
66
+ }
67
+
68
+ return undefined;
69
+ }
@@ -0,0 +1,159 @@
1
+ import type { Element, ElementContent } from 'hast';
2
+
3
+ import { assertIsStringArray } from '../assert.js';
4
+ import type { HastNode, HastNodeIndex, HastNodeParent } from '../types/hast.js';
5
+ import { turnChildrenIntoMdx } from '../utils/children.js';
6
+
7
+ export function gitBookScrapeCallout(
8
+ node: HastNode,
9
+ _: HastNodeIndex,
10
+ __: HastNodeParent
11
+ ): Element | undefined {
12
+ if (
13
+ node.tagName !== 'div' ||
14
+ !node.properties.className ||
15
+ !Array.isArray(node.properties.className) ||
16
+ !node.properties.className
17
+ .join(' ')
18
+ .startsWith(
19
+ 'px-4 py-4 transition-colors rounded-md straight-corners:rounded-none bg-gradient-to-b'
20
+ )
21
+ ) {
22
+ return undefined;
23
+ }
24
+
25
+ assertIsStringArray(node.properties.className);
26
+
27
+ const className =
28
+ node.properties.className
29
+ .find((className) => /^from-[a-z]+\/[0-9]$/.test(className))
30
+ ?.replace(/^from-|\/[0-9]$/g, '') || null;
31
+
32
+ let tagName = 'Note';
33
+ switch (className) {
34
+ case 'periwinkle':
35
+ tagName = 'Info';
36
+ break;
37
+ case 'teal':
38
+ tagName = 'Check';
39
+ break;
40
+ case 'yellow':
41
+ case 'pomegranate':
42
+ tagName = 'Warning';
43
+ break;
44
+ default:
45
+ tagName = 'Info';
46
+ break;
47
+ }
48
+
49
+ const newNode: Element = {
50
+ type: 'element',
51
+ tagName: tagName,
52
+ properties: {},
53
+ children: turnChildrenIntoMdx(node.children) as Array<ElementContent>,
54
+ };
55
+
56
+ return newNode;
57
+ }
58
+
59
+ export function readmeScrapeCallout(
60
+ node: HastNode,
61
+ _: HastNodeIndex,
62
+ __: HastNodeParent
63
+ ): Element | undefined {
64
+ if (
65
+ node.tagName !== 'blockquote' ||
66
+ !node.properties.className ||
67
+ !Array.isArray(node.properties.className) ||
68
+ !node.properties.className.includes('callout')
69
+ ) {
70
+ return undefined;
71
+ }
72
+
73
+ node.children.shift();
74
+
75
+ assertIsStringArray(node.properties.className);
76
+ const calloutClassNames = node.properties.className.filter((className) =>
77
+ className.includes('callout_')
78
+ );
79
+ const calloutClassName: string = calloutClassNames[0] ? calloutClassNames[0] : 'callout_info';
80
+
81
+ let tagName = 'Note';
82
+ switch (calloutClassName) {
83
+ case 'callout_default':
84
+ case 'callout_info':
85
+ tagName = 'Info';
86
+ break;
87
+ case 'callout_warn':
88
+ case 'callout_error':
89
+ tagName = 'Warning';
90
+ break;
91
+ case 'callout_okay':
92
+ tagName = 'Check';
93
+ break;
94
+ default:
95
+ tagName = 'Info';
96
+ break;
97
+ }
98
+
99
+ const newNode: Element = {
100
+ type: 'element',
101
+ tagName: tagName,
102
+ properties: {},
103
+ children: turnChildrenIntoMdx(node.children) as Array<ElementContent>,
104
+ };
105
+
106
+ return newNode;
107
+ }
108
+
109
+ export function docusaurusScrapeCallout(
110
+ node: HastNode,
111
+ _: HastNodeIndex,
112
+ __: HastNodeParent
113
+ ): Element | undefined {
114
+ if (
115
+ node.tagName !== 'div' ||
116
+ !node.properties.className ||
117
+ !Array.isArray(node.properties.className) ||
118
+ (!node.properties.className.includes('admonition') &&
119
+ !node.properties.className.includes('theme-admonition'))
120
+ ) {
121
+ return undefined;
122
+ }
123
+
124
+ node.children.shift();
125
+
126
+ const calloutClassNames = node.properties.className.filter(
127
+ (className) => typeof className === 'string' && className.includes('alert--')
128
+ );
129
+ const calloutClassName = calloutClassNames.length ? calloutClassNames[0] : 'alert--info';
130
+
131
+ let tagName = 'Note';
132
+ switch (calloutClassName) {
133
+ case 'alert--info':
134
+ tagName = 'Info';
135
+ break;
136
+ case 'alert--secondary':
137
+ tagName = 'Note';
138
+ break;
139
+ case 'alert--danger':
140
+ case 'alert--warning':
141
+ tagName = 'Warning';
142
+ break;
143
+ case 'alert--success':
144
+ tagName = 'Check';
145
+ break;
146
+ default:
147
+ tagName = 'Info';
148
+ break;
149
+ }
150
+
151
+ const newNode: Element = {
152
+ type: 'element',
153
+ tagName: tagName,
154
+ properties: {},
155
+ children: turnChildrenIntoMdx(node.children) as Array<ElementContent>,
156
+ };
157
+
158
+ return newNode;
159
+ }
@@ -0,0 +1,168 @@
1
+ import type { Element, ElementContent } from 'hast';
2
+ import { visit, EXIT, CONTINUE } from 'unist-util-visit';
3
+
4
+ import { assertIsDefined } from '../assert.js';
5
+ import type { HastNode, HastNodeIndex, HastNodeParent } from '../types/hast.js';
6
+ import { turnChildrenIntoMdx } from '../utils/children.js';
7
+ import { findImg } from '../utils/img.js';
8
+ import { findTitle } from '../utils/title.js';
9
+
10
+ export function gitBookScrapeCard(
11
+ node: HastNode,
12
+ _: HastNodeIndex,
13
+ __: HastNodeParent
14
+ ): Element | undefined {
15
+ if (
16
+ (node.tagName !== 'a' && node.tagName !== 'div') ||
17
+ !node.properties.className ||
18
+ !Array.isArray(node.properties.className) ||
19
+ !node.properties.className
20
+ .join(' ')
21
+ .startsWith(
22
+ 'group grid shadow-1xs shadow-dark/[0.02] rounded-md straight-corners:rounded-none dark:shadow-transparent'
23
+ )
24
+ ) {
25
+ return undefined;
26
+ }
27
+
28
+ let firstTextElement: Element | undefined = undefined;
29
+ visit(node, 'element', function (subNode, index, parent) {
30
+ if (
31
+ !subNode.properties.className ||
32
+ !Array.isArray(subNode.properties.className) ||
33
+ subNode.properties.className.join(' ') !== 'w-full space-y-2 lg:space-y-3 leading-normal'
34
+ )
35
+ return CONTINUE;
36
+
37
+ firstTextElement = subNode;
38
+ if (parent && typeof index === 'number') {
39
+ parent.children.splice(index, 1);
40
+ }
41
+ return EXIT;
42
+ });
43
+
44
+ const title = findTitle(firstTextElement);
45
+ const imgSrc = findImg(node);
46
+
47
+ const newNode: Element = {
48
+ type: 'element',
49
+ tagName: 'Card',
50
+ properties: {
51
+ title: title,
52
+ },
53
+ children: turnChildrenIntoMdx(node.children) as Array<ElementContent>,
54
+ };
55
+
56
+ if (node.properties.href) newNode.properties.href = node.properties.href;
57
+ if (imgSrc) newNode.properties.img = imgSrc;
58
+
59
+ return newNode;
60
+ }
61
+
62
+ export function readmeScrapeCard(
63
+ node: HastNode,
64
+ _: HastNodeIndex,
65
+ parent: HastNodeParent
66
+ ): Element | undefined {
67
+ if (
68
+ (node.tagName !== 'div' && node.tagName !== 'a') ||
69
+ !node.properties.className ||
70
+ !Array.isArray(node.properties.className) ||
71
+ (!node.properties.className.includes('Tile') &&
72
+ !node.properties.className.includes('card') &&
73
+ !node.properties.className.includes('Card') &&
74
+ !node.properties.className.includes('docs-card') &&
75
+ !node.properties.className.join(' ').includes('_card') &&
76
+ !node.properties.className.join(' ').includes('-card'))
77
+ ) {
78
+ return undefined;
79
+ }
80
+
81
+ const title = findTitle(node);
82
+
83
+ let href: string | undefined = undefined;
84
+ if (node.properties.href) {
85
+ href = node.properties.href as string;
86
+ } else if (node.properties.onclick && typeof node.properties.onclick === 'string') {
87
+ const str = node.properties.onclick.split("'")[1];
88
+ href = str ? `./${str}` : undefined;
89
+ } else {
90
+ visit(node, 'element', function (subNode) {
91
+ if (subNode.properties.href) {
92
+ href = subNode.properties.href as string;
93
+ return EXIT;
94
+ } else if (subNode.properties.onclick && typeof node.properties.onclick === 'string') {
95
+ const str = node.properties.onclick.split("'")[1];
96
+ href = str ? `./${str}` : undefined;
97
+ return EXIT;
98
+ }
99
+ });
100
+ }
101
+
102
+ assertIsDefined(parent);
103
+ const newNode: Element = {
104
+ type: 'element',
105
+ tagName: 'Card',
106
+ properties: {
107
+ title: title,
108
+ href: href,
109
+ },
110
+ children: turnChildrenIntoMdx(parent.children as Array<Element>) as Array<ElementContent>,
111
+ };
112
+
113
+ return newNode;
114
+ }
115
+
116
+ export function docusaurusScrapeCard(
117
+ node: HastNode,
118
+ _: HastNodeIndex,
119
+ parent: HastNodeParent
120
+ ): Element | undefined {
121
+ if (
122
+ (node.tagName !== 'div' && node.tagName !== 'a') ||
123
+ !node.properties.className ||
124
+ !Array.isArray(node.properties.className) ||
125
+ (!node.properties.className.includes('Tile') &&
126
+ !node.properties.className.includes('card') &&
127
+ !node.properties.className.includes('Card') &&
128
+ !node.properties.className.includes('docs-card') &&
129
+ !node.properties.className.join(' ').includes('_card') &&
130
+ !node.properties.className.join(' ').includes('-card'))
131
+ ) {
132
+ return undefined;
133
+ }
134
+
135
+ const title = findTitle(node);
136
+
137
+ let href: string | undefined = undefined;
138
+ if (node.properties.href) {
139
+ href = node.properties.href as string;
140
+ } else if (node.properties.onclick && typeof node.properties.onclick === 'string') {
141
+ const str = node.properties.onclick.split("'")[1];
142
+ href = str ? `./${str}` : undefined;
143
+ } else {
144
+ visit(node, 'element', function (subNode) {
145
+ if (subNode.properties.href) {
146
+ href = subNode.properties.href as string;
147
+ return EXIT;
148
+ } else if (subNode.properties.onclick && typeof node.properties.onclick === 'string') {
149
+ const str = node.properties.onclick.split("'")[1];
150
+ href = str ? `./${str}` : undefined;
151
+ return EXIT;
152
+ }
153
+ });
154
+ }
155
+
156
+ assertIsDefined(parent);
157
+ const newNode: Element = {
158
+ type: 'element',
159
+ tagName: 'Card',
160
+ properties: {
161
+ title: title,
162
+ href: href,
163
+ },
164
+ children: turnChildrenIntoMdx(parent.children as Array<Element>) as Array<ElementContent>,
165
+ };
166
+
167
+ return newNode;
168
+ }