@mintlify/scraping 3.0.187 → 3.0.189

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (345) hide show
  1. package/README.md +0 -5
  2. package/bin/assert.d.ts +5 -0
  3. package/bin/assert.js +13 -0
  4. package/bin/assert.js.map +1 -0
  5. package/bin/cli.js +43 -72
  6. package/bin/cli.js.map +1 -1
  7. package/bin/components/Accordion.d.ts +5 -0
  8. package/bin/components/Accordion.js +54 -0
  9. package/bin/components/Accordion.js.map +1 -0
  10. package/bin/components/AccordionGroup.d.ts +5 -0
  11. package/bin/components/AccordionGroup.js +52 -0
  12. package/bin/components/AccordionGroup.js.map +1 -0
  13. package/bin/components/Callout.d.ts +5 -0
  14. package/bin/components/Callout.js +114 -0
  15. package/bin/components/Callout.js.map +1 -0
  16. package/bin/components/Card.d.ts +5 -0
  17. package/bin/components/Card.js +135 -0
  18. package/bin/components/Card.js.map +1 -0
  19. package/bin/components/CardGroup.d.ts +5 -0
  20. package/bin/components/CardGroup.js +52 -0
  21. package/bin/components/CardGroup.js.map +1 -0
  22. package/bin/components/CodeGroup.d.ts +5 -0
  23. package/bin/components/CodeGroup.js +166 -0
  24. package/bin/components/CodeGroup.js.map +1 -0
  25. package/bin/components/Frame.d.ts +5 -0
  26. package/bin/components/Frame.js +51 -0
  27. package/bin/components/Frame.js.map +1 -0
  28. package/bin/components/Tabs.d.ts +5 -0
  29. package/bin/components/Tabs.js +122 -0
  30. package/bin/components/Tabs.js.map +1 -0
  31. package/bin/components/link.d.ts +2 -0
  32. package/bin/components/link.js +16 -0
  33. package/bin/components/link.js.map +1 -0
  34. package/bin/constants.d.ts +6 -7
  35. package/bin/constants.js +31 -12
  36. package/bin/constants.js.map +1 -1
  37. package/bin/customComponents/create.d.ts +10 -0
  38. package/bin/customComponents/create.js +69 -0
  39. package/bin/customComponents/create.js.map +1 -0
  40. package/bin/customComponents/plugin.d.ts +2 -0
  41. package/bin/customComponents/plugin.js +26 -0
  42. package/bin/customComponents/plugin.js.map +1 -0
  43. package/bin/customComponents/selective.d.ts +6 -0
  44. package/bin/customComponents/selective.js +29 -0
  45. package/bin/customComponents/selective.js.map +1 -0
  46. package/bin/nav/iterate.d.ts +2 -0
  47. package/bin/nav/iterate.js +15 -0
  48. package/bin/nav/iterate.js.map +1 -0
  49. package/bin/nav/listItems.d.ts +8 -0
  50. package/bin/nav/listItems.js +62 -0
  51. package/bin/nav/listItems.js.map +1 -0
  52. package/bin/nav/retrieve.d.ts +3 -0
  53. package/bin/nav/retrieve.js +75 -0
  54. package/bin/nav/retrieve.js.map +1 -0
  55. package/bin/nav/root.d.ts +2 -0
  56. package/bin/nav/root.js +40 -0
  57. package/bin/nav/root.js.map +1 -0
  58. package/bin/openapi/generateOpenApiPages.js +2 -2
  59. package/bin/openapi/generateOpenApiPages.js.map +1 -1
  60. package/bin/root/retrieve.d.ts +2 -0
  61. package/bin/root/retrieve.js +46 -0
  62. package/bin/root/retrieve.js.map +1 -0
  63. package/bin/scrapingPipeline/group.d.ts +5 -0
  64. package/bin/scrapingPipeline/group.js +46 -0
  65. package/bin/scrapingPipeline/group.js.map +1 -0
  66. package/bin/scrapingPipeline/icon.d.ts +2 -0
  67. package/bin/scrapingPipeline/icon.js +22 -0
  68. package/bin/scrapingPipeline/icon.js.map +1 -0
  69. package/bin/scrapingPipeline/images.d.ts +3 -0
  70. package/bin/scrapingPipeline/images.js +50 -0
  71. package/bin/scrapingPipeline/images.js.map +1 -0
  72. package/bin/scrapingPipeline/logo.d.ts +5 -0
  73. package/bin/scrapingPipeline/logo.js +92 -0
  74. package/bin/scrapingPipeline/logo.js.map +1 -0
  75. package/bin/scrapingPipeline/page.d.ts +6 -0
  76. package/bin/scrapingPipeline/page.js +102 -0
  77. package/bin/scrapingPipeline/page.js.map +1 -0
  78. package/bin/scrapingPipeline/root.d.ts +2 -0
  79. package/bin/scrapingPipeline/root.js +8 -0
  80. package/bin/scrapingPipeline/root.js.map +1 -0
  81. package/bin/scrapingPipeline/site.d.ts +7 -0
  82. package/bin/scrapingPipeline/site.js +129 -0
  83. package/bin/scrapingPipeline/site.js.map +1 -0
  84. package/bin/scrapingPipeline/tabs.d.ts +3 -0
  85. package/bin/scrapingPipeline/tabs.js +67 -0
  86. package/bin/scrapingPipeline/tabs.js.map +1 -0
  87. package/bin/tabs/retrieveReadme.d.ts +3 -0
  88. package/bin/tabs/retrieveReadme.js +78 -0
  89. package/bin/tabs/retrieveReadme.js.map +1 -0
  90. package/bin/tsconfig.build.tsbuildinfo +1 -1
  91. package/bin/types/components.d.ts +2 -0
  92. package/bin/types/components.js +2 -0
  93. package/bin/types/components.js.map +1 -0
  94. package/bin/types/framework.d.ts +8 -0
  95. package/bin/types/framework.js +3 -0
  96. package/bin/types/framework.js.map +1 -0
  97. package/bin/types/hast.d.ts +6 -0
  98. package/bin/types/hast.js +2 -0
  99. package/bin/types/hast.js.map +1 -0
  100. package/bin/types/result.d.ts +7 -0
  101. package/bin/types/result.js +2 -0
  102. package/bin/types/result.js.map +1 -0
  103. package/bin/types/scrapeFunc.d.ts +3 -0
  104. package/bin/types/scrapeFunc.js +2 -0
  105. package/bin/types/scrapeFunc.js.map +1 -0
  106. package/bin/utils/append.d.ts +1 -0
  107. package/bin/utils/append.js +12 -0
  108. package/bin/utils/append.js.map +1 -0
  109. package/bin/utils/children.d.ts +5 -0
  110. package/bin/utils/children.js +35 -0
  111. package/bin/utils/children.js.map +1 -0
  112. package/bin/utils/className.d.ts +3 -0
  113. package/bin/utils/className.js +13 -0
  114. package/bin/utils/className.js.map +1 -0
  115. package/bin/utils/detectFramework.d.ts +4 -0
  116. package/bin/utils/detectFramework.js +60 -0
  117. package/bin/utils/detectFramework.js.map +1 -0
  118. package/bin/utils/emptyParagraphs.d.ts +3 -0
  119. package/bin/utils/emptyParagraphs.js +19 -0
  120. package/bin/utils/emptyParagraphs.js.map +1 -0
  121. package/bin/utils/errors.d.ts +3 -0
  122. package/bin/utils/errors.js +16 -0
  123. package/bin/utils/errors.js.map +1 -0
  124. package/bin/utils/escape.d.ts +2 -0
  125. package/bin/utils/escape.js +25 -0
  126. package/bin/utils/escape.js.map +1 -0
  127. package/bin/utils/extension.d.ts +3 -0
  128. package/bin/utils/extension.js +18 -0
  129. package/bin/utils/extension.js.map +1 -0
  130. package/bin/utils/file.d.ts +4 -0
  131. package/bin/utils/file.js +43 -0
  132. package/bin/utils/file.js.map +1 -0
  133. package/bin/utils/firstChild.d.ts +2 -0
  134. package/bin/utils/firstChild.js +12 -0
  135. package/bin/utils/firstChild.js.map +1 -0
  136. package/bin/utils/images.d.ts +5 -0
  137. package/bin/utils/images.js +86 -0
  138. package/bin/utils/images.js.map +1 -0
  139. package/bin/utils/img.d.ts +2 -0
  140. package/bin/utils/img.js +15 -0
  141. package/bin/utils/img.js.map +1 -0
  142. package/bin/utils/log.d.ts +18 -0
  143. package/bin/utils/log.js +68 -0
  144. package/bin/utils/log.js.map +1 -0
  145. package/bin/utils/nestedRoots.d.ts +7 -0
  146. package/bin/utils/nestedRoots.js +19 -0
  147. package/bin/utils/nestedRoots.js.map +1 -0
  148. package/bin/utils/network.d.ts +5 -0
  149. package/bin/utils/network.js +82 -0
  150. package/bin/utils/network.js.map +1 -0
  151. package/bin/utils/path.d.ts +1 -0
  152. package/bin/utils/path.js +22 -0
  153. package/bin/utils/path.js.map +1 -0
  154. package/bin/utils/position.d.ts +3 -0
  155. package/bin/utils/position.js +12 -0
  156. package/bin/utils/position.js.map +1 -0
  157. package/bin/utils/reservedNames.d.ts +4 -0
  158. package/bin/utils/reservedNames.js +27 -0
  159. package/bin/utils/reservedNames.js.map +1 -0
  160. package/bin/utils/strings.d.ts +2 -0
  161. package/bin/utils/strings.js +7 -0
  162. package/bin/utils/strings.js.map +1 -0
  163. package/bin/utils/text.d.ts +2 -0
  164. package/bin/utils/text.js +11 -0
  165. package/bin/utils/text.js.map +1 -0
  166. package/bin/utils/title.d.ts +10 -0
  167. package/bin/utils/title.js +58 -0
  168. package/bin/utils/title.js.map +1 -0
  169. package/bin/utils/url.d.ts +3 -0
  170. package/bin/utils/url.js +10 -0
  171. package/bin/utils/url.js.map +1 -0
  172. package/package.json +20 -11
  173. package/src/assert.ts +15 -0
  174. package/src/cli.ts +53 -90
  175. package/src/components/Accordion.ts +84 -0
  176. package/src/components/AccordionGroup.ts +69 -0
  177. package/src/components/Callout.ts +159 -0
  178. package/src/components/Card.ts +168 -0
  179. package/src/components/CardGroup.ts +69 -0
  180. package/src/components/CodeGroup.ts +209 -0
  181. package/src/components/Frame.ts +86 -0
  182. package/src/components/Tabs.ts +154 -0
  183. package/src/components/link.ts +17 -0
  184. package/src/constants.ts +37 -19
  185. package/src/customComponents/create.ts +106 -0
  186. package/src/customComponents/plugin.ts +31 -0
  187. package/src/customComponents/selective.ts +37 -0
  188. package/src/nav/iterate.ts +18 -0
  189. package/src/nav/listItems.ts +82 -0
  190. package/src/nav/retrieve.ts +88 -0
  191. package/src/nav/root.ts +47 -0
  192. package/src/openapi/generateOpenApiPages.ts +2 -2
  193. package/src/root/retrieve.ts +52 -0
  194. package/src/scrapingPipeline/group.ts +62 -0
  195. package/src/scrapingPipeline/icon.ts +26 -0
  196. package/src/scrapingPipeline/images.ts +67 -0
  197. package/src/scrapingPipeline/logo.ts +127 -0
  198. package/src/scrapingPipeline/page.ts +130 -0
  199. package/src/scrapingPipeline/root.ts +10 -0
  200. package/src/scrapingPipeline/site.ts +161 -0
  201. package/src/scrapingPipeline/tabs.ts +87 -0
  202. package/src/tabs/retrieveReadme.ts +99 -0
  203. package/src/types/components.ts +3 -0
  204. package/src/types/framework.ts +10 -0
  205. package/src/types/hast.ts +12 -0
  206. package/src/types/result.ts +1 -0
  207. package/src/types/scrapeFunc.ts +9 -0
  208. package/src/utils/append.ts +9 -0
  209. package/src/utils/children.ts +51 -0
  210. package/src/utils/className.ts +14 -0
  211. package/src/utils/detectFramework.ts +72 -0
  212. package/src/utils/emptyParagraphs.ts +21 -0
  213. package/src/utils/errors.ts +24 -0
  214. package/src/utils/escape.ts +30 -0
  215. package/src/utils/extension.ts +19 -0
  216. package/src/utils/file.ts +58 -0
  217. package/src/utils/firstChild.ts +13 -0
  218. package/src/utils/images.ts +101 -0
  219. package/src/utils/img.ts +17 -0
  220. package/src/utils/log.ts +82 -0
  221. package/src/utils/nestedRoots.ts +20 -0
  222. package/src/utils/network.ts +95 -0
  223. package/src/utils/path.ts +27 -0
  224. package/src/utils/position.ts +14 -0
  225. package/src/utils/reservedNames.ts +31 -0
  226. package/src/utils/strings.ts +7 -0
  227. package/src/utils/text.ts +11 -0
  228. package/src/utils/title.ts +68 -0
  229. package/src/utils/url.ts +8 -0
  230. package/bin/browser.d.ts +0 -2
  231. package/bin/browser.js +0 -24
  232. package/bin/browser.js.map +0 -1
  233. package/bin/checks.d.ts +0 -8
  234. package/bin/checks.js +0 -24
  235. package/bin/checks.js.map +0 -1
  236. package/bin/downloadImage.d.ts +0 -5
  237. package/bin/downloadImage.js +0 -88
  238. package/bin/downloadImage.js.map +0 -1
  239. package/bin/scraping/combineNavWithEmptyGroupTitles.d.ts +0 -2
  240. package/bin/scraping/combineNavWithEmptyGroupTitles.js +0 -20
  241. package/bin/scraping/combineNavWithEmptyGroupTitles.js.map +0 -1
  242. package/bin/scraping/detectFramework.d.ts +0 -9
  243. package/bin/scraping/detectFramework.js +0 -36
  244. package/bin/scraping/detectFramework.js.map +0 -1
  245. package/bin/scraping/downloadAllImages.d.ts +0 -4
  246. package/bin/scraping/downloadAllImages.js +0 -36
  247. package/bin/scraping/downloadAllImages.js.map +0 -1
  248. package/bin/scraping/downloadLogoImage.d.ts +0 -1
  249. package/bin/scraping/downloadLogoImage.js +0 -12
  250. package/bin/scraping/downloadLogoImage.js.map +0 -1
  251. package/bin/scraping/replaceImagePaths.d.ts +0 -1
  252. package/bin/scraping/replaceImagePaths.js +0 -14
  253. package/bin/scraping/replaceImagePaths.js.map +0 -1
  254. package/bin/scraping/scrapeFileGettingFileNameFromUrl.d.ts +0 -6
  255. package/bin/scraping/scrapeFileGettingFileNameFromUrl.js +0 -46
  256. package/bin/scraping/scrapeFileGettingFileNameFromUrl.js.map +0 -1
  257. package/bin/scraping/scrapeGettingFileNameFromUrl.d.ts +0 -6
  258. package/bin/scraping/scrapeGettingFileNameFromUrl.js +0 -13
  259. package/bin/scraping/scrapeGettingFileNameFromUrl.js.map +0 -1
  260. package/bin/scraping/scrapePage.d.ts +0 -8
  261. package/bin/scraping/scrapePage.js +0 -10
  262. package/bin/scraping/scrapePage.js.map +0 -1
  263. package/bin/scraping/scrapePageCommands.d.ts +0 -7
  264. package/bin/scraping/scrapePageCommands.js +0 -50
  265. package/bin/scraping/scrapePageCommands.js.map +0 -1
  266. package/bin/scraping/scrapeSection.d.ts +0 -3
  267. package/bin/scraping/scrapeSection.js +0 -12
  268. package/bin/scraping/scrapeSection.js.map +0 -1
  269. package/bin/scraping/scrapeSectionCommands.d.ts +0 -6
  270. package/bin/scraping/scrapeSectionCommands.js +0 -63
  271. package/bin/scraping/scrapeSectionCommands.js.map +0 -1
  272. package/bin/scraping/site-scrapers/Intercom/scrapeIntercomPage.d.ts +0 -5
  273. package/bin/scraping/site-scrapers/Intercom/scrapeIntercomPage.js +0 -29
  274. package/bin/scraping/site-scrapers/Intercom/scrapeIntercomPage.js.map +0 -1
  275. package/bin/scraping/site-scrapers/Intercom/scrapeIntercomSection.d.ts +0 -2
  276. package/bin/scraping/site-scrapers/Intercom/scrapeIntercomSection.js +0 -31
  277. package/bin/scraping/site-scrapers/Intercom/scrapeIntercomSection.js.map +0 -1
  278. package/bin/scraping/site-scrapers/alternateGroupTitle.d.ts +0 -3
  279. package/bin/scraping/site-scrapers/alternateGroupTitle.js +0 -9
  280. package/bin/scraping/site-scrapers/alternateGroupTitle.js.map +0 -1
  281. package/bin/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.d.ts +0 -5
  282. package/bin/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.js +0 -33
  283. package/bin/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.js.map +0 -1
  284. package/bin/scraping/site-scrapers/links-per-group/getLinksRecursively.d.ts +0 -3
  285. package/bin/scraping/site-scrapers/links-per-group/getLinksRecursively.js +0 -35
  286. package/bin/scraping/site-scrapers/links-per-group/getLinksRecursively.js.map +0 -1
  287. package/bin/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.d.ts +0 -3
  288. package/bin/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.js +0 -33
  289. package/bin/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.js.map +0 -1
  290. package/bin/scraping/site-scrapers/openNestedDocusaurusMenus.d.ts +0 -2
  291. package/bin/scraping/site-scrapers/openNestedDocusaurusMenus.js +0 -30
  292. package/bin/scraping/site-scrapers/openNestedDocusaurusMenus.js.map +0 -1
  293. package/bin/scraping/site-scrapers/openNestedGitbookMenus.d.ts +0 -2
  294. package/bin/scraping/site-scrapers/openNestedGitbookMenus.js +0 -21
  295. package/bin/scraping/site-scrapers/openNestedGitbookMenus.js.map +0 -1
  296. package/bin/scraping/site-scrapers/scrapeDocusaurusPage.d.ts +0 -5
  297. package/bin/scraping/site-scrapers/scrapeDocusaurusPage.js +0 -53
  298. package/bin/scraping/site-scrapers/scrapeDocusaurusPage.js.map +0 -1
  299. package/bin/scraping/site-scrapers/scrapeDocusaurusSection.d.ts +0 -2
  300. package/bin/scraping/site-scrapers/scrapeDocusaurusSection.js +0 -32
  301. package/bin/scraping/site-scrapers/scrapeDocusaurusSection.js.map +0 -1
  302. package/bin/scraping/site-scrapers/scrapeGitBookPage.d.ts +0 -5
  303. package/bin/scraping/site-scrapers/scrapeGitBookPage.js +0 -56
  304. package/bin/scraping/site-scrapers/scrapeGitBookPage.js.map +0 -1
  305. package/bin/scraping/site-scrapers/scrapeGitBookSection.d.ts +0 -2
  306. package/bin/scraping/site-scrapers/scrapeGitBookSection.js +0 -42
  307. package/bin/scraping/site-scrapers/scrapeGitBookSection.js.map +0 -1
  308. package/bin/scraping/site-scrapers/scrapeReadMePage.d.ts +0 -5
  309. package/bin/scraping/site-scrapers/scrapeReadMePage.js +0 -38
  310. package/bin/scraping/site-scrapers/scrapeReadMePage.js.map +0 -1
  311. package/bin/scraping/site-scrapers/scrapeReadMeSection.d.ts +0 -2
  312. package/bin/scraping/site-scrapers/scrapeReadMeSection.js +0 -39
  313. package/bin/scraping/site-scrapers/scrapeReadMeSection.js.map +0 -1
  314. package/bin/util.d.ts +0 -29
  315. package/bin/util.js +0 -97
  316. package/bin/util.js.map +0 -1
  317. package/src/browser.ts +0 -24
  318. package/src/checks.ts +0 -32
  319. package/src/downloadImage.ts +0 -102
  320. package/src/scraping/combineNavWithEmptyGroupTitles.ts +0 -21
  321. package/src/scraping/detectFramework.ts +0 -55
  322. package/src/scraping/downloadAllImages.ts +0 -61
  323. package/src/scraping/downloadLogoImage.ts +0 -24
  324. package/src/scraping/replaceImagePaths.ts +0 -17
  325. package/src/scraping/scrapeFileGettingFileNameFromUrl.ts +0 -84
  326. package/src/scraping/scrapeGettingFileNameFromUrl.ts +0 -56
  327. package/src/scraping/scrapePage.ts +0 -40
  328. package/src/scraping/scrapePageCommands.ts +0 -68
  329. package/src/scraping/scrapeSection.ts +0 -30
  330. package/src/scraping/scrapeSectionCommands.ts +0 -98
  331. package/src/scraping/site-scrapers/Intercom/scrapeIntercomPage.ts +0 -52
  332. package/src/scraping/site-scrapers/Intercom/scrapeIntercomSection.ts +0 -54
  333. package/src/scraping/site-scrapers/alternateGroupTitle.ts +0 -11
  334. package/src/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.ts +0 -45
  335. package/src/scraping/site-scrapers/links-per-group/getLinksRecursively.ts +0 -47
  336. package/src/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.ts +0 -44
  337. package/src/scraping/site-scrapers/openNestedDocusaurusMenus.ts +0 -42
  338. package/src/scraping/site-scrapers/openNestedGitbookMenus.ts +0 -27
  339. package/src/scraping/site-scrapers/scrapeDocusaurusPage.ts +0 -85
  340. package/src/scraping/site-scrapers/scrapeDocusaurusSection.ts +0 -63
  341. package/src/scraping/site-scrapers/scrapeGitBookPage.ts +0 -82
  342. package/src/scraping/site-scrapers/scrapeGitBookSection.ts +0 -69
  343. package/src/scraping/site-scrapers/scrapeReadMePage.ts +0 -56
  344. package/src/scraping/site-scrapers/scrapeReadMeSection.ts +0 -66
  345. package/src/util.ts +0 -122
@@ -0,0 +1,92 @@
1
+ import { join } from 'node:path';
2
+ import { EXIT, visit } from 'unist-util-visit';
3
+ import { framework } from '../utils/detectFramework.js';
4
+ import { downloadImage } from '../utils/images.js';
5
+ import { fetchPageHtml } from '../utils/network.js';
6
+ import { htmlToHast } from './root.js';
7
+ function findReadmeLogoNodes(root) {
8
+ const elements = [];
9
+ visit(root, 'element', function (node) {
10
+ if (node.tagName === 'img' &&
11
+ Array.isArray(node.properties.className) &&
12
+ node.properties.className.includes('rm-Logo-img'))
13
+ elements.push(node);
14
+ });
15
+ return elements.length ? elements : undefined;
16
+ }
17
+ function findGitBookLogoNodes(root) {
18
+ const elements = [];
19
+ visit(root, 'element', function (node) {
20
+ if (node.tagName === 'img' && node.properties.alt === 'Logo') {
21
+ elements.push(node);
22
+ }
23
+ });
24
+ return elements.length ? elements : undefined;
25
+ }
26
+ function findDocusaurusLogoNodes(root) {
27
+ const elements = [];
28
+ visit(root, 'element', function (node) {
29
+ if (node.tagName === 'div' &&
30
+ Array.isArray(node.properties.className) &&
31
+ node.properties.className.includes('navbar__brand')) {
32
+ visit(node, 'element', function (subNode) {
33
+ if (subNode.tagName === 'img')
34
+ elements.push(subNode);
35
+ });
36
+ return EXIT;
37
+ }
38
+ });
39
+ return elements.length ? elements : undefined;
40
+ }
41
+ async function findLogosFromHtml(html, downloadFn, filepaths) {
42
+ const hast = htmlToHast(html);
43
+ const imgNodes = downloadFn(hast);
44
+ if (imgNodes) {
45
+ filepaths.push(...(await Promise.all(imgNodes.map(async (node) => {
46
+ const res = await downloadImage(node.properties.src, join(process.cwd(), 'images'));
47
+ if (res.success && res.data) {
48
+ return res.data[1];
49
+ }
50
+ else {
51
+ return '';
52
+ }
53
+ }))));
54
+ }
55
+ filepaths.forEach((filepath, index) => {
56
+ if (!filepath)
57
+ filepaths.splice(index, 1);
58
+ });
59
+ }
60
+ export async function downloadLogos(url, browser) {
61
+ url = new URL(url);
62
+ const filepaths = [];
63
+ if (browser) {
64
+ const htmls = [];
65
+ const page = await browser.newPage();
66
+ await page.goto(url.toString(), {
67
+ waitUntil: 'networkidle2',
68
+ });
69
+ htmls.push(await page.content());
70
+ await page.click('.rm-ThemeToggle');
71
+ htmls.push(await page.content());
72
+ await Promise.all(htmls.map(async (html) => {
73
+ return await findLogosFromHtml(html, findReadmeLogoNodes, filepaths);
74
+ }));
75
+ }
76
+ else {
77
+ const html = await fetchPageHtml(url);
78
+ await findLogosFromHtml(html, framework.vendor === 'gitbook' ? findGitBookLogoNodes : findDocusaurusLogoNodes, filepaths);
79
+ }
80
+ if (browser)
81
+ await browser.close();
82
+ const uniqueFilepaths = [...new Set(filepaths).values()];
83
+ return uniqueFilepaths.length === 1
84
+ ? uniqueFilepaths[0]
85
+ : uniqueFilepaths.length > 1
86
+ ? {
87
+ light: uniqueFilepaths[0],
88
+ dark: uniqueFilepaths[1],
89
+ }
90
+ : undefined;
91
+ }
92
+ //# sourceMappingURL=logo.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"logo.js","sourceRoot":"","sources":["../../src/scrapingPipeline/logo.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAEjC,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAE/C,OAAO,EAAE,SAAS,EAAE,MAAM,6BAA6B,CAAC;AACxD,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AACnD,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AACpD,OAAO,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AAEvC,SAAS,mBAAmB,CAAC,IAAc;IACzC,MAAM,QAAQ,GAAmB,EAAE,CAAC;IACpC,KAAK,CAAC,IAAI,EAAE,SAAS,EAAE,UAAU,IAAI;QACnC,IACE,IAAI,CAAC,OAAO,KAAK,KAAK;YACtB,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC;YACxC,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,QAAQ,CAAC,aAAa,CAAC;YAEjD,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACxB,CAAC,CAAC,CAAC;IACH,OAAO,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC;AAChD,CAAC;AAED,SAAS,oBAAoB,CAAC,IAAc;IAC1C,MAAM,QAAQ,GAAmB,EAAE,CAAC;IACpC,KAAK,CAAC,IAAI,EAAE,SAAS,EAAE,UAAU,IAAI;QACnC,IAAI,IAAI,CAAC,OAAO,KAAK,KAAK,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,KAAK,MAAM,EAAE,CAAC;YAC7D,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACtB,CAAC;IACH,CAAC,CAAC,CAAC;IACH,OAAO,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC;AAChD,CAAC;AAED,SAAS,uBAAuB,CAAC,IAAc;IAC7C,MAAM,QAAQ,GAAmB,EAAE,CAAC;IACpC,KAAK,CAAC,IAAI,EAAE,SAAS,EAAE,UAAU,IAAI;QACnC,IACE,IAAI,CAAC,OAAO,KAAK,KAAK;YACtB,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC;YACxC,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,QAAQ,CAAC,eAAe,CAAC,EACnD,CAAC;YACD,KAAK,CAAC,IAAI,EAAE,SAAS,EAAE,UAAU,OAAO;gBACtC,IAAI,OAAO,CAAC,OAAO,KAAK,KAAK;oBAAE,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACxD,CAAC,CAAC,CAAC;YACH,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC,CAAC,CAAC;IACH,OAAO,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC;AAChD,CAAC;AAED,KAAK,UAAU,iBAAiB,CAC9B,IAAY,EACZ,UAA0D,EAC1D,SAAwB;IAExB,MAAM,IAAI,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC;IAC9B,MAAM,QAAQ,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC;IAElC,IAAI,QAAQ,EAAE,CAAC;QACb,SAAS,CAAC,IAAI,CACZ,GAAG,CAAC,MAAM,OAAO,CAAC,GAAG,CACnB,QAAQ,CAAC,GAAG,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE;YAC1B,MAAM,GAAG,GAAG,MAAM,aAAa,CAC7B,IAAI,CAAC,UAAU,CAAC,GAAa,EAC7B,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,QAAQ,CAAC,CAC9B,CAAC;YAEF,IAAI,GAAG,CAAC,OAAO,IAAI,GAAG,CAAC,IAAI,EAAE,CAAC;gBAC5B,OAAO,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YACrB,CAAC;iBAAM,CAAC;gBACN,OAAO,EAAE,CAAC;YACZ,CAAC;QACH,CAAC,CAAC,CACH,CAAC,CACH,CAAC;IACJ,CAAC;IAED,SAAS,CAAC,OAAO,CAAC,CAAC,QAAQ,EAAE,KAAK,EAAE,EAAE;QACpC,IAAI,CAAC,QAAQ;YAAE,SAAS,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IAC5C,CAAC,CAAC,CAAC;AACL,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,GAAiB,EACjB,OAA4B;IAE5B,GAAG,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;IACnB,MAAM,SAAS,GAAkB,EAAE,CAAC;IACpC,IAAI,OAAO,EAAE,CAAC;QACZ,MAAM,KAAK,GAAkB,EAAE,CAAC;QAEhC,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;QACrC,MAAM,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,EAAE;YAC9B,SAAS,EAAE,cAAc;SAC1B,CAAC,CAAC;QAEH,KAAK,CAAC,IAAI,CAAC,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC;QACjC,MAAM,IAAI,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAC;QACpC,KAAK,CAAC,IAAI,CAAC,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC;QAEjC,MAAM,OAAO,CAAC,GAAG,CACf,KAAK,CAAC,GAAG,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE;YACvB,OAAO,MAAM,iBAAiB,CAAC,IAAI,EAAE,mBAAmB,EAAE,SAAS,CAAC,CAAC;QACvE,CAAC,CAAC,CACH,CAAC;IACJ,CAAC;SAAM,CAAC;QACN,MAAM,IAAI,GAAG,MAAM,aAAa,CAAC,GAAG,CAAC,CAAC;QACtC,MAAM,iBAAiB,CACrB,IAAI,EACJ,SAAS,CAAC,MAAM,KAAK,SAAS,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC,CAAC,uBAAuB,EAC/E,SAAS,CACV,CAAC;IACJ,CAAC;IAED,IAAI,OAAO;QAAE,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;IAEnC,MAAM,eAAe,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC;IAEzD,OAAO,eAAe,CAAC,MAAM,KAAK,CAAC;QACjC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC;QACpB,CAAC,CAAC,eAAe,CAAC,MAAM,GAAG,CAAC;YAC1B,CAAC,CAAC;gBACE,KAAK,EAAE,eAAe,CAAC,CAAC,CAAW;gBACnC,IAAI,EAAE,eAAe,CAAC,CAAC,CAAW;aACnC;YACH,CAAC,CAAC,SAAS,CAAC;AAClB,CAAC"}
@@ -0,0 +1,6 @@
1
+ import type { Result } from '../types/result.js';
2
+ export declare function scrapePage(html: string, url: string | URL, opts?: {
3
+ externalLink: boolean;
4
+ isOverviewPage?: boolean;
5
+ rootPath?: string;
6
+ }): Promise<Result<[string, string]>>;
@@ -0,0 +1,102 @@
1
+ import remarkGfm from 'remark-gfm';
2
+ import remarkMdx from 'remark-mdx';
3
+ import remarkStringify from 'remark-stringify';
4
+ import { unified } from 'unified';
5
+ import { convertHeaderLinksToText } from '../components/link.js';
6
+ import { CONTENT_FAILURE_MSG, MDAST_FAILURE_MSG } from '../constants.js';
7
+ import { createCallout, createCard, createAccordion, createAccordionGroup, createFrame, createCodeGroup, createTabs, createCardGroup, } from '../customComponents/create.js';
8
+ import { rehypeToRemarkCustomComponents } from '../customComponents/plugin.js';
9
+ import { selectiveRehypeRemark } from '../customComponents/selective.js';
10
+ import { retrieveRootContent } from '../root/retrieve.js';
11
+ import { unifiedRemoveClassNames } from '../utils/className.js';
12
+ import { detectFramework, framework } from '../utils/detectFramework.js';
13
+ import { unifiedRemoveEmptyParagraphs } from '../utils/emptyParagraphs.js';
14
+ import { getErrorMessage, logErrorResults } from '../utils/errors.js';
15
+ import { escapeCharactersOutsideCodeBlocks } from '../utils/escape.js';
16
+ import { write, writePage } from '../utils/file.js';
17
+ import { log } from '../utils/log.js';
18
+ import { unifiedRemoveNestedRoots } from '../utils/nestedRoots.js';
19
+ import { unifiedRemovePositions } from '../utils/position.js';
20
+ import { removeLeadingSlash, removeTrailingSlash } from '../utils/strings.js';
21
+ import { getDescriptionFromRoot, getTitleFromHeading } from '../utils/title.js';
22
+ import { downloadImagesFromFile } from './images.js';
23
+ import { htmlToHast } from './root.js';
24
+ export async function scrapePage(html, url, opts = { externalLink: false }) {
25
+ url = new URL(url);
26
+ if (opts.externalLink) {
27
+ const filename = html;
28
+ const filenameWithExt = `${filename}.mdx`;
29
+ writePage(filenameWithExt, '', '', '', url.toString());
30
+ return { success: true, data: [url.toString(), filename] };
31
+ }
32
+ const hast = htmlToHast(html);
33
+ if (!framework.vendor)
34
+ detectFramework(hast);
35
+ const urlStr = url.toString();
36
+ const content = retrieveRootContent(hast);
37
+ if (!content)
38
+ return { success: false, message: `${urlStr}: ${CONTENT_FAILURE_MSG}` };
39
+ const contentAsRoot = {
40
+ type: 'root',
41
+ children: [content],
42
+ };
43
+ const mdastTree = unified()
44
+ .use(createCard)
45
+ .use(createAccordion)
46
+ .use(createFrame)
47
+ .use(createTabs)
48
+ .use(createCallout)
49
+ .use(createCardGroup)
50
+ .use(createAccordionGroup)
51
+ .use(createCodeGroup)
52
+ .use(unifiedRemoveClassNames)
53
+ .use(unifiedRemovePositions)
54
+ .use(unifiedRemoveEmptyParagraphs)
55
+ .use(escapeCharactersOutsideCodeBlocks)
56
+ .use(selectiveRehypeRemark)
57
+ // Cleans up any nested components left untouched
58
+ // by `selectiveRehypeRemark`, and converts them to
59
+ // MDX compatible components
60
+ .use(rehypeToRemarkCustomComponents)
61
+ .use(convertHeaderLinksToText)
62
+ .use(unifiedRemoveNestedRoots)
63
+ .runSync(contentAsRoot);
64
+ try {
65
+ const imageResults = await downloadImagesFromFile(mdastTree, url);
66
+ logErrorResults(`scraping images from ${url.toString()}`, imageResults);
67
+ }
68
+ catch (error) {
69
+ const errorMessage = getErrorMessage(error);
70
+ log(`We encountered an error when scraping the images from ${url.toString()}${errorMessage}`);
71
+ throw error;
72
+ }
73
+ const title = getTitleFromHeading(mdastTree);
74
+ const description = getDescriptionFromRoot(mdastTree);
75
+ try {
76
+ const result = unified()
77
+ .use(remarkMdx)
78
+ .use(remarkGfm)
79
+ // @ts-expect-error remarkStringify errors even if used for valid code from documentation examples
80
+ .use(remarkStringify)
81
+ .stringify(mdastTree);
82
+ if (opts.rootPath) {
83
+ url = new URL(opts.rootPath, url.origin);
84
+ }
85
+ else if (url.origin === removeTrailingSlash(url.toString())) {
86
+ url = new URL('home', new URL(url).origin);
87
+ }
88
+ writePage(url, opts.isOverviewPage ? 'Overview' : title, description, String(result));
89
+ return {
90
+ success: true,
91
+ data: opts.rootPath
92
+ ? [removeLeadingSlash(removeTrailingSlash(new URL(urlStr).pathname)), opts.rootPath]
93
+ : undefined,
94
+ };
95
+ }
96
+ catch (error) {
97
+ write('error.json', JSON.stringify(mdastTree, undefined, 2));
98
+ const errorMessage = getErrorMessage(error);
99
+ return { success: false, message: `${urlStr}: ${MDAST_FAILURE_MSG}${errorMessage}` };
100
+ }
101
+ }
102
+ //# sourceMappingURL=page.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"page.js","sourceRoot":"","sources":["../../src/scrapingPipeline/page.ts"],"names":[],"mappings":"AAEA,OAAO,SAAS,MAAM,YAAY,CAAC;AACnC,OAAO,SAAS,MAAM,YAAY,CAAC;AACnC,OAAO,eAAe,MAAM,kBAAkB,CAAC;AAC/C,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAElC,OAAO,EAAE,wBAAwB,EAAE,MAAM,uBAAuB,CAAC;AACjE,OAAO,EAAE,mBAAmB,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAC;AACzE,OAAO,EACL,aAAa,EACb,UAAU,EACV,eAAe,EACf,oBAAoB,EACpB,WAAW,EACX,eAAe,EACf,UAAU,EACV,eAAe,GAChB,MAAM,+BAA+B,CAAC;AACvC,OAAO,EAAE,8BAA8B,EAAE,MAAM,+BAA+B,CAAC;AAC/E,OAAO,EAAE,qBAAqB,EAAE,MAAM,kCAAkC,CAAC;AACzE,OAAO,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAE1D,OAAO,EAAE,uBAAuB,EAAE,MAAM,uBAAuB,CAAC;AAChE,OAAO,EAAE,eAAe,EAAE,SAAS,EAAE,MAAM,6BAA6B,CAAC;AACzE,OAAO,EAAE,4BAA4B,EAAE,MAAM,6BAA6B,CAAC;AAC3E,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AACtE,OAAO,EAAE,iCAAiC,EAAE,MAAM,oBAAoB,CAAC;AACvE,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AACpD,OAAO,EAAE,GAAG,EAAE,MAAM,iBAAiB,CAAC;AACtC,OAAO,EAAE,wBAAwB,EAAE,MAAM,yBAAyB,CAAC;AACnE,OAAO,EAAE,sBAAsB,EAAE,MAAM,sBAAsB,CAAC;AAC9D,OAAO,EAAE,kBAAkB,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAC9E,OAAO,EAAE,sBAAsB,EAAE,mBAAmB,EAAE,MAAM,mBAAmB,CAAC;AAChF,OAAO,EAAE,sBAAsB,EAAE,MAAM,aAAa,CAAC;AACrD,OAAO,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AAEvC,MAAM,CAAC,KAAK,UAAU,UAAU,CAC9B,IAAY,EACZ,GAAiB,EACjB,OAII,EAAE,YAAY,EAAE,KAAK,EAAE;IAE3B,GAAG,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;IAEnB,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;QACtB,MAAM,QAAQ,GAAG,IAAI,CAAC;QACtB,MAAM,eAAe,GAAG,GAAG,QAAQ,MAAM,CAAC;QAC1C,SAAS,CAAC,eAAe,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC;QACvD,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,GAAG,CAAC,QAAQ,EAAE,EAAE,QAAQ,CAAC,EAAE,CAAC;IAC7D,CAAC;IAED,MAAM,IAAI,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC;IAE9B,IAAI,CAAC,SAAS,CAAC,MAAM;QAAE,eAAe,CAAC,IAAI,CAAC,CAAC;IAE7C,MAAM,MAAM,GAAG,GAAG,CAAC,QAAQ,EAAE,CAAC;IAC9B,MAAM,OAAO,GAAG,mBAAmB,CAAC,IAAI,CAAC,CAAC;IAC1C,IAAI,CAAC,OAAO;QAAE,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,GAAG,MAAM,KAAK,mBAAmB,EAAE,EAAE,CAAC;IAEtF,MAAM,aAAa,GAAa;QAC9B,IAAI,EAAE,MAAM;QACZ,QAAQ,EAAE,CAAC,OAAO,CAAC;KACpB,CAAC;IAEF,MAAM,SAAS,GAAc,OAAO,EAAE;SACnC,GAAG,CAAC,UAAU,CAAC;SACf,GAAG,CAAC,eAAe,CAAC;SACpB,GAAG,CAAC,WAAW,CAAC;SAChB,GAAG,CAAC,UAAU,CAAC;SACf,GAAG,CAAC,aAAa,CAAC;SAClB,GAAG,CAAC,eAAe,CAAC;SACpB,GAAG,CAAC,oBAAoB,CAAC;SACzB,GAAG,CAAC,eAAe,CAAC;SACpB,GAAG,CAAC,uBAAuB,CAAC;SAC5B,GAAG,CAAC,sBAAsB,CAAC;SAC3B,GAAG,CAAC,4BAA4B,CAAC;SACjC,GAAG,CAAC,iCAAiC,CAAC;SACtC,GAAG,CAAC,qBAAqB,CAAC;QAE3B,iDAAiD;QACjD,mDAAmD;QACnD,4BAA4B;SAC3B,GAAG,CAAC,8BAA8B,CAAC;SACnC,GAAG,CAAC,wBAAwB,CAAC;SAC7B,GAAG,CAAC,wBAAwB,CAAC;SAC7B,OAAO,CAAC,aAAa,CAAc,CAAC;IAEvC,IAAI,CAAC;QACH,MAAM,YAAY,GAAG,MAAM,sBAAsB,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;QAClE,eAAe,CAAC,wBAAwB,GAAG,CAAC,QAAQ,EAAE,EAAE,EAAE,YAAY,CAAC,CAAC;IAC1E,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,YAAY,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAC5C,GAAG,CAAC,yDAAyD,GAAG,CAAC,QAAQ,EAAE,GAAG,YAAY,EAAE,CAAC,CAAC;QAC9F,MAAM,KAAK,CAAC;IACd,CAAC;IAED,MAAM,KAAK,GAAG,mBAAmB,CAAC,SAAS,CAAC,CAAC;IAC7C,MAAM,WAAW,GAAG,sBAAsB,CAAC,SAAS,CAAC,CAAC;IAEtD,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,OAAO,EAAE;aACrB,GAAG,CAAC,SAAS,CAAC;aACd,GAAG,CAAC,SAAS,CAAC;YACf,kGAAkG;aACjG,GAAG,CAAC,eAAe,CAAC;aACpB,SAAS,CAAC,SAAS,CAAC,CAAC;QAExB,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YAClB,GAAG,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,QAAQ,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;QAC3C,CAAC;aAAM,IAAI,GAAG,CAAC,MAAM,KAAK,mBAAmB,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,EAAE,CAAC;YAC9D,GAAG,GAAG,IAAI,GAAG,CAAC,MAAM,EAAE,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC;QAC7C,CAAC;QAED,SAAS,CAAC,GAAG,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,KAAK,EAAE,WAAW,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC;QACtF,OAAO;YACL,OAAO,EAAE,IAAI;YACb,IAAI,EAAE,IAAI,CAAC,QAAQ;gBACjB,CAAC,CAAC,CAAC,kBAAkB,CAAC,mBAAmB,CAAC,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,IAAI,CAAC,QAAQ,CAAC;gBACpF,CAAC,CAAC,SAAS;SACd,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,KAAK,CAAC,YAAY,EAAE,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC;QAC7D,MAAM,YAAY,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAC5C,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,GAAG,MAAM,KAAK,iBAAiB,GAAG,YAAY,EAAE,EAAE,CAAC;IACvF,CAAC;AACH,CAAC"}
@@ -0,0 +1,2 @@
1
+ import type { Root as HastRoot } from 'hast';
2
+ export declare function htmlToHast(html: string): HastRoot;
@@ -0,0 +1,8 @@
1
+ import rehypeParse from 'rehype-parse';
2
+ import { unified } from 'unified';
3
+ import { unifiedRemovePositions } from '../utils/position.js';
4
+ export function htmlToHast(html) {
5
+ // @ts-expect-error remarkStringify errors even if used for valid code from documentation examples
6
+ return unified().use(rehypeParse).use(unifiedRemovePositions).parse(html);
7
+ }
8
+ //# sourceMappingURL=root.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"root.js","sourceRoot":"","sources":["../../src/scrapingPipeline/root.ts"],"names":[],"mappings":"AACA,OAAO,WAAW,MAAM,cAAc,CAAC;AACvC,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAElC,OAAO,EAAE,sBAAsB,EAAE,MAAM,sBAAsB,CAAC;AAE9D,MAAM,UAAU,UAAU,CAAC,IAAY;IACrC,kGAAkG;IAClG,OAAO,OAAO,EAAE,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC,GAAG,CAAC,sBAAsB,CAAC,CAAC,KAAK,CAAC,IAAI,CAAa,CAAC;AACxF,CAAC"}
@@ -0,0 +1,7 @@
1
+ import { MintConfig, Tab } from '@mintlify/models';
2
+ import type { Root as HastRoot } from 'hast';
3
+ import type { Result } from '../types/result.js';
4
+ export declare function scrapeSite(html: string, url: string | URL, opts?: {
5
+ hast?: HastRoot;
6
+ tabs?: Array<Tab>;
7
+ }): Promise<Result<MintConfig>>;
@@ -0,0 +1,129 @@
1
+ import traverse from 'traverse';
2
+ import { NAV_FAILURE_MSG } from '../constants.js';
3
+ import { iterateOverNavItems } from '../nav/iterate.js';
4
+ import { retrieveNavItems } from '../nav/retrieve.js';
5
+ import { retrieveRootNavElement } from '../nav/root.js';
6
+ import { detectFramework, framework } from '../utils/detectFramework.js';
7
+ import { logErrorResults } from '../utils/errors.js';
8
+ import { startPuppeteer } from '../utils/network.js';
9
+ import { INDEX_NAMES, iterateThroughReservedNames } from '../utils/reservedNames.js';
10
+ import { removeTrailingSlash } from '../utils/strings.js';
11
+ import { scrapePageGroup } from './group.js';
12
+ import { downloadFavicon } from './icon.js';
13
+ import { downloadLogos } from './logo.js';
14
+ import { htmlToHast } from './root.js';
15
+ export async function scrapeSite(html, url, opts = {}) {
16
+ let hast = opts.hast;
17
+ if (!hast)
18
+ hast = htmlToHast(html);
19
+ url = new URL(url);
20
+ const origin = url.origin;
21
+ detectFramework(hast);
22
+ const sidebar = retrieveRootNavElement(hast);
23
+ if (!sidebar)
24
+ return { success: false, message: `${url.toString()}: ${NAV_FAILURE_MSG}` };
25
+ const navItems = retrieveNavItems(sidebar);
26
+ if (origin === '') {
27
+ return { success: false, message: `invalid URL provided to scrape site: ${url}` };
28
+ }
29
+ const listOfLinks = iterateOverNavItems(navItems, origin);
30
+ if (listOfLinks.length === 0) {
31
+ return { success: false, message: `no navigation links were able to be found: ${url}` };
32
+ }
33
+ const needsBrowser = framework.vendor === 'gitbook';
34
+ const externalLinks = listOfLinks.filter((url) => url.origin !== origin);
35
+ const internalLinks = listOfLinks.filter((url) => url.origin === origin && removeTrailingSlash(url.toString()) !== origin);
36
+ const rootLinks = listOfLinks.filter((url) => url.origin === origin && removeTrailingSlash(url.toString()) === origin);
37
+ const allPathnames = [
38
+ ...internalLinks.map((url) => url.toString()),
39
+ ...rootLinks.map((url) => url.toString()),
40
+ ];
41
+ const rootPaths = rootLinks.map(() => {
42
+ const name = iterateThroughReservedNames(INDEX_NAMES, allPathnames);
43
+ allPathnames.push(name);
44
+ return name;
45
+ });
46
+ try {
47
+ const externalResults = await scrapePageGroup(externalLinks, needsBrowser, {
48
+ externalLinks: true,
49
+ });
50
+ const internalResults = await scrapePageGroup(internalLinks, needsBrowser);
51
+ const rootResults = await scrapePageGroup(rootLinks, needsBrowser, {
52
+ externalLinks: false,
53
+ rootPaths,
54
+ });
55
+ const externalLinkReplaceMap = new Map(externalResults
56
+ .filter((result) => result.success)
57
+ .map((result) => result.data));
58
+ const rootPathReplaceMap = new Map(rootResults
59
+ .filter((result) => result.success)
60
+ .map((result) => result.data));
61
+ traverse(navItems).forEach(function (value) {
62
+ if (typeof value === 'string') {
63
+ if (externalLinkReplaceMap.has(value)) {
64
+ this.update(externalLinkReplaceMap.get(value) ?? value);
65
+ }
66
+ else if (rootPathReplaceMap.has(value)) {
67
+ this.update(rootPathReplaceMap.get(value) ?? value);
68
+ }
69
+ }
70
+ else if (Array.isArray(value)) {
71
+ if (value.find((item) => externalLinkReplaceMap.has(item))) {
72
+ this.update(value.map((item) => externalLinkReplaceMap.get(item) ?? item));
73
+ }
74
+ else if (value.find((item) => rootPathReplaceMap.has(item))) {
75
+ this.update(value.map((item) => rootPathReplaceMap.get(item) ?? item));
76
+ }
77
+ }
78
+ });
79
+ traverse(navItems).forEach(function (value) {
80
+ if (typeof value === 'string') {
81
+ this.update(value.replace('/mintie_overview', ''));
82
+ }
83
+ else if (Array.isArray(value)) {
84
+ this.update(value.map((item) => typeof item === 'string' ? item.replace('/mintie_overview', '') : item));
85
+ }
86
+ });
87
+ navItems.forEach((navItem, index) => {
88
+ if (typeof navItem !== 'string')
89
+ return;
90
+ const name = navItem
91
+ .split('-')
92
+ .map((str) => (str[0] ? `${str[0].toUpperCase()}${str.substring(1)}` : str))
93
+ .join(' ');
94
+ navItems[index] = {
95
+ group: name,
96
+ pages: [navItem],
97
+ };
98
+ });
99
+ logErrorResults('linking to external pages', externalResults);
100
+ logErrorResults('scraping your docs', [...internalResults, ...rootResults]);
101
+ const browser = needsBrowser ? await startPuppeteer() : undefined;
102
+ const favicon = await downloadFavicon(hast);
103
+ const logo = await downloadLogos(url, browser);
104
+ return {
105
+ success: true,
106
+ data: {
107
+ $schema: 'https://mintlify.com/schema.json',
108
+ name: '',
109
+ logo,
110
+ colors: {
111
+ primary: '',
112
+ },
113
+ favicon: favicon ?? '',
114
+ navigation: navItems,
115
+ tabs: opts.tabs,
116
+ },
117
+ };
118
+ }
119
+ catch (error) {
120
+ if (error instanceof Error) {
121
+ return { success: false, message: error.message };
122
+ }
123
+ return {
124
+ success: false,
125
+ message: 'An unknown error occurred when scraping this site. Please try again.',
126
+ };
127
+ }
128
+ }
129
+ //# sourceMappingURL=site.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"site.js","sourceRoot":"","sources":["../../src/scrapingPipeline/site.ts"],"names":[],"mappings":"AAEA,OAAO,QAAQ,MAAM,UAAU,CAAC;AAEhC,OAAO,EAAE,eAAe,EAAE,MAAM,iBAAiB,CAAC;AAClD,OAAO,EAAE,mBAAmB,EAAE,MAAM,mBAAmB,CAAC;AACxD,OAAO,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AACtD,OAAO,EAAE,sBAAsB,EAAE,MAAM,gBAAgB,CAAC;AAExD,OAAO,EAAE,eAAe,EAAE,SAAS,EAAE,MAAM,6BAA6B,CAAC;AACzE,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AACrD,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACrD,OAAO,EAAE,WAAW,EAAE,2BAA2B,EAAE,MAAM,2BAA2B,CAAC;AACrF,OAAO,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAC1D,OAAO,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAC7C,OAAO,EAAE,eAAe,EAAE,MAAM,WAAW,CAAC;AAC5C,OAAO,EAAE,aAAa,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AAEvC,MAAM,CAAC,KAAK,UAAU,UAAU,CAC9B,IAAY,EACZ,GAAiB,EACjB,OAA+C,EAAE;IAEjD,IAAI,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC;IACrB,IAAI,CAAC,IAAI;QAAE,IAAI,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC;IAEnC,GAAG,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;IACnB,MAAM,MAAM,GAAG,GAAG,CAAC,MAAM,CAAC;IAE1B,eAAe,CAAC,IAAI,CAAC,CAAC;IAEtB,MAAM,OAAO,GAAG,sBAAsB,CAAC,IAAI,CAAC,CAAC;IAC7C,IAAI,CAAC,OAAO;QAAE,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,GAAG,GAAG,CAAC,QAAQ,EAAE,KAAK,eAAe,EAAE,EAAE,CAAC;IAE1F,MAAM,QAAQ,GAAG,gBAAgB,CAAC,OAAO,CAAC,CAAC;IAE3C,IAAI,MAAM,KAAK,EAAE,EAAE,CAAC;QAClB,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,wCAAwC,GAAG,EAAE,EAAE,CAAC;IACpF,CAAC;IAED,MAAM,WAAW,GAAG,mBAAmB,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;IAC1D,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC7B,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,8CAA8C,GAAG,EAAE,EAAE,CAAC;IAC1F,CAAC;IAED,MAAM,YAAY,GAAG,SAAS,CAAC,MAAM,KAAK,SAAS,CAAC;IAEpD,MAAM,aAAa,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,MAAM,KAAK,MAAM,CAAC,CAAC;IACzE,MAAM,aAAa,GAAG,WAAW,CAAC,MAAM,CACtC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,MAAM,KAAK,MAAM,IAAI,mBAAmB,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,KAAK,MAAM,CACjF,CAAC;IACF,MAAM,SAAS,GAAG,WAAW,CAAC,MAAM,CAClC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,MAAM,KAAK,MAAM,IAAI,mBAAmB,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,KAAK,MAAM,CACjF,CAAC;IAEF,MAAM,YAAY,GAAG;QACnB,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC;QAC7C,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC;KAC1C,CAAC;IACF,MAAM,SAAS,GAAG,SAAS,CAAC,GAAG,CAAC,GAAG,EAAE;QACnC,MAAM,IAAI,GAAG,2BAA2B,CAAC,WAAW,EAAE,YAAY,CAAC,CAAC;QACpE,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACxB,OAAO,IAAI,CAAC;IACd,CAAC,CAAC,CAAC;IAEH,IAAI,CAAC;QACH,MAAM,eAAe,GAAG,MAAM,eAAe,CAAC,aAAa,EAAE,YAAY,EAAE;YACzE,aAAa,EAAE,IAAI;SACpB,CAAC,CAAC;QACH,MAAM,eAAe,GAAG,MAAM,eAAe,CAAC,aAAa,EAAE,YAAY,CAAC,CAAC;QAC3E,MAAM,WAAW,GAAG,MAAM,eAAe,CAAC,SAAS,EAAE,YAAY,EAAE;YACjE,aAAa,EAAE,KAAK;YACpB,SAAS;SACV,CAAC,CAAC;QAEH,MAAM,sBAAsB,GAAG,IAAI,GAAG,CACpC,eAAe;aACZ,MAAM,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC;aAClC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,IAAwB,CAAC,CACpD,CAAC;QAEF,MAAM,kBAAkB,GAAG,IAAI,GAAG,CAChC,WAAW;aACR,MAAM,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC;aAClC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,IAAwB,CAAC,CACpD,CAAC;QAEF,QAAQ,CAAC,QAAQ,CAAC,CAAC,OAAO,CAAC,UAAU,KAAK;YACxC,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;gBAC9B,IAAI,sBAAsB,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;oBACtC,IAAI,CAAC,MAAM,CAAC,sBAAsB,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,CAAC;gBAC1D,CAAC;qBAAM,IAAI,kBAAkB,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;oBACzC,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,CAAC;gBACtD,CAAC;YACH,CAAC;iBAAM,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;gBAChC,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,sBAAsB,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC;oBAC3D,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,sBAAsB,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC;gBAC7E,CAAC;qBAAM,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,kBAAkB,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC;oBAC9D,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,kBAAkB,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC;gBACzE,CAAC;YACH,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,QAAQ,CAAC,QAAQ,CAAC,CAAC,OAAO,CAAC,UAAU,KAAK;YACxC,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;gBAC9B,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,kBAAkB,EAAE,EAAE,CAAC,CAAC,CAAC;YACrD,CAAC;iBAAM,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;gBAChC,IAAI,CAAC,MAAM,CACT,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CACjB,OAAO,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,kBAAkB,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CACvE,CACF,CAAC;YACJ,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,QAAQ,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE,KAAK,EAAE,EAAE;YAClC,IAAI,OAAO,OAAO,KAAK,QAAQ;gBAAE,OAAO;YACxC,MAAM,IAAI,GAAG,OAAO;iBACjB,KAAK,CAAC,GAAG,CAAC;iBACV,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,GAAG,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;iBAC3E,IAAI,CAAC,GAAG,CAAC,CAAC;YAEb,QAAQ,CAAC,KAAK,CAAC,GAAG;gBAChB,KAAK,EAAE,IAAI;gBACX,KAAK,EAAE,CAAC,OAAO,CAAC;aACjB,CAAC;QACJ,CAAC,CAAC,CAAC;QAEH,eAAe,CAAC,2BAA2B,EAAE,eAAe,CAAC,CAAC;QAC9D,eAAe,CAAC,oBAAoB,EAAE,CAAC,GAAG,eAAe,EAAE,GAAG,WAAW,CAAC,CAAC,CAAC;QAE5E,MAAM,OAAO,GAAG,YAAY,CAAC,CAAC,CAAC,MAAM,cAAc,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC;QAElE,MAAM,OAAO,GAAG,MAAM,eAAe,CAAC,IAAI,CAAC,CAAC;QAC5C,MAAM,IAAI,GAAG,MAAM,aAAa,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;QAE/C,OAAO;YACL,OAAO,EAAE,IAAI;YACb,IAAI,EAAE;gBACJ,OAAO,EAAE,kCAAkC;gBAC3C,IAAI,EAAE,EAAE;gBACR,IAAI;gBACJ,MAAM,EAAE;oBACN,OAAO,EAAE,EAAE;iBACZ;gBACD,OAAO,EAAE,OAAO,IAAI,EAAE;gBACtB,UAAU,EAAE,QAAsB;gBAClC,IAAI,EAAE,IAAI,CAAC,IAAI;aAChB;SACF,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;YAC3B,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,CAAC,OAAO,EAAE,CAAC;QACpD,CAAC;QACD,OAAO;YACL,OAAO,EAAE,KAAK;YACd,OAAO,EAAE,sEAAsE;SAChF,CAAC;IACJ,CAAC;AACH,CAAC"}
@@ -0,0 +1,3 @@
1
+ import { MintConfig } from '@mintlify/models';
2
+ import type { Result } from '../types/result.js';
3
+ export declare function scrapeAllSiteTabs(html: string, url: string | URL): Promise<Result<MintConfig>>;
@@ -0,0 +1,67 @@
1
+ import { retrieveTabLinks } from '../tabs/retrieveReadme.js';
2
+ import { detectFramework, framework } from '../utils/detectFramework.js';
3
+ import { log } from '../utils/log.js';
4
+ import { fetchPageHtml, startPuppeteer } from '../utils/network.js';
5
+ import { getTitleFromLink } from '../utils/title.js';
6
+ import { downloadFavicon } from './icon.js';
7
+ import { downloadLogos } from './logo.js';
8
+ import { htmlToHast } from './root.js';
9
+ import { scrapeSite } from './site.js';
10
+ export async function scrapeAllSiteTabs(html, url) {
11
+ const hast = htmlToHast(html);
12
+ url = new URL(url);
13
+ detectFramework(hast);
14
+ const needsBrowser = framework.vendor === 'gitbook';
15
+ const browser = needsBrowser ? await startPuppeteer() : undefined;
16
+ const favicon = await downloadFavicon(hast);
17
+ const logo = await downloadLogos(url, browser);
18
+ if (framework.vendor === 'readme' || framework.vendor === 'docusaurus') {
19
+ const links = retrieveTabLinks(hast);
20
+ if (!links ||
21
+ !links.length ||
22
+ (links.length === 1 && links[0] && links[0].url === url.pathname))
23
+ return scrapeSite(html, url, { hast });
24
+ if (!links.find((link) => url.pathname.startsWith(link.url))) {
25
+ links.push({
26
+ name: getTitleFromLink(url.pathname),
27
+ url: url.pathname,
28
+ });
29
+ }
30
+ const results = await Promise.all(links.map(async (tabEntry) => {
31
+ const newUrl = new URL(url);
32
+ newUrl.pathname = tabEntry.url;
33
+ const newHtml = await fetchPageHtml(newUrl, undefined);
34
+ return await scrapeSite(newHtml, newUrl, { tabs: [tabEntry] });
35
+ }));
36
+ const navigations = [];
37
+ const tabs = [];
38
+ const successes = results.filter((result) => result.success);
39
+ successes.forEach((result) => {
40
+ if (!result.data)
41
+ return;
42
+ navigations.push(...result.data.navigation);
43
+ if (result.data.tabs)
44
+ tabs.push(...result.data.tabs);
45
+ });
46
+ const failures = results.filter((result) => !result.success);
47
+ failures.forEach((result) => {
48
+ log('Failed to scrape tab: ' + result.message);
49
+ });
50
+ return {
51
+ success: true,
52
+ data: {
53
+ $schema: 'https://mintlify.com/schema.json',
54
+ name: '',
55
+ logo,
56
+ colors: {
57
+ primary: '',
58
+ },
59
+ favicon: favicon ?? '',
60
+ navigation: navigations,
61
+ tabs,
62
+ },
63
+ };
64
+ }
65
+ return scrapeSite(html, url, { hast });
66
+ }
67
+ //# sourceMappingURL=tabs.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tabs.js","sourceRoot":"","sources":["../../src/scrapingPipeline/tabs.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,gBAAgB,EAAE,MAAM,2BAA2B,CAAC;AAE7D,OAAO,EAAE,eAAe,EAAE,SAAS,EAAE,MAAM,6BAA6B,CAAC;AACzE,OAAO,EAAE,GAAG,EAAE,MAAM,iBAAiB,CAAC;AACtC,OAAO,EAAE,aAAa,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACpE,OAAO,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EAAE,eAAe,EAAE,MAAM,WAAW,CAAC;AAC5C,OAAO,EAAE,aAAa,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AACvC,OAAO,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AAEvC,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,IAAY,EACZ,GAAiB;IAEjB,MAAM,IAAI,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC;IAC9B,GAAG,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;IAEnB,eAAe,CAAC,IAAI,CAAC,CAAC;IAEtB,MAAM,YAAY,GAAG,SAAS,CAAC,MAAM,KAAK,SAAS,CAAC;IACpD,MAAM,OAAO,GAAG,YAAY,CAAC,CAAC,CAAC,MAAM,cAAc,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC;IAElE,MAAM,OAAO,GAAG,MAAM,eAAe,CAAC,IAAI,CAAC,CAAC;IAC5C,MAAM,IAAI,GAAG,MAAM,aAAa,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;IAE/C,IAAI,SAAS,CAAC,MAAM,KAAK,QAAQ,IAAI,SAAS,CAAC,MAAM,KAAK,YAAY,EAAE,CAAC;QACvE,MAAM,KAAK,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAC;QACrC,IACE,CAAC,KAAK;YACN,CAAC,KAAK,CAAC,MAAM;YACb,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,KAAK,GAAG,CAAC,QAAQ,CAAC;YAEjE,OAAO,UAAU,CAAC,IAAI,EAAE,GAAG,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC;QAEzC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,GAAG,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;YAC7D,KAAK,CAAC,IAAI,CAAC;gBACT,IAAI,EAAE,gBAAgB,CAAC,GAAG,CAAC,QAAQ,CAAC;gBACpC,GAAG,EAAE,GAAG,CAAC,QAAQ;aAClB,CAAC,CAAC;QACL,CAAC;QAED,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,GAAG,CAC/B,KAAK,CAAC,GAAG,CAAC,KAAK,EAAE,QAAQ,EAAE,EAAE;YAC3B,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;YAC5B,MAAM,CAAC,QAAQ,GAAG,QAAQ,CAAC,GAAG,CAAC;YAC/B,MAAM,OAAO,GAAG,MAAM,aAAa,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;YACvD,OAAO,MAAM,UAAU,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE,IAAI,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;QACjE,CAAC,CAAC,CACH,CAAC;QAEF,MAAM,WAAW,GAA2B,EAAE,CAAC;QAC/C,MAAM,IAAI,GAAe,EAAE,CAAC;QAE5B,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAC7D,SAAS,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,EAAE;YAC3B,IAAI,CAAC,MAAM,CAAC,IAAI;gBAAE,OAAO;YACzB,WAAW,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YAC5C,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI;gBAAE,IAAI,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvD,CAAC,CAAC,CAAC;QAEH,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAC7D,QAAQ,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,EAAE;YAC1B,GAAG,CAAC,wBAAwB,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC;QACjD,CAAC,CAAC,CAAC;QAEH,OAAO;YACL,OAAO,EAAE,IAAI;YACb,IAAI,EAAE;gBACJ,OAAO,EAAE,kCAAkC;gBAC3C,IAAI,EAAE,EAAE;gBACR,IAAI;gBACJ,MAAM,EAAE;oBACN,OAAO,EAAE,EAAE;iBACZ;gBACD,OAAO,EAAE,OAAO,IAAI,EAAE;gBACtB,UAAU,EAAE,WAAyB;gBACrC,IAAI;aACL;SACF,CAAC;IACJ,CAAC;IAED,OAAO,UAAU,CAAC,IAAI,EAAE,GAAG,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC;AACzC,CAAC"}
@@ -0,0 +1,3 @@
1
+ import { Tab } from '@mintlify/models';
2
+ import type { Root as HastRoot } from 'hast';
3
+ export declare function retrieveTabLinks(rootNode: HastRoot): Array<Tab> | undefined;
@@ -0,0 +1,78 @@
1
+ import { visit, EXIT, CONTINUE } from 'unist-util-visit';
2
+ import { framework } from '../utils/detectFramework.js';
3
+ import { findTitle, getTitleFromLink } from '../utils/title.js';
4
+ export function retrieveTabLinks(rootNode) {
5
+ if (framework.vendor !== 'readme' && framework.vendor !== 'docusaurus')
6
+ return undefined;
7
+ let element = undefined;
8
+ visit(rootNode, 'element', function (node) {
9
+ if (framework.vendor === 'readme') {
10
+ if (node.tagName === 'header' &&
11
+ node.properties.className &&
12
+ Array.isArray(node.properties.className) &&
13
+ node.properties.className.includes('rm-Header')) {
14
+ element = node;
15
+ return EXIT;
16
+ }
17
+ }
18
+ if (framework.vendor === 'docusaurus') {
19
+ if (node.tagName === 'nav' &&
20
+ node.properties.className &&
21
+ Array.isArray(node.properties.className) &&
22
+ node.properties.className.includes('navbar')) {
23
+ element = node;
24
+ return EXIT;
25
+ }
26
+ }
27
+ });
28
+ // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
29
+ if (!element)
30
+ return undefined;
31
+ const links = [];
32
+ visit(element, 'element', function (node) {
33
+ if (framework.vendor === 'readme') {
34
+ if (node.tagName !== 'nav' &&
35
+ !(node.tagName === 'div' &&
36
+ node.properties.className &&
37
+ Array.isArray(node.properties.className) &&
38
+ node.properties.className.includes('rm-Header-right')))
39
+ return CONTINUE;
40
+ visit(node, 'element', function (subNode) {
41
+ if (subNode.tagName !== 'a' ||
42
+ !subNode.properties.href ||
43
+ typeof subNode.properties.href !== 'string' ||
44
+ subNode.properties.href.startsWith('http'))
45
+ return CONTINUE;
46
+ const title = findTitle(subNode);
47
+ links.push({
48
+ name: title || getTitleFromLink(subNode.properties.href),
49
+ url: subNode.properties.href,
50
+ });
51
+ });
52
+ }
53
+ if (framework.vendor === 'docusaurus') {
54
+ if (node.tagName !== 'nav')
55
+ return CONTINUE;
56
+ visit(node, 'element', function (subNode, _, parent) {
57
+ if (subNode.tagName !== 'a' ||
58
+ !subNode.properties.href ||
59
+ typeof subNode.properties.href !== 'string' ||
60
+ subNode.properties.href.startsWith('http') ||
61
+ !parent ||
62
+ parent.type !== 'element' ||
63
+ !Array.isArray(parent.properties.className) ||
64
+ parent.properties.className.length !== 1 ||
65
+ parent.properties.className[0] !== 'navbar__items' ||
66
+ parent.properties.className.includes('navbar__items--right'))
67
+ return CONTINUE;
68
+ const title = findTitle(subNode);
69
+ links.push({
70
+ name: title || getTitleFromLink(subNode.properties.href),
71
+ url: subNode.properties.href,
72
+ });
73
+ });
74
+ }
75
+ });
76
+ return links;
77
+ }
78
+ //# sourceMappingURL=retrieveReadme.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"retrieveReadme.js","sourceRoot":"","sources":["../../src/tabs/retrieveReadme.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AAEzD,OAAO,EAAE,SAAS,EAAE,MAAM,6BAA6B,CAAC;AACxD,OAAO,EAAE,SAAS,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AAEhE,MAAM,UAAU,gBAAgB,CAAC,QAAkB;IACjD,IAAI,SAAS,CAAC,MAAM,KAAK,QAAQ,IAAI,SAAS,CAAC,MAAM,KAAK,YAAY;QAAE,OAAO,SAAS,CAAC;IAEzF,IAAI,OAAO,GAAwB,SAAS,CAAC;IAC7C,KAAK,CAAC,QAAQ,EAAE,SAAS,EAAE,UAAU,IAAI;QACvC,IAAI,SAAS,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;YAClC,IACE,IAAI,CAAC,OAAO,KAAK,QAAQ;gBACzB,IAAI,CAAC,UAAU,CAAC,SAAS;gBACzB,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC;gBACxC,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,QAAQ,CAAC,WAAW,CAAC,EAC/C,CAAC;gBACD,OAAO,GAAG,IAAI,CAAC;gBACf,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC;QAED,IAAI,SAAS,CAAC,MAAM,KAAK,YAAY,EAAE,CAAC;YACtC,IACE,IAAI,CAAC,OAAO,KAAK,KAAK;gBACtB,IAAI,CAAC,UAAU,CAAC,SAAS;gBACzB,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC;gBACxC,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAC5C,CAAC;gBACD,OAAO,GAAG,IAAI,CAAC;gBACf,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,uEAAuE;IACvE,IAAI,CAAC,OAAO;QAAE,OAAO,SAAS,CAAC;IAE/B,MAAM,KAAK,GAAe,EAAE,CAAC;IAC7B,KAAK,CAAC,OAAkB,EAAE,SAAS,EAAE,UAAU,IAAI;QACjD,IAAI,SAAS,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;YAClC,IACE,IAAI,CAAC,OAAO,KAAK,KAAK;gBACtB,CAAC,CACC,IAAI,CAAC,OAAO,KAAK,KAAK;oBACtB,IAAI,CAAC,UAAU,CAAC,SAAS;oBACzB,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC;oBACxC,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,QAAQ,CAAC,iBAAiB,CAAC,CACtD;gBAED,OAAO,QAAQ,CAAC;YAElB,KAAK,CAAC,IAAI,EAAE,SAAS,EAAE,UAAU,OAAO;gBACtC,IACE,OAAO,CAAC,OAAO,KAAK,GAAG;oBACvB,CAAC,OAAO,CAAC,UAAU,CAAC,IAAI;oBACxB,OAAO,OAAO,CAAC,UAAU,CAAC,IAAI,KAAK,QAAQ;oBAC3C,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC;oBAE1C,OAAO,QAAQ,CAAC;gBAClB,MAAM,KAAK,GAAG,SAAS,CAAC,OAAO,CAAC,CAAC;gBACjC,KAAK,CAAC,IAAI,CAAC;oBACT,IAAI,EAAE,KAAK,IAAI,gBAAgB,CAAC,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC;oBACxD,GAAG,EAAE,OAAO,CAAC,UAAU,CAAC,IAAI;iBAC7B,CAAC,CAAC;YACL,CAAC,CAAC,CAAC;QACL,CAAC;QAED,IAAI,SAAS,CAAC,MAAM,KAAK,YAAY,EAAE,CAAC;YACtC,IAAI,IAAI,CAAC,OAAO,KAAK,KAAK;gBAAE,OAAO,QAAQ,CAAC;YAE5C,KAAK,CAAC,IAAI,EAAE,SAAS,EAAE,UAAU,OAAO,EAAE,CAAC,EAAE,MAAM;gBACjD,IACE,OAAO,CAAC,OAAO,KAAK,GAAG;oBACvB,CAAC,OAAO,CAAC,UAAU,CAAC,IAAI;oBACxB,OAAO,OAAO,CAAC,UAAU,CAAC,IAAI,KAAK,QAAQ;oBAC3C,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC;oBAC1C,CAAC,MAAM;oBACP,MAAM,CAAC,IAAI,KAAK,SAAS;oBACzB,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,UAAU,CAAC,SAAS,CAAC;oBAC3C,MAAM,CAAC,UAAU,CAAC,SAAS,CAAC,MAAM,KAAK,CAAC;oBACxC,MAAM,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC,CAAC,KAAK,eAAe;oBAClD,MAAM,CAAC,UAAU,CAAC,SAAS,CAAC,QAAQ,CAAC,sBAAsB,CAAC;oBAE5D,OAAO,QAAQ,CAAC;gBAElB,MAAM,KAAK,GAAG,SAAS,CAAC,OAAO,CAAC,CAAC;gBACjC,KAAK,CAAC,IAAI,CAAC;oBACT,IAAI,EAAE,KAAK,IAAI,gBAAgB,CAAC,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC;oBACxD,GAAG,EAAE,OAAO,CAAC,UAAU,CAAC,IAAI;iBAC7B,CAAC,CAAC;YACL,CAAC,CAAC,CAAC;QACL,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,KAAK,CAAC;AACf,CAAC"}