mintlify 3.0.7 → 3.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. package/bin/browser.js +24 -0
  2. package/bin/browser.js.map +1 -0
  3. package/bin/constants.js +32 -0
  4. package/bin/constants.js.map +1 -0
  5. package/bin/downloadImage.js +85 -0
  6. package/bin/downloadImage.js.map +1 -0
  7. package/bin/index.js +49 -0
  8. package/bin/index.js.map +1 -0
  9. package/bin/local-preview/helper-commands/installDepsCommand.js +12 -0
  10. package/bin/local-preview/helper-commands/installDepsCommand.js.map +1 -0
  11. package/bin/local-preview/index.js +154 -0
  12. package/bin/local-preview/index.js.map +1 -0
  13. package/bin/local-preview/listener/categorize.js +95 -0
  14. package/bin/local-preview/listener/categorize.js.map +1 -0
  15. package/bin/local-preview/listener/categorizeFiles.js +47 -0
  16. package/bin/local-preview/listener/categorizeFiles.js.map +1 -0
  17. package/bin/local-preview/listener/generate.js +89 -0
  18. package/bin/local-preview/listener/generate.js.map +1 -0
  19. package/bin/local-preview/listener/index.js +200 -0
  20. package/bin/local-preview/listener/index.js.map +1 -0
  21. package/bin/local-preview/listener/update.js +24 -0
  22. package/bin/local-preview/listener/update.js.map +1 -0
  23. package/bin/local-preview/listener/utils/createPage.js +167 -0
  24. package/bin/local-preview/listener/utils/createPage.js.map +1 -0
  25. package/bin/local-preview/listener/utils/fileIsMdxOrMd.js +12 -0
  26. package/bin/local-preview/listener/utils/fileIsMdxOrMd.js.map +1 -0
  27. package/bin/local-preview/listener/utils/getOpenApiContext.js +57 -0
  28. package/bin/local-preview/listener/utils/getOpenApiContext.js.map +1 -0
  29. package/bin/local-preview/listener/utils/mintConfigFile.js +22 -0
  30. package/bin/local-preview/listener/utils/mintConfigFile.js.map +1 -0
  31. package/bin/local-preview/listener/utils/toTitleCase.js +36 -0
  32. package/bin/local-preview/listener/utils/toTitleCase.js.map +1 -0
  33. package/bin/local-preview/listener/utils/types.js +2 -0
  34. package/bin/local-preview/listener/utils/types.js.map +1 -0
  35. package/bin/local-preview/listener/utils.js +67 -0
  36. package/bin/local-preview/listener/utils.js.map +1 -0
  37. package/bin/local-preview/utils/categorizeFiles.js +63 -0
  38. package/bin/local-preview/utils/categorizeFiles.js.map +1 -0
  39. package/bin/local-preview/utils/getOpenApiContext.js +58 -0
  40. package/bin/local-preview/utils/getOpenApiContext.js.map +1 -0
  41. package/bin/local-preview/utils/injectFavicons.js +72 -0
  42. package/bin/local-preview/utils/injectFavicons.js.map +1 -0
  43. package/bin/local-preview/utils/listener.js +116 -0
  44. package/bin/local-preview/utils/listener.js.map +1 -0
  45. package/bin/local-preview/utils/metadata.js +118 -0
  46. package/bin/local-preview/utils/metadata.js.map +1 -0
  47. package/bin/local-preview/utils/mintConfigFile.js +43 -0
  48. package/bin/local-preview/utils/mintConfigFile.js.map +1 -0
  49. package/bin/local-preview/utils/openApiCheck.js +15 -0
  50. package/bin/local-preview/utils/openApiCheck.js.map +1 -0
  51. package/bin/local-preview/utils/slugToTitle.js +8 -0
  52. package/bin/local-preview/utils/slugToTitle.js.map +1 -0
  53. package/bin/navigation.js +4 -0
  54. package/bin/navigation.js.map +1 -0
  55. package/bin/pageTemplate.js +30 -0
  56. package/bin/pageTemplate.js.map +1 -0
  57. package/bin/scraping/combineNavWithEmptyGroupTitles.js +20 -0
  58. package/bin/scraping/combineNavWithEmptyGroupTitles.js.map +1 -0
  59. package/bin/scraping/detectFramework.js +39 -0
  60. package/bin/scraping/detectFramework.js.map +1 -0
  61. package/bin/scraping/downloadAllImages.js +33 -0
  62. package/bin/scraping/downloadAllImages.js.map +1 -0
  63. package/bin/scraping/downloadLogoImage.js +13 -0
  64. package/bin/scraping/downloadLogoImage.js.map +1 -0
  65. package/bin/scraping/getSitemapLinks.js +18 -0
  66. package/bin/scraping/getSitemapLinks.js.map +1 -0
  67. package/bin/scraping/replaceImagePaths.js +17 -0
  68. package/bin/scraping/replaceImagePaths.js.map +1 -0
  69. package/bin/scraping/scrapeFileGettingFileNameFromUrl.js +43 -0
  70. package/bin/scraping/scrapeFileGettingFileNameFromUrl.js.map +1 -0
  71. package/bin/scraping/scrapeGettingFileNameFromUrl.js +13 -0
  72. package/bin/scraping/scrapeGettingFileNameFromUrl.js.map +1 -0
  73. package/bin/scraping/scrapePage.js +10 -0
  74. package/bin/scraping/scrapePage.js.map +1 -0
  75. package/bin/scraping/scrapePageCommands.js +55 -0
  76. package/bin/scraping/scrapePageCommands.js.map +1 -0
  77. package/bin/scraping/scrapeSection.js +12 -0
  78. package/bin/scraping/scrapeSection.js.map +1 -0
  79. package/bin/scraping/scrapeSectionCommands.js +66 -0
  80. package/bin/scraping/scrapeSectionCommands.js.map +1 -0
  81. package/bin/scraping/site-scrapers/Intercom/scrapeIntercomPage.js +27 -0
  82. package/bin/scraping/site-scrapers/Intercom/scrapeIntercomPage.js.map +1 -0
  83. package/bin/scraping/site-scrapers/Intercom/scrapeIntercomSection.js +32 -0
  84. package/bin/scraping/site-scrapers/Intercom/scrapeIntercomSection.js.map +1 -0
  85. package/bin/scraping/site-scrapers/alternateGroupTitle.js +9 -0
  86. package/bin/scraping/site-scrapers/alternateGroupTitle.js.map +1 -0
  87. package/bin/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.js +34 -0
  88. package/bin/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.js.map +1 -0
  89. package/bin/scraping/site-scrapers/links-per-group/getLinksRecursively.js +38 -0
  90. package/bin/scraping/site-scrapers/links-per-group/getLinksRecursively.js.map +1 -0
  91. package/bin/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.js +38 -0
  92. package/bin/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.js.map +1 -0
  93. package/bin/scraping/site-scrapers/openNestedDocusaurusMenus.js +30 -0
  94. package/bin/scraping/site-scrapers/openNestedDocusaurusMenus.js.map +1 -0
  95. package/bin/scraping/site-scrapers/openNestedGitbookMenus.js +17 -0
  96. package/bin/scraping/site-scrapers/openNestedGitbookMenus.js.map +1 -0
  97. package/bin/scraping/site-scrapers/scrapeDocusaurusPage.js +49 -0
  98. package/bin/scraping/site-scrapers/scrapeDocusaurusPage.js.map +1 -0
  99. package/bin/scraping/site-scrapers/scrapeDocusaurusSection.js +30 -0
  100. package/bin/scraping/site-scrapers/scrapeDocusaurusSection.js.map +1 -0
  101. package/bin/scraping/site-scrapers/scrapeGitBookPage.js +47 -0
  102. package/bin/scraping/site-scrapers/scrapeGitBookPage.js.map +1 -0
  103. package/bin/scraping/site-scrapers/scrapeGitBookSection.js +52 -0
  104. package/bin/scraping/site-scrapers/scrapeGitBookSection.js.map +1 -0
  105. package/bin/scraping/site-scrapers/scrapeReadMePage.js +36 -0
  106. package/bin/scraping/site-scrapers/scrapeReadMePage.js.map +1 -0
  107. package/bin/scraping/site-scrapers/scrapeReadMeSection.js +44 -0
  108. package/bin/scraping/site-scrapers/scrapeReadMeSection.js.map +1 -0
  109. package/bin/util.js +129 -0
  110. package/bin/util.js.map +1 -0
  111. package/bin/validation/isValidLink.js +11 -0
  112. package/bin/validation/isValidLink.js.map +1 -0
  113. package/bin/validation/stopIfInvalidLink.js +9 -0
  114. package/bin/validation/stopIfInvalidLink.js.map +1 -0
  115. package/package.json +2 -2
@@ -0,0 +1,30 @@
1
+ export default async function openNestedDocusaurusMenus(page) {
2
+ let prevEncountered = [];
3
+ let encounteredHref = ["fake-href-to-make-loop-run-at-least-once"];
4
+ // Loop until we've encountered every link
5
+ while (!encounteredHref.every((href) => prevEncountered.includes(href))) {
6
+ prevEncountered = encounteredHref;
7
+ encounteredHref = await page.evaluate((encounteredHref) => {
8
+ const collapsible = Array.from(document.querySelectorAll(".menu__link.menu__link--sublist"));
9
+ const linksFound = [];
10
+ collapsible.forEach(async (collapsibleItem) => {
11
+ const href = collapsibleItem?.getAttribute("href");
12
+ // Should never occur but we keep it as a fail-safe
13
+ if (href?.startsWith("https://") || href?.startsWith("http://")) {
14
+ return;
15
+ }
16
+ // Click any links we haven't seen before
17
+ if (href && !encounteredHref.includes(href)) {
18
+ collapsibleItem?.click();
19
+ }
20
+ if (href) {
21
+ linksFound.push(href);
22
+ }
23
+ });
24
+ return linksFound;
25
+ }, encounteredHref // Need to pass array into the browser
26
+ );
27
+ }
28
+ return await page.content();
29
+ }
30
+ //# sourceMappingURL=openNestedDocusaurusMenus.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"openNestedDocusaurusMenus.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/openNestedDocusaurusMenus.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,OAAO,CAAC,KAAK,UAAU,yBAAyB,CAAC,IAAU;IAChE,IAAI,eAAe,GAAa,EAAE,CAAC;IACnC,IAAI,eAAe,GAAG,CAAC,0CAA0C,CAAC,CAAC;IAEnE,0CAA0C;IAC1C,OAAO,CAAC,eAAe,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,eAAe,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,EAAE;QACvE,eAAe,GAAG,eAAe,CAAC;QAClC,eAAe,GAAG,MAAM,IAAI,CAAC,QAAQ,CACnC,CAAC,eAAe,EAAE,EAAE;YAClB,MAAM,WAAW,GAAkB,KAAK,CAAC,IAAI,CAC3C,QAAQ,CAAC,gBAAgB,CAAC,iCAAiC,CAAC,CAC7D,CAAC;YAEF,MAAM,UAAU,GAAa,EAAE,CAAC;YAChC,WAAW,CAAC,OAAO,CAAC,KAAK,EAAE,eAA4B,EAAE,EAAE;gBACzD,MAAM,IAAI,GAAG,eAAe,EAAE,YAAY,CAAC,MAAM,CAAC,CAAC;gBAEnD,mDAAmD;gBACnD,IAAI,IAAI,EAAE,UAAU,CAAC,UAAU,CAAC,IAAI,IAAI,EAAE,UAAU,CAAC,SAAS,CAAC,EAAE;oBAC/D,OAAO;iBACR;gBAED,yCAAyC;gBACzC,IAAI,IAAI,IAAI,CAAC,eAAe,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE;oBAC3C,eAAe,EAAE,KAAK,EAAE,CAAC;iBAC1B;gBAED,IAAI,IAAI,EAAE;oBACR,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;iBACvB;YACH,CAAC,CAAC,CAAC;YAEH,OAAO,UAAU,CAAC;QACpB,CAAC,EACD,eAAe,CAAC,sCAAsC;SACvD,CAAC;KACH;IAED,OAAO,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;AAC9B,CAAC"}
@@ -0,0 +1,17 @@
1
+ export default async function openNestedGitbookMenus(page) {
2
+ let clickedAny = true;
3
+ // Loop until we've encountered every closed menu
4
+ while (clickedAny) {
5
+ clickedAny = await page.evaluate(() => {
6
+ // Right pointing arrow. Only closed menus have this icon
7
+ const icons = Array.from(document.querySelectorAll('path[d="M9 18l6-6-6-6"]'));
8
+ icons.forEach(async (icon) => {
9
+ const toClick = icon?.parentElement?.parentElement;
10
+ toClick.click();
11
+ });
12
+ return icons.length > 0;
13
+ });
14
+ }
15
+ return await page.content();
16
+ }
17
+ //# sourceMappingURL=openNestedGitbookMenus.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"openNestedGitbookMenus.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/openNestedGitbookMenus.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,OAAO,CAAC,KAAK,UAAU,sBAAsB,CAAC,IAAU;IAC7D,IAAI,UAAU,GAAG,IAAI,CAAC;IAEtB,iDAAiD;IACjD,OAAO,UAAU,EAAE;QACjB,UAAU,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE;YACpC,yDAAyD;YACzD,MAAM,KAAK,GAAkB,KAAK,CAAC,IAAI,CACrC,QAAQ,CAAC,gBAAgB,CAAC,yBAAyB,CAAC,CACrD,CAAC;YAEF,KAAK,CAAC,OAAO,CAAC,KAAK,EAAE,IAAiB,EAAE,EAAE;gBACxC,MAAM,OAAO,GAAG,IAAI,EAAE,aAAa,EAAE,aAAa,CAAC;gBACnD,OAAO,CAAC,KAAK,EAAE,CAAC;YAClB,CAAC,CAAC,CAAC;YAEH,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC;QAC1B,CAAC,CAAC,CAAC;KACJ;IAED,OAAO,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;AAC9B,CAAC"}
@@ -0,0 +1,49 @@
1
+ import cheerio from "cheerio";
2
+ import { NodeHtmlMarkdown } from "node-html-markdown";
3
+ import downloadAllImages from "../downloadAllImages.js";
4
+ import replaceImagePaths from "../replaceImagePaths.js";
5
+ export async function scrapeDocusaurusPage(html, origin, cliDir, imageBaseDir, overwrite, version // expects "2", or "3". Have not written support for "1" yet
6
+ ) {
7
+ const $ = cheerio.load(html);
8
+ const article = version === "3" ? $(".theme-doc-markdown").first() : $("article").first();
9
+ if (article.length === 0) {
10
+ // Index pages with no additional text don't have the markdown class
11
+ return {};
12
+ }
13
+ const titleComponent = article.find("h1");
14
+ const title = titleComponent.text().trim();
15
+ // Do not include title in the content when we insert it in our metadata
16
+ titleComponent.remove();
17
+ const markdownContent = version === "3" ? article : article.find(".markdown").first();
18
+ const origToWritePath = await downloadAllImages($, markdownContent, origin, imageBaseDir, overwrite);
19
+ const markdownHtml = markdownContent.html();
20
+ const nhm = new NodeHtmlMarkdown({ useInlineLinks: false });
21
+ let markdown = nhm.translate(markdownHtml);
22
+ if (markdown == null) {
23
+ console.error("We do not support scraping this page. Content will be empty");
24
+ return { title, description: null, markdown: "" };
25
+ }
26
+ // Description only exists in meta tags. The code is commented out because its prone to incorrectly
27
+ // including a description if the first line of text had markdown annotations like `.
28
+ // The commented out alternative is to ignore description if it's the first line of text,
29
+ // this means it was not set in the metadata and Docusaurus defaulted to the text.
30
+ const description = null;
31
+ // let description = $('meta[property="og:description"]').attr("content");
32
+ // if (markdown.startsWith(description)) {
33
+ // description = null;
34
+ // }
35
+ // Remove Docusaurus links from headers
36
+ // When we parse their HTML the parser adds things like:
37
+ // [](#setup "Direct link to heading")
38
+ // to the end of each header.
39
+ markdown = markdown.replace(/\[\]\(#.+ ".+"\)\n/g, "\n");
40
+ // Remove unnecessary nonwidth blank space characters
41
+ markdown = markdown.replace(/\u200b/g, "");
42
+ // Reduce unnecessary blank lines
43
+ markdown = markdown.replace(/\n\n\n/g, "\n\n");
44
+ // Mintlify doesn't support bolded headers, remove the asterisks
45
+ markdown = markdown.replace(/(\n#+) \*\*(.*)\*\*\n/g, "$1 $2\n");
46
+ markdown = replaceImagePaths(origToWritePath, cliDir, markdown);
47
+ return { title, description, markdown };
48
+ }
49
+ //# sourceMappingURL=scrapeDocusaurusPage.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"scrapeDocusaurusPage.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeDocusaurusPage.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AACtD,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AACxD,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AAExD,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,IAAY,EACZ,MAAc,EACd,MAAc,EACd,YAAoB,EACpB,SAAkB,EAClB,OAA2B,CAAC,4DAA4D;;IAExF,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,MAAM,OAAO,GACX,OAAO,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,qBAAqB,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,KAAK,EAAE,CAAC;IAE5E,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE;QACxB,oEAAoE;QACpE,OAAO,EAAE,CAAC;KACX;IAED,MAAM,cAAc,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC1C,MAAM,KAAK,GAAG,cAAc,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IAE3C,wEAAwE;IACxE,cAAc,CAAC,MAAM,EAAE,CAAC;IAExB,MAAM,eAAe,GACnB,OAAO,KAAK,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,KAAK,EAAE,CAAC;IAEhE,MAAM,eAAe,GAAG,MAAM,iBAAiB,CAC7C,CAAC,EACD,eAAe,EACf,MAAM,EACN,YAAY,EACZ,SAAS,CACV,CAAC;IAEF,MAAM,YAAY,GAAG,eAAe,CAAC,IAAI,EAAE,CAAC;IAE5C,MAAM,GAAG,GAAG,IAAI,gBAAgB,CAAC,EAAE,cAAc,EAAE,KAAK,EAAE,CAAC,CAAC;IAC5D,IAAI,QAAQ,GAAG,GAAG,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;IAE3C,IAAI,QAAQ,IAAI,IAAI,EAAE;QACpB,OAAO,CAAC,KAAK,CACX,6DAA6D,CAC9D,CAAC;QACF,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;KACnD;IAED,mGAAmG;IACnG,qFAAqF;IACrF,yFAAyF;IACzF,kFAAkF;IAClF,MAAM,WAAW,GAAG,IAAI,CAAC;IACzB,0EAA0E;IAC1E,0CAA0C;IAC1C,wBAAwB;IACxB,IAAI;IAEJ,uCAAuC;IACvC,wDAAwD;IACxD,sCAAsC;IACtC,6BAA6B;IAC7B,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,qBAAqB,EAAE,IAAI,CAAC,CAAC;IAEzD,qDAAqD;IACrD,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;IAE3C,iCAAiC;IACjC,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IAE/C,gEAAgE;IAChE,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,wBAAwB,EAAE,SAAS,CAAC,CAAC;IAEjE,QAAQ,GAAG,iBAAiB,CAAC,eAAe,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;IAEhE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,QAAQ,EAAE,CAAC;AAC1C,CAAC"}
@@ -0,0 +1,30 @@
1
+ import cheerio from "cheerio";
2
+ import { scrapeGettingFileNameFromUrl } from "../scrapeGettingFileNameFromUrl.js";
3
+ import combineNavWithEmptyGroupTitles from "../combineNavWithEmptyGroupTitles.js";
4
+ import { scrapeDocusaurusPage } from "./scrapeDocusaurusPage.js";
5
+ import { getDocusaurusLinksPerGroup } from "./links-per-group/getDocusaurusLinksPerGroup.js";
6
+ import downloadLogoImage from "../downloadLogoImage.js";
7
+ export async function scrapeDocusaurusSection(html, origin, cliDir, imageBaseDir, overwrite, version) {
8
+ const $ = cheerio.load(html);
9
+ // Download the logo
10
+ const logoSrc = $(".navbar__logo img").attr("src");
11
+ downloadLogoImage(logoSrc, imageBaseDir, origin, overwrite);
12
+ // Get all the navigation sections
13
+ const navigationSections = $(".theme-doc-sidebar-menu").first().children();
14
+ // Get all links per group
15
+ const groupsConfig = getDocusaurusLinksPerGroup(navigationSections, $, version);
16
+ // Merge groups with empty titles together
17
+ const reducedGroupsConfig = combineNavWithEmptyGroupTitles(groupsConfig);
18
+ // Scrape each link in the navigation.
19
+ const groupsConfigCleanPaths = await Promise.all(reducedGroupsConfig.map(async (groupConfig) => {
20
+ groupConfig.pages = (await Promise.all(groupConfig.pages.map(async (navEntry) =>
21
+ // Docusaurus requires a directory on all sections wheras we use root.
22
+ // /docs is their default directory so we remove it
23
+ scrapeGettingFileNameFromUrl(navEntry, cliDir, origin, overwrite, scrapeDocusaurusPage, false, version, "/docs"))))
24
+ // Remove skipped index pages (they return undefined from the above function)
25
+ .filter(Boolean);
26
+ return groupConfig;
27
+ }));
28
+ return groupsConfigCleanPaths;
29
+ }
30
+ //# sourceMappingURL=scrapeDocusaurusSection.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"scrapeDocusaurusSection.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeDocusaurusSection.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,4BAA4B,EAAE,MAAM,oCAAoC,CAAC;AAClF,OAAO,8BAA8B,MAAM,sCAAsC,CAAC;AAClF,OAAO,EAAE,oBAAoB,EAAE,MAAM,2BAA2B,CAAC;AACjE,OAAO,EAAE,0BAA0B,EAAE,MAAM,iDAAiD,CAAC;AAC7F,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AAExD,MAAM,CAAC,KAAK,UAAU,uBAAuB,CAC3C,IAAY,EACZ,MAAc,EACd,MAAc,EACd,YAAoB,EACpB,SAAkB,EAClB,OAAe;IAEf,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,oBAAoB;IACpB,MAAM,OAAO,GAAG,CAAC,CAAC,mBAAmB,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACnD,iBAAiB,CAAC,OAAO,EAAE,YAAY,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC;IAE5D,kCAAkC;IAClC,MAAM,kBAAkB,GAAG,CAAC,CAAC,yBAAyB,CAAC,CAAC,KAAK,EAAE,CAAC,QAAQ,EAAE,CAAC;IAE3E,0BAA0B;IAC1B,MAAM,YAAY,GAAqB,0BAA0B,CAC/D,kBAAkB,EAClB,CAAC,EACD,OAAO,CACR,CAAC;IAEF,0CAA0C;IAC1C,MAAM,mBAAmB,GAAG,8BAA8B,CAAC,YAAY,CAAC,CAAC;IAEzE,sCAAsC;IACtC,MAAM,sBAAsB,GAAG,MAAM,OAAO,CAAC,GAAG,CAC9C,mBAAmB,CAAC,GAAG,CAAC,KAAK,EAAE,WAAW,EAAE,EAAE;QAC5C,WAAW,CAAC,KAAK,GAAG,CAClB,MAAM,OAAO,CAAC,GAAG,CACf,WAAW,CAAC,KAAK,CAAC,GAAG,CAAC,KAAK,EAAE,QAA6B,EAAE,EAAE;QAC5D,sEAAsE;QACtE,mDAAmD;QACnD,4BAA4B,CAC1B,QAAQ,EACR,MAAM,EACN,MAAM,EACN,SAAS,EACT,oBAAoB,EACpB,KAAK,EACL,OAAO,EACP,OAAO,CACR,CACF,CACF,CACF;YACC,6EAA6E;aAC5E,MAAM,CAAC,OAAO,CAAC,CAAC;QACnB,OAAO,WAAW,CAAC;IACrB,CAAC,CAAC,CACH,CAAC;IAEF,OAAO,sBAAsB,CAAC;AAChC,CAAC"}
@@ -0,0 +1,47 @@
1
+ import cheerio from "cheerio";
2
+ import { NodeHtmlMarkdown } from "node-html-markdown";
3
+ import downloadAllImages from "../downloadAllImages.js";
4
+ import replaceImagePaths from "../replaceImagePaths.js";
5
+ export async function scrapeGitBookPage(html, origin, cliDir, imageBaseDir, overwrite, _ // version
6
+ ) {
7
+ const $ = cheerio.load(html);
8
+ const titleComponent = $('[data-testid="page.title"]').first();
9
+ const titleAndDescription = titleComponent.parent().parent().parent().text();
10
+ const description = titleAndDescription
11
+ .replace(titleComponent.text(), "")
12
+ .trim();
13
+ const title = titleComponent.text().trim();
14
+ const content = $('[data-testid="page.contentEditor"]').first();
15
+ // Replace code blocks with parseable html
16
+ const codeBlocks = content.find('[spellcheck="false"] div');
17
+ codeBlocks.each((i, c) => {
18
+ const code = $(c);
19
+ code.find('[contenteditable="false"]').empty();
20
+ const codeContent = code
21
+ .children()
22
+ .toArray()
23
+ .map((d) => $(d).text())
24
+ .filter((text) => text !== "")
25
+ .join("\n");
26
+ code.replaceWith(`<pre><code>${codeContent}</code></pre>`);
27
+ });
28
+ const contentHtml = $.html(content);
29
+ const modifyFileName = (fileName) =>
30
+ // Remove GitBook metadata from the start
31
+ // The first four %2F split metadata fields. Remaining ones are part of the file name.
32
+ fileName.split("%2F").slice(4).join("%2F");
33
+ const origToWritePath = await downloadAllImages($, content, origin, imageBaseDir, overwrite, modifyFileName);
34
+ const nhm = new NodeHtmlMarkdown({ useInlineLinks: false });
35
+ let markdown = nhm.translate(contentHtml);
36
+ // Keep headers on one line
37
+ markdown = markdown.replace(/# \n\n/g, "# ");
38
+ // Remove unnecessary nonwidth blank space characters
39
+ markdown = markdown.replace(/\u200b/g, "");
40
+ // Reduce unnecessary blank lines
41
+ markdown = markdown.replace(/\n\n\n/g, "\n\n");
42
+ // Mintlify doesn't support bolded headers, remove the asterisks
43
+ markdown = markdown.replace(/(\n#+) \*\*(.*)\*\*\n/g, "$1 $2\n");
44
+ markdown = replaceImagePaths(origToWritePath, cliDir, markdown);
45
+ return { title, description, markdown };
46
+ }
47
+ //# sourceMappingURL=scrapeGitBookPage.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"scrapeGitBookPage.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeGitBookPage.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AACtD,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AACxD,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AAExD,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,IAAY,EACZ,MAAc,EACd,MAAc,EACd,YAAoB,EACpB,SAAkB,EAClB,CAAqB,CAAC,UAAU;;IAEhC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,MAAM,cAAc,GAAG,CAAC,CAAC,4BAA4B,CAAC,CAAC,KAAK,EAAE,CAAC;IAC/D,MAAM,mBAAmB,GAAG,cAAc,CAAC,MAAM,EAAE,CAAC,MAAM,EAAE,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,CAAC;IAE7E,MAAM,WAAW,GAAG,mBAAmB;SACpC,OAAO,CAAC,cAAc,CAAC,IAAI,EAAE,EAAE,EAAE,CAAC;SAClC,IAAI,EAAE,CAAC;IACV,MAAM,KAAK,GAAG,cAAc,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IAE3C,MAAM,OAAO,GAAG,CAAC,CAAC,oCAAoC,CAAC,CAAC,KAAK,EAAE,CAAC;IAEhE,0CAA0C;IAC1C,MAAM,UAAU,GAAG,OAAO,CAAC,IAAI,CAAC,0BAA0B,CAAC,CAAC;IAC5D,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACvB,MAAM,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QAClB,IAAI,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC,KAAK,EAAE,CAAC;QAC/C,MAAM,WAAW,GAAG,IAAI;aACrB,QAAQ,EAAE;aACV,OAAO,EAAE;aACT,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;aACvB,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,KAAK,EAAE,CAAC;aAC7B,IAAI,CAAC,IAAI,CAAC,CAAC;QACd,IAAI,CAAC,WAAW,CAAC,cAAc,WAAW,eAAe,CAAC,CAAC;IAC7D,CAAC,CAAC,CAAC;IAEH,MAAM,WAAW,GAAG,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAEpC,MAAM,cAAc,GAAG,CAAC,QAAQ,EAAE,EAAE;IAClC,yCAAyC;IACzC,sFAAsF;IACtF,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAE7C,MAAM,eAAe,GAAG,MAAM,iBAAiB,CAC7C,CAAC,EACD,OAAO,EACP,MAAM,EACN,YAAY,EACZ,SAAS,EACT,cAAc,CACf,CAAC;IAEF,MAAM,GAAG,GAAG,IAAI,gBAAgB,CAAC,EAAE,cAAc,EAAE,KAAK,EAAE,CAAC,CAAC;IAC5D,IAAI,QAAQ,GAAG,GAAG,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;IAE1C,2BAA2B;IAC3B,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;IAE7C,qDAAqD;IACrD,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;IAE3C,iCAAiC;IACjC,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IAE/C,gEAAgE;IAChE,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,wBAAwB,EAAE,SAAS,CAAC,CAAC;IAEjE,QAAQ,GAAG,iBAAiB,CAAC,eAAe,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;IAEhE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,QAAQ,EAAE,CAAC;AAC1C,CAAC"}
@@ -0,0 +1,52 @@
1
+ import cheerio from "cheerio";
2
+ import { scrapeGettingFileNameFromUrl } from "../scrapeGettingFileNameFromUrl.js";
3
+ import { scrapeGitBookPage } from "./scrapeGitBookPage.js";
4
+ import combineNavWithEmptyGroupTitles from "../combineNavWithEmptyGroupTitles.js";
5
+ import getLinksRecursivelyGitBook from "./links-per-group/getLinksRecursivelyGitBook.js";
6
+ import alternateGroupTitle from "./alternateGroupTitle.js";
7
+ import downloadLogoImage from "../downloadLogoImage.js";
8
+ export async function scrapeGitBookSection(html, origin, cliDir, imageBaseDir, overwrite, version) {
9
+ const $ = cheerio.load(html);
10
+ // Download the logo
11
+ const logoSrc = $('a[data-testid="public.headerHomeLink"] img')
12
+ .first()
13
+ .attr("src");
14
+ downloadLogoImage(logoSrc, imageBaseDir, origin, overwrite);
15
+ // Get all the navigation sections
16
+ // Some variants of the GitBook UI show the logo and search base in the side navigation bar,
17
+ // but the navigation sections are always the last value.
18
+ const navigationSections = $('div[data-testid="page.desktopTableOfContents"] > nav > div:first-child')
19
+ .children()
20
+ .eq(-1)
21
+ .children()
22
+ .first()
23
+ .children();
24
+ // Get all links per group
25
+ const groupsConfig = navigationSections
26
+ .toArray()
27
+ .map((s) => {
28
+ const section = $(s);
29
+ const sectionTitle = $(section)
30
+ .find('div > div[dir="auto"]')
31
+ .first()
32
+ .text();
33
+ // Only present if the nested navigation is not in a group
34
+ const firstLink = section.children().eq(0);
35
+ const firstHref = firstLink.attr("href");
36
+ const linkSections = section.children().eq(1).children();
37
+ const pages = getLinksRecursivelyGitBook(linkSections, $);
38
+ return {
39
+ group: sectionTitle || alternateGroupTitle(firstLink, pages),
40
+ pages: firstHref ? [firstHref, ...pages] : pages,
41
+ };
42
+ })
43
+ .filter(Boolean);
44
+ // Merge groups with empty titles together
45
+ const reducedGroupsConfig = combineNavWithEmptyGroupTitles(groupsConfig);
46
+ // Scrape each link in the navigation.
47
+ const groupsConfigCleanPaths = await Promise.all(reducedGroupsConfig.map(async (navEntry) => {
48
+ return await scrapeGettingFileNameFromUrl(navEntry, cliDir, origin, overwrite, scrapeGitBookPage, true, version);
49
+ }));
50
+ return groupsConfigCleanPaths;
51
+ }
52
+ //# sourceMappingURL=scrapeGitBookSection.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"scrapeGitBookSection.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeGitBookSection.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,4BAA4B,EAAE,MAAM,oCAAoC,CAAC;AAClF,OAAO,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAC3D,OAAO,8BAA8B,MAAM,sCAAsC,CAAC;AAClF,OAAO,0BAA0B,MAAM,iDAAiD,CAAC;AACzF,OAAO,mBAAmB,MAAM,0BAA0B,CAAC;AAC3D,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AAExD,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,IAAY,EACZ,MAAc,EACd,MAAc,EACd,YAAoB,EACpB,SAAkB,EAClB,OAA2B;IAE3B,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,oBAAoB;IACpB,MAAM,OAAO,GAAG,CAAC,CAAC,4CAA4C,CAAC;SAC5D,KAAK,EAAE;SACP,IAAI,CAAC,KAAK,CAAC,CAAC;IACf,iBAAiB,CAAC,OAAO,EAAE,YAAY,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC;IAE5D,kCAAkC;IAClC,4FAA4F;IAC5F,yDAAyD;IACzD,MAAM,kBAAkB,GAAG,CAAC,CAC1B,wEAAwE,CACzE;SACE,QAAQ,EAAE;SACV,EAAE,CAAC,CAAC,CAAC,CAAC;SACN,QAAQ,EAAE;SACV,KAAK,EAAE;SACP,QAAQ,EAAE,CAAC;IAEd,0BAA0B;IAC1B,MAAM,YAAY,GAAqB,kBAAkB;SACtD,OAAO,EAAE;SACT,GAAG,CAAC,CAAC,CAAkB,EAAE,EAAE;QAC1B,MAAM,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACrB,MAAM,YAAY,GAAG,CAAC,CAAC,OAAO,CAAC;aAC5B,IAAI,CAAC,uBAAuB,CAAC;aAC7B,KAAK,EAAE;aACP,IAAI,EAAE,CAAC;QAEV,0DAA0D;QAC1D,MAAM,SAAS,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QAC3C,MAAM,SAAS,GAAG,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAEzC,MAAM,YAAY,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC;QACzD,MAAM,KAAK,GAAG,0BAA0B,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC;QAE1D,OAAO;YACL,KAAK,EAAE,YAAY,IAAI,mBAAmB,CAAC,SAAS,EAAE,KAAK,CAAC;YAC5D,KAAK,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,EAAE,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK;SACjD,CAAC;IACJ,CAAC,CAAC;SACD,MAAM,CAAC,OAAO,CAAC,CAAC;IAEnB,0CAA0C;IAC1C,MAAM,mBAAmB,GAAG,8BAA8B,CAAC,YAAY,CAAC,CAAC;IAEzE,sCAAsC;IACtC,MAAM,sBAAsB,GAAG,MAAM,OAAO,CAAC,GAAG,CAC9C,mBAAmB,CAAC,GAAG,CAAC,KAAK,EAAE,QAA6B,EAAE,EAAE;QAC9D,OAAO,MAAM,4BAA4B,CACvC,QAAQ,EACR,MAAM,EACN,MAAM,EACN,SAAS,EACT,iBAAiB,EACjB,IAAI,EACJ,OAAO,CACR,CAAC;IACJ,CAAC,CAAC,CACH,CAAC;IAEF,OAAO,sBAAsB,CAAC;AAChC,CAAC"}
@@ -0,0 +1,36 @@
1
+ import cheerio from "cheerio";
2
+ import { NodeHtmlMarkdown } from "node-html-markdown";
3
+ import downloadAllImages from "../downloadAllImages.js";
4
+ import replaceImagePaths from "../replaceImagePaths.js";
5
+ export async function scrapeReadMePage(html, origin, cliDir, imageBaseDir, overwrite, _ // version
6
+ ) {
7
+ const $ = cheerio.load(html);
8
+ const titleComponent = $("h1").first();
9
+ const title = titleComponent.text().trim();
10
+ let description = $(".markdown-body", titleComponent.parent()).text().trim();
11
+ if (!description) {
12
+ description = $(".rm-Article > header p").text().trim();
13
+ }
14
+ let content = $(".content-body .markdown-body").first();
15
+ if (content.length === 0) {
16
+ content = $(".rm-Article > .markdown-body");
17
+ }
18
+ // API Pages don't have a markdown body in the same position so there's no HTML
19
+ let contentHtml = content.html() || "";
20
+ const origToWritePath = await downloadAllImages($, content, origin, imageBaseDir, overwrite);
21
+ const nhm = new NodeHtmlMarkdown({ useInlineLinks: false });
22
+ let markdown = nhm.translate(contentHtml);
23
+ // Keep headers on one line and increase their depth by one
24
+ markdown = markdown.replace(/# \n\n/g, "## ");
25
+ // Remove unnecessary nonwidth blank space characters
26
+ markdown = markdown.replace(/\u200b/g, "");
27
+ // Remove ReadMe anchor links
28
+ markdown = markdown.replace(/\n\[\]\(#.+\)\n/g, "\n");
29
+ // Reduce unnecessary blank lines
30
+ markdown = markdown.replace(/\n\n\n/g, "\n\n");
31
+ // Mintlify doesn't support bolded headers, remove the asterisks
32
+ markdown = markdown.replace(/(\n#+) \*\*(.*)\*\*\n/g, "$1 $2\n");
33
+ markdown = replaceImagePaths(origToWritePath, cliDir, markdown);
34
+ return { title, description, markdown };
35
+ }
36
+ //# sourceMappingURL=scrapeReadMePage.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"scrapeReadMePage.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeReadMePage.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AACtD,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AACxD,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AAExD,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,IAAY,EACZ,MAAc,EACd,MAAc,EACd,YAAoB,EACpB,SAAkB,EAClB,CAAqB,CAAC,UAAU;;IAEhC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,MAAM,cAAc,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC;IACvC,MAAM,KAAK,GAAG,cAAc,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IAC3C,IAAI,WAAW,GAAG,CAAC,CAAC,gBAAgB,EAAE,cAAc,CAAC,MAAM,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IAC7E,IAAI,CAAC,WAAW,EAAE;QAChB,WAAW,GAAG,CAAC,CAAC,wBAAwB,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;KACzD;IAED,IAAI,OAAO,GAAG,CAAC,CAAC,8BAA8B,CAAC,CAAC,KAAK,EAAE,CAAC;IACxD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE;QACxB,OAAO,GAAG,CAAC,CAAC,8BAA8B,CAAC,CAAC;KAC7C;IAED,+EAA+E;IAC/E,IAAI,WAAW,GAAG,OAAO,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC;IAEvC,MAAM,eAAe,GAAG,MAAM,iBAAiB,CAC7C,CAAC,EACD,OAAO,EACP,MAAM,EACN,YAAY,EACZ,SAAS,CACV,CAAC;IAEF,MAAM,GAAG,GAAG,IAAI,gBAAgB,CAAC,EAAE,cAAc,EAAE,KAAK,EAAE,CAAC,CAAC;IAC5D,IAAI,QAAQ,GAAG,GAAG,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;IAE1C,2DAA2D;IAC3D,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;IAE9C,qDAAqD;IACrD,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;IAE3C,6BAA6B;IAC7B,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,kBAAkB,EAAE,IAAI,CAAC,CAAC;IAEtD,iCAAiC;IACjC,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IAE/C,gEAAgE;IAChE,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,wBAAwB,EAAE,SAAS,CAAC,CAAC;IAEjE,QAAQ,GAAG,iBAAiB,CAAC,eAAe,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;IAEhE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,QAAQ,EAAE,CAAC;AAC1C,CAAC"}
@@ -0,0 +1,44 @@
1
+ import cheerio from "cheerio";
2
+ import { scrapeReadMePage } from "./scrapeReadMePage.js";
3
+ import { scrapeGettingFileNameFromUrl } from "../scrapeGettingFileNameFromUrl.js";
4
+ import getLinksRecursively from "./links-per-group/getLinksRecursively.js";
5
+ import downloadLogoImage from "../downloadLogoImage.js";
6
+ export async function scrapeReadMeSection(html, origin, cliDir, imageBaseDir, overwrite, version) {
7
+ const $ = cheerio.load(html);
8
+ // Download the logo
9
+ const logoSrc = $(".rm-Logo-img").first().attr("src");
10
+ downloadLogoImage(logoSrc, imageBaseDir, origin, overwrite);
11
+ // Get all the navigation sections, but only from the first
12
+ // sidebar found. There are multiple in the HTML for mobile
13
+ // responsiveness but they all have the same links.
14
+ const navigationSections = $(".rm-Sidebar")
15
+ .first()
16
+ .find(".rm-Sidebar-section");
17
+ const groupsConfig = navigationSections
18
+ .toArray()
19
+ .map((s) => {
20
+ const section = $(s);
21
+ const sectionTitle = section.find("h3").first().text();
22
+ // Get all links, then use filter to remove duplicates.
23
+ // There are duplicates because of nested navigation, eg:
24
+ // subgroupTitle -> /first-page
25
+ // -- First Page -> /first-page ** DUPLICATE **
26
+ // -- Second Page -> /second-page
27
+ const linkSections = section.find(".rm-Sidebar-list").first().children();
28
+ const pages = getLinksRecursively(linkSections, $).filter((value, index, self) => self.indexOf(value) === index);
29
+ // Follows the same structure as mint.json
30
+ return {
31
+ group: sectionTitle,
32
+ pages: pages,
33
+ };
34
+ });
35
+ // Scrape each link in the navigation.
36
+ const groupsConfigCleanPaths = await Promise.all(groupsConfig.map(async (navEntry) => {
37
+ return await scrapeGettingFileNameFromUrl(
38
+ // ReadMe requires a directory on all sections whereas we use root.
39
+ // /docs is their default directory so we remove it
40
+ navEntry, cliDir, origin, overwrite, scrapeReadMePage, false, version, "/docs");
41
+ }));
42
+ return groupsConfigCleanPaths;
43
+ }
44
+ //# sourceMappingURL=scrapeReadMeSection.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"scrapeReadMeSection.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeReadMeSection.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AACzD,OAAO,EAAE,4BAA4B,EAAE,MAAM,oCAAoC,CAAC;AAClF,OAAO,mBAAmB,MAAM,0CAA0C,CAAC;AAC3E,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AAExD,MAAM,CAAC,KAAK,UAAU,mBAAmB,CACvC,IAAY,EACZ,MAAc,EACd,MAAc,EACd,YAAoB,EACpB,SAAkB,EAClB,OAA2B;IAE3B,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,oBAAoB;IACpB,MAAM,OAAO,GAAG,CAAC,CAAC,cAAc,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACtD,iBAAiB,CAAC,OAAO,EAAE,YAAY,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC;IAE5D,2DAA2D;IAC3D,2DAA2D;IAC3D,mDAAmD;IACnD,MAAM,kBAAkB,GAAG,CAAC,CAAC,aAAa,CAAC;SACxC,KAAK,EAAE;SACP,IAAI,CAAC,qBAAqB,CAAC,CAAC;IAE/B,MAAM,YAAY,GAAqB,kBAAkB;SACtD,OAAO,EAAE;SACT,GAAG,CAAC,CAAC,CAAkB,EAAE,EAAE;QAC1B,MAAM,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACrB,MAAM,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,CAAC;QAEvD,uDAAuD;QACvD,yDAAyD;QACzD,+BAA+B;QAC/B,iDAAiD;QACjD,iCAAiC;QACjC,MAAM,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC,KAAK,EAAE,CAAC,QAAQ,EAAE,CAAC;QACzE,MAAM,KAAK,GAAG,mBAAmB,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC,MAAM,CACvD,CAAC,KAAa,EAAE,KAAa,EAAE,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,KAAK,KAAK,CACtE,CAAC;QAEF,0CAA0C;QAC1C,OAAO;YACL,KAAK,EAAE,YAAY;YACnB,KAAK,EAAE,KAAK;SACb,CAAC;IACJ,CAAC,CAAC,CAAC;IAEL,sCAAsC;IACtC,MAAM,sBAAsB,GAAG,MAAM,OAAO,CAAC,GAAG,CAC9C,YAAY,CAAC,GAAG,CAAC,KAAK,EAAE,QAA6B,EAAE,EAAE;QACvD,OAAO,MAAM,4BAA4B;QACvC,mEAAmE;QACnE,mDAAmD;QACnD,QAAQ,EACR,MAAM,EACN,MAAM,EACN,SAAS,EACT,gBAAgB,EAChB,KAAK,EACL,OAAO,EACP,OAAO,CACR,CAAC;IACJ,CAAC,CAAC,CACH,CAAC;IAEF,OAAO,sBAAsB,CAAC;AAChC,CAAC"}
package/bin/util.js ADDED
@@ -0,0 +1,129 @@
1
+ import { mkdirSync, writeFileSync } from "fs";
2
+ import Ora from "ora";
3
+ import path from "path";
4
+ import shell from "shelljs";
5
+ import stopIfInvalidLink from "./validation/stopIfInvalidLink.js";
6
+ export const MintConfig = (name, color, ctaName, ctaUrl, filename) => {
7
+ return {
8
+ name,
9
+ logo: "",
10
+ favicon: "",
11
+ colors: {
12
+ primary: color,
13
+ },
14
+ topbarLinks: [],
15
+ topbarCtaButton: {
16
+ name: ctaName,
17
+ url: ctaUrl,
18
+ },
19
+ anchors: [],
20
+ navigation: [
21
+ {
22
+ group: "Home",
23
+ pages: [filename],
24
+ },
25
+ ],
26
+ // footerSocials: {}, // support object type for footer tyoes
27
+ };
28
+ };
29
+ export const Page = (title, description, markdown) => {
30
+ // If we are an empty String we want to add two quotes,
31
+ // if we added as we went we would detect the first quote
32
+ // as the closing quote.
33
+ const startsWithQuote = title.startsWith('"');
34
+ const endsWithQuote = title.startsWith('"');
35
+ if (!startsWithQuote) {
36
+ title = '"' + title;
37
+ }
38
+ if (!endsWithQuote) {
39
+ title = title + '"';
40
+ }
41
+ const optionalDescription = description
42
+ ? `\ndescription: "${description}"`
43
+ : "";
44
+ return `---\ntitle: ${title}${optionalDescription}\n---\n\n${markdown}`;
45
+ };
46
+ export function getOrigin(url) {
47
+ // eg. https://google.com -> https://google.com
48
+ // https://google.com/page -> https://google.com
49
+ return new URL(url).origin;
50
+ }
51
+ export function objToReadableString(objs) {
52
+ // Two spaces as indentation
53
+ return objs.map((obj) => JSON.stringify(obj, null, 2)).join(",\n");
54
+ }
55
+ export const toFilename = (title) => {
56
+ // Gets rid of special characters at the start and end
57
+ // of the name by converting to spaces then using trim.
58
+ return title
59
+ .replace(/[^a-z0-9]/gi, " ")
60
+ .trim()
61
+ .replace(/ /g, "-")
62
+ .toLowerCase();
63
+ };
64
+ export const addMdx = (fileName) => {
65
+ if (fileName.endsWith(".mdx")) {
66
+ return fileName;
67
+ }
68
+ return fileName + ".mdx";
69
+ };
70
+ export const createPage = (title, description, markdown, overwrite = false, rootDir = "", fileName) => {
71
+ const writePath = path.join(rootDir, addMdx(fileName || toFilename(title)));
72
+ // Create the folders needed if they're missing
73
+ mkdirSync(rootDir, { recursive: true });
74
+ // Write the page to memory
75
+ if (overwrite) {
76
+ writeFileSync(writePath, Page(title, description, markdown));
77
+ console.log("✏️ - " + writePath);
78
+ }
79
+ else {
80
+ try {
81
+ writeFileSync(writePath, Page(title, description, markdown), {
82
+ flag: "wx",
83
+ });
84
+ console.log("✏️ - " + writePath);
85
+ }
86
+ catch (e) {
87
+ // We do a try-catch instead of an if-statement to avoid a race condition
88
+ // of the file being created after we started writing.
89
+ if (e.code === "EEXIST") {
90
+ console.log(`❌ Skipping existing file ${writePath}`);
91
+ }
92
+ else {
93
+ console.error(e);
94
+ }
95
+ }
96
+ }
97
+ };
98
+ export function getHrefFromArgs(argv) {
99
+ const href = argv.url;
100
+ stopIfInvalidLink(href);
101
+ return href;
102
+ }
103
+ export const buildLogger = (startText = "") => {
104
+ const logger = Ora().start(startText);
105
+ return logger;
106
+ };
107
+ export const getFileExtension = (filename) => {
108
+ const ext = filename.substring(filename.lastIndexOf(".") + 1, filename.length);
109
+ if (filename === ext)
110
+ return undefined;
111
+ return ext;
112
+ };
113
+ export const fileBelongsInPagesFolder = (filename) => {
114
+ const extension = getFileExtension(filename);
115
+ return (extension &&
116
+ (extension === "mdx" || extension === "md" || extension === "tsx"));
117
+ };
118
+ export const ensureYarn = (logger) => {
119
+ const yarnInstalled = shell.which("yarn");
120
+ if (!yarnInstalled) {
121
+ logger.fail(`yarn must be installed, run
122
+
123
+ npm install --global yarn
124
+
125
+ `);
126
+ process.exit(1);
127
+ }
128
+ };
129
+ //# sourceMappingURL=util.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"util.js","sourceRoot":"","sources":["../src/util.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,IAAI,CAAC;AAC9C,OAAO,GAAG,MAAM,KAAK,CAAC;AACtB,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,KAAK,MAAM,SAAS,CAAC;AAC5B,OAAO,iBAAiB,MAAM,mCAAmC,CAAC;AAElE,MAAM,CAAC,MAAM,UAAU,GAAG,CACxB,IAAY,EACZ,KAAa,EACb,OAAe,EACf,MAAc,EACd,QAAgB,EAChB,EAAE;IACF,OAAO;QACL,IAAI;QACJ,IAAI,EAAE,EAAE;QACR,OAAO,EAAE,EAAE;QACX,MAAM,EAAE;YACN,OAAO,EAAE,KAAK;SACf;QACD,WAAW,EAAE,EAAE;QACf,eAAe,EAAE;YACf,IAAI,EAAE,OAAO;YACb,GAAG,EAAE,MAAM;SACZ;QACD,OAAO,EAAE,EAAE;QACX,UAAU,EAAE;YACV;gBACE,KAAK,EAAE,MAAM;gBACb,KAAK,EAAE,CAAC,QAAQ,CAAC;aAClB;SACF;QACD,6DAA6D;KAC9D,CAAC;AACJ,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,IAAI,GAAG,CAClB,KAAa,EACb,WAAoB,EACpB,QAAiB,EACjB,EAAE;IACF,uDAAuD;IACvD,yDAAyD;IACzD,wBAAwB;IACxB,MAAM,eAAe,GAAG,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;IAC9C,MAAM,aAAa,GAAG,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;IAC5C,IAAI,CAAC,eAAe,EAAE;QACpB,KAAK,GAAG,GAAG,GAAG,KAAK,CAAC;KACrB;IACD,IAAI,CAAC,aAAa,EAAE;QAClB,KAAK,GAAG,KAAK,GAAG,GAAG,CAAC;KACrB;IAED,MAAM,mBAAmB,GAAG,WAAW;QACrC,CAAC,CAAC,mBAAmB,WAAW,GAAG;QACnC,CAAC,CAAC,EAAE,CAAC;IACP,OAAO,eAAe,KAAK,GAAG,mBAAmB,YAAY,QAAQ,EAAE,CAAC;AAC1E,CAAC,CAAC;AAEF,MAAM,UAAU,SAAS,CAAC,GAAW;IACnC,+CAA+C;IAC/C,gDAAgD;IAChD,OAAO,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC;AAC7B,CAAC;AAED,MAAM,UAAU,mBAAmB,CAAC,IAA2B;IAC7D,4BAA4B;IAC5B,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;AACrE,CAAC;AAED,MAAM,CAAC,MAAM,UAAU,GAAG,CAAC,KAAa,EAAE,EAAE;IAC1C,sDAAsD;IACtD,uDAAuD;IACvD,OAAO,KAAK;SACT,OAAO,CAAC,aAAa,EAAE,GAAG,CAAC;SAC3B,IAAI,EAAE;SACN,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC;SAClB,WAAW,EAAE,CAAC;AACnB,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,MAAM,GAAG,CAAC,QAAgB,EAAE,EAAE;IACzC,IAAI,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE;QAC7B,OAAO,QAAQ,CAAC;KACjB;IACD,OAAO,QAAQ,GAAG,MAAM,CAAC;AAC3B,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,UAAU,GAAG,CACxB,KAAa,EACb,WAAoB,EACpB,QAAiB,EACjB,YAAqB,KAAK,EAC1B,UAAkB,EAAE,EACpB,QAAiB,EACjB,EAAE;IACF,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,MAAM,CAAC,QAAQ,IAAI,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAE5E,+CAA+C;IAC/C,SAAS,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAExC,2BAA2B;IAC3B,IAAI,SAAS,EAAE;QACb,aAAa,CAAC,SAAS,EAAE,IAAI,CAAC,KAAK,EAAE,WAAW,EAAE,QAAQ,CAAC,CAAC,CAAC;QAC7D,OAAO,CAAC,GAAG,CAAC,OAAO,GAAG,SAAS,CAAC,CAAC;KAClC;SAAM;QACL,IAAI;YACF,aAAa,CAAC,SAAS,EAAE,IAAI,CAAC,KAAK,EAAE,WAAW,EAAE,QAAQ,CAAC,EAAE;gBAC3D,IAAI,EAAE,IAAI;aACX,CAAC,CAAC;YACH,OAAO,CAAC,GAAG,CAAC,OAAO,GAAG,SAAS,CAAC,CAAC;SAClC;QAAC,OAAO,CAAC,EAAE;YACV,yEAAyE;YACzE,sDAAsD;YACtD,IAAI,CAAC,CAAC,IAAI,KAAK,QAAQ,EAAE;gBACvB,OAAO,CAAC,GAAG,CAAC,4BAA4B,SAAS,EAAE,CAAC,CAAC;aACtD;iBAAM;gBACL,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;aAClB;SACF;KACF;AACH,CAAC,CAAC;AAEF,MAAM,UAAU,eAAe,CAAC,IAAS;IACvC,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC;IACtB,iBAAiB,CAAC,IAAI,CAAC,CAAC;IACxB,OAAO,IAAI,CAAC;AACd,CAAC;AAED,MAAM,CAAC,MAAM,WAAW,GAAG,CAAC,YAAoB,EAAE,EAAE,EAAE;IACpD,MAAM,MAAM,GAAG,GAAG,EAAE,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;IACtC,OAAO,MAAM,CAAC;AAChB,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,gBAAgB,GAAG,CAAC,QAAgB,EAAE,EAAE;IACnD,MAAM,GAAG,GAAG,QAAQ,CAAC,SAAS,CAC5B,QAAQ,CAAC,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,EAC7B,QAAQ,CAAC,MAAM,CAChB,CAAC;IACF,IAAI,QAAQ,KAAK,GAAG;QAAE,OAAO,SAAS,CAAC;IACvC,OAAO,GAAG,CAAC;AACb,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,wBAAwB,GAAG,CAAC,QAAgB,EAAE,EAAE;IAC3D,MAAM,SAAS,GAAG,gBAAgB,CAAC,QAAQ,CAAC,CAAC;IAC7C,OAAO,CACL,SAAS;QACT,CAAC,SAAS,KAAK,KAAK,IAAI,SAAS,KAAK,IAAI,IAAI,SAAS,KAAK,KAAK,CAAC,CACnE,CAAC;AACJ,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,UAAU,GAAG,CAAC,MAAW,EAAE,EAAE;IACxC,MAAM,aAAa,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;IAC1C,IAAI,CAAC,aAAa,EAAE;QAClB,MAAM,CAAC,IAAI,CAAC;;;;KAIX,CAAC,CAAC;QACH,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;KACjB;AACH,CAAC,CAAC"}
@@ -0,0 +1,11 @@
1
+ export default function isValidLink(href) {
2
+ // This checks the link is written correctly, not that the page exists.
3
+ try {
4
+ new URL(href);
5
+ return true;
6
+ }
7
+ catch (_) {
8
+ return false;
9
+ }
10
+ }
11
+ //# sourceMappingURL=isValidLink.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"isValidLink.js","sourceRoot":"","sources":["../../src/validation/isValidLink.ts"],"names":[],"mappings":"AAAA,MAAM,CAAC,OAAO,UAAU,WAAW,CAAC,IAAY;IAC9C,uEAAuE;IACvE,IAAI;QACF,IAAI,GAAG,CAAC,IAAI,CAAC,CAAC;QACd,OAAO,IAAI,CAAC;KACb;IAAC,OAAO,CAAC,EAAE;QACV,OAAO,KAAK,CAAC;KACd;AACH,CAAC"}
@@ -0,0 +1,9 @@
1
+ import isValidLink from "./isValidLink.js";
2
+ export default function stopIfInvalidLink(href) {
3
+ if (!isValidLink(href)) {
4
+ console.log("Invalid link: " + href);
5
+ console.log("Make sure the link starts with http:// or https://");
6
+ process.exit(1);
7
+ }
8
+ }
9
+ //# sourceMappingURL=stopIfInvalidLink.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"stopIfInvalidLink.js","sourceRoot":"","sources":["../../src/validation/stopIfInvalidLink.ts"],"names":[],"mappings":"AAAA,OAAO,WAAW,MAAM,kBAAkB,CAAC;AAE3C,MAAM,CAAC,OAAO,UAAU,iBAAiB,CAAC,IAAY;IACpD,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,EAAE;QACtB,OAAO,CAAC,GAAG,CAAC,gBAAgB,GAAG,IAAI,CAAC,CAAC;QACrC,OAAO,CAAC,GAAG,CAAC,oDAAoD,CAAC,CAAC;QAClE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;KACjB;AACH,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mintlify",
3
- "version": "3.0.7",
3
+ "version": "3.0.8",
4
4
  "description": "Alias for @mintlify/cli",
5
5
  "engines": {
6
6
  "node": ">=18.0.0"
@@ -30,6 +30,6 @@
30
30
  "mintlify": "index.js"
31
31
  },
32
32
  "dependencies": {
33
- "@mintlify/cli": "3.0.7"
33
+ "@mintlify/cli": "3.0.8"
34
34
  }
35
35
  }