mintlify 1.0.6 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/README.md +8 -0
  2. package/bin/browser.js +24 -0
  3. package/bin/browser.js.map +1 -0
  4. package/bin/downloadImage.js +27 -0
  5. package/bin/downloadImage.js.map +1 -0
  6. package/bin/index.js +144 -22
  7. package/bin/index.js.map +1 -1
  8. package/bin/scraping/detectFramework.js +25 -0
  9. package/bin/scraping/detectFramework.js.map +1 -0
  10. package/bin/scraping/downloadAllImages.js +57 -0
  11. package/bin/scraping/downloadAllImages.js.map +1 -0
  12. package/bin/scraping/getSitemapLinks.js +16 -0
  13. package/bin/scraping/getSitemapLinks.js.map +1 -0
  14. package/bin/scraping/replaceImagePaths.js +17 -0
  15. package/bin/scraping/replaceImagePaths.js.map +1 -0
  16. package/bin/scraping/scrapeGettingFileNameFromUrl.js +43 -0
  17. package/bin/scraping/scrapeGettingFileNameFromUrl.js.map +1 -0
  18. package/bin/scraping/scrapePage.js +9 -0
  19. package/bin/scraping/scrapePage.js.map +1 -0
  20. package/bin/scraping/scrapeSection.js +9 -0
  21. package/bin/scraping/scrapeSection.js.map +1 -0
  22. package/bin/scraping/site-scrapers/scrapeDocusaurusPage.js +43 -0
  23. package/bin/scraping/site-scrapers/scrapeDocusaurusPage.js.map +1 -0
  24. package/bin/scraping/site-scrapers/scrapeDocusaurusSection.js +52 -0
  25. package/bin/scraping/site-scrapers/scrapeDocusaurusSection.js.map +1 -0
  26. package/bin/{scrapeGitBookPage.js → scraping/site-scrapers/scrapeGitBookPage.js} +10 -5
  27. package/bin/scraping/site-scrapers/scrapeGitBookPage.js.map +1 -0
  28. package/bin/scraping/site-scrapers/scrapeGitBookSection.js +74 -0
  29. package/bin/scraping/site-scrapers/scrapeGitBookSection.js.map +1 -0
  30. package/bin/{scrapeReadMePage.js → scraping/site-scrapers/scrapeReadMePage.js} +15 -9
  31. package/bin/scraping/site-scrapers/scrapeReadMePage.js.map +1 -0
  32. package/bin/scraping/site-scrapers/scrapeReadMeSection.js +48 -0
  33. package/bin/scraping/site-scrapers/scrapeReadMeSection.js.map +1 -0
  34. package/bin/util.js +27 -8
  35. package/bin/util.js.map +1 -1
  36. package/package.json +3 -2
  37. package/src/browser.ts +24 -0
  38. package/src/downloadImage.ts +35 -0
  39. package/src/index.ts +173 -22
  40. package/src/scraping/detectFramework.ts +31 -0
  41. package/src/scraping/downloadAllImages.ts +79 -0
  42. package/src/scraping/getSitemapLinks.ts +16 -0
  43. package/src/scraping/replaceImagePaths.ts +21 -0
  44. package/src/scraping/scrapeGettingFileNameFromUrl.ts +81 -0
  45. package/src/scraping/scrapePage.ts +24 -0
  46. package/src/scraping/scrapeSection.ts +16 -0
  47. package/src/scraping/site-scrapers/scrapeDocusaurusPage.ts +67 -0
  48. package/src/scraping/site-scrapers/scrapeDocusaurusSection.ts +80 -0
  49. package/src/{scrapeGitBookPage.ts → scraping/site-scrapers/scrapeGitBookPage.ts} +25 -5
  50. package/src/scraping/site-scrapers/scrapeGitBookSection.ts +116 -0
  51. package/src/{scrapeReadMePage.ts → scraping/site-scrapers/scrapeReadMePage.ts} +28 -10
  52. package/src/scraping/site-scrapers/scrapeReadMeSection.ts +77 -0
  53. package/src/util.ts +25 -7
  54. package/tsconfig.json +1 -1
  55. package/bin/scrapeGitBook.js +0 -28
  56. package/bin/scrapeGitBook.js.map +0 -1
  57. package/bin/scrapeGitBookPage.js.map +0 -1
  58. package/bin/scrapeReadMe.js +0 -60
  59. package/bin/scrapeReadMe.js.map +0 -1
  60. package/bin/scrapeReadMePage.js.map +0 -1
  61. package/src/scrapeReadMe.ts +0 -79
@@ -1,18 +1,22 @@
1
- import axios from "axios";
2
1
  import cheerio from "cheerio";
3
2
  import { NodeHtmlMarkdown } from "node-html-markdown";
4
- export async function scrapeGitBookPage(url) {
5
- const res = await axios.default.get(url);
6
- const $ = cheerio.load(res.data);
3
+ import downloadAllImages from "../downloadAllImages.js";
4
+ import replaceImagePaths from "../replaceImagePaths.js";
5
+ export async function scrapeGitBookPage(html, origin, cliDir, imageBaseDir) {
6
+ const $ = cheerio.load(html);
7
7
  const titleComponent = $('[data-testid="page.title"]').first();
8
8
  const titleAndDescription = titleComponent.parent().parent().parent().text();
9
- console.log(titleAndDescription);
10
9
  const description = titleAndDescription
11
10
  .replace(titleComponent.text(), "")
12
11
  .trim();
13
12
  const title = titleComponent.text().trim();
14
13
  const content = $('[data-testid="page.contentEditor"]').first();
15
14
  const contentHtml = $.html(content);
15
+ const modifyFileName = (fileName) =>
16
+ // Remove GitBook metadata from the start
17
+ // The first four %2F split metadata fields. Remaining ones are part of the file name.
18
+ fileName.split("%2F").slice(4).join("%2F");
19
+ const origToWritePath = await downloadAllImages($, content, origin, imageBaseDir, modifyFileName);
16
20
  const nhm = new NodeHtmlMarkdown();
17
21
  let markdown = nhm.translate(contentHtml);
18
22
  // Keep headers on one line and increase their depth by one
@@ -23,6 +27,7 @@ export async function scrapeGitBookPage(url) {
23
27
  markdown = markdown.replace(/\n\n\n/g, "\n\n");
24
28
  // Mintlify doesn't support bolded headers, remove the asterisks
25
29
  markdown = markdown.replace(/(\n#+) \*\*(.*)\*\*\n/g, "$1 $2\n");
30
+ markdown = replaceImagePaths(origToWritePath, cliDir, markdown);
26
31
  return { title, description, markdown };
27
32
  }
28
33
  //# sourceMappingURL=scrapeGitBookPage.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"scrapeGitBookPage.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeGitBookPage.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AACtD,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AACxD,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AAExD,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,IAAY,EACZ,MAAc,EACd,MAAc,EACd,YAAoB;IAEpB,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,MAAM,cAAc,GAAG,CAAC,CAAC,4BAA4B,CAAC,CAAC,KAAK,EAAE,CAAC;IAC/D,MAAM,mBAAmB,GAAG,cAAc,CAAC,MAAM,EAAE,CAAC,MAAM,EAAE,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,CAAC;IAE7E,MAAM,WAAW,GAAG,mBAAmB;SACpC,OAAO,CAAC,cAAc,CAAC,IAAI,EAAE,EAAE,EAAE,CAAC;SAClC,IAAI,EAAE,CAAC;IACV,MAAM,KAAK,GAAG,cAAc,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IAE3C,MAAM,OAAO,GAAG,CAAC,CAAC,oCAAoC,CAAC,CAAC,KAAK,EAAE,CAAC;IAChE,MAAM,WAAW,GAAG,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAEpC,MAAM,cAAc,GAAG,CAAC,QAAQ,EAAE,EAAE;IAClC,yCAAyC;IACzC,sFAAsF;IACtF,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAE7C,MAAM,eAAe,GAAG,MAAM,iBAAiB,CAC7C,CAAC,EACD,OAAO,EACP,MAAM,EACN,YAAY,EACZ,cAAc,CACf,CAAC;IAEF,MAAM,GAAG,GAAG,IAAI,gBAAgB,EAAE,CAAC;IACnC,IAAI,QAAQ,GAAG,GAAG,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;IAE1C,2DAA2D;IAC3D,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;IAE9C,qDAAqD;IACrD,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;IAE3C,iCAAiC;IACjC,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IAE/C,gEAAgE;IAChE,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,wBAAwB,EAAE,SAAS,CAAC,CAAC;IAEjE,QAAQ,GAAG,iBAAiB,CAAC,eAAe,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;IAEhE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,QAAQ,EAAE,CAAC;AAC1C,CAAC"}
@@ -0,0 +1,74 @@
1
+ import cheerio from "cheerio";
2
+ import { scrapeGettingFileNameFromUrl } from "../scrapeGettingFileNameFromUrl.js";
3
+ import { getSitemapLinks } from "../getSitemapLinks.js";
4
+ import { scrapeGitBookPage } from "./scrapeGitBookPage.js";
5
+ export async function scrapeGitBookSection(html, origin, cliDir, overwrite) {
6
+ const $ = cheerio.load(html);
7
+ // Get all the navigation sections
8
+ const navigationSections = $('div[data-testid="page.desktopTableOfContents"] > div > div:first-child')
9
+ .children()
10
+ .first()
11
+ .children()
12
+ .first()
13
+ .children();
14
+ // Get all links per group
15
+ let allNavPathnames = [];
16
+ const groupsConfig = navigationSections
17
+ .map((i, section) => {
18
+ const sectionTitle = $(section)
19
+ .find('div > div[dir="auto"]')
20
+ .first()
21
+ .text();
22
+ const linkPaths = $(section)
23
+ .find("a[href]")
24
+ .map((i, link) => {
25
+ const linkHref = $(link).attr("href");
26
+ // Skip external links until Mintlify supports them
27
+ if (linkHref.startsWith("https://") ||
28
+ linkHref.startsWith("http://")) {
29
+ return undefined;
30
+ }
31
+ return linkHref;
32
+ })
33
+ .toArray();
34
+ allNavPathnames = allNavPathnames.concat(linkPaths);
35
+ // Follows the same structure as mint.json
36
+ return {
37
+ group: sectionTitle,
38
+ pages: linkPaths,
39
+ };
40
+ })
41
+ .toArray();
42
+ // Scrape every link not in the navigation. Nested docs
43
+ // don't show up in navigation without clicking buttons,
44
+ // so this lets us download the files for the user to add
45
+ // manually to mint.json.
46
+ const sitemapPaths = (await getSitemapLinks(new URL("sitemap.xml", origin)))
47
+ .map((sitemapLinks) => {
48
+ return new URL(sitemapLinks).pathname;
49
+ })
50
+ .filter((pathname) => !allNavPathnames.includes(pathname));
51
+ const sitemapPathnamesForConfig = [];
52
+ for (const pathname of sitemapPaths) {
53
+ sitemapPathnamesForConfig.push(await scrapeGettingFileNameFromUrl(cliDir, origin, pathname, overwrite, scrapeGitBookPage, true));
54
+ }
55
+ // Scrape each link in the navigation.
56
+ const groupsConfigCleanPaths = await Promise.all(groupsConfig.map(async (groupConfig) => {
57
+ const newPages = [];
58
+ for (const pathname of groupConfig.pages) {
59
+ newPages.push(await scrapeGettingFileNameFromUrl(cliDir, origin, pathname, overwrite, scrapeGitBookPage, true));
60
+ }
61
+ groupConfig.pages = newPages;
62
+ return groupConfig;
63
+ }));
64
+ if (sitemapPathnamesForConfig.length > 0) {
65
+ return groupsConfigCleanPaths.concat([
66
+ {
67
+ group: "ATTENTION! WE CANNOT DETECT GROUPS FOR NESTED DOCS. PLEASE MOVE THEM INTO THEIR ORIGINAL GROUPS.",
68
+ pages: sitemapPathnamesForConfig,
69
+ },
70
+ ]);
71
+ }
72
+ return groupsConfigCleanPaths;
73
+ }
74
+ //# sourceMappingURL=scrapeGitBookSection.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"scrapeGitBookSection.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeGitBookSection.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,4BAA4B,EAAE,MAAM,oCAAoC,CAAC;AAClF,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AACxD,OAAO,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAE3D,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,IAAY,EACZ,MAAc,EACd,MAAc,EACd,SAAkB;IAElB,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,kCAAkC;IAClC,MAAM,kBAAkB,GAAG,CAAC,CAC1B,wEAAwE,CACzE;SACE,QAAQ,EAAE;SACV,KAAK,EAAE;SACP,QAAQ,EAAE;SACV,KAAK,EAAE;SACP,QAAQ,EAAE,CAAC;IAEd,0BAA0B;IAC1B,IAAI,eAAe,GAAG,EAAE,CAAC;IACzB,MAAM,YAAY,GAAG,kBAAkB;SACpC,GAAG,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,EAAE;QAClB,MAAM,YAAY,GAAG,CAAC,CAAC,OAAO,CAAC;aAC5B,IAAI,CAAC,uBAAuB,CAAC;aAC7B,KAAK,EAAE;aACP,IAAI,EAAE,CAAC;QAEV,MAAM,SAAS,GAAG,CAAC,CAAC,OAAO,CAAC;aACzB,IAAI,CAAC,SAAS,CAAC;aACf,GAAG,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;YACf,MAAM,QAAQ,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAEtC,mDAAmD;YACnD,IACE,QAAQ,CAAC,UAAU,CAAC,UAAU,CAAC;gBAC/B,QAAQ,CAAC,UAAU,CAAC,SAAS,CAAC,EAC9B;gBACA,OAAO,SAAS,CAAC;aAClB;YAED,OAAO,QAAQ,CAAC;QAClB,CAAC,CAAC;aACD,OAAO,EAAE,CAAC;QAEb,eAAe,GAAG,eAAe,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QAEpD,0CAA0C;QAC1C,OAAO;YACL,KAAK,EAAE,YAAY;YACnB,KAAK,EAAE,SAAS;SACjB,CAAC;IACJ,CAAC,CAAC;SACD,OAAO,EAAE,CAAC;IAEb,uDAAuD;IACvD,wDAAwD;IACxD,yDAAyD;IACzD,yBAAyB;IACzB,MAAM,YAAY,GAAG,CAAC,MAAM,eAAe,CAAC,IAAI,GAAG,CAAC,aAAa,EAAE,MAAM,CAAC,CAAC,CAAC;SACzE,GAAG,CAAC,CAAC,YAAoB,EAAE,EAAE;QAC5B,OAAO,IAAI,GAAG,CAAC,YAAY,CAAC,CAAC,QAAQ,CAAC;IACxC,CAAC,CAAC;SACD,MAAM,CAAC,CAAC,QAAgB,EAAE,EAAE,CAAC,CAAC,eAAe,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC;IAErE,MAAM,yBAAyB,GAAG,EAAE,CAAC;IACrC,KAAK,MAAM,QAAQ,IAAI,YAAY,EAAE;QACnC,yBAAyB,CAAC,IAAI,CAC5B,MAAM,4BAA4B,CAChC,MAAM,EACN,MAAM,EACN,QAAQ,EACR,SAAS,EACT,iBAAiB,EACjB,IAAI,CACL,CACF,CAAC;KACH;IAED,sCAAsC;IACtC,MAAM,sBAAsB,GAAG,MAAM,OAAO,CAAC,GAAG,CAC9C,YAAY,CAAC,GAAG,CAAC,KAAK,EAAE,WAAW,EAAE,EAAE;QACrC,MAAM,QAAQ,GAAG,EAAE,CAAC;QACpB,KAAK,MAAM,QAAQ,IAAI,WAAW,CAAC,KAAK,EAAE;YACxC,QAAQ,CAAC,IAAI,CACX,MAAM,4BAA4B,CAChC,MAAM,EACN,MAAM,EACN,QAAQ,EACR,SAAS,EACT,iBAAiB,EACjB,IAAI,CACL,CACF,CAAC;SACH;QACD,WAAW,CAAC,KAAK,GAAG,QAAQ,CAAC;QAC7B,OAAO,WAAW,CAAC;IACrB,CAAC,CAAC,CACH,CAAC;IAEF,IAAI,yBAAyB,CAAC,MAAM,GAAG,CAAC,EAAE;QACxC,OAAO,sBAAsB,CAAC,MAAM,CAAC;YACnC;gBACE,KAAK,EACH,kGAAkG;gBACpG,KAAK,EAAE,yBAAyB;aACjC;SACF,CAAC,CAAC;KACJ;IAED,OAAO,sBAAsB,CAAC;AAChC,CAAC"}
@@ -1,16 +1,21 @@
1
- import axios from "axios";
2
1
  import cheerio from "cheerio";
3
2
  import { NodeHtmlMarkdown } from "node-html-markdown";
4
- export async function scrapeReadMePage(url) {
5
- const res = await axios.default.get(url);
6
- const $ = cheerio.load(res.data);
3
+ import downloadAllImages from "../downloadAllImages.js";
4
+ import replaceImagePaths from "../replaceImagePaths.js";
5
+ export async function scrapeReadMePage(html, origin, cliDir, imageBaseDir) {
6
+ const $ = cheerio.load(html);
7
7
  const titleComponent = $("h1").first();
8
8
  const title = titleComponent.text().trim();
9
- const description = $(".markdown-body", titleComponent.parent())
10
- .text()
11
- .trim();
12
- const content = $(".content-body .markdown-body").first();
13
- const contentHtml = $.html(content);
9
+ let description = $(".markdown-body", titleComponent.parent()).text().trim();
10
+ if (!description) {
11
+ description = $(".rm-Article > header p").text().trim();
12
+ }
13
+ let content = $(".content-body .markdown-body").first();
14
+ if (content.length === 0) {
15
+ content = $(".rm-Article > .markdown-body");
16
+ }
17
+ const contentHtml = content.html();
18
+ const origToWritePath = await downloadAllImages($, content, origin, imageBaseDir);
14
19
  const nhm = new NodeHtmlMarkdown();
15
20
  let markdown = nhm.translate(contentHtml);
16
21
  // Keep headers on one line and increase their depth by one
@@ -23,6 +28,7 @@ export async function scrapeReadMePage(url) {
23
28
  markdown = markdown.replace(/\n\n\n/g, "\n\n");
24
29
  // Mintlify doesn't support bolded headers, remove the asterisks
25
30
  markdown = markdown.replace(/(\n#+) \*\*(.*)\*\*\n/g, "$1 $2\n");
31
+ markdown = replaceImagePaths(origToWritePath, cliDir, markdown);
26
32
  return { title, description, markdown };
27
33
  }
28
34
  //# sourceMappingURL=scrapeReadMePage.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"scrapeReadMePage.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeReadMePage.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AACtD,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AACxD,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AAExD,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,IAAY,EACZ,MAAc,EACd,MAAc,EACd,YAAoB;IAEpB,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,MAAM,cAAc,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC;IACvC,MAAM,KAAK,GAAG,cAAc,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IAC3C,IAAI,WAAW,GAAG,CAAC,CAAC,gBAAgB,EAAE,cAAc,CAAC,MAAM,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IAC7E,IAAI,CAAC,WAAW,EAAE;QAChB,WAAW,GAAG,CAAC,CAAC,wBAAwB,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;KACzD;IAED,IAAI,OAAO,GAAG,CAAC,CAAC,8BAA8B,CAAC,CAAC,KAAK,EAAE,CAAC;IACxD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE;QACxB,OAAO,GAAG,CAAC,CAAC,8BAA8B,CAAC,CAAC;KAC7C;IACD,MAAM,WAAW,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC;IAEnC,MAAM,eAAe,GAAG,MAAM,iBAAiB,CAC7C,CAAC,EACD,OAAO,EACP,MAAM,EACN,YAAY,CACb,CAAC;IAEF,MAAM,GAAG,GAAG,IAAI,gBAAgB,EAAE,CAAC;IACnC,IAAI,QAAQ,GAAG,GAAG,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;IAE1C,2DAA2D;IAC3D,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;IAE9C,qDAAqD;IACrD,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;IAE3C,6BAA6B;IAC7B,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,kBAAkB,EAAE,IAAI,CAAC,CAAC;IAEtD,iCAAiC;IACjC,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IAE/C,gEAAgE;IAChE,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,wBAAwB,EAAE,SAAS,CAAC,CAAC;IAEjE,QAAQ,GAAG,iBAAiB,CAAC,eAAe,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;IAEhE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,QAAQ,EAAE,CAAC;AAC1C,CAAC"}
@@ -0,0 +1,48 @@
1
+ import cheerio from "cheerio";
2
+ import { scrapeReadMePage } from "./scrapeReadMePage.js";
3
+ import { scrapeGettingFileNameFromUrl } from "../scrapeGettingFileNameFromUrl.js";
4
+ export async function scrapeReadMeSection(html, origin, cliDir, overwrite) {
5
+ const $ = cheerio.load(html);
6
+ // Get all the navigation sections, but only from the first
7
+ // sidebar found. There are multiple in the HTML for mobile
8
+ // responsiveness but they all have the same links.
9
+ const navigationSections = $(".rm-Sidebar")
10
+ .first()
11
+ .find(".rm-Sidebar-section");
12
+ const groupsConfig = navigationSections
13
+ .map((i, section) => {
14
+ const sectionTitle = $(section).find("h3").first().text();
15
+ // Get all links, then use filter to remove duplicates.
16
+ // There are duplicates because of nested navigation, eg:
17
+ // subgroupTitle -> /first-page
18
+ // -- First Page -> /first-page ** DUPLICATE **
19
+ // -- Second Page -> /second-page
20
+ const linkPaths = $(section)
21
+ .find("a[href]")
22
+ .map((i, link) => {
23
+ const linkHref = $(link).attr("href");
24
+ // Skip external links until Mintlify supports them
25
+ if (linkHref.startsWith("https://") ||
26
+ linkHref.startsWith("http://")) {
27
+ return undefined;
28
+ }
29
+ return linkHref;
30
+ })
31
+ .toArray()
32
+ .filter((value, index, self) => self.indexOf(value) === index);
33
+ // Follows the same structure as mint.json
34
+ return {
35
+ group: sectionTitle,
36
+ pages: linkPaths,
37
+ };
38
+ })
39
+ .toArray();
40
+ return await Promise.all(groupsConfig.map(async (groupConfig) => {
41
+ groupConfig.pages = await Promise.all(groupConfig.pages.map(async (pathname) =>
42
+ // ReadMe requires a directory on all sections wheras we use root.
43
+ // /docs is their default directory so we remove it
44
+ scrapeGettingFileNameFromUrl(cliDir, origin, pathname, overwrite, scrapeReadMePage, false, "/docs")));
45
+ return groupConfig;
46
+ }));
47
+ }
48
+ //# sourceMappingURL=scrapeReadMeSection.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"scrapeReadMeSection.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeReadMeSection.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AACzD,OAAO,EAAE,4BAA4B,EAAE,MAAM,oCAAoC,CAAC;AAElF,MAAM,CAAC,KAAK,UAAU,mBAAmB,CACvC,IAAY,EACZ,MAAc,EACd,MAAc,EACd,SAAkB;IAElB,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,2DAA2D;IAC3D,2DAA2D;IAC3D,mDAAmD;IACnD,MAAM,kBAAkB,GAAG,CAAC,CAAC,aAAa,CAAC;SACxC,KAAK,EAAE;SACP,IAAI,CAAC,qBAAqB,CAAC,CAAC;IAE/B,MAAM,YAAY,GAAG,kBAAkB;SACpC,GAAG,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,EAAE;QAClB,MAAM,YAAY,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,CAAC;QAE1D,uDAAuD;QACvD,yDAAyD;QACzD,+BAA+B;QAC/B,iDAAiD;QACjD,iCAAiC;QACjC,MAAM,SAAS,GAAG,CAAC,CAAC,OAAO,CAAC;aACzB,IAAI,CAAC,SAAS,CAAC;aACf,GAAG,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE;YACf,MAAM,QAAQ,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAEtC,mDAAmD;YACnD,IACE,QAAQ,CAAC,UAAU,CAAC,UAAU,CAAC;gBAC/B,QAAQ,CAAC,UAAU,CAAC,SAAS,CAAC,EAC9B;gBACA,OAAO,SAAS,CAAC;aAClB;YAED,OAAO,QAAQ,CAAC;QAClB,CAAC,CAAC;aACD,OAAO,EAAE;aACT,MAAM,CACL,CAAC,KAAa,EAAE,KAAa,EAAE,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,KAAK,KAAK,CACtE,CAAC;QAEJ,0CAA0C;QAC1C,OAAO;YACL,KAAK,EAAE,YAAY;YACnB,KAAK,EAAE,SAAS;SACjB,CAAC;IACJ,CAAC,CAAC;SACD,OAAO,EAAE,CAAC;IAEb,OAAO,MAAM,OAAO,CAAC,GAAG,CACtB,YAAY,CAAC,GAAG,CAAC,KAAK,EAAE,WAAW,EAAE,EAAE;QACrC,WAAW,CAAC,KAAK,GAAG,MAAM,OAAO,CAAC,GAAG,CACnC,WAAW,CAAC,KAAK,CAAC,GAAG,CAAC,KAAK,EAAE,QAAgB,EAAE,EAAE;QAC/C,kEAAkE;QAClE,mDAAmD;QACnD,4BAA4B,CAC1B,MAAM,EACN,MAAM,EACN,QAAQ,EACR,SAAS,EACT,gBAAgB,EAChB,KAAK,EACL,OAAO,CACR,CACF,CACF,CAAC;QACF,OAAO,WAAW,CAAC;IACrB,CAAC,CAAC,CACH,CAAC;AACJ,CAAC"}
package/bin/util.js CHANGED
@@ -1,11 +1,10 @@
1
1
  import { mkdirSync, writeFileSync } from "fs";
2
- import { Page } from "./templates.js";
3
2
  import path from "path";
4
- export function getWebsite(url) {
5
- // Gets the website
3
+ import { Page } from "./templates.js";
4
+ export function getOrigin(url) {
6
5
  // eg. https://google.com -> https://google.com
7
6
  // https://google.com/page -> https://google.com
8
- return url.split("/").slice(0, 3).join("/");
7
+ return new URL(url).origin;
9
8
  }
10
9
  export function objToReadableString(objs) {
11
10
  // Two spaces as indentation
@@ -26,12 +25,32 @@ export const addMdx = (fileName) => {
26
25
  }
27
26
  return fileName + ".mdx";
28
27
  };
29
- export const createPage = (title, description, markdown, rootDir = "", fileName) => {
28
+ export const createPage = (title, description, markdown, overwrite = false, rootDir = "", fileName) => {
29
+ const writePath = path.join(rootDir, addMdx(fileName || toFilename(title)));
30
30
  // Create the folders needed if they're missing
31
31
  mkdirSync(rootDir, { recursive: true });
32
32
  // Write the page to memory
33
- const writePath = path.join(rootDir, addMdx(fileName || toFilename(title)));
34
- writeFileSync(writePath, Page(title, description, markdown));
35
- console.log("✏️ - " + writePath);
33
+ if (overwrite) {
34
+ writeFileSync(writePath, Page(title, description, markdown));
35
+ console.log("✏️ - " + writePath);
36
+ }
37
+ else {
38
+ try {
39
+ writeFileSync(writePath, Page(title, description, markdown), {
40
+ flag: "wx",
41
+ });
42
+ console.log("✏️ - " + writePath);
43
+ }
44
+ catch (e) {
45
+ // We do a try-catch instead of an if-statement to avoid a race condition
46
+ // of the file being created after we started writing.
47
+ if (e.code === "EEXIST") {
48
+ console.log(`❌ Skipping existing file ${writePath}`);
49
+ }
50
+ else {
51
+ console.error(e);
52
+ }
53
+ }
54
+ }
36
55
  };
37
56
  //# sourceMappingURL=util.js.map
package/bin/util.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"util.js","sourceRoot":"","sources":["../src/util.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,IAAI,CAAC;AAC9C,OAAO,EAAE,IAAI,EAAE,MAAM,gBAAgB,CAAC;AACtC,OAAO,IAAI,MAAM,MAAM,CAAC;AAExB,MAAM,UAAU,UAAU,CAAC,GAAW;IACpC,mBAAmB;IACnB,+CAA+C;IAC/C,gDAAgD;IAChD,OAAO,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC9C,CAAC;AAED,MAAM,UAAU,mBAAmB,CAAC,IAAc;IAChD,4BAA4B;IAC5B,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;AACrE,CAAC;AAED,MAAM,CAAC,MAAM,UAAU,GAAG,CAAC,KAAa,EAAE,EAAE;IAC1C,sDAAsD;IACtD,uDAAuD;IACvD,OAAO,KAAK;SACT,OAAO,CAAC,aAAa,EAAE,GAAG,CAAC;SAC3B,IAAI,EAAE;SACN,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC;SAClB,WAAW,EAAE,CAAC;AACnB,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,MAAM,GAAG,CAAC,QAAgB,EAAE,EAAE;IACzC,IAAI,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE;QAC7B,OAAO,QAAQ,CAAC;KACjB;IACD,OAAO,QAAQ,GAAG,MAAM,CAAC;AAC3B,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,UAAU,GAAG,CACxB,KAAa,EACb,WAAoB,EACpB,QAAiB,EACjB,UAAkB,EAAE,EACpB,QAAiB,EACjB,EAAE;IACF,+CAA+C;IAC/C,SAAS,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAExC,2BAA2B;IAC3B,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,MAAM,CAAC,QAAQ,IAAI,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAC5E,aAAa,CAAC,SAAS,EAAE,IAAI,CAAC,KAAK,EAAE,WAAW,EAAE,QAAQ,CAAC,CAAC,CAAC;IAC7D,OAAO,CAAC,GAAG,CAAC,OAAO,GAAG,SAAS,CAAC,CAAC;AACnC,CAAC,CAAC"}
1
+ {"version":3,"file":"util.js","sourceRoot":"","sources":["../src/util.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,IAAI,CAAC;AAC9C,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,IAAI,EAAE,MAAM,gBAAgB,CAAC;AAEtC,MAAM,UAAU,SAAS,CAAC,GAAW;IACnC,+CAA+C;IAC/C,gDAAgD;IAChD,OAAO,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC;AAC7B,CAAC;AAED,MAAM,UAAU,mBAAmB,CAAC,IAAc;IAChD,4BAA4B;IAC5B,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;AACrE,CAAC;AAED,MAAM,CAAC,MAAM,UAAU,GAAG,CAAC,KAAa,EAAE,EAAE;IAC1C,sDAAsD;IACtD,uDAAuD;IACvD,OAAO,KAAK;SACT,OAAO,CAAC,aAAa,EAAE,GAAG,CAAC;SAC3B,IAAI,EAAE;SACN,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC;SAClB,WAAW,EAAE,CAAC;AACnB,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,MAAM,GAAG,CAAC,QAAgB,EAAE,EAAE;IACzC,IAAI,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE;QAC7B,OAAO,QAAQ,CAAC;KACjB;IACD,OAAO,QAAQ,GAAG,MAAM,CAAC;AAC3B,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,UAAU,GAAG,CACxB,KAAa,EACb,WAAoB,EACpB,QAAiB,EACjB,YAAqB,KAAK,EAC1B,UAAkB,EAAE,EACpB,QAAiB,EACjB,EAAE;IACF,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,MAAM,CAAC,QAAQ,IAAI,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAE5E,+CAA+C;IAC/C,SAAS,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAExC,2BAA2B;IAC3B,IAAI,SAAS,EAAE;QACb,aAAa,CAAC,SAAS,EAAE,IAAI,CAAC,KAAK,EAAE,WAAW,EAAE,QAAQ,CAAC,CAAC,CAAC;QAC7D,OAAO,CAAC,GAAG,CAAC,OAAO,GAAG,SAAS,CAAC,CAAC;KAClC;SAAM;QACL,IAAI;YACF,aAAa,CAAC,SAAS,EAAE,IAAI,CAAC,KAAK,EAAE,WAAW,EAAE,QAAQ,CAAC,EAAE;gBAC3D,IAAI,EAAE,IAAI;aACX,CAAC,CAAC;YACH,OAAO,CAAC,GAAG,CAAC,OAAO,GAAG,SAAS,CAAC,CAAC;SAClC;QAAC,OAAO,CAAC,EAAE;YACV,yEAAyE;YACzE,sDAAsD;YACtD,IAAI,CAAC,CAAC,IAAI,KAAK,QAAQ,EAAE;gBACvB,OAAO,CAAC,GAAG,CAAC,4BAA4B,SAAS,EAAE,CAAC,CAAC;aACtD;iBAAM;gBACL,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;aAClB;SACF;KACF;AACH,CAAC,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mintlify",
3
- "version": "1.0.6",
3
+ "version": "1.0.7",
4
4
  "description": "Mintlify CLI",
5
5
  "engines": {
6
6
  "node": ">=14.16"
@@ -19,8 +19,9 @@
19
19
  "axios": "^0.27.2",
20
20
  "cheerio": "^0.22.0",
21
21
  "inquirer": "^9.1.0",
22
+ "minimist-lite": "^2.2.1",
22
23
  "node-html-markdown": "^1.2.0",
23
- "puppeteer": "^17.1.2"
24
+ "puppeteer": "^17.1.3"
24
25
  },
25
26
  "devDependencies": {
26
27
  "@types/inquirer": "^9.0.1",
package/src/browser.ts ADDED
@@ -0,0 +1,24 @@
1
+ import { launch } from "puppeteer";
2
+
3
+ export async function startBrowser() {
4
+ try {
5
+ return await launch({
6
+ headless: true,
7
+ ignoreHTTPSErrors: true,
8
+ });
9
+ } catch (err) {
10
+ console.log("Could not create a browser instance: ", err);
11
+ process.exit(1);
12
+ }
13
+ }
14
+
15
+ export async function getHtmlWithPuppeteer(href: string) {
16
+ const browser = await startBrowser();
17
+ const page = await browser.newPage();
18
+ await page.goto(href, {
19
+ waitUntil: "networkidle2",
20
+ });
21
+ const html = await page.content();
22
+ browser.close();
23
+ return html;
24
+ }
@@ -0,0 +1,35 @@
1
+ import { existsSync, mkdirSync, createWriteStream } from "fs";
2
+ import path from "path";
3
+ import axios from "axios";
4
+
5
+ export default async function downloadImage(
6
+ imageSrc: string,
7
+ writePath: string
8
+ ) {
9
+ // Avoid unnecessary downloads
10
+ if (existsSync(writePath)) {
11
+ return Promise.reject({
12
+ code: "EEXIST",
13
+ });
14
+ }
15
+
16
+ // Create the folders needed if they're missing
17
+ mkdirSync(path.dirname(writePath), { recursive: true });
18
+
19
+ const writer = createWriteStream(writePath);
20
+
21
+ const response = await axios.default.get(imageSrc, {
22
+ responseType: "stream",
23
+ });
24
+
25
+ // wx prevents overwriting an image with the exact same name
26
+ // being created in the time we were downloading
27
+ response.data.pipe(writer, {
28
+ flag: "wx",
29
+ });
30
+
31
+ return new Promise((resolve, reject) => {
32
+ writer.on("finish", resolve);
33
+ writer.on("error", reject);
34
+ });
35
+ }
package/src/index.ts CHANGED
@@ -1,23 +1,37 @@
1
1
  #! /usr/bin/env node
2
2
 
3
+ import axios from "axios";
3
4
  import { writeFileSync } from "fs";
4
5
  import inquirer from "inquirer";
6
+ import minimistLite from "minimist-lite";
5
7
  import { MintConfig } from "./templates.js";
6
- import { createPage, toFilename, objToReadableString } from "./util.js";
7
- import { scrapeGitBookPage } from "./scrapeGitBookPage.js";
8
- import { scrapeReadMePage } from "./scrapeReadMePage.js";
9
- import { scrapeReadMe } from "./scrapeReadMe.js";
8
+ import { scrapePage } from "./scraping/scrapePage.js";
9
+ import { scrapeSection } from "./scraping/scrapeSection.js";
10
+ import { createPage, toFilename, getOrigin } from "./util.js";
11
+ import { scrapeDocusaurusPage } from "./scraping/site-scrapers/scrapeDocusaurusPage.js";
12
+ import { scrapeDocusaurusSection } from "./scraping/site-scrapers/scrapeDocusaurusSection.js";
13
+ import { scrapeGitBookPage } from "./scraping/site-scrapers/scrapeGitBookPage.js";
14
+ import { scrapeGitBookSection } from "./scraping/site-scrapers/scrapeGitBookSection.js";
15
+ import { scrapeReadMePage } from "./scraping/site-scrapers/scrapeReadMePage.js";
16
+ import { scrapeReadMeSection } from "./scraping/site-scrapers/scrapeReadMeSection.js";
17
+ import { detectFramework, Frameworks } from "./scraping/detectFramework.js";
18
+ import { startBrowser, getHtmlWithPuppeteer } from "./browser.js";
10
19
 
11
- const args = process.argv.slice(2);
20
+ const argv = minimistLite(process.argv.slice(2), {
21
+ boolean: ["overwrite"],
22
+ default: {
23
+ overwrite: false,
24
+ },
25
+ });
12
26
 
13
- if (args.length === 0) {
27
+ if (argv._.length === 0) {
14
28
  console.error(
15
29
  `No command specified. Here are is the list that you can use:\ninit: initialize a Mintlify documentation instance`
16
30
  );
17
31
  process.exit(1); //an error occurred
18
32
  }
19
33
 
20
- const command = args[0];
34
+ const command = argv._[0];
21
35
 
22
36
  if (command === "init") {
23
37
  inquirer
@@ -55,7 +69,7 @@ if (command === "init") {
55
69
  .then((answers) => {
56
70
  const { name, color, ctaName, ctaUrl, title } = answers;
57
71
  writeFileSync(
58
- "mint.config.json",
72
+ "mint.json",
59
73
  JSON.stringify(
60
74
  MintConfig(name, color, ctaName, ctaUrl, toFilename(title)),
61
75
  null,
@@ -100,26 +114,163 @@ if (command === "page") {
100
114
  });
101
115
  }
102
116
 
103
- if (command === "scrape-gitbook-page") {
104
- const url = args[1];
105
- const { title, description, markdown } = await scrapeGitBookPage(url);
106
- createPage(title, description, markdown, process.cwd());
117
+ function validateFramework(framework) {
118
+ if (!framework) {
119
+ console.log(
120
+ "Could not detect the framework automatically. Please use one of:"
121
+ );
122
+ console.log("scrape-page-docusaurus");
123
+ console.log("scrape-page-gitbook");
124
+ console.log("scrape-page-readme");
125
+ return process.exit(1);
126
+ }
127
+ }
128
+
129
+ async function scrapePageAutomatically() {
130
+ const href = argv._[1];
131
+ const res = await axios.default.get(href);
132
+ const html = res.data;
133
+ const framework = detectFramework(html);
134
+
135
+ validateFramework(framework);
136
+
137
+ console.log("Detected framework: " + framework);
138
+
139
+ if (framework === Frameworks.DOCUSAURUS) {
140
+ await scrapePageWrapper(scrapeDocusaurusPage);
141
+ } else if (framework === Frameworks.GITBOOK) {
142
+ await scrapePageWrapper(scrapeGitBookPage, true);
143
+ } else if (framework === Frameworks.README) {
144
+ await scrapePageWrapper(scrapeReadMePage);
145
+ }
146
+ }
147
+
148
+ async function scrapePageWrapper(scrapeFunc, puppeteer = false) {
149
+ const href = argv._[1];
150
+ let html;
151
+ if (puppeteer) {
152
+ html = await getHtmlWithPuppeteer(href);
153
+ } else {
154
+ const res = await axios.default.get(href);
155
+ html = res.data;
156
+ }
157
+ await scrapePage(scrapeFunc, href, html, argv.overwrite);
107
158
  process.exit(1);
108
159
  }
109
160
 
161
+ if (command === "scrape-page") {
162
+ await scrapePageAutomatically();
163
+ }
164
+
165
+ if (command === "scrape-docusaurus-page") {
166
+ await scrapePageWrapper(scrapeDocusaurusPage);
167
+ }
168
+
169
+ if (command === "scrape-gitbook-page") {
170
+ await scrapePageWrapper(scrapeGitBookPage, true);
171
+ }
172
+
110
173
  if (command === "scrape-readme-page") {
111
- const url = args[1];
112
- const { title, description, markdown } = await scrapeReadMePage(url);
113
- createPage(title, description, markdown, process.cwd());
174
+ await scrapePageWrapper(scrapeReadMePage);
175
+ }
176
+
177
+ async function scrapeSectionAutomatically() {
178
+ const href = argv._[1];
179
+ const res = await axios.default.get(href);
180
+ const html = res.data;
181
+ const framework = detectFramework(html);
182
+
183
+ validateFramework(framework);
184
+
185
+ console.log("Detected framework: " + framework);
186
+
187
+ if (framework === Frameworks.DOCUSAURUS) {
188
+ await scrapeSectionAxiosWrapper(scrapeDocusaurusSection);
189
+ } else if (framework === Frameworks.GITBOOK) {
190
+ await scrapeSectionGitBookWrapper(scrapeGitBookSection);
191
+ } else if (framework === Frameworks.README) {
192
+ await scrapeSectionAxiosWrapper(scrapeReadMeSection);
193
+ }
194
+ }
195
+
196
+ async function scrapeSectionAxiosWrapper(scrapeFunc: any) {
197
+ const href = argv._[1];
198
+ const res = await axios.default.get(href);
199
+ const html = res.data;
200
+ await scrapeSection(scrapeFunc, html, getOrigin(href), argv.overwrite);
114
201
  process.exit(1);
115
202
  }
116
203
 
117
- if (command === "scrape-readme-section") {
118
- const url = args[1];
119
- console.log("Started scraping...");
120
- const groupsConfig = await scrapeReadMe(url, process.cwd());
121
- console.log("Finished scraping.");
122
- console.log("Add the following to your navigation in mint.config.js:");
123
- console.log(objToReadableString(groupsConfig));
204
+ async function scrapeSectionGitBookWrapper(scrapeFunc: any) {
205
+ const href = argv._[1];
206
+
207
+ const browser = await startBrowser();
208
+ const page = await browser.newPage();
209
+ await page.goto(href, {
210
+ waitUntil: "networkidle2",
211
+ });
212
+
213
+ let prevEncountered = [];
214
+ let encounteredHref = ["fake"];
215
+
216
+ // Loop until we've encountered every link
217
+ while (!encounteredHref.every((href) => prevEncountered.includes(href))) {
218
+ prevEncountered = encounteredHref;
219
+ encounteredHref = await page.evaluate(
220
+ (encounteredHref) => {
221
+ const icons = Array.from(
222
+ document.querySelectorAll('path[d="M9 18l6-6-6-6"]')
223
+ );
224
+
225
+ const linksFound = [];
226
+ icons.forEach(async (icon: HTMLElement) => {
227
+ const toClick = icon.parentElement.parentElement;
228
+ const link = toClick.parentElement.parentElement;
229
+
230
+ // Skip icons not in the side navigation
231
+ if (!link.hasAttribute("href")) {
232
+ return;
233
+ }
234
+
235
+ const href = link.getAttribute("href");
236
+
237
+ // Should never occur but we keep it as a fail-safe
238
+ if (href.startsWith("https://") || href.startsWith("http://")) {
239
+ return;
240
+ }
241
+
242
+ // Click any links we haven't seen before
243
+ if (!encounteredHref.includes(href)) {
244
+ toClick.click();
245
+ }
246
+
247
+ linksFound.push(href);
248
+ });
249
+
250
+ return linksFound;
251
+ },
252
+ encounteredHref // Need to pass array into the browser
253
+ );
254
+ }
255
+
256
+ const html = await page.content();
257
+ browser.close();
258
+ await scrapeSection(scrapeFunc, html, getOrigin(href), argv.overwrite);
124
259
  process.exit(1);
125
260
  }
261
+
262
+ if (command === "scrape-section") {
263
+ await scrapeSectionAutomatically();
264
+ }
265
+
266
+ if (command === "scrape-docusaurus-section") {
267
+ await scrapeSectionAxiosWrapper(scrapeDocusaurusSection);
268
+ }
269
+
270
+ if (command === "scrape-gitbook-section") {
271
+ await scrapeSectionGitBookWrapper(scrapeGitBookSection);
272
+ }
273
+
274
+ if (command === "scrape-readme-section") {
275
+ await scrapeSectionAxiosWrapper(scrapeReadMeSection);
276
+ }
@@ -0,0 +1,31 @@
1
+ import cheerio from "cheerio";
2
+
3
+ export enum Frameworks {
4
+ DOCUSAURUS = "DOCUSAURUS",
5
+ GITBOOK = "GITBOOK",
6
+ README = "README",
7
+ }
8
+
9
+ export function detectFramework(html) {
10
+ const $ = cheerio.load(html);
11
+ const docusaurusMeta = $('meta[name="generator"]');
12
+
13
+ if (
14
+ docusaurusMeta.length > 0 &&
15
+ docusaurusMeta.attr("content").includes("Docusaurus")
16
+ ) {
17
+ return Frameworks.DOCUSAURUS;
18
+ }
19
+
20
+ const isGitBook = $(".gitbook-root").length > 0;
21
+ if (isGitBook) {
22
+ return Frameworks.GITBOOK;
23
+ }
24
+
25
+ const isReadMe = $('meta[name="readme-deploy"]').length > 0;
26
+ if (isReadMe) {
27
+ return Frameworks.README;
28
+ }
29
+
30
+ return undefined;
31
+ }