mintlify 3.0.7 → 3.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/browser.js +24 -0
- package/bin/browser.js.map +1 -0
- package/bin/constants.js +32 -0
- package/bin/constants.js.map +1 -0
- package/bin/downloadImage.js +85 -0
- package/bin/downloadImage.js.map +1 -0
- package/bin/index.js +49 -0
- package/bin/index.js.map +1 -0
- package/bin/local-preview/helper-commands/installDepsCommand.js +12 -0
- package/bin/local-preview/helper-commands/installDepsCommand.js.map +1 -0
- package/bin/local-preview/index.js +154 -0
- package/bin/local-preview/index.js.map +1 -0
- package/bin/local-preview/listener/categorize.js +95 -0
- package/bin/local-preview/listener/categorize.js.map +1 -0
- package/bin/local-preview/listener/categorizeFiles.js +47 -0
- package/bin/local-preview/listener/categorizeFiles.js.map +1 -0
- package/bin/local-preview/listener/generate.js +89 -0
- package/bin/local-preview/listener/generate.js.map +1 -0
- package/bin/local-preview/listener/index.js +200 -0
- package/bin/local-preview/listener/index.js.map +1 -0
- package/bin/local-preview/listener/update.js +24 -0
- package/bin/local-preview/listener/update.js.map +1 -0
- package/bin/local-preview/listener/utils/createPage.js +167 -0
- package/bin/local-preview/listener/utils/createPage.js.map +1 -0
- package/bin/local-preview/listener/utils/fileIsMdxOrMd.js +12 -0
- package/bin/local-preview/listener/utils/fileIsMdxOrMd.js.map +1 -0
- package/bin/local-preview/listener/utils/getOpenApiContext.js +57 -0
- package/bin/local-preview/listener/utils/getOpenApiContext.js.map +1 -0
- package/bin/local-preview/listener/utils/mintConfigFile.js +22 -0
- package/bin/local-preview/listener/utils/mintConfigFile.js.map +1 -0
- package/bin/local-preview/listener/utils/toTitleCase.js +36 -0
- package/bin/local-preview/listener/utils/toTitleCase.js.map +1 -0
- package/bin/local-preview/listener/utils/types.js +2 -0
- package/bin/local-preview/listener/utils/types.js.map +1 -0
- package/bin/local-preview/listener/utils.js +67 -0
- package/bin/local-preview/listener/utils.js.map +1 -0
- package/bin/local-preview/utils/categorizeFiles.js +63 -0
- package/bin/local-preview/utils/categorizeFiles.js.map +1 -0
- package/bin/local-preview/utils/getOpenApiContext.js +58 -0
- package/bin/local-preview/utils/getOpenApiContext.js.map +1 -0
- package/bin/local-preview/utils/injectFavicons.js +72 -0
- package/bin/local-preview/utils/injectFavicons.js.map +1 -0
- package/bin/local-preview/utils/listener.js +116 -0
- package/bin/local-preview/utils/listener.js.map +1 -0
- package/bin/local-preview/utils/metadata.js +118 -0
- package/bin/local-preview/utils/metadata.js.map +1 -0
- package/bin/local-preview/utils/mintConfigFile.js +43 -0
- package/bin/local-preview/utils/mintConfigFile.js.map +1 -0
- package/bin/local-preview/utils/openApiCheck.js +15 -0
- package/bin/local-preview/utils/openApiCheck.js.map +1 -0
- package/bin/local-preview/utils/slugToTitle.js +8 -0
- package/bin/local-preview/utils/slugToTitle.js.map +1 -0
- package/bin/navigation.js +4 -0
- package/bin/navigation.js.map +1 -0
- package/bin/pageTemplate.js +30 -0
- package/bin/pageTemplate.js.map +1 -0
- package/bin/scraping/combineNavWithEmptyGroupTitles.js +20 -0
- package/bin/scraping/combineNavWithEmptyGroupTitles.js.map +1 -0
- package/bin/scraping/detectFramework.js +39 -0
- package/bin/scraping/detectFramework.js.map +1 -0
- package/bin/scraping/downloadAllImages.js +33 -0
- package/bin/scraping/downloadAllImages.js.map +1 -0
- package/bin/scraping/downloadLogoImage.js +13 -0
- package/bin/scraping/downloadLogoImage.js.map +1 -0
- package/bin/scraping/getSitemapLinks.js +18 -0
- package/bin/scraping/getSitemapLinks.js.map +1 -0
- package/bin/scraping/replaceImagePaths.js +17 -0
- package/bin/scraping/replaceImagePaths.js.map +1 -0
- package/bin/scraping/scrapeFileGettingFileNameFromUrl.js +43 -0
- package/bin/scraping/scrapeFileGettingFileNameFromUrl.js.map +1 -0
- package/bin/scraping/scrapeGettingFileNameFromUrl.js +13 -0
- package/bin/scraping/scrapeGettingFileNameFromUrl.js.map +1 -0
- package/bin/scraping/scrapePage.js +10 -0
- package/bin/scraping/scrapePage.js.map +1 -0
- package/bin/scraping/scrapePageCommands.js +55 -0
- package/bin/scraping/scrapePageCommands.js.map +1 -0
- package/bin/scraping/scrapeSection.js +12 -0
- package/bin/scraping/scrapeSection.js.map +1 -0
- package/bin/scraping/scrapeSectionCommands.js +66 -0
- package/bin/scraping/scrapeSectionCommands.js.map +1 -0
- package/bin/scraping/site-scrapers/Intercom/scrapeIntercomPage.js +27 -0
- package/bin/scraping/site-scrapers/Intercom/scrapeIntercomPage.js.map +1 -0
- package/bin/scraping/site-scrapers/Intercom/scrapeIntercomSection.js +32 -0
- package/bin/scraping/site-scrapers/Intercom/scrapeIntercomSection.js.map +1 -0
- package/bin/scraping/site-scrapers/alternateGroupTitle.js +9 -0
- package/bin/scraping/site-scrapers/alternateGroupTitle.js.map +1 -0
- package/bin/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.js +34 -0
- package/bin/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.js.map +1 -0
- package/bin/scraping/site-scrapers/links-per-group/getLinksRecursively.js +38 -0
- package/bin/scraping/site-scrapers/links-per-group/getLinksRecursively.js.map +1 -0
- package/bin/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.js +38 -0
- package/bin/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.js.map +1 -0
- package/bin/scraping/site-scrapers/openNestedDocusaurusMenus.js +30 -0
- package/bin/scraping/site-scrapers/openNestedDocusaurusMenus.js.map +1 -0
- package/bin/scraping/site-scrapers/openNestedGitbookMenus.js +17 -0
- package/bin/scraping/site-scrapers/openNestedGitbookMenus.js.map +1 -0
- package/bin/scraping/site-scrapers/scrapeDocusaurusPage.js +49 -0
- package/bin/scraping/site-scrapers/scrapeDocusaurusPage.js.map +1 -0
- package/bin/scraping/site-scrapers/scrapeDocusaurusSection.js +30 -0
- package/bin/scraping/site-scrapers/scrapeDocusaurusSection.js.map +1 -0
- package/bin/scraping/site-scrapers/scrapeGitBookPage.js +47 -0
- package/bin/scraping/site-scrapers/scrapeGitBookPage.js.map +1 -0
- package/bin/scraping/site-scrapers/scrapeGitBookSection.js +52 -0
- package/bin/scraping/site-scrapers/scrapeGitBookSection.js.map +1 -0
- package/bin/scraping/site-scrapers/scrapeReadMePage.js +36 -0
- package/bin/scraping/site-scrapers/scrapeReadMePage.js.map +1 -0
- package/bin/scraping/site-scrapers/scrapeReadMeSection.js +44 -0
- package/bin/scraping/site-scrapers/scrapeReadMeSection.js.map +1 -0
- package/bin/util.js +129 -0
- package/bin/util.js.map +1 -0
- package/bin/validation/isValidLink.js +11 -0
- package/bin/validation/isValidLink.js.map +1 -0
- package/bin/validation/stopIfInvalidLink.js +9 -0
- package/bin/validation/stopIfInvalidLink.js.map +1 -0
- package/package.json +2 -2
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import cheerio from "cheerio";
|
|
2
|
+
export var Frameworks;
|
|
3
|
+
(function (Frameworks) {
|
|
4
|
+
Frameworks["DOCUSAURUS"] = "DOCUSAURUS";
|
|
5
|
+
Frameworks["GITBOOK"] = "GITBOOK";
|
|
6
|
+
Frameworks["README"] = "README";
|
|
7
|
+
Frameworks["INTERCOM"] = "INTERCOM";
|
|
8
|
+
})(Frameworks || (Frameworks = {}));
|
|
9
|
+
export function detectFramework(html) {
|
|
10
|
+
const $ = cheerio.load(html);
|
|
11
|
+
const docusaurusMeta = $('meta[name="generator"]');
|
|
12
|
+
if (docusaurusMeta.length > 0 &&
|
|
13
|
+
docusaurusMeta.attr("content").includes("Docusaurus")) {
|
|
14
|
+
if (docusaurusMeta.attr("content").includes("v3")) {
|
|
15
|
+
return { framework: Frameworks.DOCUSAURUS, version: "3" };
|
|
16
|
+
}
|
|
17
|
+
if (docusaurusMeta.attr("content").includes("v2")) {
|
|
18
|
+
return { framework: Frameworks.DOCUSAURUS, version: "2" };
|
|
19
|
+
}
|
|
20
|
+
else if (docusaurusMeta.attr("content").includes("v1")) {
|
|
21
|
+
console.warn("WARNING: We detected Docusaurus version 1 but we only support scraping versions 2 and 3.");
|
|
22
|
+
return { framework: Frameworks.DOCUSAURUS, version: "1" };
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
const isGitBook = $(".gitbook-root").length > 0;
|
|
26
|
+
if (isGitBook) {
|
|
27
|
+
return { framework: Frameworks.GITBOOK };
|
|
28
|
+
}
|
|
29
|
+
const isReadMe = $('meta[name="readme-deploy"]').length > 0;
|
|
30
|
+
if (isReadMe) {
|
|
31
|
+
return { framework: Frameworks.README };
|
|
32
|
+
}
|
|
33
|
+
const isIntercom = $("meta[name='intercom:trackingEvent']").length > 0;
|
|
34
|
+
if (isIntercom) {
|
|
35
|
+
return { framework: Frameworks.INTERCOM };
|
|
36
|
+
}
|
|
37
|
+
return undefined;
|
|
38
|
+
}
|
|
39
|
+
//# sourceMappingURL=detectFramework.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"detectFramework.js","sourceRoot":"","sources":["../../src/scraping/detectFramework.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAE9B,MAAM,CAAN,IAAY,UAKX;AALD,WAAY,UAAU;IACpB,uCAAyB,CAAA;IACzB,iCAAmB,CAAA;IACnB,+BAAiB,CAAA;IACjB,mCAAqB,CAAA;AACvB,CAAC,EALW,UAAU,KAAV,UAAU,QAKrB;AAED,MAAM,UAAU,eAAe,CAAC,IAAI;IAClC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,MAAM,cAAc,GAAG,CAAC,CAAC,wBAAwB,CAAC,CAAC;IAEnD,IACE,cAAc,CAAC,MAAM,GAAG,CAAC;QACzB,cAAc,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,QAAQ,CAAC,YAAY,CAAC,EACrD;QACA,IAAI,cAAc,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE;YACjD,OAAO,EAAE,SAAS,EAAE,UAAU,CAAC,UAAU,EAAE,OAAO,EAAE,GAAG,EAAE,CAAC;SAC3D;QACD,IAAI,cAAc,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE;YACjD,OAAO,EAAE,SAAS,EAAE,UAAU,CAAC,UAAU,EAAE,OAAO,EAAE,GAAG,EAAE,CAAC;SAC3D;aAAM,IAAI,cAAc,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE;YACxD,OAAO,CAAC,IAAI,CACV,0FAA0F,CAC3F,CAAC;YACF,OAAO,EAAE,SAAS,EAAE,UAAU,CAAC,UAAU,EAAE,OAAO,EAAE,GAAG,EAAE,CAAC;SAC3D;KACF;IAED,MAAM,SAAS,GAAG,CAAC,CAAC,eAAe,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;IAChD,IAAI,SAAS,EAAE;QACb,OAAO,EAAE,SAAS,EAAE,UAAU,CAAC,OAAO,EAAE,CAAC;KAC1C;IAED,MAAM,QAAQ,GAAG,CAAC,CAAC,4BAA4B,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;IAC5D,IAAI,QAAQ,EAAE;QACZ,OAAO,EAAE,SAAS,EAAE,UAAU,CAAC,MAAM,EAAE,CAAC;KACzC;IAED,MAAM,UAAU,GAAG,CAAC,CAAC,qCAAqC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;IACvE,IAAI,UAAU,EAAE;QACd,OAAO,EAAE,SAAS,EAAE,UAAU,CAAC,QAAQ,EAAE,CAAC;KAC3C;IAED,OAAO,SAAS,CAAC;AACnB,CAAC"}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import path from "path";
|
|
2
|
+
import downloadImage, { cleanImageSrc, isValidImageSrc, removeMetadataFromImageSrc, } from "../downloadImage.js";
|
|
3
|
+
// To Do: Use CheerioElement instead of any when we bump the cheerio version
|
|
4
|
+
export default async function downloadAllImages($, content, origin, baseDir, overwrite, modifyFileName, skipValidateImageExtension) {
|
|
5
|
+
if (!baseDir) {
|
|
6
|
+
console.debug("Skipping image downloading");
|
|
7
|
+
return;
|
|
8
|
+
}
|
|
9
|
+
// We remove duplicates because some frameworks duplicate img tags
|
|
10
|
+
// to show the image larger when clicked on.
|
|
11
|
+
const imageSrcs = [
|
|
12
|
+
...new Set(content
|
|
13
|
+
.find("img[src]")
|
|
14
|
+
.map((i, image) => $(image).attr("src"))
|
|
15
|
+
.toArray()),
|
|
16
|
+
];
|
|
17
|
+
// Wait to all images to download before continuing
|
|
18
|
+
const origToNewArray = await Promise.all(imageSrcs.map(async (imageSrc) => {
|
|
19
|
+
if (!isValidImageSrc(imageSrc, skipValidateImageExtension)) {
|
|
20
|
+
return;
|
|
21
|
+
}
|
|
22
|
+
const imageHref = cleanImageSrc(imageSrc, origin);
|
|
23
|
+
let fileName = removeMetadataFromImageSrc(path.basename(imageHref));
|
|
24
|
+
if (modifyFileName) {
|
|
25
|
+
fileName = modifyFileName(fileName);
|
|
26
|
+
}
|
|
27
|
+
const writePath = path.join(baseDir, fileName);
|
|
28
|
+
await downloadImage(imageHref, writePath, overwrite);
|
|
29
|
+
return { [imageSrc]: writePath };
|
|
30
|
+
}));
|
|
31
|
+
return origToNewArray.reduce((result, current) => Object.assign(result, current), {});
|
|
32
|
+
}
|
|
33
|
+
//# sourceMappingURL=downloadAllImages.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"downloadAllImages.js","sourceRoot":"","sources":["../../src/scraping/downloadAllImages.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,aAAa,EAAE,EACpB,aAAa,EACb,eAAe,EACf,0BAA0B,GAC3B,MAAM,qBAAqB,CAAC;AAE7B,4EAA4E;AAC5E,MAAM,CAAC,OAAO,CAAC,KAAK,UAAU,iBAAiB,CAC7C,CAAM,EACN,OAAY,EACZ,MAAc,EACd,OAAe,EACf,SAAkB,EAClB,cAAoB,EACpB,0BAAoC;IAEpC,IAAI,CAAC,OAAO,EAAE;QACZ,OAAO,CAAC,KAAK,CAAC,4BAA4B,CAAC,CAAC;QAC5C,OAAO;KACR;IAED,kEAAkE;IAClE,4CAA4C;IAC5C,MAAM,SAAS,GAAG;QAChB,GAAG,IAAI,GAAG,CACR,OAAO;aACJ,IAAI,CAAC,UAAU,CAAC;aAChB,GAAG,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;aACvC,OAAO,EAAE,CACb;KACF,CAAC;IAEF,mDAAmD;IACnD,MAAM,cAAc,GAAG,MAAM,OAAO,CAAC,GAAG,CACtC,SAAS,CAAC,GAAG,CAAC,KAAK,EAAE,QAAgB,EAAE,EAAE;QACvC,IAAI,CAAC,eAAe,CAAC,QAAQ,EAAE,0BAA0B,CAAC,EAAE;YAC1D,OAAO;SACR;QAED,MAAM,SAAS,GAAG,aAAa,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;QAElD,IAAI,QAAQ,GAAG,0BAA0B,CAAC,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC;QACpE,IAAI,cAAc,EAAE;YAClB,QAAQ,GAAG,cAAc,CAAC,QAAQ,CAAC,CAAC;SACrC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;QAE/C,MAAM,aAAa,CAAC,SAAS,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC;QAErD,OAAO,EAAE,CAAC,QAAQ,CAAC,EAAE,SAAS,EAAE,CAAC;IACnC,CAAC,CAAC,CACH,CAAC;IAEF,OAAO,cAAc,CAAC,MAAM,CAC1B,CAAC,MAAM,EAAE,OAAO,EAAE,EAAE,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EACnD,EAAE,CACH,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import path from "path";
|
|
2
|
+
import downloadImage, { cleanImageSrc, isValidImageSrc, removeMetadataFromImageSrc, } from "../downloadImage.js";
|
|
3
|
+
import { getFileExtension } from "../util.js";
|
|
4
|
+
// To Do: Use CheerioElement instead of any when we bump the cheerio version
|
|
5
|
+
export default async function downloadLogoImage(imageSrc, imageBaseDir, origin, overwrite, skipValidateImageExtension) {
|
|
6
|
+
if (!isValidImageSrc(imageSrc, skipValidateImageExtension))
|
|
7
|
+
return;
|
|
8
|
+
const imageHref = cleanImageSrc(imageSrc, origin);
|
|
9
|
+
const ext = getFileExtension(removeMetadataFromImageSrc(imageSrc));
|
|
10
|
+
const imagePath = path.join(imageBaseDir, "logo", "logo-light-mode." + ext);
|
|
11
|
+
await downloadImage(imageHref, imagePath, overwrite);
|
|
12
|
+
}
|
|
13
|
+
//# sourceMappingURL=downloadLogoImage.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"downloadLogoImage.js","sourceRoot":"","sources":["../../src/scraping/downloadLogoImage.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,aAAa,EAAE,EACpB,aAAa,EACb,eAAe,EACf,0BAA0B,GAC3B,MAAM,qBAAqB,CAAC;AAC7B,OAAO,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAE9C,4EAA4E;AAC5E,MAAM,CAAC,OAAO,CAAC,KAAK,UAAU,iBAAiB,CAC7C,QAAgB,EAChB,YAAoB,EACpB,MAAc,EACd,SAAkB,EAClB,0BAAoC;IAEpC,IAAI,CAAC,eAAe,CAAC,QAAQ,EAAE,0BAA0B,CAAC;QAAE,OAAO;IAEnE,MAAM,SAAS,GAAG,aAAa,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;IAElD,MAAM,GAAG,GAAG,gBAAgB,CAAC,0BAA0B,CAAC,QAAQ,CAAC,CAAC,CAAC;IACnE,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,MAAM,EAAE,kBAAkB,GAAG,GAAG,CAAC,CAAC;IAE5E,MAAM,aAAa,CAAC,SAAS,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC;AACvD,CAAC"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import axios from "axios";
|
|
2
|
+
// Not in use.
|
|
3
|
+
// Gets all links in a sitemap.
|
|
4
|
+
export const getSitemapLinks = async (url) => {
|
|
5
|
+
const hostname = url.hostname.replace(".", "\\.");
|
|
6
|
+
const regex = new RegExp(`https?:\/\/${hostname}.+?(?=<\/loc>)`, "gmi");
|
|
7
|
+
try {
|
|
8
|
+
const indexData = (await axios.get(url.href)).data;
|
|
9
|
+
const array = indexData.match(regex);
|
|
10
|
+
return array || [];
|
|
11
|
+
}
|
|
12
|
+
catch (err) {
|
|
13
|
+
console.error(err);
|
|
14
|
+
console.log("Skipping sitemap links because we encountered an error.");
|
|
15
|
+
return [];
|
|
16
|
+
}
|
|
17
|
+
};
|
|
18
|
+
//# sourceMappingURL=getSitemapLinks.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"getSitemapLinks.js","sourceRoot":"","sources":["../../src/scraping/getSitemapLinks.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAE1B,cAAc;AACd,+BAA+B;AAC/B,MAAM,CAAC,MAAM,eAAe,GAAG,KAAK,EAAE,GAAQ,EAAE,EAAE;IAChD,MAAM,QAAQ,GAAG,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;IAClD,MAAM,KAAK,GAAG,IAAI,MAAM,CAAC,cAAc,QAAQ,gBAAgB,EAAE,KAAK,CAAC,CAAC;IAExE,IAAI;QACF,MAAM,SAAS,GAAG,CAAC,MAAM,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,IAAc,CAAC;QAC7D,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,KAAK,CAAoB,CAAC;QACxD,OAAO,KAAK,IAAI,EAAE,CAAC;KACpB;IAAC,OAAO,GAAG,EAAE;QACZ,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QACnB,OAAO,CAAC,GAAG,CAAC,yDAAyD,CAAC,CAAC;QACvE,OAAO,EAAE,CAAC;KACX;AACH,CAAC,CAAC"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
export default function replaceImagePaths(origToWritePath, cliDir, markdown) {
|
|
2
|
+
if (origToWritePath == null) {
|
|
3
|
+
return markdown;
|
|
4
|
+
}
|
|
5
|
+
// Change image paths to use the downloaded locations
|
|
6
|
+
for (const [origHref, writePath] of Object.entries(origToWritePath)) {
|
|
7
|
+
// Use relative paths within the folder we are in
|
|
8
|
+
if (writePath.startsWith(cliDir)) {
|
|
9
|
+
markdown = markdown.replaceAll(origHref, writePath.slice(cliDir.length));
|
|
10
|
+
}
|
|
11
|
+
else {
|
|
12
|
+
markdown = markdown.replaceAll(origHref, writePath);
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
return markdown;
|
|
16
|
+
}
|
|
17
|
+
//# sourceMappingURL=replaceImagePaths.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"replaceImagePaths.js","sourceRoot":"","sources":["../../src/scraping/replaceImagePaths.ts"],"names":[],"mappings":"AAAA,MAAM,CAAC,OAAO,UAAU,iBAAiB,CACvC,eAAuB,EACvB,MAAc,EACd,QAAgB;IAEhB,IAAI,eAAe,IAAI,IAAI,EAAE;QAC3B,OAAO,QAAQ,CAAC;KACjB;IAED,qDAAqD;IACrD,KAAK,MAAM,CAAC,QAAQ,EAAE,SAAS,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,eAAe,CAAC,EAAE;QACnE,iDAAiD;QACjD,IAAI,SAAS,CAAC,UAAU,CAAC,MAAM,CAAC,EAAE;YAChC,QAAQ,GAAG,QAAQ,CAAC,UAAU,CAAC,QAAQ,EAAE,SAAS,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC;SAC1E;aAAM;YACL,QAAQ,GAAG,QAAQ,CAAC,UAAU,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;SACrD;KACF;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import path from "path";
|
|
2
|
+
import axios from "axios";
|
|
3
|
+
import { getHtmlWithPuppeteer } from "../browser.js";
|
|
4
|
+
import { createPage } from "../util.js";
|
|
5
|
+
export async function scrapeFileGettingFileNameFromUrl(pathname, cliDir, origin, overwrite, scrapePageFunc, puppeteer = false, version, baseToRemove) {
|
|
6
|
+
// Skip scraping external links
|
|
7
|
+
if (pathname.startsWith("https://") || pathname.startsWith("http://")) {
|
|
8
|
+
return pathname;
|
|
9
|
+
}
|
|
10
|
+
// Removes file name from the end
|
|
11
|
+
const splitSubpath = pathname.split("/");
|
|
12
|
+
let folders = splitSubpath.slice(0, splitSubpath.length - 1).join("/");
|
|
13
|
+
// Remove base dir if passed in
|
|
14
|
+
if (baseToRemove && folders.startsWith(baseToRemove)) {
|
|
15
|
+
folders = folders.replace(baseToRemove, "");
|
|
16
|
+
}
|
|
17
|
+
// TO DO: Improve this by putting each page's images in a separate
|
|
18
|
+
// folder named after the title of the page.
|
|
19
|
+
const imageBaseDir = path.join(cliDir, "images", folders);
|
|
20
|
+
// Scrape each page separately
|
|
21
|
+
const href = new URL(pathname, origin).href;
|
|
22
|
+
let html;
|
|
23
|
+
if (puppeteer) {
|
|
24
|
+
html = await getHtmlWithPuppeteer(href);
|
|
25
|
+
}
|
|
26
|
+
else {
|
|
27
|
+
const res = await axios.get(href);
|
|
28
|
+
html = res.data;
|
|
29
|
+
}
|
|
30
|
+
const { title, description, markdown } = await scrapePageFunc(html, origin, cliDir, imageBaseDir, overwrite, version);
|
|
31
|
+
// Check if page didn't have content
|
|
32
|
+
if (!title && !markdown) {
|
|
33
|
+
return undefined;
|
|
34
|
+
}
|
|
35
|
+
const newFileLocation = folders ? path.join(cliDir, folders) : cliDir;
|
|
36
|
+
// Default to introduction.mdx if we encountered index.html
|
|
37
|
+
const fileName = splitSubpath[splitSubpath.length - 1] || "introduction";
|
|
38
|
+
// Will create subfolders as needed
|
|
39
|
+
createPage(title, description, markdown, overwrite, newFileLocation, fileName);
|
|
40
|
+
// Removes first slash if we are in a folder, Mintlify doesn't need it
|
|
41
|
+
return folders ? path.join(folders, fileName).substring(1) : fileName;
|
|
42
|
+
}
|
|
43
|
+
//# sourceMappingURL=scrapeFileGettingFileNameFromUrl.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scrapeFileGettingFileNameFromUrl.js","sourceRoot":"","sources":["../../src/scraping/scrapeFileGettingFileNameFromUrl.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,oBAAoB,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAExC,MAAM,CAAC,KAAK,UAAU,gCAAgC,CACpD,QAAgB,EAChB,MAAc,EACd,MAAc,EACd,SAAkB,EAClB,cAWE,EACF,SAAS,GAAG,KAAK,EACjB,OAA2B,EAC3B,YAAqB;IAErB,+BAA+B;IAC/B,IAAI,QAAQ,CAAC,UAAU,CAAC,UAAU,CAAC,IAAI,QAAQ,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE;QACrE,OAAO,QAAQ,CAAC;KACjB;IAED,iCAAiC;IACjC,MAAM,YAAY,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IACzC,IAAI,OAAO,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAEvE,+BAA+B;IAC/B,IAAI,YAAY,IAAI,OAAO,CAAC,UAAU,CAAC,YAAY,CAAC,EAAE;QACpD,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC,CAAC;KAC7C;IAED,kEAAkE;IAClE,4CAA4C;IAC5C,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;IAE1D,8BAA8B;IAC9B,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC;IAC5C,IAAI,IAAY,CAAC;IACjB,IAAI,SAAS,EAAE;QACb,IAAI,GAAG,MAAM,oBAAoB,CAAC,IAAI,CAAC,CAAC;KACzC;SAAM;QACL,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAClC,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC;KACjB;IAED,MAAM,EAAE,KAAK,EAAE,WAAW,EAAE,QAAQ,EAAE,GAAG,MAAM,cAAc,CAC3D,IAAI,EACJ,MAAM,EACN,MAAM,EACN,YAAY,EACZ,SAAS,EACT,OAAO,CACR,CAAC;IAEF,oCAAoC;IACpC,IAAI,CAAC,KAAK,IAAI,CAAC,QAAQ,EAAE;QACvB,OAAO,SAAS,CAAC;KAClB;IAED,MAAM,eAAe,GAAG,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IAEtE,2DAA2D;IAC3D,MAAM,QAAQ,GAAG,YAAY,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,IAAI,cAAc,CAAC;IAEzE,mCAAmC;IACnC,UAAU,CACR,KAAK,EACL,WAAW,EACX,QAAQ,EACR,SAAS,EACT,eAAe,EACf,QAAQ,CACT,CAAC;IAEF,sEAAsE;IACtE,OAAO,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC;AACxE,CAAC"}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { scrapeFileGettingFileNameFromUrl } from "./scrapeFileGettingFileNameFromUrl.js";
|
|
2
|
+
export async function scrapeGettingFileNameFromUrl(navEntry, cliDir, origin, overwrite, scrapePageFunc, puppeteer = false, version, baseToRemove) {
|
|
3
|
+
if (typeof navEntry !== "string") {
|
|
4
|
+
const newPages = [];
|
|
5
|
+
for (const nestedNavEntry of navEntry.pages) {
|
|
6
|
+
newPages.push(await scrapeGettingFileNameFromUrl(nestedNavEntry, cliDir, origin, overwrite, scrapePageFunc, puppeteer, version, baseToRemove));
|
|
7
|
+
}
|
|
8
|
+
navEntry.pages = newPages;
|
|
9
|
+
return navEntry;
|
|
10
|
+
}
|
|
11
|
+
return await scrapeFileGettingFileNameFromUrl(navEntry, cliDir, origin, overwrite, scrapePageFunc, puppeteer, version, baseToRemove);
|
|
12
|
+
}
|
|
13
|
+
//# sourceMappingURL=scrapeGettingFileNameFromUrl.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scrapeGettingFileNameFromUrl.js","sourceRoot":"","sources":["../../src/scraping/scrapeGettingFileNameFromUrl.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gCAAgC,EAAE,MAAM,uCAAuC,CAAC;AAEzF,MAAM,CAAC,KAAK,UAAU,4BAA4B,CAChD,QAA6B,EAC7B,MAAc,EACd,MAAc,EACd,SAAkB,EAClB,cAWE,EACF,SAAS,GAAG,KAAK,EACjB,OAA2B,EAC3B,YAAqB;IAErB,IAAI,OAAO,QAAQ,KAAK,QAAQ,EAAE;QAChC,MAAM,QAAQ,GAAG,EAAE,CAAC;QACpB,KAAK,MAAM,cAAc,IAAI,QAAQ,CAAC,KAAK,EAAE;YAC3C,QAAQ,CAAC,IAAI,CACX,MAAM,4BAA4B,CAChC,cAAc,EACd,MAAM,EACN,MAAM,EACN,SAAS,EACT,cAAc,EACd,SAAS,EACT,OAAO,EACP,YAAY,CACb,CACF,CAAC;SACH;QACD,QAAQ,CAAC,KAAK,GAAG,QAAQ,CAAC;QAC1B,OAAO,QAAQ,CAAC;KACjB;IAED,OAAO,MAAM,gCAAgC,CAC3C,QAAQ,EACR,MAAM,EACN,MAAM,EACN,SAAS,EACT,cAAc,EACd,SAAS,EACT,OAAO,EACP,YAAY,CACb,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import path from "path";
|
|
2
|
+
import { createPage, getOrigin } from "../util.js";
|
|
3
|
+
export async function scrapePage(scrapeFunc, href, html, overwrite, version) {
|
|
4
|
+
const origin = getOrigin(href);
|
|
5
|
+
const cwd = process.cwd();
|
|
6
|
+
const imageBaseDir = path.join(cwd, "images");
|
|
7
|
+
const { title, description, markdown } = await scrapeFunc(html, origin, cwd, imageBaseDir, overwrite, version);
|
|
8
|
+
createPage(title, description, markdown, overwrite, process.cwd());
|
|
9
|
+
}
|
|
10
|
+
//# sourceMappingURL=scrapePage.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scrapePage.js","sourceRoot":"","sources":["../../src/scraping/scrapePage.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AAEnD,MAAM,CAAC,KAAK,UAAU,UAAU,CAC9B,UAAwB,EACxB,IAAY,EACZ,IAAY,EACZ,SAAkB,EAClB,OAA2B;IAE3B,MAAM,MAAM,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAC/B,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,EAAE,CAAC;IAC1B,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;IAE9C,MAAM,EAAE,KAAK,EAAE,WAAW,EAAE,QAAQ,EAAE,GAAG,MAAM,UAAU,CACvD,IAAI,EACJ,MAAM,EACN,GAAG,EACH,YAAY,EACZ,SAAS,EACT,OAAO,CACR,CAAC;IACF,UAAU,CAAC,KAAK,EAAE,WAAW,EAAE,QAAQ,EAAE,SAAS,EAAE,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC;AACrE,CAAC"}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import axios from "axios";
|
|
2
|
+
import { scrapePage } from "./scrapePage.js";
|
|
3
|
+
import { scrapeDocusaurusPage } from "./site-scrapers/scrapeDocusaurusPage.js";
|
|
4
|
+
import { scrapeGitBookPage } from "./site-scrapers/scrapeGitBookPage.js";
|
|
5
|
+
import { scrapeReadMePage } from "./site-scrapers/scrapeReadMePage.js";
|
|
6
|
+
import { detectFramework, Frameworks } from "./detectFramework.js";
|
|
7
|
+
import { getHrefFromArgs } from "../util.js";
|
|
8
|
+
import { getHtmlWithPuppeteer } from "../browser.js";
|
|
9
|
+
import { scrapeIntercomPage } from "./site-scrapers/Intercom/scrapeIntercomPage.js";
|
|
10
|
+
function validateFramework(framework) {
|
|
11
|
+
if (!framework) {
|
|
12
|
+
console.log("Could not detect the framework automatically. Please use one of:");
|
|
13
|
+
console.log("scrape-page-docusaurus");
|
|
14
|
+
console.log("scrape-page-gitbook");
|
|
15
|
+
console.log("scrape-page-readme");
|
|
16
|
+
console.log("scrape-page-intercom");
|
|
17
|
+
return process.exit(1);
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
export async function scrapePageWrapper(argv, scrapeFunc, options) {
|
|
21
|
+
const href = getHrefFromArgs(argv);
|
|
22
|
+
let html;
|
|
23
|
+
if (options?.puppeteer) {
|
|
24
|
+
html = await getHtmlWithPuppeteer(href);
|
|
25
|
+
}
|
|
26
|
+
else {
|
|
27
|
+
const res = await axios.get(href);
|
|
28
|
+
html = res.data;
|
|
29
|
+
}
|
|
30
|
+
await scrapePage(scrapeFunc, href, html, !!argv.overwrite, options?.version);
|
|
31
|
+
process.exit(0);
|
|
32
|
+
}
|
|
33
|
+
export async function scrapePageAutomatically(argv) {
|
|
34
|
+
const href = getHrefFromArgs(argv);
|
|
35
|
+
const res = await axios.get(href);
|
|
36
|
+
const html = res.data;
|
|
37
|
+
const { framework, version } = detectFramework(html);
|
|
38
|
+
validateFramework(framework);
|
|
39
|
+
console.log("Detected framework: " + framework);
|
|
40
|
+
switch (framework) {
|
|
41
|
+
case Frameworks.DOCUSAURUS:
|
|
42
|
+
await scrapePageWrapper(argv, scrapeDocusaurusPage, { version });
|
|
43
|
+
break;
|
|
44
|
+
case Frameworks.GITBOOK:
|
|
45
|
+
await scrapePageWrapper(argv, scrapeGitBookPage, { puppeteer: true });
|
|
46
|
+
break;
|
|
47
|
+
case Frameworks.README:
|
|
48
|
+
await scrapePageWrapper(argv, scrapeReadMePage);
|
|
49
|
+
break;
|
|
50
|
+
case Frameworks.INTERCOM:
|
|
51
|
+
await scrapePageWrapper(argv, scrapeIntercomPage);
|
|
52
|
+
break;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
//# sourceMappingURL=scrapePageCommands.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scrapePageCommands.js","sourceRoot":"","sources":["../../src/scraping/scrapePageCommands.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,oBAAoB,EAAE,MAAM,yCAAyC,CAAC;AAC/E,OAAO,EAAE,iBAAiB,EAAE,MAAM,sCAAsC,CAAC;AACzE,OAAO,EAAE,gBAAgB,EAAE,MAAM,qCAAqC,CAAC;AACvE,OAAO,EAAE,eAAe,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAC;AACnE,OAAO,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAC7C,OAAO,EAAE,oBAAoB,EAAE,MAAM,eAAe,CAAC;AAErD,OAAO,EAAE,kBAAkB,EAAE,MAAM,gDAAgD,CAAC;AAEpF,SAAS,iBAAiB,CAAC,SAAS;IAClC,IAAI,CAAC,SAAS,EAAE;QACd,OAAO,CAAC,GAAG,CACT,kEAAkE,CACnE,CAAC;QACF,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAC;QACtC,OAAO,CAAC,GAAG,CAAC,qBAAqB,CAAC,CAAC;QACnC,OAAO,CAAC,GAAG,CAAC,oBAAoB,CAAC,CAAC;QAClC,OAAO,CAAC,GAAG,CAAC,sBAAsB,CAAC,CAAC;QACpC,OAAO,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;KACxB;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,IAAwB,EACxB,UAAwB,EACxB,OAAmD;IAEnD,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IACnC,IAAI,IAAY,CAAC;IACjB,IAAI,OAAO,EAAE,SAAS,EAAE;QACtB,IAAI,GAAG,MAAM,oBAAoB,CAAC,IAAI,CAAC,CAAC;KACzC;SAAM;QACL,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAClC,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC;KACjB;IACD,MAAM,UAAU,CAAC,UAAU,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,SAAS,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;IAC7E,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,uBAAuB,CAAC,IAAS;IACrD,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAClC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC;IACtB,MAAM,EAAE,SAAS,EAAE,OAAO,EAAE,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IAErD,iBAAiB,CAAC,SAAS,CAAC,CAAC;IAE7B,OAAO,CAAC,GAAG,CAAC,sBAAsB,GAAG,SAAS,CAAC,CAAC;IAEhD,QAAQ,SAAS,EAAE;QACjB,KAAK,UAAU,CAAC,UAAU;YACxB,MAAM,iBAAiB,CAAC,IAAI,EAAE,oBAAoB,EAAE,EAAE,OAAO,EAAE,CAAC,CAAC;YACjE,MAAM;QACR,KAAK,UAAU,CAAC,OAAO;YACrB,MAAM,iBAAiB,CAAC,IAAI,EAAE,iBAAiB,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;YACtE,MAAM;QACR,KAAK,UAAU,CAAC,MAAM;YACpB,MAAM,iBAAiB,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAAC;YAChD,MAAM;QACR,KAAK,UAAU,CAAC,QAAQ;YACtB,MAAM,iBAAiB,CAAC,IAAI,EAAE,kBAAkB,CAAC,CAAC;YAClD,MAAM;KACT;AACH,CAAC"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import path from "path";
|
|
2
|
+
import { objToReadableString } from "../util.js";
|
|
3
|
+
export async function scrapeSection(scrapeFunc, html, origin, overwrite, version) {
|
|
4
|
+
console.log(`Started scraping${overwrite ? ", overwrite mode is on" : ""}...`);
|
|
5
|
+
const cwd = process.cwd();
|
|
6
|
+
const imageBaseDir = path.join(cwd, "images");
|
|
7
|
+
const groupsConfig = await scrapeFunc(html, origin, cwd, imageBaseDir, overwrite, version);
|
|
8
|
+
console.log("Finished scraping.");
|
|
9
|
+
console.log("Add the following to your navigation in mint.json:");
|
|
10
|
+
console.log(objToReadableString(groupsConfig));
|
|
11
|
+
}
|
|
12
|
+
//# sourceMappingURL=scrapeSection.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scrapeSection.js","sourceRoot":"","sources":["../../src/scraping/scrapeSection.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,mBAAmB,EAAE,MAAM,YAAY,CAAC;AAEjD,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,UAA2B,EAC3B,IAAY,EACZ,MAAc,EACd,SAAkB,EAClB,OAA2B;IAE3B,OAAO,CAAC,GAAG,CACT,mBAAmB,SAAS,CAAC,CAAC,CAAC,wBAAwB,CAAC,CAAC,CAAC,EAAE,KAAK,CAClE,CAAC;IACF,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,EAAE,CAAC;IAC1B,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;IAE9C,MAAM,YAAY,GAAG,MAAM,UAAU,CACnC,IAAI,EACJ,MAAM,EACN,GAAG,EACH,YAAY,EACZ,SAAS,EACT,OAAO,CACR,CAAC;IACF,OAAO,CAAC,GAAG,CAAC,oBAAoB,CAAC,CAAC;IAClC,OAAO,CAAC,GAAG,CAAC,oDAAoD,CAAC,CAAC;IAClE,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,YAAY,CAAC,CAAC,CAAC;AACjD,CAAC"}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import axios from "axios";
|
|
2
|
+
import { detectFramework, Frameworks } from "./detectFramework.js";
|
|
3
|
+
import { getHrefFromArgs, getOrigin } from "../util.js";
|
|
4
|
+
import { scrapeSection } from "./scrapeSection.js";
|
|
5
|
+
import { scrapeDocusaurusSection } from "./site-scrapers/scrapeDocusaurusSection.js";
|
|
6
|
+
import openNestedDocusaurusMenus from "./site-scrapers/openNestedDocusaurusMenus.js";
|
|
7
|
+
import { scrapeGitBookSection } from "./site-scrapers/scrapeGitBookSection.js";
|
|
8
|
+
import openNestedGitbookMenus from "./site-scrapers/openNestedGitbookMenus.js";
|
|
9
|
+
import { scrapeReadMeSection } from "./site-scrapers/scrapeReadMeSection.js";
|
|
10
|
+
import { startBrowser } from "../browser.js";
|
|
11
|
+
import { scrapeIntercomSection } from "./site-scrapers/Intercom/scrapeIntercomSection.js";
|
|
12
|
+
export async function scrapeSectionAxiosWrapper(argv, scrapeFunc) {
|
|
13
|
+
const href = getHrefFromArgs(argv);
|
|
14
|
+
const res = await axios.get(href);
|
|
15
|
+
const html = res.data;
|
|
16
|
+
await scrapeSection(scrapeFunc, html, getOrigin(href), !!argv.overwrite, undefined);
|
|
17
|
+
process.exit(0);
|
|
18
|
+
}
|
|
19
|
+
export async function scrapeDocusaurusSectionCommand(argv, version // "1" | "2" | "3"
|
|
20
|
+
) {
|
|
21
|
+
await scrapeSectionOpeningAllNested(argv, openNestedDocusaurusMenus, scrapeDocusaurusSection, version);
|
|
22
|
+
}
|
|
23
|
+
export async function scrapeGitbookSectionCommand(argv) {
|
|
24
|
+
await scrapeSectionOpeningAllNested(argv, openNestedGitbookMenus, scrapeGitBookSection);
|
|
25
|
+
}
|
|
26
|
+
async function scrapeSectionOpeningAllNested(argv, openLinks, scrapeFunc, version) {
|
|
27
|
+
const href = getHrefFromArgs(argv);
|
|
28
|
+
const browser = await startBrowser();
|
|
29
|
+
const page = await browser.newPage();
|
|
30
|
+
await page.goto(href, {
|
|
31
|
+
waitUntil: "networkidle2",
|
|
32
|
+
});
|
|
33
|
+
const html = await openLinks(page);
|
|
34
|
+
browser.close();
|
|
35
|
+
await scrapeSection(scrapeFunc, html, getOrigin(href), !!argv.overwrite, version);
|
|
36
|
+
process.exit(0);
|
|
37
|
+
}
|
|
38
|
+
export async function scrapeSectionAutomatically(argv) {
|
|
39
|
+
const href = getHrefFromArgs(argv);
|
|
40
|
+
const res = await axios.get(href);
|
|
41
|
+
const html = res.data;
|
|
42
|
+
const { framework, version } = detectFramework(html);
|
|
43
|
+
validateFramework(framework);
|
|
44
|
+
console.log("Detected framework: " + framework);
|
|
45
|
+
switch (framework) {
|
|
46
|
+
case Frameworks.DOCUSAURUS:
|
|
47
|
+
await scrapeDocusaurusSectionCommand(argv, version);
|
|
48
|
+
break;
|
|
49
|
+
case Frameworks.GITBOOK:
|
|
50
|
+
await scrapeGitbookSectionCommand(argv);
|
|
51
|
+
break;
|
|
52
|
+
case Frameworks.README:
|
|
53
|
+
await scrapeSectionAxiosWrapper(argv, scrapeReadMeSection);
|
|
54
|
+
break;
|
|
55
|
+
case Frameworks.INTERCOM:
|
|
56
|
+
await scrapeSectionAxiosWrapper(argv, scrapeIntercomSection);
|
|
57
|
+
break;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
function validateFramework(framework) {
|
|
61
|
+
if (!framework) {
|
|
62
|
+
console.log("Could not detect the framework automatically. We only support Docusaurus (V2 and V3), GitBook, and ReadMe.");
|
|
63
|
+
process.exit();
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
//# sourceMappingURL=scrapeSectionCommands.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scrapeSectionCommands.js","sourceRoot":"","sources":["../../src/scraping/scrapeSectionCommands.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,eAAe,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAC;AACnE,OAAO,EAAE,eAAe,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AACxD,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AACnD,OAAO,EAAE,uBAAuB,EAAE,MAAM,4CAA4C,CAAC;AACrF,OAAO,yBAAyB,MAAM,8CAA8C,CAAC;AACrF,OAAO,EAAE,oBAAoB,EAAE,MAAM,yCAAyC,CAAC;AAC/E,OAAO,sBAAsB,MAAM,2CAA2C,CAAC;AAC/E,OAAO,EAAE,mBAAmB,EAAE,MAAM,wCAAwC,CAAC;AAC7E,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAE7C,OAAO,EAAE,qBAAqB,EAAE,MAAM,mDAAmD,CAAC;AAE1F,MAAM,CAAC,KAAK,UAAU,yBAAyB,CAC7C,IAAwB,EACxB,UAA2B;IAE3B,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAClC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC;IACtB,MAAM,aAAa,CACjB,UAAU,EACV,IAAI,EACJ,SAAS,CAAC,IAAI,CAAC,EACf,CAAC,CAAC,IAAI,CAAC,SAAS,EAChB,SAAS,CACV,CAAC;IACF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,8BAA8B,CAClD,IAAS,EACT,OAAe,CAAC,kBAAkB;;IAElC,MAAM,6BAA6B,CACjC,IAAI,EACJ,yBAAyB,EACzB,uBAAuB,EACvB,OAAO,CACR,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,2BAA2B,CAAC,IAAS;IACzD,MAAM,6BAA6B,CACjC,IAAI,EACJ,sBAAsB,EACtB,oBAAoB,CACrB,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,6BAA6B,CAC1C,IAAS,EACT,SAAc,EACd,UAA2B,EAC3B,OAAgB;IAEhB,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IAEnC,MAAM,OAAO,GAAG,MAAM,YAAY,EAAE,CAAC;IACrC,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;IACrC,MAAM,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;QACpB,SAAS,EAAE,cAAc;KAC1B,CAAC,CAAC;IAEH,MAAM,IAAI,GAAG,MAAM,SAAS,CAAC,IAAI,CAAC,CAAC;IACnC,OAAO,CAAC,KAAK,EAAE,CAAC;IAChB,MAAM,aAAa,CACjB,UAAU,EACV,IAAI,EACJ,SAAS,CAAC,IAAI,CAAC,EACf,CAAC,CAAC,IAAI,CAAC,SAAS,EAChB,OAAO,CACR,CAAC;IACF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,0BAA0B,CAAC,IAAS;IACxD,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAClC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC;IACtB,MAAM,EAAE,SAAS,EAAE,OAAO,EAAE,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IAErD,iBAAiB,CAAC,SAAS,CAAC,CAAC;IAE7B,OAAO,CAAC,GAAG,CAAC,sBAAsB,GAAG,SAAS,CAAC,CAAC;IAEhD,QAAQ,SAAS,EAAE;QACjB,KAAK,UAAU,CAAC,UAAU;YACxB,MAAM,8BAA8B,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;YACpD,MAAM;QACR,KAAK,UAAU,CAAC,OAAO;YACrB,MAAM,2BAA2B,CAAC,IAAI,CAAC,CAAC;YACxC,MAAM;QACR,KAAK,UAAU,CAAC,MAAM;YACpB,MAAM,yBAAyB,CAAC,IAAI,EAAE,mBAAmB,CAAC,CAAC;YAC3D,MAAM;QACR,KAAK,UAAU,CAAC,QAAQ;YACtB,MAAM,yBAAyB,CAAC,IAAI,EAAE,qBAAqB,CAAC,CAAC;YAC7D,MAAM;KACT;AACH,CAAC;AAED,SAAS,iBAAiB,CAAC,SAAiC;IAC1D,IAAI,CAAC,SAAS,EAAE;QACd,OAAO,CAAC,GAAG,CACT,4GAA4G,CAC7G,CAAC;QACF,OAAO,CAAC,IAAI,EAAE,CAAC;KAChB;AACH,CAAC"}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import cheerio from "cheerio";
|
|
2
|
+
import { NodeHtmlMarkdown } from "node-html-markdown";
|
|
3
|
+
import downloadAllImages from "../../downloadAllImages.js";
|
|
4
|
+
import replaceImagePaths from "../../replaceImagePaths.js";
|
|
5
|
+
export async function scrapeIntercomPage(html, origin, cliDir, imageBaseDir, overwrite, _ // version
|
|
6
|
+
) {
|
|
7
|
+
const $ = cheerio.load(html);
|
|
8
|
+
const titleComponent = $(".t__h1").first();
|
|
9
|
+
const title = titleComponent.text().trim();
|
|
10
|
+
let description = $(".article__desc", titleComponent.parent()).text().trim();
|
|
11
|
+
let content = $("article").first();
|
|
12
|
+
const contentHtml = $.html(content);
|
|
13
|
+
const origToWritePath = await downloadAllImages($, content, origin, imageBaseDir, overwrite, undefined, true);
|
|
14
|
+
const nhm = new NodeHtmlMarkdown({ useInlineLinks: false });
|
|
15
|
+
let markdown = nhm.translate(contentHtml);
|
|
16
|
+
// Keep headers on one line
|
|
17
|
+
markdown = markdown.replace(/# \n\n/g, "# ");
|
|
18
|
+
// Remove unnecessary nonwidth blank space characters
|
|
19
|
+
markdown = markdown.replace(/\u200b/g, "");
|
|
20
|
+
// Reduce unnecessary blank lines
|
|
21
|
+
markdown = markdown.replace(/\n\n\n/g, "\n\n");
|
|
22
|
+
// Mintlify doesn't support bolded headers, remove the asterisks
|
|
23
|
+
markdown = markdown.replace(/(\n#+) \*\*(.*)\*\*\n/g, "$1 $2\n");
|
|
24
|
+
markdown = replaceImagePaths(origToWritePath, cliDir, markdown);
|
|
25
|
+
return { title, description, markdown };
|
|
26
|
+
}
|
|
27
|
+
//# sourceMappingURL=scrapeIntercomPage.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scrapeIntercomPage.js","sourceRoot":"","sources":["../../../../src/scraping/site-scrapers/Intercom/scrapeIntercomPage.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AACtD,OAAO,iBAAiB,MAAM,4BAA4B,CAAC;AAC3D,OAAO,iBAAiB,MAAM,4BAA4B,CAAC;AAE3D,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,IAAY,EACZ,MAAc,EACd,MAAc,EACd,YAAoB,EACpB,SAAkB,EAClB,CAAqB,CAAC,UAAU;;IAEhC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,MAAM,cAAc,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC,KAAK,EAAE,CAAC;IAC3C,MAAM,KAAK,GAAG,cAAc,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IAC3C,IAAI,WAAW,GAAG,CAAC,CAAC,gBAAgB,EAAE,cAAc,CAAC,MAAM,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IAE7E,IAAI,OAAO,GAAG,CAAC,CAAC,SAAS,CAAC,CAAC,KAAK,EAAE,CAAC;IACnC,MAAM,WAAW,GAAG,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAEpC,MAAM,eAAe,GAAG,MAAM,iBAAiB,CAC7C,CAAC,EACD,OAAO,EACP,MAAM,EACN,YAAY,EACZ,SAAS,EACT,SAAS,EACT,IAAI,CACL,CAAC;IAEF,MAAM,GAAG,GAAG,IAAI,gBAAgB,CAAC,EAAE,cAAc,EAAE,KAAK,EAAE,CAAC,CAAC;IAC5D,IAAI,QAAQ,GAAG,GAAG,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;IAE1C,2BAA2B;IAC3B,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;IAE7C,qDAAqD;IACrD,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;IAE3C,iCAAiC;IACjC,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IAE/C,gEAAgE;IAChE,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,wBAAwB,EAAE,SAAS,CAAC,CAAC;IAEjE,QAAQ,GAAG,iBAAiB,CAAC,eAAe,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;IAEhE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,QAAQ,EAAE,CAAC;AAC1C,CAAC"}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import cheerio from "cheerio";
|
|
2
|
+
import { scrapeIntercomPage } from "./scrapeIntercomPage.js";
|
|
3
|
+
import { scrapeGettingFileNameFromUrl } from "../../scrapeGettingFileNameFromUrl.js";
|
|
4
|
+
import downloadLogoImage from "../../downloadLogoImage.js";
|
|
5
|
+
import axios from "axios";
|
|
6
|
+
export async function scrapeIntercomSection(html, origin, cliDir, imageBaseDir, overwrite, version) {
|
|
7
|
+
let $ = cheerio.load(html);
|
|
8
|
+
const logoSrc = $(".header__logo img").first().attr("src");
|
|
9
|
+
downloadLogoImage(logoSrc, imageBaseDir, origin, overwrite);
|
|
10
|
+
const collectionsLink = $(".section .g__space a");
|
|
11
|
+
const collectionsMap = collectionsLink
|
|
12
|
+
.toArray()
|
|
13
|
+
.map(async (s) => {
|
|
14
|
+
const href = $(s).attr("href");
|
|
15
|
+
const res = await axios.get(`${origin}${href}`);
|
|
16
|
+
const html = res.data;
|
|
17
|
+
$ = cheerio.load(html);
|
|
18
|
+
const sectionTitle = $(".collection h1").first().text().trim();
|
|
19
|
+
const sectionPages = $(".section .g__space a")
|
|
20
|
+
.toArray()
|
|
21
|
+
.map((s) => $(s).attr("href"));
|
|
22
|
+
return {
|
|
23
|
+
group: sectionTitle,
|
|
24
|
+
pages: sectionPages,
|
|
25
|
+
};
|
|
26
|
+
});
|
|
27
|
+
const collections = await Promise.all(collectionsMap);
|
|
28
|
+
return await Promise.all(collections.map(async (entry) => {
|
|
29
|
+
return await scrapeGettingFileNameFromUrl(entry, cliDir, origin, overwrite, scrapeIntercomPage, false, version);
|
|
30
|
+
}));
|
|
31
|
+
}
|
|
32
|
+
//# sourceMappingURL=scrapeIntercomSection.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scrapeIntercomSection.js","sourceRoot":"","sources":["../../../../src/scraping/site-scrapers/Intercom/scrapeIntercomSection.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,kBAAkB,EAAE,MAAM,yBAAyB,CAAC;AAC7D,OAAO,EAAE,4BAA4B,EAAE,MAAM,uCAAuC,CAAC;AACrF,OAAO,iBAAiB,MAAM,4BAA4B,CAAC;AAC3D,OAAO,KAAK,MAAM,OAAO,CAAC;AAE1B,MAAM,CAAC,KAAK,UAAU,qBAAqB,CACzC,IAAY,EACZ,MAAc,EACd,MAAc,EACd,YAAoB,EACpB,SAAkB,EAClB,OAA2B;IAE3B,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE3B,MAAM,OAAO,GAAG,CAAC,CAAC,mBAAmB,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAC3D,iBAAiB,CAAC,OAAO,EAAE,YAAY,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC;IAE5D,MAAM,eAAe,GAAG,CAAC,CAAC,sBAAsB,CAAC,CAAC;IAClD,MAAM,cAAc,GAAG,eAAe;SACnC,OAAO,EAAE;SACT,GAAG,CAAC,KAAK,EAAE,CAAkB,EAAE,EAAE;QAChC,MAAM,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAC/B,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,GAAG,MAAM,GAAG,IAAI,EAAE,CAAC,CAAC;QAChD,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC;QACtB,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvB,MAAM,YAAY,GAAG,CAAC,CAAC,gBAAgB,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QAC/D,MAAM,YAAY,GAAG,CAAC,CAAC,sBAAsB,CAAC;aAC3C,OAAO,EAAE;aACT,GAAG,CAAC,CAAC,CAAkB,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;QAClD,OAAO;YACL,KAAK,EAAE,YAAY;YACnB,KAAK,EAAE,YAAY;SACpB,CAAC;IACJ,CAAC,CAAC,CAAC;IAEL,MAAM,WAAW,GAAqB,MAAM,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC;IAExE,OAAO,MAAM,OAAO,CAAC,GAAG,CACtB,WAAW,CAAC,GAAG,CAAC,KAAK,EAAE,KAA0B,EAAE,EAAE;QACnD,OAAO,MAAM,4BAA4B,CACvC,KAAK,EACL,MAAM,EACN,MAAM,EACN,SAAS,EACT,kBAAkB,EAClB,KAAK,EACL,OAAO,CACR,CAAC;IACJ,CAAC,CAAC,CACH,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
export default function alternateGroupTitle(firstLink, pages) {
|
|
2
|
+
// Only assign titles to nested navigation menus outside a section.
|
|
3
|
+
// Others should not have a title so we can merge them into one section.
|
|
4
|
+
if (pages.length > 0) {
|
|
5
|
+
return firstLink?.text();
|
|
6
|
+
}
|
|
7
|
+
return "";
|
|
8
|
+
}
|
|
9
|
+
//# sourceMappingURL=alternateGroupTitle.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"alternateGroupTitle.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/alternateGroupTitle.ts"],"names":[],"mappings":"AAAA,MAAM,CAAC,OAAO,UAAU,mBAAmB,CAAC,SAAS,EAAE,KAAK;IAC1D,mEAAmE;IACnE,wEAAwE;IACxE,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE;QACpB,OAAO,SAAS,EAAE,IAAI,EAAE,CAAC;KAC1B;IACD,OAAO,EAAE,CAAC;AACZ,CAAC"}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import alternateGroupTitle from "../alternateGroupTitle.js";
|
|
2
|
+
import getLinksRecursively from "./getLinksRecursively.js";
|
|
3
|
+
export function getDocusaurusLinksPerGroup(navigationSections, $, version) {
|
|
4
|
+
if (version === "3" || version === "2") {
|
|
5
|
+
return getDocusaurusLinksPerGroupLoop(navigationSections, $);
|
|
6
|
+
}
|
|
7
|
+
return [];
|
|
8
|
+
}
|
|
9
|
+
function getDocusaurusLinksPerGroupLoop(navigationSections, $) {
|
|
10
|
+
return navigationSections.toArray().map((s) => {
|
|
11
|
+
const section = $(s);
|
|
12
|
+
// Links without a group
|
|
13
|
+
if (section.hasClass("theme-doc-sidebar-item-link")) {
|
|
14
|
+
const linkHref = section.find("a[href]").first().attr("href");
|
|
15
|
+
return {
|
|
16
|
+
group: "",
|
|
17
|
+
pages: [linkHref],
|
|
18
|
+
};
|
|
19
|
+
}
|
|
20
|
+
const firstLink = section
|
|
21
|
+
.find(".menu__list-item-collapsible")
|
|
22
|
+
.first()
|
|
23
|
+
.find("a[href]");
|
|
24
|
+
const sectionTitle = firstLink.text();
|
|
25
|
+
const firstHref = firstLink.attr("href");
|
|
26
|
+
const linkSections = section.children().eq(1).children();
|
|
27
|
+
const pages = getLinksRecursively(linkSections, $);
|
|
28
|
+
return {
|
|
29
|
+
group: sectionTitle || alternateGroupTitle(firstLink, pages),
|
|
30
|
+
pages: firstHref ? [firstHref, ...pages] : pages,
|
|
31
|
+
};
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
//# sourceMappingURL=getDocusaurusLinksPerGroup.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"getDocusaurusLinksPerGroup.js","sourceRoot":"","sources":["../../../../src/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.ts"],"names":[],"mappings":"AAAA,OAAO,mBAAmB,MAAM,2BAA2B,CAAC;AAC5D,OAAO,mBAAmB,MAAM,0BAA0B,CAAC;AAE3D,MAAM,UAAU,0BAA0B,CACxC,kBAAuB,EACvB,CAAM,EACN,OAA2B;IAE3B,IAAI,OAAO,KAAK,GAAG,IAAI,OAAO,KAAK,GAAG,EAAE;QACtC,OAAO,8BAA8B,CAAC,kBAAkB,EAAE,CAAC,CAAC,CAAC;KAC9D;IACD,OAAO,EAAE,CAAC;AACZ,CAAC;AAED,SAAS,8BAA8B,CAAC,kBAAuB,EAAE,CAAM;IACrE,OAAO,kBAAkB,CAAC,OAAO,EAAE,CAAC,GAAG,CAAC,CAAC,CAAS,EAAE,EAAE;QACpD,MAAM,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QAErB,wBAAwB;QACxB,IAAI,OAAO,CAAC,QAAQ,CAAC,6BAA6B,CAAC,EAAE;YACnD,MAAM,QAAQ,GAAG,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAC9D,OAAO;gBACL,KAAK,EAAE,EAAE;gBACT,KAAK,EAAE,CAAC,QAAQ,CAAC;aAClB,CAAC;SACH;QAED,MAAM,SAAS,GAAG,OAAO;aACtB,IAAI,CAAC,8BAA8B,CAAC;aACpC,KAAK,EAAE;aACP,IAAI,CAAC,SAAS,CAAC,CAAC;QAEnB,MAAM,YAAY,GAAG,SAAS,CAAC,IAAI,EAAE,CAAC;QACtC,MAAM,SAAS,GAAG,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACzC,MAAM,YAAY,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC;QAEzD,MAAM,KAAK,GAAG,mBAAmB,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC;QAEnD,OAAO;YACL,KAAK,EAAE,YAAY,IAAI,mBAAmB,CAAC,SAAS,EAAE,KAAK,CAAC;YAC5D,KAAK,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,EAAE,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK;SACjD,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC"}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
// Used by Docusaurus and ReadMe section scrapers
|
|
2
|
+
export default function getLinksRecursively(linkSections, $) {
|
|
3
|
+
if (linkSections == null || linkSections.length === 0) {
|
|
4
|
+
return [];
|
|
5
|
+
}
|
|
6
|
+
return linkSections
|
|
7
|
+
.map((i, s) => {
|
|
8
|
+
const subsection = $(s);
|
|
9
|
+
let link = subsection.children().first();
|
|
10
|
+
if (!link.attr("href")) {
|
|
11
|
+
// Docusaurus nests the <a> inside a <div>
|
|
12
|
+
link = link.find("a[href]").first();
|
|
13
|
+
}
|
|
14
|
+
const linkHref = link.attr("href");
|
|
15
|
+
// Skip missing links. For example, GitBook uses
|
|
16
|
+
// empty divs are used for styling a line beside the nav.
|
|
17
|
+
// Skip external links until Mintlify supports them
|
|
18
|
+
if (!linkHref ||
|
|
19
|
+
linkHref === "#" ||
|
|
20
|
+
linkHref.startsWith("https://") ||
|
|
21
|
+
linkHref.startsWith("http://")) {
|
|
22
|
+
return undefined;
|
|
23
|
+
}
|
|
24
|
+
const childLinks = subsection.children().eq(1).children();
|
|
25
|
+
if (childLinks.length > 0) {
|
|
26
|
+
// Put the section link in the list of pages.
|
|
27
|
+
// When we support the section itself being a link we should update this
|
|
28
|
+
return {
|
|
29
|
+
group: link.text(),
|
|
30
|
+
pages: [linkHref, ...getLinksRecursively(childLinks, $)],
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
return linkHref;
|
|
34
|
+
})
|
|
35
|
+
.toArray()
|
|
36
|
+
.filter(Boolean);
|
|
37
|
+
}
|
|
38
|
+
//# sourceMappingURL=getLinksRecursively.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"getLinksRecursively.js","sourceRoot":"","sources":["../../../../src/scraping/site-scrapers/links-per-group/getLinksRecursively.ts"],"names":[],"mappings":"AAAA,iDAAiD;AACjD,MAAM,CAAC,OAAO,UAAU,mBAAmB,CAAC,YAAiB,EAAE,CAAM;IACnE,IAAI,YAAY,IAAI,IAAI,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC,EAAE;QACrD,OAAO,EAAE,CAAC;KACX;IAED,OAAO,YAAY;SAChB,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACZ,MAAM,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACxB,IAAI,IAAI,GAAG,UAAU,CAAC,QAAQ,EAAE,CAAC,KAAK,EAAE,CAAC;QAEzC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE;YACtB,0CAA0C;YAC1C,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,KAAK,EAAE,CAAC;SACrC;QACD,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAEnC,gDAAgD;QAChD,yDAAyD;QACzD,mDAAmD;QACnD,IACE,CAAC,QAAQ;YACT,QAAQ,KAAK,GAAG;YAChB,QAAQ,CAAC,UAAU,CAAC,UAAU,CAAC;YAC/B,QAAQ,CAAC,UAAU,CAAC,SAAS,CAAC,EAC9B;YACA,OAAO,SAAS,CAAC;SAClB;QAED,MAAM,UAAU,GAAG,UAAU,CAAC,QAAQ,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC;QAE1D,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE;YACzB,6CAA6C;YAC7C,wEAAwE;YACxE,OAAO;gBACL,KAAK,EAAE,IAAI,CAAC,IAAI,EAAE;gBAClB,KAAK,EAAE,CAAC,QAAQ,EAAE,GAAG,mBAAmB,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;aACzD,CAAC;SACH;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC,CAAC;SACD,OAAO,EAAE;SACT,MAAM,CAAC,OAAO,CAAC,CAAC;AACrB,CAAC"}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
// Used by GitBook section scraper
|
|
2
|
+
export default function getLinksRecursivelyGitBook(linkSections, $) {
|
|
3
|
+
if (linkSections == null || linkSections.length === 0) {
|
|
4
|
+
return [];
|
|
5
|
+
}
|
|
6
|
+
return linkSections
|
|
7
|
+
.map((i, s) => {
|
|
8
|
+
let subsection = $(s);
|
|
9
|
+
// GitBook has an extra div when more than one layer deep
|
|
10
|
+
if (subsection.children().length === 1) {
|
|
11
|
+
subsection = subsection.children().first();
|
|
12
|
+
}
|
|
13
|
+
const link = subsection.children().first();
|
|
14
|
+
const linkHref = link.attr("href");
|
|
15
|
+
// Skip missing links. For example, GitBook uses
|
|
16
|
+
// empty divs are used for styling a line beside the nav.
|
|
17
|
+
// Skip external links until Mintlify supports them
|
|
18
|
+
if (!linkHref ||
|
|
19
|
+
linkHref === "#" ||
|
|
20
|
+
linkHref.startsWith("https://") ||
|
|
21
|
+
linkHref.startsWith("http://")) {
|
|
22
|
+
return undefined;
|
|
23
|
+
}
|
|
24
|
+
const childLinks = subsection.children().eq(1).children();
|
|
25
|
+
if (childLinks.length > 0) {
|
|
26
|
+
// Put the section link in the list of pages.
|
|
27
|
+
// When we support the section itself being a link we should update this
|
|
28
|
+
return {
|
|
29
|
+
group: link.text(),
|
|
30
|
+
pages: [linkHref, ...getLinksRecursivelyGitBook(childLinks, $)],
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
return linkHref;
|
|
34
|
+
})
|
|
35
|
+
.toArray()
|
|
36
|
+
.filter(Boolean);
|
|
37
|
+
}
|
|
38
|
+
//# sourceMappingURL=getLinksRecursivelyGitBook.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"getLinksRecursivelyGitBook.js","sourceRoot":"","sources":["../../../../src/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.ts"],"names":[],"mappings":"AAAA,kCAAkC;AAClC,MAAM,CAAC,OAAO,UAAU,0BAA0B,CAAC,YAAiB,EAAE,CAAM;IAC1E,IAAI,YAAY,IAAI,IAAI,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC,EAAE;QACrD,OAAO,EAAE,CAAC;KACX;IAED,OAAO,YAAY;SAChB,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACZ,IAAI,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QAEtB,yDAAyD;QACzD,IAAI,UAAU,CAAC,QAAQ,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE;YACtC,UAAU,GAAG,UAAU,CAAC,QAAQ,EAAE,CAAC,KAAK,EAAE,CAAC;SAC5C;QAED,MAAM,IAAI,GAAG,UAAU,CAAC,QAAQ,EAAE,CAAC,KAAK,EAAE,CAAC;QAC3C,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAEnC,gDAAgD;QAChD,yDAAyD;QACzD,mDAAmD;QACnD,IACE,CAAC,QAAQ;YACT,QAAQ,KAAK,GAAG;YAChB,QAAQ,CAAC,UAAU,CAAC,UAAU,CAAC;YAC/B,QAAQ,CAAC,UAAU,CAAC,SAAS,CAAC,EAC9B;YACA,OAAO,SAAS,CAAC;SAClB;QAED,MAAM,UAAU,GAAG,UAAU,CAAC,QAAQ,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC;QAE1D,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE;YACzB,6CAA6C;YAC7C,wEAAwE;YACxE,OAAO;gBACL,KAAK,EAAE,IAAI,CAAC,IAAI,EAAE;gBAClB,KAAK,EAAE,CAAC,QAAQ,EAAE,GAAG,0BAA0B,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;aAChE,CAAC;SACH;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC,CAAC;SACD,OAAO,EAAE;SACT,MAAM,CAAC,OAAO,CAAC,CAAC;AACrB,CAAC"}
|