mintlify 2.0.4 → 2.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/constants.js +17 -0
- package/bin/constants.js.map +1 -1
- package/bin/downloadImage.js +47 -2
- package/bin/downloadImage.js.map +1 -1
- package/bin/index.js +5 -3
- package/bin/index.js.map +1 -1
- package/bin/local-preview/index.js +2 -1
- package/bin/local-preview/index.js.map +1 -1
- package/bin/local-preview/listener/{categorizeFiles.js → categorize.js} +53 -3
- package/bin/local-preview/listener/categorize.js.map +1 -0
- package/bin/local-preview/listener/generate.js +1 -1
- package/bin/local-preview/listener/generate.js.map +1 -1
- package/bin/local-preview/listener/index.js +145 -108
- package/bin/local-preview/listener/index.js.map +1 -1
- package/bin/local-preview/listener/update.js +1 -1
- package/bin/local-preview/listener/update.js.map +1 -1
- package/bin/scraping/combineNavWithEmptyGroupTitles.js.map +1 -1
- package/bin/scraping/downloadAllImages.js +9 -38
- package/bin/scraping/downloadAllImages.js.map +1 -1
- package/bin/scraping/downloadLogoImage.js +13 -0
- package/bin/scraping/downloadLogoImage.js.map +1 -0
- package/bin/scraping/scrapeFileGettingFileNameFromUrl.js +1 -1
- package/bin/scraping/scrapeFileGettingFileNameFromUrl.js.map +1 -1
- package/bin/scraping/scrapeGettingFileNameFromUrl.js +1 -2
- package/bin/scraping/scrapeGettingFileNameFromUrl.js.map +1 -1
- package/bin/scraping/scrapePage.js +3 -2
- package/bin/scraping/scrapePage.js.map +1 -1
- package/bin/scraping/scrapePageCommands.js +1 -1
- package/bin/scraping/scrapePageCommands.js.map +1 -1
- package/bin/scraping/scrapeSection.js +4 -1
- package/bin/scraping/scrapeSection.js.map +1 -1
- package/bin/scraping/scrapeSectionCommands.js +2 -2
- package/bin/scraping/scrapeSectionCommands.js.map +1 -1
- package/bin/scraping/site-scrapers/scrapeDocusaurusPage.js +2 -2
- package/bin/scraping/site-scrapers/scrapeDocusaurusPage.js.map +1 -1
- package/bin/scraping/site-scrapers/scrapeDocusaurusSection.js +5 -1
- package/bin/scraping/site-scrapers/scrapeDocusaurusSection.js.map +1 -1
- package/bin/scraping/site-scrapers/scrapeGitBookPage.js +2 -2
- package/bin/scraping/site-scrapers/scrapeGitBookPage.js.map +1 -1
- package/bin/scraping/site-scrapers/scrapeGitBookSection.js +7 -1
- package/bin/scraping/site-scrapers/scrapeGitBookSection.js.map +1 -1
- package/bin/scraping/site-scrapers/scrapeReadMePage.js +2 -2
- package/bin/scraping/site-scrapers/scrapeReadMePage.js.map +1 -1
- package/bin/scraping/site-scrapers/scrapeReadMeSection.js +9 -3
- package/bin/scraping/site-scrapers/scrapeReadMeSection.js.map +1 -1
- package/bin/util.js +4 -2
- package/bin/util.js.map +1 -1
- package/package.json +1 -1
- package/src/constants.ts +18 -0
- package/src/downloadImage.ts +61 -3
- package/src/index.ts +10 -3
- package/src/local-preview/index.ts +2 -1
- package/src/local-preview/listener/{categorizeFiles.ts → categorize.ts} +56 -2
- package/src/local-preview/listener/generate.ts +1 -1
- package/src/local-preview/listener/index.ts +163 -143
- package/src/local-preview/listener/update.ts +1 -1
- package/src/local-preview/listener/utils/types.ts +15 -0
- package/src/scraping/combineNavWithEmptyGroupTitles.ts +4 -4
- package/src/scraping/downloadAllImages.ts +12 -39
- package/src/scraping/downloadLogoImage.ts +24 -0
- package/src/scraping/scrapeFileGettingFileNameFromUrl.ts +3 -1
- package/src/scraping/scrapeGettingFileNameFromUrl.ts +4 -4
- package/src/scraping/scrapePage.ts +6 -9
- package/src/scraping/scrapePageCommands.ts +2 -2
- package/src/scraping/scrapeSection.ts +7 -2
- package/src/scraping/scrapeSectionCommands.ts +7 -4
- package/src/scraping/site-scrapers/scrapeDocusaurusPage.ts +3 -1
- package/src/scraping/site-scrapers/scrapeDocusaurusSection.ts +9 -4
- package/src/scraping/site-scrapers/scrapeGitBookPage.ts +2 -0
- package/src/scraping/site-scrapers/scrapeGitBookSection.ts +12 -4
- package/src/scraping/site-scrapers/scrapeReadMePage.ts +3 -1
- package/src/scraping/site-scrapers/scrapeReadMeSection.ts +14 -6
- package/src/types.d.ts +29 -0
- package/src/util.ts +6 -5
- package/bin/local-preview/listener/categorizeFiles.js.map +0 -1
- package/bin/local-preview/listener/utils/fileIsMdxOrMd.js +0 -12
- package/bin/local-preview/listener/utils/fileIsMdxOrMd.js.map +0 -1
- package/bin/local-preview/utils/categorizeFiles.js +0 -63
- package/bin/local-preview/utils/categorizeFiles.js.map +0 -1
- package/bin/local-preview/utils/getOpenApiContext.js +0 -58
- package/bin/local-preview/utils/getOpenApiContext.js.map +0 -1
- package/bin/local-preview/utils/injectFavicons.js +0 -72
- package/bin/local-preview/utils/injectFavicons.js.map +0 -1
- package/bin/local-preview/utils/listener.js +0 -116
- package/bin/local-preview/utils/listener.js.map +0 -1
- package/bin/local-preview/utils/metadata.js +0 -118
- package/bin/local-preview/utils/metadata.js.map +0 -1
- package/bin/local-preview/utils/mintConfigFile.js +0 -43
- package/bin/local-preview/utils/mintConfigFile.js.map +0 -1
- package/bin/local-preview/utils/openApiCheck.js +0 -15
- package/bin/local-preview/utils/openApiCheck.js.map +0 -1
- package/bin/local-preview/utils/slugToTitle.js +0 -8
- package/bin/local-preview/utils/slugToTitle.js.map +0 -1
- package/bin/navigation.js +0 -4
- package/bin/navigation.js.map +0 -1
- package/bin/pageTemplate.js +0 -30
- package/bin/pageTemplate.js.map +0 -1
- package/src/local-preview/listener/utils/fileIsMdxOrMd.ts +0 -11
- package/src/navigation.ts +0 -12
- package/src/pageTemplate.ts +0 -32
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import path from "path";
|
|
2
|
-
import downloadImage from "../downloadImage.js";
|
|
2
|
+
import downloadImage, { cleanImageSrc, isValidImageSrc, removeMetadataFromImageSrc, } from "../downloadImage.js";
|
|
3
3
|
// To Do: Use CheerioElement instead of any when we bump the cheerio version
|
|
4
|
-
export default async function downloadAllImages($, content, origin, baseDir, modifyFileName) {
|
|
4
|
+
export default async function downloadAllImages($, content, origin, baseDir, overwrite, modifyFileName) {
|
|
5
5
|
if (!baseDir) {
|
|
6
6
|
console.debug("Skipping image downloading");
|
|
7
7
|
return;
|
|
@@ -15,47 +15,18 @@ export default async function downloadAllImages($, content, origin, baseDir, mod
|
|
|
15
15
|
.toArray()),
|
|
16
16
|
];
|
|
17
17
|
// Wait to all images to download before continuing
|
|
18
|
-
const origToNewArray = await Promise.all(imageSrcs.map(async (
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
// Add origin if the image tags are using relative sources
|
|
24
|
-
const imageHref = origImageSrc.startsWith("http")
|
|
25
|
-
? origImageSrc
|
|
26
|
-
: new URL(origImageSrc, origin).href;
|
|
27
|
-
let fileName = removeMetadataFromExtension(path.basename(imageHref));
|
|
18
|
+
const origToNewArray = await Promise.all(imageSrcs.map(async (imageSrc) => {
|
|
19
|
+
if (!isValidImageSrc(imageSrc))
|
|
20
|
+
return;
|
|
21
|
+
const imageHref = cleanImageSrc(imageSrc, origin);
|
|
22
|
+
let fileName = removeMetadataFromImageSrc(path.basename(imageHref));
|
|
28
23
|
if (modifyFileName) {
|
|
29
24
|
fileName = modifyFileName(fileName);
|
|
30
25
|
}
|
|
31
|
-
if (!fileName) {
|
|
32
|
-
console.error("Invalid image path " + imageHref);
|
|
33
|
-
return;
|
|
34
|
-
}
|
|
35
26
|
const writePath = path.join(baseDir, fileName);
|
|
36
|
-
await downloadImage(imageHref, writePath)
|
|
37
|
-
|
|
38
|
-
console.log("🖼️ - " + writePath);
|
|
39
|
-
})
|
|
40
|
-
.catch((e) => {
|
|
41
|
-
if (e.code === "EEXIST") {
|
|
42
|
-
console.log(`❌ Skipping existing image ${writePath}`);
|
|
43
|
-
}
|
|
44
|
-
else {
|
|
45
|
-
console.error(e);
|
|
46
|
-
}
|
|
47
|
-
});
|
|
48
|
-
return { [origImageSrc]: writePath };
|
|
27
|
+
await downloadImage(imageHref, writePath, overwrite);
|
|
28
|
+
return { [imageSrc]: writePath };
|
|
49
29
|
}));
|
|
50
30
|
return origToNewArray.reduce((result, current) => Object.assign(result, current), {});
|
|
51
31
|
}
|
|
52
|
-
function removeMetadataFromExtension(src) {
|
|
53
|
-
// Part of the URL standard
|
|
54
|
-
const metadataSymbols = ["?", "#"];
|
|
55
|
-
metadataSymbols.forEach((dividerSymbol) => {
|
|
56
|
-
// Some frameworks add metadata after the file extension, we need to remove that.
|
|
57
|
-
src = src.split(dividerSymbol)[0];
|
|
58
|
-
});
|
|
59
|
-
return src;
|
|
60
|
-
}
|
|
61
32
|
//# sourceMappingURL=downloadAllImages.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"downloadAllImages.js","sourceRoot":"","sources":["../../src/scraping/downloadAllImages.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,aAAa,MAAM,qBAAqB,CAAC;
|
|
1
|
+
{"version":3,"file":"downloadAllImages.js","sourceRoot":"","sources":["../../src/scraping/downloadAllImages.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,aAAa,EAAE,EACpB,aAAa,EACb,eAAe,EACf,0BAA0B,GAC3B,MAAM,qBAAqB,CAAC;AAE7B,4EAA4E;AAC5E,MAAM,CAAC,OAAO,CAAC,KAAK,UAAU,iBAAiB,CAC7C,CAAM,EACN,OAAY,EACZ,MAAc,EACd,OAAe,EACf,SAAkB,EAClB,cAAoB;IAEpB,IAAI,CAAC,OAAO,EAAE;QACZ,OAAO,CAAC,KAAK,CAAC,4BAA4B,CAAC,CAAC;QAC5C,OAAO;KACR;IAED,kEAAkE;IAClE,4CAA4C;IAC5C,MAAM,SAAS,GAAG;QAChB,GAAG,IAAI,GAAG,CACR,OAAO;aACJ,IAAI,CAAC,UAAU,CAAC;aAChB,GAAG,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;aACvC,OAAO,EAAE,CACb;KACF,CAAC;IAEF,mDAAmD;IACnD,MAAM,cAAc,GAAG,MAAM,OAAO,CAAC,GAAG,CACtC,SAAS,CAAC,GAAG,CAAC,KAAK,EAAE,QAAgB,EAAE,EAAE;QACvC,IAAI,CAAC,eAAe,CAAC,QAAQ,CAAC;YAAE,OAAO;QAEvC,MAAM,SAAS,GAAG,aAAa,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;QAElD,IAAI,QAAQ,GAAG,0BAA0B,CAAC,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC;QACpE,IAAI,cAAc,EAAE;YAClB,QAAQ,GAAG,cAAc,CAAC,QAAQ,CAAC,CAAC;SACrC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;QAE/C,MAAM,aAAa,CAAC,SAAS,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC;QAErD,OAAO,EAAE,CAAC,QAAQ,CAAC,EAAE,SAAS,EAAE,CAAC;IACnC,CAAC,CAAC,CACH,CAAC;IAEF,OAAO,cAAc,CAAC,MAAM,CAC1B,CAAC,MAAM,EAAE,OAAO,EAAE,EAAE,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EACnD,EAAE,CACH,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import path from "path";
|
|
2
|
+
import downloadImage, { cleanImageSrc, isValidImageSrc, removeMetadataFromImageSrc, } from "../downloadImage.js";
|
|
3
|
+
import { getFileExtension } from "../util.js";
|
|
4
|
+
// To Do: Use CheerioElement instead of any when we bump the cheerio version
|
|
5
|
+
export default async function downloadLogoImage(imageSrc, imageBaseDir, origin, overwrite) {
|
|
6
|
+
if (!isValidImageSrc(imageSrc))
|
|
7
|
+
return;
|
|
8
|
+
const imageHref = cleanImageSrc(imageSrc, origin);
|
|
9
|
+
const ext = getFileExtension(removeMetadataFromImageSrc(imageSrc));
|
|
10
|
+
const imagePath = path.join(imageBaseDir, "logo", "logo-light-mode." + ext);
|
|
11
|
+
await downloadImage(imageHref, imagePath, overwrite);
|
|
12
|
+
}
|
|
13
|
+
//# sourceMappingURL=downloadLogoImage.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"downloadLogoImage.js","sourceRoot":"","sources":["../../src/scraping/downloadLogoImage.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,aAAa,EAAE,EACpB,aAAa,EACb,eAAe,EACf,0BAA0B,GAC3B,MAAM,qBAAqB,CAAC;AAC7B,OAAO,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAE9C,4EAA4E;AAC5E,MAAM,CAAC,OAAO,CAAC,KAAK,UAAU,iBAAiB,CAC7C,QAAgB,EAChB,YAAoB,EACpB,MAAc,EACd,SAAkB;IAElB,IAAI,CAAC,eAAe,CAAC,QAAQ,CAAC;QAAE,OAAO;IAEvC,MAAM,SAAS,GAAG,aAAa,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;IAElD,MAAM,GAAG,GAAG,gBAAgB,CAAC,0BAA0B,CAAC,QAAQ,CAAC,CAAC,CAAC;IACnE,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,MAAM,EAAE,kBAAkB,GAAG,GAAG,CAAC,CAAC;IAE5E,MAAM,aAAa,CAAC,SAAS,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC;AACvD,CAAC"}
|
|
@@ -27,7 +27,7 @@ export async function scrapeFileGettingFileNameFromUrl(pathname, cliDir, origin,
|
|
|
27
27
|
const res = await axios.get(href);
|
|
28
28
|
html = res.data;
|
|
29
29
|
}
|
|
30
|
-
const { title, description, markdown } = await scrapePageFunc(html, origin, cliDir, imageBaseDir, version);
|
|
30
|
+
const { title, description, markdown } = await scrapePageFunc(html, origin, cliDir, imageBaseDir, overwrite, version);
|
|
31
31
|
// Check if page didn't have content
|
|
32
32
|
if (!title && !markdown) {
|
|
33
33
|
return undefined;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrapeFileGettingFileNameFromUrl.js","sourceRoot":"","sources":["../../src/scraping/scrapeFileGettingFileNameFromUrl.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,oBAAoB,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAExC,MAAM,CAAC,KAAK,UAAU,gCAAgC,CACpD,QAAgB,EAChB,MAAc,EACd,MAAc,EACd,SAAkB,EAClB,
|
|
1
|
+
{"version":3,"file":"scrapeFileGettingFileNameFromUrl.js","sourceRoot":"","sources":["../../src/scraping/scrapeFileGettingFileNameFromUrl.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,oBAAoB,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAExC,MAAM,CAAC,KAAK,UAAU,gCAAgC,CACpD,QAAgB,EAChB,MAAc,EACd,MAAc,EACd,SAAkB,EAClB,cAWE,EACF,SAAS,GAAG,KAAK,EACjB,OAA2B,EAC3B,YAAqB;IAErB,+BAA+B;IAC/B,IAAI,QAAQ,CAAC,UAAU,CAAC,UAAU,CAAC,IAAI,QAAQ,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE;QACrE,OAAO,QAAQ,CAAC;KACjB;IAED,iCAAiC;IACjC,MAAM,YAAY,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IACzC,IAAI,OAAO,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAEvE,+BAA+B;IAC/B,IAAI,YAAY,IAAI,OAAO,CAAC,UAAU,CAAC,YAAY,CAAC,EAAE;QACpD,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC,CAAC;KAC7C;IAED,kEAAkE;IAClE,4CAA4C;IAC5C,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;IAE1D,8BAA8B;IAC9B,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC;IAC5C,IAAI,IAAY,CAAC;IACjB,IAAI,SAAS,EAAE;QACb,IAAI,GAAG,MAAM,oBAAoB,CAAC,IAAI,CAAC,CAAC;KACzC;SAAM;QACL,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAClC,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC;KACjB;IAED,MAAM,EAAE,KAAK,EAAE,WAAW,EAAE,QAAQ,EAAE,GAAG,MAAM,cAAc,CAC3D,IAAI,EACJ,MAAM,EACN,MAAM,EACN,YAAY,EACZ,SAAS,EACT,OAAO,CACR,CAAC;IAEF,oCAAoC;IACpC,IAAI,CAAC,KAAK,IAAI,CAAC,QAAQ,EAAE;QACvB,OAAO,SAAS,CAAC;KAClB;IAED,MAAM,eAAe,GAAG,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IAEtE,2DAA2D;IAC3D,MAAM,QAAQ,GAAG,YAAY,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,IAAI,cAAc,CAAC;IAEzE,mCAAmC;IACnC,UAAU,CACR,KAAK,EACL,WAAW,EACX,QAAQ,EACR,SAAS,EACT,eAAe,EACf,QAAQ,CACT,CAAC;IAEF,sEAAsE;IACtE,OAAO,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC;AACxE,CAAC"}
|
|
@@ -1,7 +1,6 @@
|
|
|
1
|
-
import { isNavigation } from "../navigation.js";
|
|
2
1
|
import { scrapeFileGettingFileNameFromUrl } from "./scrapeFileGettingFileNameFromUrl.js";
|
|
3
2
|
export async function scrapeGettingFileNameFromUrl(navEntry, cliDir, origin, overwrite, scrapePageFunc, puppeteer = false, version, baseToRemove) {
|
|
4
|
-
if (
|
|
3
|
+
if (typeof navEntry !== "string") {
|
|
5
4
|
const newPages = [];
|
|
6
5
|
for (const nestedNavEntry of navEntry.pages) {
|
|
7
6
|
newPages.push(await scrapeGettingFileNameFromUrl(nestedNavEntry, cliDir, origin, overwrite, scrapePageFunc, puppeteer, version, baseToRemove));
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrapeGettingFileNameFromUrl.js","sourceRoot":"","sources":["../../src/scraping/scrapeGettingFileNameFromUrl.ts"],"names":[],"mappings":"AAAA,OAAO,
|
|
1
|
+
{"version":3,"file":"scrapeGettingFileNameFromUrl.js","sourceRoot":"","sources":["../../src/scraping/scrapeGettingFileNameFromUrl.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gCAAgC,EAAE,MAAM,uCAAuC,CAAC;AAEzF,MAAM,CAAC,KAAK,UAAU,4BAA4B,CAChD,QAA6B,EAC7B,MAAc,EACd,MAAc,EACd,SAAkB,EAClB,cAWE,EACF,SAAS,GAAG,KAAK,EACjB,OAA2B,EAC3B,YAAqB;IAErB,IAAI,OAAO,QAAQ,KAAK,QAAQ,EAAE;QAChC,MAAM,QAAQ,GAAG,EAAE,CAAC;QACpB,KAAK,MAAM,cAAc,IAAI,QAAQ,CAAC,KAAK,EAAE;YAC3C,QAAQ,CAAC,IAAI,CACX,MAAM,4BAA4B,CAChC,cAAc,EACd,MAAM,EACN,MAAM,EACN,SAAS,EACT,cAAc,EACd,SAAS,EACT,OAAO,EACP,YAAY,CACb,CACF,CAAC;SACH;QACD,QAAQ,CAAC,KAAK,GAAG,QAAQ,CAAC;QAC1B,OAAO,QAAQ,CAAC;KACjB;IAED,OAAO,MAAM,gCAAgC,CAC3C,QAAQ,EACR,MAAM,EACN,MAAM,EACN,SAAS,EACT,cAAc,EACd,SAAS,EACT,OAAO,EACP,YAAY,CACb,CAAC;AACJ,CAAC"}
|
|
@@ -2,8 +2,9 @@ import path from "path";
|
|
|
2
2
|
import { createPage, getOrigin } from "../util.js";
|
|
3
3
|
export async function scrapePage(scrapeFunc, href, html, overwrite, version) {
|
|
4
4
|
const origin = getOrigin(href);
|
|
5
|
-
const
|
|
6
|
-
const
|
|
5
|
+
const cwd = process.cwd();
|
|
6
|
+
const imageBaseDir = path.join(cwd, "images");
|
|
7
|
+
const { title, description, markdown } = await scrapeFunc(html, origin, cwd, imageBaseDir, overwrite, version);
|
|
7
8
|
createPage(title, description, markdown, overwrite, process.cwd());
|
|
8
9
|
}
|
|
9
10
|
//# sourceMappingURL=scrapePage.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrapePage.js","sourceRoot":"","sources":["../../src/scraping/scrapePage.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AAEnD,MAAM,CAAC,KAAK,UAAU,UAAU,CAC9B,
|
|
1
|
+
{"version":3,"file":"scrapePage.js","sourceRoot":"","sources":["../../src/scraping/scrapePage.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AAEnD,MAAM,CAAC,KAAK,UAAU,UAAU,CAC9B,UAAwB,EACxB,IAAY,EACZ,IAAY,EACZ,SAAkB,EAClB,OAA2B;IAE3B,MAAM,MAAM,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAC/B,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,EAAE,CAAC;IAC1B,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;IAE9C,MAAM,EAAE,KAAK,EAAE,WAAW,EAAE,QAAQ,EAAE,GAAG,MAAM,UAAU,CACvD,IAAI,EACJ,MAAM,EACN,GAAG,EACH,YAAY,EACZ,SAAS,EACT,OAAO,CACR,CAAC;IACF,UAAU,CAAC,KAAK,EAAE,WAAW,EAAE,QAAQ,EAAE,SAAS,EAAE,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC;AACrE,CAAC"}
|
|
@@ -25,7 +25,7 @@ export async function scrapePageWrapper(argv, scrapeFunc, options) {
|
|
|
25
25
|
const res = await axios.get(href);
|
|
26
26
|
html = res.data;
|
|
27
27
|
}
|
|
28
|
-
await scrapePage(scrapeFunc, href, html, argv.overwrite, options?.version);
|
|
28
|
+
await scrapePage(scrapeFunc, href, html, !!argv.overwrite, options?.version);
|
|
29
29
|
process.exit(0);
|
|
30
30
|
}
|
|
31
31
|
export async function scrapePageAutomatically(argv) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrapePageCommands.js","sourceRoot":"","sources":["../../src/scraping/scrapePageCommands.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,oBAAoB,EAAE,MAAM,yCAAyC,CAAC;AAC/E,OAAO,EAAE,iBAAiB,EAAE,MAAM,sCAAsC,CAAC;AACzE,OAAO,EAAE,gBAAgB,EAAE,MAAM,qCAAqC,CAAC;AACvE,OAAO,EAAE,eAAe,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAC;AACnE,OAAO,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAC7C,OAAO,EAAE,oBAAoB,EAAE,MAAM,eAAe,CAAC;AAErD,SAAS,iBAAiB,CAAC,SAAS;IAClC,IAAI,CAAC,SAAS,EAAE;QACd,OAAO,CAAC,GAAG,CACT,kEAAkE,CACnE,CAAC;QACF,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAC;QACtC,OAAO,CAAC,GAAG,CAAC,qBAAqB,CAAC,CAAC;QACnC,OAAO,CAAC,GAAG,CAAC,oBAAoB,CAAC,CAAC;QAClC,OAAO,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;KACxB;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,IAAS,EACT,
|
|
1
|
+
{"version":3,"file":"scrapePageCommands.js","sourceRoot":"","sources":["../../src/scraping/scrapePageCommands.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,oBAAoB,EAAE,MAAM,yCAAyC,CAAC;AAC/E,OAAO,EAAE,iBAAiB,EAAE,MAAM,sCAAsC,CAAC;AACzE,OAAO,EAAE,gBAAgB,EAAE,MAAM,qCAAqC,CAAC;AACvE,OAAO,EAAE,eAAe,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAC;AACnE,OAAO,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAC7C,OAAO,EAAE,oBAAoB,EAAE,MAAM,eAAe,CAAC;AAErD,SAAS,iBAAiB,CAAC,SAAS;IAClC,IAAI,CAAC,SAAS,EAAE;QACd,OAAO,CAAC,GAAG,CACT,kEAAkE,CACnE,CAAC;QACF,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAC;QACtC,OAAO,CAAC,GAAG,CAAC,qBAAqB,CAAC,CAAC;QACnC,OAAO,CAAC,GAAG,CAAC,oBAAoB,CAAC,CAAC;QAClC,OAAO,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;KACxB;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,IAAS,EACT,UAAwB,EACxB,OAAmD;IAEnD,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IACnC,IAAI,IAAY,CAAC;IACjB,IAAI,OAAO,EAAE,SAAS,EAAE;QACtB,IAAI,GAAG,MAAM,oBAAoB,CAAC,IAAI,CAAC,CAAC;KACzC;SAAM;QACL,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAClC,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC;KACjB;IACD,MAAM,UAAU,CAAC,UAAU,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,SAAS,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;IAC7E,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,uBAAuB,CAAC,IAAS;IACrD,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAClC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC;IACtB,MAAM,EAAE,SAAS,EAAE,OAAO,EAAE,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IAErD,iBAAiB,CAAC,SAAS,CAAC,CAAC;IAE7B,OAAO,CAAC,GAAG,CAAC,sBAAsB,GAAG,SAAS,CAAC,CAAC;IAEhD,IAAI,SAAS,KAAK,UAAU,CAAC,UAAU,EAAE;QACvC,MAAM,iBAAiB,CAAC,IAAI,EAAE,oBAAoB,EAAE,EAAE,OAAO,EAAE,CAAC,CAAC;KAClE;SAAM,IAAI,SAAS,KAAK,UAAU,CAAC,OAAO,EAAE;QAC3C,MAAM,iBAAiB,CAAC,IAAI,EAAE,iBAAiB,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;KACvE;SAAM,IAAI,SAAS,KAAK,UAAU,CAAC,MAAM,EAAE;QAC1C,MAAM,iBAAiB,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAAC;KACjD;AACH,CAAC"}
|
|
@@ -1,7 +1,10 @@
|
|
|
1
|
+
import path from "path";
|
|
1
2
|
import { objToReadableString } from "../util.js";
|
|
2
3
|
export async function scrapeSection(scrapeFunc, html, origin, overwrite, version) {
|
|
3
4
|
console.log(`Started scraping${overwrite ? ", overwrite mode is on" : ""}...`);
|
|
4
|
-
const
|
|
5
|
+
const cwd = process.cwd();
|
|
6
|
+
const imageBaseDir = path.join(cwd, "images");
|
|
7
|
+
const groupsConfig = await scrapeFunc(html, origin, cwd, imageBaseDir, overwrite, version);
|
|
5
8
|
console.log("Finished scraping.");
|
|
6
9
|
console.log("Add the following to your navigation in mint.json:");
|
|
7
10
|
console.log(objToReadableString(groupsConfig));
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrapeSection.js","sourceRoot":"","sources":["../../src/scraping/scrapeSection.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,YAAY,CAAC;AAEjD,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,
|
|
1
|
+
{"version":3,"file":"scrapeSection.js","sourceRoot":"","sources":["../../src/scraping/scrapeSection.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,mBAAmB,EAAE,MAAM,YAAY,CAAC;AAEjD,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,UAA2B,EAC3B,IAAY,EACZ,MAAc,EACd,SAAkB,EAClB,OAA2B;IAE3B,OAAO,CAAC,GAAG,CACT,mBAAmB,SAAS,CAAC,CAAC,CAAC,wBAAwB,CAAC,CAAC,CAAC,EAAE,KAAK,CAClE,CAAC;IACF,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,EAAE,CAAC;IAC1B,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;IAE9C,MAAM,YAAY,GAAG,MAAM,UAAU,CACnC,IAAI,EACJ,MAAM,EACN,GAAG,EACH,YAAY,EACZ,SAAS,EACT,OAAO,CACR,CAAC;IACF,OAAO,CAAC,GAAG,CAAC,oBAAoB,CAAC,CAAC;IAClC,OAAO,CAAC,GAAG,CAAC,oDAAoD,CAAC,CAAC;IAClE,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,YAAY,CAAC,CAAC,CAAC;AACjD,CAAC"}
|
|
@@ -12,7 +12,7 @@ export async function scrapeSectionAxiosWrapper(argv, scrapeFunc) {
|
|
|
12
12
|
const href = getHrefFromArgs(argv);
|
|
13
13
|
const res = await axios.get(href);
|
|
14
14
|
const html = res.data;
|
|
15
|
-
await scrapeSection(scrapeFunc, html, getOrigin(href), argv.overwrite, undefined);
|
|
15
|
+
await scrapeSection(scrapeFunc, html, getOrigin(href), !!argv.overwrite, undefined);
|
|
16
16
|
process.exit(0);
|
|
17
17
|
}
|
|
18
18
|
export async function scrapeDocusaurusSectionCommand(argv, version // "1" | "2" | "3"
|
|
@@ -31,7 +31,7 @@ async function scrapeSectionOpeningAllNested(argv, openLinks, scrapeFunc, versio
|
|
|
31
31
|
});
|
|
32
32
|
const html = await openLinks(page);
|
|
33
33
|
browser.close();
|
|
34
|
-
await scrapeSection(scrapeFunc, html, getOrigin(href), argv.overwrite, version);
|
|
34
|
+
await scrapeSection(scrapeFunc, html, getOrigin(href), !!argv.overwrite, version);
|
|
35
35
|
process.exit(0);
|
|
36
36
|
}
|
|
37
37
|
export async function scrapeSectionAutomatically(argv) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrapeSectionCommands.js","sourceRoot":"","sources":["../../src/scraping/scrapeSectionCommands.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,eAAe,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAC;AACnE,OAAO,EAAE,eAAe,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AACxD,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AACnD,OAAO,EAAE,uBAAuB,EAAE,MAAM,4CAA4C,CAAC;AACrF,OAAO,yBAAyB,MAAM,8CAA8C,CAAC;AACrF,OAAO,EAAE,oBAAoB,EAAE,MAAM,yCAAyC,CAAC;AAC/E,OAAO,sBAAsB,MAAM,2CAA2C,CAAC;AAC/E,OAAO,EAAE,mBAAmB,EAAE,MAAM,wCAAwC,CAAC;AAC7E,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAE7C,MAAM,CAAC,KAAK,UAAU,yBAAyB,
|
|
1
|
+
{"version":3,"file":"scrapeSectionCommands.js","sourceRoot":"","sources":["../../src/scraping/scrapeSectionCommands.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,eAAe,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAC;AACnE,OAAO,EAAE,eAAe,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AACxD,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AACnD,OAAO,EAAE,uBAAuB,EAAE,MAAM,4CAA4C,CAAC;AACrF,OAAO,yBAAyB,MAAM,8CAA8C,CAAC;AACrF,OAAO,EAAE,oBAAoB,EAAE,MAAM,yCAAyC,CAAC;AAC/E,OAAO,sBAAsB,MAAM,2CAA2C,CAAC;AAC/E,OAAO,EAAE,mBAAmB,EAAE,MAAM,wCAAwC,CAAC;AAC7E,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAE7C,MAAM,CAAC,KAAK,UAAU,yBAAyB,CAC7C,IAAS,EACT,UAA2B;IAE3B,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAClC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC;IACtB,MAAM,aAAa,CACjB,UAAU,EACV,IAAI,EACJ,SAAS,CAAC,IAAI,CAAC,EACf,CAAC,CAAC,IAAI,CAAC,SAAS,EAChB,SAAS,CACV,CAAC;IACF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,8BAA8B,CAClD,IAAS,EACT,OAAe,CAAC,kBAAkB;;IAElC,MAAM,6BAA6B,CACjC,IAAI,EACJ,yBAAyB,EACzB,uBAAuB,EACvB,OAAO,CACR,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,2BAA2B,CAAC,IAAS;IACzD,MAAM,6BAA6B,CACjC,IAAI,EACJ,sBAAsB,EACtB,oBAAoB,CACrB,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,6BAA6B,CAC1C,IAAS,EACT,SAAc,EACd,UAA2B,EAC3B,OAAgB;IAEhB,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IAEnC,MAAM,OAAO,GAAG,MAAM,YAAY,EAAE,CAAC;IACrC,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;IACrC,MAAM,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;QACpB,SAAS,EAAE,cAAc;KAC1B,CAAC,CAAC;IAEH,MAAM,IAAI,GAAG,MAAM,SAAS,CAAC,IAAI,CAAC,CAAC;IACnC,OAAO,CAAC,KAAK,EAAE,CAAC;IAChB,MAAM,aAAa,CACjB,UAAU,EACV,IAAI,EACJ,SAAS,CAAC,IAAI,CAAC,EACf,CAAC,CAAC,IAAI,CAAC,SAAS,EAChB,OAAO,CACR,CAAC;IACF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,0BAA0B,CAAC,IAAS;IACxD,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAClC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC;IACtB,MAAM,EAAE,SAAS,EAAE,OAAO,EAAE,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IAErD,iBAAiB,CAAC,SAAS,CAAC,CAAC;IAE7B,OAAO,CAAC,GAAG,CAAC,sBAAsB,GAAG,SAAS,CAAC,CAAC;IAEhD,IAAI,SAAS,KAAK,UAAU,CAAC,UAAU,EAAE;QACvC,MAAM,8BAA8B,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;KACrD;SAAM,IAAI,SAAS,KAAK,UAAU,CAAC,OAAO,EAAE;QAC3C,MAAM,2BAA2B,CAAC,IAAI,CAAC,CAAC;KACzC;SAAM,IAAI,SAAS,KAAK,UAAU,CAAC,MAAM,EAAE;QAC1C,MAAM,yBAAyB,CAAC,IAAI,EAAE,mBAAmB,CAAC,CAAC;KAC5D;AACH,CAAC;AAED,SAAS,iBAAiB,CAAC,SAAiC;IAC1D,IAAI,CAAC,SAAS,EAAE;QACd,OAAO,CAAC,GAAG,CACT,4GAA4G,CAC7G,CAAC;QACF,OAAO,CAAC,IAAI,EAAE,CAAC;KAChB;AACH,CAAC"}
|
|
@@ -2,7 +2,7 @@ import cheerio from "cheerio";
|
|
|
2
2
|
import { NodeHtmlMarkdown } from "node-html-markdown";
|
|
3
3
|
import downloadAllImages from "../downloadAllImages.js";
|
|
4
4
|
import replaceImagePaths from "../replaceImagePaths.js";
|
|
5
|
-
export async function scrapeDocusaurusPage(html, origin, cliDir, imageBaseDir, version // expects "2", or "3". Have not written support for "1" yet
|
|
5
|
+
export async function scrapeDocusaurusPage(html, origin, cliDir, imageBaseDir, overwrite, version // expects "2", or "3". Have not written support for "1" yet
|
|
6
6
|
) {
|
|
7
7
|
const $ = cheerio.load(html);
|
|
8
8
|
const article = version === "3" ? $(".theme-doc-markdown").first() : $("article").first();
|
|
@@ -15,7 +15,7 @@ export async function scrapeDocusaurusPage(html, origin, cliDir, imageBaseDir, v
|
|
|
15
15
|
// Do not include title in the content when we insert it in our metadata
|
|
16
16
|
titleComponent.remove();
|
|
17
17
|
const markdownContent = version === "3" ? article : article.find(".markdown").first();
|
|
18
|
-
const origToWritePath = await downloadAllImages($, markdownContent, origin, imageBaseDir);
|
|
18
|
+
const origToWritePath = await downloadAllImages($, markdownContent, origin, imageBaseDir, overwrite);
|
|
19
19
|
const markdownHtml = markdownContent.html();
|
|
20
20
|
const nhm = new NodeHtmlMarkdown({ useInlineLinks: false });
|
|
21
21
|
let markdown = nhm.translate(markdownHtml);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrapeDocusaurusPage.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeDocusaurusPage.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AACtD,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AACxD,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AAExD,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,IAAY,EACZ,MAAc,EACd,MAAc,EACd,YAAoB,EACpB,OAA2B,CAAC,4DAA4D;;IAExF,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,MAAM,OAAO,GACX,OAAO,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,qBAAqB,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,KAAK,EAAE,CAAC;IAE5E,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE;QACxB,oEAAoE;QACpE,OAAO,EAAE,CAAC;KACX;IAED,MAAM,cAAc,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC1C,MAAM,KAAK,GAAG,cAAc,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IAE3C,wEAAwE;IACxE,cAAc,CAAC,MAAM,EAAE,CAAC;IAExB,MAAM,eAAe,GACnB,OAAO,KAAK,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,KAAK,EAAE,CAAC;IAEhE,MAAM,eAAe,GAAG,MAAM,iBAAiB,CAC7C,CAAC,EACD,eAAe,EACf,MAAM,EACN,YAAY,
|
|
1
|
+
{"version":3,"file":"scrapeDocusaurusPage.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeDocusaurusPage.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AACtD,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AACxD,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AAExD,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,IAAY,EACZ,MAAc,EACd,MAAc,EACd,YAAoB,EACpB,SAAkB,EAClB,OAA2B,CAAC,4DAA4D;;IAExF,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,MAAM,OAAO,GACX,OAAO,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,qBAAqB,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,KAAK,EAAE,CAAC;IAE5E,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE;QACxB,oEAAoE;QACpE,OAAO,EAAE,CAAC;KACX;IAED,MAAM,cAAc,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC1C,MAAM,KAAK,GAAG,cAAc,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IAE3C,wEAAwE;IACxE,cAAc,CAAC,MAAM,EAAE,CAAC;IAExB,MAAM,eAAe,GACnB,OAAO,KAAK,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,KAAK,EAAE,CAAC;IAEhE,MAAM,eAAe,GAAG,MAAM,iBAAiB,CAC7C,CAAC,EACD,eAAe,EACf,MAAM,EACN,YAAY,EACZ,SAAS,CACV,CAAC;IAEF,MAAM,YAAY,GAAG,eAAe,CAAC,IAAI,EAAE,CAAC;IAE5C,MAAM,GAAG,GAAG,IAAI,gBAAgB,CAAC,EAAE,cAAc,EAAE,KAAK,EAAE,CAAC,CAAC;IAC5D,IAAI,QAAQ,GAAG,GAAG,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;IAE3C,IAAI,QAAQ,IAAI,IAAI,EAAE;QACpB,OAAO,CAAC,KAAK,CACX,6DAA6D,CAC9D,CAAC;QACF,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;KACnD;IAED,mGAAmG;IACnG,qFAAqF;IACrF,yFAAyF;IACzF,kFAAkF;IAClF,MAAM,WAAW,GAAG,IAAI,CAAC;IACzB,0EAA0E;IAC1E,0CAA0C;IAC1C,wBAAwB;IACxB,IAAI;IAEJ,uCAAuC;IACvC,wDAAwD;IACxD,sCAAsC;IACtC,6BAA6B;IAC7B,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,qBAAqB,EAAE,IAAI,CAAC,CAAC;IAEzD,qDAAqD;IACrD,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;IAE3C,iCAAiC;IACjC,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IAE/C,gEAAgE;IAChE,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,wBAAwB,EAAE,SAAS,CAAC,CAAC;IAEjE,QAAQ,GAAG,iBAAiB,CAAC,eAAe,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;IAEhE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,QAAQ,EAAE,CAAC;AAC1C,CAAC"}
|
|
@@ -3,8 +3,12 @@ import { scrapeGettingFileNameFromUrl } from "../scrapeGettingFileNameFromUrl.js
|
|
|
3
3
|
import combineNavWithEmptyGroupTitles from "../combineNavWithEmptyGroupTitles.js";
|
|
4
4
|
import { scrapeDocusaurusPage } from "./scrapeDocusaurusPage.js";
|
|
5
5
|
import { getDocusaurusLinksPerGroup } from "./links-per-group/getDocusaurusLinksPerGroup.js";
|
|
6
|
-
|
|
6
|
+
import downloadLogoImage from "../downloadLogoImage.js";
|
|
7
|
+
export async function scrapeDocusaurusSection(html, origin, cliDir, imageBaseDir, overwrite, version) {
|
|
7
8
|
const $ = cheerio.load(html);
|
|
9
|
+
// Download the logo
|
|
10
|
+
const logoSrc = $(".navbar__logo img").attr("src");
|
|
11
|
+
downloadLogoImage(logoSrc, imageBaseDir, origin, overwrite);
|
|
8
12
|
// Get all the navigation sections
|
|
9
13
|
const navigationSections = $(".theme-doc-sidebar-menu").first().children();
|
|
10
14
|
// Get all links per group
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrapeDocusaurusSection.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeDocusaurusSection.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;
|
|
1
|
+
{"version":3,"file":"scrapeDocusaurusSection.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeDocusaurusSection.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,4BAA4B,EAAE,MAAM,oCAAoC,CAAC;AAClF,OAAO,8BAA8B,MAAM,sCAAsC,CAAC;AAClF,OAAO,EAAE,oBAAoB,EAAE,MAAM,2BAA2B,CAAC;AACjE,OAAO,EAAE,0BAA0B,EAAE,MAAM,iDAAiD,CAAC;AAC7F,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AAExD,MAAM,CAAC,KAAK,UAAU,uBAAuB,CAC3C,IAAY,EACZ,MAAc,EACd,MAAc,EACd,YAAoB,EACpB,SAAkB,EAClB,OAAe;IAEf,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,oBAAoB;IACpB,MAAM,OAAO,GAAG,CAAC,CAAC,mBAAmB,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACnD,iBAAiB,CAAC,OAAO,EAAE,YAAY,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC;IAE5D,kCAAkC;IAClC,MAAM,kBAAkB,GAAG,CAAC,CAAC,yBAAyB,CAAC,CAAC,KAAK,EAAE,CAAC,QAAQ,EAAE,CAAC;IAE3E,0BAA0B;IAC1B,MAAM,YAAY,GAAqB,0BAA0B,CAC/D,kBAAkB,EAClB,CAAC,EACD,OAAO,CACR,CAAC;IAEF,0CAA0C;IAC1C,MAAM,mBAAmB,GAAG,8BAA8B,CAAC,YAAY,CAAC,CAAC;IAEzE,sCAAsC;IACtC,MAAM,sBAAsB,GAAG,MAAM,OAAO,CAAC,GAAG,CAC9C,mBAAmB,CAAC,GAAG,CAAC,KAAK,EAAE,WAAW,EAAE,EAAE;QAC5C,WAAW,CAAC,KAAK,GAAG,CAClB,MAAM,OAAO,CAAC,GAAG,CACf,WAAW,CAAC,KAAK,CAAC,GAAG,CAAC,KAAK,EAAE,QAA6B,EAAE,EAAE;QAC5D,sEAAsE;QACtE,mDAAmD;QACnD,4BAA4B,CAC1B,QAAQ,EACR,MAAM,EACN,MAAM,EACN,SAAS,EACT,oBAAoB,EACpB,KAAK,EACL,OAAO,EACP,OAAO,CACR,CACF,CACF,CACF;YACC,6EAA6E;aAC5E,MAAM,CAAC,OAAO,CAAC,CAAC;QACnB,OAAO,WAAW,CAAC;IACrB,CAAC,CAAC,CACH,CAAC;IAEF,OAAO,sBAAsB,CAAC;AAChC,CAAC"}
|
|
@@ -2,7 +2,7 @@ import cheerio from "cheerio";
|
|
|
2
2
|
import { NodeHtmlMarkdown } from "node-html-markdown";
|
|
3
3
|
import downloadAllImages from "../downloadAllImages.js";
|
|
4
4
|
import replaceImagePaths from "../replaceImagePaths.js";
|
|
5
|
-
export async function scrapeGitBookPage(html, origin, cliDir, imageBaseDir, _ // version
|
|
5
|
+
export async function scrapeGitBookPage(html, origin, cliDir, imageBaseDir, overwrite, _ // version
|
|
6
6
|
) {
|
|
7
7
|
const $ = cheerio.load(html);
|
|
8
8
|
const titleComponent = $('[data-testid="page.title"]').first();
|
|
@@ -17,7 +17,7 @@ export async function scrapeGitBookPage(html, origin, cliDir, imageBaseDir, _ //
|
|
|
17
17
|
// Remove GitBook metadata from the start
|
|
18
18
|
// The first four %2F split metadata fields. Remaining ones are part of the file name.
|
|
19
19
|
fileName.split("%2F").slice(4).join("%2F");
|
|
20
|
-
const origToWritePath = await downloadAllImages($, content, origin, imageBaseDir, modifyFileName);
|
|
20
|
+
const origToWritePath = await downloadAllImages($, content, origin, imageBaseDir, overwrite, modifyFileName);
|
|
21
21
|
const nhm = new NodeHtmlMarkdown({ useInlineLinks: false });
|
|
22
22
|
let markdown = nhm.translate(contentHtml);
|
|
23
23
|
// Keep headers on one line and increase their depth by one
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrapeGitBookPage.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeGitBookPage.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AACtD,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AACxD,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AAExD,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,IAAY,EACZ,MAAc,EACd,MAAc,EACd,YAAoB,EACpB,CAAqB,CAAC,UAAU;;IAEhC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,MAAM,cAAc,GAAG,CAAC,CAAC,4BAA4B,CAAC,CAAC,KAAK,EAAE,CAAC;IAC/D,MAAM,mBAAmB,GAAG,cAAc,CAAC,MAAM,EAAE,CAAC,MAAM,EAAE,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,CAAC;IAE7E,MAAM,WAAW,GAAG,mBAAmB;SACpC,OAAO,CAAC,cAAc,CAAC,IAAI,EAAE,EAAE,EAAE,CAAC;SAClC,IAAI,EAAE,CAAC;IACV,MAAM,KAAK,GAAG,cAAc,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IAE3C,MAAM,OAAO,GAAG,CAAC,CAAC,oCAAoC,CAAC,CAAC,KAAK,EAAE,CAAC;IAChE,MAAM,WAAW,GAAG,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAEpC,MAAM,cAAc,GAAG,CAAC,QAAQ,EAAE,EAAE;IAClC,yCAAyC;IACzC,sFAAsF;IACtF,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAE7C,MAAM,eAAe,GAAG,MAAM,iBAAiB,CAC7C,CAAC,EACD,OAAO,EACP,MAAM,EACN,YAAY,EACZ,cAAc,CACf,CAAC;IAEF,MAAM,GAAG,GAAG,IAAI,gBAAgB,CAAC,EAAE,cAAc,EAAE,KAAK,EAAE,CAAC,CAAC;IAC5D,IAAI,QAAQ,GAAG,GAAG,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;IAE1C,2DAA2D;IAC3D,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;IAE9C,qDAAqD;IACrD,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;IAE3C,iCAAiC;IACjC,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IAE/C,gEAAgE;IAChE,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,wBAAwB,EAAE,SAAS,CAAC,CAAC;IAEjE,QAAQ,GAAG,iBAAiB,CAAC,eAAe,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;IAEhE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,QAAQ,EAAE,CAAC;AAC1C,CAAC"}
|
|
1
|
+
{"version":3,"file":"scrapeGitBookPage.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeGitBookPage.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AACtD,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AACxD,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AAExD,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,IAAY,EACZ,MAAc,EACd,MAAc,EACd,YAAoB,EACpB,SAAkB,EAClB,CAAqB,CAAC,UAAU;;IAEhC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,MAAM,cAAc,GAAG,CAAC,CAAC,4BAA4B,CAAC,CAAC,KAAK,EAAE,CAAC;IAC/D,MAAM,mBAAmB,GAAG,cAAc,CAAC,MAAM,EAAE,CAAC,MAAM,EAAE,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,CAAC;IAE7E,MAAM,WAAW,GAAG,mBAAmB;SACpC,OAAO,CAAC,cAAc,CAAC,IAAI,EAAE,EAAE,EAAE,CAAC;SAClC,IAAI,EAAE,CAAC;IACV,MAAM,KAAK,GAAG,cAAc,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IAE3C,MAAM,OAAO,GAAG,CAAC,CAAC,oCAAoC,CAAC,CAAC,KAAK,EAAE,CAAC;IAChE,MAAM,WAAW,GAAG,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAEpC,MAAM,cAAc,GAAG,CAAC,QAAQ,EAAE,EAAE;IAClC,yCAAyC;IACzC,sFAAsF;IACtF,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAE7C,MAAM,eAAe,GAAG,MAAM,iBAAiB,CAC7C,CAAC,EACD,OAAO,EACP,MAAM,EACN,YAAY,EACZ,SAAS,EACT,cAAc,CACf,CAAC;IAEF,MAAM,GAAG,GAAG,IAAI,gBAAgB,CAAC,EAAE,cAAc,EAAE,KAAK,EAAE,CAAC,CAAC;IAC5D,IAAI,QAAQ,GAAG,GAAG,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;IAE1C,2DAA2D;IAC3D,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;IAE9C,qDAAqD;IACrD,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;IAE3C,iCAAiC;IACjC,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IAE/C,gEAAgE;IAChE,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,wBAAwB,EAAE,SAAS,CAAC,CAAC;IAEjE,QAAQ,GAAG,iBAAiB,CAAC,eAAe,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;IAEhE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,QAAQ,EAAE,CAAC;AAC1C,CAAC"}
|
|
@@ -4,8 +4,14 @@ import { scrapeGitBookPage } from "./scrapeGitBookPage.js";
|
|
|
4
4
|
import combineNavWithEmptyGroupTitles from "../combineNavWithEmptyGroupTitles.js";
|
|
5
5
|
import getLinksRecursivelyGitBook from "./links-per-group/getLinksRecursivelyGitBook.js";
|
|
6
6
|
import alternateGroupTitle from "./alternateGroupTitle.js";
|
|
7
|
-
|
|
7
|
+
import downloadLogoImage from "../downloadLogoImage.js";
|
|
8
|
+
export async function scrapeGitBookSection(html, origin, cliDir, imageBaseDir, overwrite, version) {
|
|
8
9
|
const $ = cheerio.load(html);
|
|
10
|
+
// Download the logo
|
|
11
|
+
const logoSrc = $('a[data-testid="public.headerHomeLink"] img')
|
|
12
|
+
.first()
|
|
13
|
+
.attr("src");
|
|
14
|
+
downloadLogoImage(logoSrc, imageBaseDir, origin, overwrite);
|
|
9
15
|
// Get all the navigation sections
|
|
10
16
|
// Some variants of the GitBook UI show the logo and search base in the side navigation bar,
|
|
11
17
|
// but the navigation sections are always the last value.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrapeGitBookSection.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeGitBookSection.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAE9B,OAAO,EAAE,4BAA4B,EAAE,MAAM,oCAAoC,CAAC;AAClF,OAAO,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAC3D,OAAO,8BAA8B,MAAM,sCAAsC,CAAC;AAClF,OAAO,0BAA0B,MAAM,iDAAiD,CAAC;AACzF,OAAO,mBAAmB,MAAM,0BAA0B,CAAC;
|
|
1
|
+
{"version":3,"file":"scrapeGitBookSection.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeGitBookSection.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAE9B,OAAO,EAAE,4BAA4B,EAAE,MAAM,oCAAoC,CAAC;AAClF,OAAO,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAC3D,OAAO,8BAA8B,MAAM,sCAAsC,CAAC;AAClF,OAAO,0BAA0B,MAAM,iDAAiD,CAAC;AACzF,OAAO,mBAAmB,MAAM,0BAA0B,CAAC;AAC3D,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AAExD,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,IAAY,EACZ,MAAc,EACd,MAAc,EACd,YAAoB,EACpB,SAAkB,EAClB,OAA2B;IAE3B,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,oBAAoB;IACpB,MAAM,OAAO,GAAG,CAAC,CAAC,4CAA4C,CAAC;SAC5D,KAAK,EAAE;SACP,IAAI,CAAC,KAAK,CAAC,CAAC;IACf,iBAAiB,CAAC,OAAO,EAAE,YAAY,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC;IAE5D,kCAAkC;IAClC,4FAA4F;IAC5F,yDAAyD;IACzD,MAAM,kBAAkB,GAAG,CAAC,CAC1B,wEAAwE,CACzE;SACE,QAAQ,EAAE;SACV,EAAE,CAAC,CAAC,CAAC,CAAC;SACN,QAAQ,EAAE;SACV,KAAK,EAAE;SACP,QAAQ,EAAE,CAAC;IAEd,0BAA0B;IAC1B,MAAM,YAAY,GAAqB,kBAAkB;SACtD,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACZ,MAAM,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACrB,MAAM,YAAY,GAAG,CAAC,CAAC,OAAO,CAAC;aAC5B,IAAI,CAAC,uBAAuB,CAAC;aAC7B,KAAK,EAAE;aACP,IAAI,EAAE,CAAC;QAEV,0DAA0D;QAC1D,MAAM,SAAS,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QAC3C,MAAM,SAAS,GAAG,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAEzC,MAAM,YAAY,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC;QACzD,MAAM,KAAK,GAAG,0BAA0B,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC;QAE1D,OAAO;YACL,KAAK,EAAE,YAAY,IAAI,mBAAmB,CAAC,SAAS,EAAE,KAAK,CAAC;YAC5D,KAAK,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,EAAE,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK;SACjD,CAAC;IACJ,CAAC,CAAC;SACD,OAAO,EAAE;SACT,MAAM,CAAC,OAAO,CAAC,CAAC;IAEnB,0CAA0C;IAC1C,MAAM,mBAAmB,GAAG,8BAA8B,CAAC,YAAY,CAAC,CAAC;IAEzE,sCAAsC;IACtC,MAAM,sBAAsB,GAAG,MAAM,OAAO,CAAC,GAAG,CAC9C,mBAAmB,CAAC,GAAG,CAAC,KAAK,EAAE,QAA6B,EAAE,EAAE;QAC9D,OAAO,MAAM,4BAA4B,CACvC,QAAQ,EACR,MAAM,EACN,MAAM,EACN,SAAS,EACT,iBAAiB,EACjB,IAAI,EACJ,OAAO,CACR,CAAC;IACJ,CAAC,CAAC,CACH,CAAC;IAEF,OAAO,sBAAsB,CAAC;AAChC,CAAC"}
|
|
@@ -2,7 +2,7 @@ import cheerio from "cheerio";
|
|
|
2
2
|
import { NodeHtmlMarkdown } from "node-html-markdown";
|
|
3
3
|
import downloadAllImages from "../downloadAllImages.js";
|
|
4
4
|
import replaceImagePaths from "../replaceImagePaths.js";
|
|
5
|
-
export async function scrapeReadMePage(html, origin, cliDir, imageBaseDir, _ // version
|
|
5
|
+
export async function scrapeReadMePage(html, origin, cliDir, imageBaseDir, overwrite, _ // version
|
|
6
6
|
) {
|
|
7
7
|
const $ = cheerio.load(html);
|
|
8
8
|
const titleComponent = $("h1").first();
|
|
@@ -17,7 +17,7 @@ export async function scrapeReadMePage(html, origin, cliDir, imageBaseDir, _ //
|
|
|
17
17
|
}
|
|
18
18
|
// API Pages don't have a markdown body in the same position so there's no HTML
|
|
19
19
|
let contentHtml = content.html() || "";
|
|
20
|
-
const origToWritePath = await downloadAllImages($, content, origin, imageBaseDir);
|
|
20
|
+
const origToWritePath = await downloadAllImages($, content, origin, imageBaseDir, overwrite);
|
|
21
21
|
const nhm = new NodeHtmlMarkdown({ useInlineLinks: false });
|
|
22
22
|
let markdown = nhm.translate(contentHtml);
|
|
23
23
|
// Keep headers on one line and increase their depth by one
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrapeReadMePage.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeReadMePage.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AACtD,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AACxD,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AAExD,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,IAAY,EACZ,MAAc,EACd,MAAc,EACd,YAAoB,EACpB,CAAqB,CAAC,UAAU;;IAEhC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,MAAM,cAAc,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC;IACvC,MAAM,KAAK,GAAG,cAAc,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IAC3C,IAAI,WAAW,GAAG,CAAC,CAAC,gBAAgB,EAAE,cAAc,CAAC,MAAM,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IAC7E,IAAI,CAAC,WAAW,EAAE;QAChB,WAAW,GAAG,CAAC,CAAC,wBAAwB,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;KACzD;IAED,IAAI,OAAO,GAAG,CAAC,CAAC,8BAA8B,CAAC,CAAC,KAAK,EAAE,CAAC;IACxD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE;QACxB,OAAO,GAAG,CAAC,CAAC,8BAA8B,CAAC,CAAC;KAC7C;IAED,+EAA+E;IAC/E,IAAI,WAAW,GAAG,OAAO,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC;IAEvC,MAAM,eAAe,GAAG,MAAM,iBAAiB,CAC7C,CAAC,EACD,OAAO,EACP,MAAM,EACN,YAAY,
|
|
1
|
+
{"version":3,"file":"scrapeReadMePage.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeReadMePage.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AACtD,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AACxD,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AAExD,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,IAAY,EACZ,MAAc,EACd,MAAc,EACd,YAAoB,EACpB,SAAkB,EAClB,CAAqB,CAAC,UAAU;;IAEhC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,MAAM,cAAc,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC;IACvC,MAAM,KAAK,GAAG,cAAc,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IAC3C,IAAI,WAAW,GAAG,CAAC,CAAC,gBAAgB,EAAE,cAAc,CAAC,MAAM,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IAC7E,IAAI,CAAC,WAAW,EAAE;QAChB,WAAW,GAAG,CAAC,CAAC,wBAAwB,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;KACzD;IAED,IAAI,OAAO,GAAG,CAAC,CAAC,8BAA8B,CAAC,CAAC,KAAK,EAAE,CAAC;IACxD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE;QACxB,OAAO,GAAG,CAAC,CAAC,8BAA8B,CAAC,CAAC;KAC7C;IAED,+EAA+E;IAC/E,IAAI,WAAW,GAAG,OAAO,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC;IAEvC,MAAM,eAAe,GAAG,MAAM,iBAAiB,CAC7C,CAAC,EACD,OAAO,EACP,MAAM,EACN,YAAY,EACZ,SAAS,CACV,CAAC;IAEF,MAAM,GAAG,GAAG,IAAI,gBAAgB,CAAC,EAAE,cAAc,EAAE,KAAK,EAAE,CAAC,CAAC;IAC5D,IAAI,QAAQ,GAAG,GAAG,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;IAE1C,2DAA2D;IAC3D,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;IAE9C,qDAAqD;IACrD,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;IAE3C,6BAA6B;IAC7B,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,kBAAkB,EAAE,IAAI,CAAC,CAAC;IAEtD,iCAAiC;IACjC,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IAE/C,gEAAgE;IAChE,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,wBAAwB,EAAE,SAAS,CAAC,CAAC;IAEjE,QAAQ,GAAG,iBAAiB,CAAC,eAAe,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;IAEhE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,QAAQ,EAAE,CAAC;AAC1C,CAAC"}
|
|
@@ -2,8 +2,12 @@ import cheerio from "cheerio";
|
|
|
2
2
|
import { scrapeReadMePage } from "./scrapeReadMePage.js";
|
|
3
3
|
import { scrapeGettingFileNameFromUrl } from "../scrapeGettingFileNameFromUrl.js";
|
|
4
4
|
import getLinksRecursively from "./links-per-group/getLinksRecursively.js";
|
|
5
|
-
|
|
5
|
+
import downloadLogoImage from "../downloadLogoImage.js";
|
|
6
|
+
export async function scrapeReadMeSection(html, origin, cliDir, imageBaseDir, overwrite, version) {
|
|
6
7
|
const $ = cheerio.load(html);
|
|
8
|
+
// Download the logo
|
|
9
|
+
const logoSrc = $(".rm-Logo-img").first().attr("src");
|
|
10
|
+
downloadLogoImage(logoSrc, imageBaseDir, origin, overwrite);
|
|
7
11
|
// Get all the navigation sections, but only from the first
|
|
8
12
|
// sidebar found. There are multiple in the HTML for mobile
|
|
9
13
|
// responsiveness but they all have the same links.
|
|
@@ -28,11 +32,13 @@ export async function scrapeReadMeSection(html, origin, cliDir, overwrite, versi
|
|
|
28
32
|
};
|
|
29
33
|
})
|
|
30
34
|
.toArray();
|
|
31
|
-
|
|
35
|
+
// Scrape each link in the navigation.
|
|
36
|
+
const groupsConfigCleanPaths = await Promise.all(groupsConfig.map(async (navEntry) => {
|
|
32
37
|
return await scrapeGettingFileNameFromUrl(
|
|
33
|
-
// ReadMe requires a directory on all sections
|
|
38
|
+
// ReadMe requires a directory on all sections whereas we use root.
|
|
34
39
|
// /docs is their default directory so we remove it
|
|
35
40
|
navEntry, cliDir, origin, overwrite, scrapeReadMePage, false, version, "/docs");
|
|
36
41
|
}));
|
|
42
|
+
return groupsConfigCleanPaths;
|
|
37
43
|
}
|
|
38
44
|
//# sourceMappingURL=scrapeReadMeSection.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrapeReadMeSection.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeReadMeSection.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AACzD,OAAO,EAAE,4BAA4B,EAAE,MAAM,oCAAoC,CAAC;AAClF,OAAO,mBAAmB,MAAM,0CAA0C,CAAC;
|
|
1
|
+
{"version":3,"file":"scrapeReadMeSection.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeReadMeSection.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AACzD,OAAO,EAAE,4BAA4B,EAAE,MAAM,oCAAoC,CAAC;AAClF,OAAO,mBAAmB,MAAM,0CAA0C,CAAC;AAC3E,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AAExD,MAAM,CAAC,KAAK,UAAU,mBAAmB,CACvC,IAAY,EACZ,MAAc,EACd,MAAc,EACd,YAAoB,EACpB,SAAkB,EAClB,OAA2B;IAE3B,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,oBAAoB;IACpB,MAAM,OAAO,GAAG,CAAC,CAAC,cAAc,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACtD,iBAAiB,CAAC,OAAO,EAAE,YAAY,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC;IAE5D,2DAA2D;IAC3D,2DAA2D;IAC3D,mDAAmD;IACnD,MAAM,kBAAkB,GAAG,CAAC,CAAC,aAAa,CAAC;SACxC,KAAK,EAAE;SACP,IAAI,CAAC,qBAAqB,CAAC,CAAC;IAE/B,MAAM,YAAY,GAAqB,kBAAkB;SACtD,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACZ,MAAM,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACrB,MAAM,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,CAAC;QAEvD,uDAAuD;QACvD,yDAAyD;QACzD,+BAA+B;QAC/B,iDAAiD;QACjD,iCAAiC;QACjC,MAAM,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC,KAAK,EAAE,CAAC,QAAQ,EAAE,CAAC;QACzE,MAAM,KAAK,GAAG,mBAAmB,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC,MAAM,CACvD,CAAC,KAAa,EAAE,KAAa,EAAE,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,KAAK,KAAK,CACtE,CAAC;QAEF,0CAA0C;QAC1C,OAAO;YACL,KAAK,EAAE,YAAY;YACnB,KAAK,EAAE,KAAK;SACb,CAAC;IACJ,CAAC,CAAC;SACD,OAAO,EAAE,CAAC;IAEb,sCAAsC;IACtC,MAAM,sBAAsB,GAAG,MAAM,OAAO,CAAC,GAAG,CAC9C,YAAY,CAAC,GAAG,CAAC,KAAK,EAAE,QAA6B,EAAE,EAAE;QACvD,OAAO,MAAM,4BAA4B;QACvC,mEAAmE;QACnE,mDAAmD;QACnD,QAAQ,EACR,MAAM,EACN,MAAM,EACN,SAAS,EACT,gBAAgB,EAChB,KAAK,EACL,OAAO,EACP,OAAO,CACR,CAAC;IACJ,CAAC,CAAC,CACH,CAAC;IAEF,OAAO,sBAAsB,CAAC;AAChC,CAAC"}
|
package/bin/util.js
CHANGED
|
@@ -105,8 +105,10 @@ export const buildLogger = (startText = "") => {
|
|
|
105
105
|
return logger;
|
|
106
106
|
};
|
|
107
107
|
export const getFileExtension = (filename) => {
|
|
108
|
-
|
|
109
|
-
|
|
108
|
+
const ext = filename.substring(filename.lastIndexOf(".") + 1, filename.length);
|
|
109
|
+
if (filename === ext)
|
|
110
|
+
return undefined;
|
|
111
|
+
return ext;
|
|
110
112
|
};
|
|
111
113
|
export const fileBelongsInPagesFolder = (filename) => {
|
|
112
114
|
const extension = getFileExtension(filename);
|
package/bin/util.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"util.js","sourceRoot":"","sources":["../src/util.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,IAAI,CAAC;AAC9C,OAAO,GAAG,MAAM,KAAK,CAAC;AACtB,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,KAAK,MAAM,SAAS,CAAC;AAC5B,OAAO,iBAAiB,MAAM,mCAAmC,CAAC;AAElE,MAAM,CAAC,MAAM,UAAU,GAAG,CACxB,IAAY,EACZ,KAAa,EACb,OAAe,EACf,MAAc,EACd,QAAgB,EAChB,EAAE;IACF,OAAO;QACL,IAAI;QACJ,IAAI,EAAE,EAAE;QACR,OAAO,EAAE,EAAE;QACX,MAAM,EAAE;YACN,OAAO,EAAE,KAAK;SACf;QACD,WAAW,EAAE,EAAE;QACf,eAAe,EAAE;YACf,IAAI,EAAE,OAAO;YACb,GAAG,EAAE,MAAM;SACZ;QACD,OAAO,EAAE,EAAE;QACX,UAAU,EAAE;YACV;gBACE,KAAK,EAAE,MAAM;gBACb,KAAK,EAAE,CAAC,QAAQ,CAAC;aAClB;SACF;QACD,6DAA6D;KAC9D,CAAC;AACJ,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,IAAI,GAAG,CAClB,KAAa,EACb,WAAoB,EACpB,QAAiB,EACjB,EAAE;IACF,uDAAuD;IACvD,yDAAyD;IACzD,wBAAwB;IACxB,MAAM,eAAe,GAAG,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;IAC9C,MAAM,aAAa,GAAG,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;IAC5C,IAAI,CAAC,eAAe,EAAE;QACpB,KAAK,GAAG,GAAG,GAAG,KAAK,CAAC;KACrB;IACD,IAAI,CAAC,aAAa,EAAE;QAClB,KAAK,GAAG,KAAK,GAAG,GAAG,CAAC;KACrB;IAED,MAAM,mBAAmB,GAAG,WAAW;QACrC,CAAC,CAAC,mBAAmB,WAAW,GAAG;QACnC,CAAC,CAAC,EAAE,CAAC;IACP,OAAO,eAAe,KAAK,GAAG,mBAAmB,YAAY,QAAQ,EAAE,CAAC;AAC1E,CAAC,CAAC;
|
|
1
|
+
{"version":3,"file":"util.js","sourceRoot":"","sources":["../src/util.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,IAAI,CAAC;AAC9C,OAAO,GAAG,MAAM,KAAK,CAAC;AACtB,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,KAAK,MAAM,SAAS,CAAC;AAC5B,OAAO,iBAAiB,MAAM,mCAAmC,CAAC;AAElE,MAAM,CAAC,MAAM,UAAU,GAAG,CACxB,IAAY,EACZ,KAAa,EACb,OAAe,EACf,MAAc,EACd,QAAgB,EAChB,EAAE;IACF,OAAO;QACL,IAAI;QACJ,IAAI,EAAE,EAAE;QACR,OAAO,EAAE,EAAE;QACX,MAAM,EAAE;YACN,OAAO,EAAE,KAAK;SACf;QACD,WAAW,EAAE,EAAE;QACf,eAAe,EAAE;YACf,IAAI,EAAE,OAAO;YACb,GAAG,EAAE,MAAM;SACZ;QACD,OAAO,EAAE,EAAE;QACX,UAAU,EAAE;YACV;gBACE,KAAK,EAAE,MAAM;gBACb,KAAK,EAAE,CAAC,QAAQ,CAAC;aAClB;SACF;QACD,6DAA6D;KAC9D,CAAC;AACJ,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,IAAI,GAAG,CAClB,KAAa,EACb,WAAoB,EACpB,QAAiB,EACjB,EAAE;IACF,uDAAuD;IACvD,yDAAyD;IACzD,wBAAwB;IACxB,MAAM,eAAe,GAAG,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;IAC9C,MAAM,aAAa,GAAG,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;IAC5C,IAAI,CAAC,eAAe,EAAE;QACpB,KAAK,GAAG,GAAG,GAAG,KAAK,CAAC;KACrB;IACD,IAAI,CAAC,aAAa,EAAE;QAClB,KAAK,GAAG,KAAK,GAAG,GAAG,CAAC;KACrB;IAED,MAAM,mBAAmB,GAAG,WAAW;QACrC,CAAC,CAAC,mBAAmB,WAAW,GAAG;QACnC,CAAC,CAAC,EAAE,CAAC;IACP,OAAO,eAAe,KAAK,GAAG,mBAAmB,YAAY,QAAQ,EAAE,CAAC;AAC1E,CAAC,CAAC;AAEF,MAAM,UAAU,SAAS,CAAC,GAAW;IACnC,+CAA+C;IAC/C,gDAAgD;IAChD,OAAO,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC;AAC7B,CAAC;AAED,MAAM,UAAU,mBAAmB,CAAC,IAA2B;IAC7D,4BAA4B;IAC5B,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;AACrE,CAAC;AAED,MAAM,CAAC,MAAM,UAAU,GAAG,CAAC,KAAa,EAAE,EAAE;IAC1C,sDAAsD;IACtD,uDAAuD;IACvD,OAAO,KAAK;SACT,OAAO,CAAC,aAAa,EAAE,GAAG,CAAC;SAC3B,IAAI,EAAE;SACN,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC;SAClB,WAAW,EAAE,CAAC;AACnB,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,MAAM,GAAG,CAAC,QAAgB,EAAE,EAAE;IACzC,IAAI,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE;QAC7B,OAAO,QAAQ,CAAC;KACjB;IACD,OAAO,QAAQ,GAAG,MAAM,CAAC;AAC3B,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,UAAU,GAAG,CACxB,KAAa,EACb,WAAoB,EACpB,QAAiB,EACjB,YAAqB,KAAK,EAC1B,UAAkB,EAAE,EACpB,QAAiB,EACjB,EAAE;IACF,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,MAAM,CAAC,QAAQ,IAAI,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAE5E,+CAA+C;IAC/C,SAAS,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAExC,2BAA2B;IAC3B,IAAI,SAAS,EAAE;QACb,aAAa,CAAC,SAAS,EAAE,IAAI,CAAC,KAAK,EAAE,WAAW,EAAE,QAAQ,CAAC,CAAC,CAAC;QAC7D,OAAO,CAAC,GAAG,CAAC,OAAO,GAAG,SAAS,CAAC,CAAC;KAClC;SAAM;QACL,IAAI;YACF,aAAa,CAAC,SAAS,EAAE,IAAI,CAAC,KAAK,EAAE,WAAW,EAAE,QAAQ,CAAC,EAAE;gBAC3D,IAAI,EAAE,IAAI;aACX,CAAC,CAAC;YACH,OAAO,CAAC,GAAG,CAAC,OAAO,GAAG,SAAS,CAAC,CAAC;SAClC;QAAC,OAAO,CAAC,EAAE;YACV,yEAAyE;YACzE,sDAAsD;YACtD,IAAI,CAAC,CAAC,IAAI,KAAK,QAAQ,EAAE;gBACvB,OAAO,CAAC,GAAG,CAAC,4BAA4B,SAAS,EAAE,CAAC,CAAC;aACtD;iBAAM;gBACL,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;aAClB;SACF;KACF;AACH,CAAC,CAAC;AAEF,MAAM,UAAU,eAAe,CAAC,IAAS;IACvC,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC;IACtB,iBAAiB,CAAC,IAAI,CAAC,CAAC;IACxB,OAAO,IAAI,CAAC;AACd,CAAC;AAED,MAAM,CAAC,MAAM,WAAW,GAAG,CAAC,YAAoB,EAAE,EAAE,EAAE;IACpD,MAAM,MAAM,GAAG,GAAG,EAAE,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;IACtC,OAAO,MAAM,CAAC;AAChB,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,gBAAgB,GAAG,CAAC,QAAgB,EAAE,EAAE;IACnD,MAAM,GAAG,GAAG,QAAQ,CAAC,SAAS,CAC5B,QAAQ,CAAC,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,EAC7B,QAAQ,CAAC,MAAM,CAChB,CAAC;IACF,IAAI,QAAQ,KAAK,GAAG;QAAE,OAAO,SAAS,CAAC;IACvC,OAAO,GAAG,CAAC;AACb,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,wBAAwB,GAAG,CAAC,QAAgB,EAAE,EAAE;IAC3D,MAAM,SAAS,GAAG,gBAAgB,CAAC,QAAQ,CAAC,CAAC;IAC7C,OAAO,CACL,SAAS;QACT,CAAC,SAAS,KAAK,KAAK,IAAI,SAAS,KAAK,IAAI,IAAI,SAAS,KAAK,KAAK,CAAC,CACnE,CAAC;AACJ,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,UAAU,GAAG,CAAC,MAAW,EAAE,EAAE;IACxC,MAAM,aAAa,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;IAC1C,IAAI,CAAC,aAAa,EAAE;QAClB,MAAM,CAAC,IAAI,CAAC;;;;KAIX,CAAC,CAAC;QACH,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;KACjB;AACH,CAAC,CAAC"}
|
package/package.json
CHANGED
package/src/constants.ts
CHANGED
|
@@ -13,3 +13,21 @@ export const CLIENT_PATH = path.join(DOT_MINTLIFY, "mint", "client");
|
|
|
13
13
|
|
|
14
14
|
// command execution location
|
|
15
15
|
export const CMD_EXEC_PATH = process.cwd();
|
|
16
|
+
|
|
17
|
+
export const SUPPORTED_MEDIA_EXTENSIONS = [
|
|
18
|
+
"jpeg",
|
|
19
|
+
"jpg",
|
|
20
|
+
"jfif",
|
|
21
|
+
"pjpeg",
|
|
22
|
+
"pjp",
|
|
23
|
+
"png",
|
|
24
|
+
"svg",
|
|
25
|
+
"svgz",
|
|
26
|
+
"ico",
|
|
27
|
+
"webp",
|
|
28
|
+
"gif",
|
|
29
|
+
"apng",
|
|
30
|
+
"avif",
|
|
31
|
+
"bmp",
|
|
32
|
+
"mp4",
|
|
33
|
+
];
|
package/src/downloadImage.ts
CHANGED
|
@@ -1,13 +1,16 @@
|
|
|
1
1
|
import { existsSync, mkdirSync, createWriteStream } from "fs";
|
|
2
2
|
import path from "path";
|
|
3
3
|
import axios from "axios";
|
|
4
|
+
import { getFileExtension } from "./util.js";
|
|
5
|
+
import { SUPPORTED_MEDIA_EXTENSIONS } from "./constants.js";
|
|
4
6
|
|
|
5
|
-
|
|
7
|
+
async function writeImageToFile(
|
|
6
8
|
imageSrc: string,
|
|
7
|
-
writePath: string
|
|
9
|
+
writePath: string,
|
|
10
|
+
overwrite: boolean
|
|
8
11
|
) {
|
|
9
12
|
// Avoid unnecessary downloads
|
|
10
|
-
if (existsSync(writePath)) {
|
|
13
|
+
if (existsSync(writePath) && !overwrite) {
|
|
11
14
|
return Promise.reject({
|
|
12
15
|
code: "EEXIST",
|
|
13
16
|
});
|
|
@@ -33,3 +36,58 @@ export default async function downloadImage(
|
|
|
33
36
|
writer.on("error", reject);
|
|
34
37
|
});
|
|
35
38
|
}
|
|
39
|
+
|
|
40
|
+
export function isValidImageSrc(src: string) {
|
|
41
|
+
if (!src) {
|
|
42
|
+
return false;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// We do not support downloading base64 in-line images.
|
|
46
|
+
if (src.startsWith("data:")) {
|
|
47
|
+
return false;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const imageHref = removeMetadataFromImageSrc(src);
|
|
51
|
+
const ext = getFileExtension(imageHref);
|
|
52
|
+
if (!SUPPORTED_MEDIA_EXTENSIONS.includes(ext)) {
|
|
53
|
+
console.error("🚨 We do not support the file extension: " + ext);
|
|
54
|
+
return false;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
return true;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export function removeMetadataFromImageSrc(src: string) {
|
|
61
|
+
// Part of the URL standard
|
|
62
|
+
const metadataSymbols = ["?", "#"];
|
|
63
|
+
|
|
64
|
+
metadataSymbols.forEach((dividerSymbol) => {
|
|
65
|
+
// Some frameworks add metadata after the file extension, we need to remove that.
|
|
66
|
+
src = src.split(dividerSymbol)[0];
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
return src;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
export function cleanImageSrc(src: string, origin: string) {
|
|
73
|
+
// Add origin if the image tags are using relative sources
|
|
74
|
+
return src.startsWith("http") ? src : new URL(src, origin).href;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
export default async function downloadImage(
|
|
78
|
+
imageSrc: string,
|
|
79
|
+
writePath: string,
|
|
80
|
+
overwrite: boolean = false
|
|
81
|
+
) {
|
|
82
|
+
await writeImageToFile(imageSrc, writePath, overwrite)
|
|
83
|
+
.then(() => {
|
|
84
|
+
console.log("🖼️ - " + writePath);
|
|
85
|
+
})
|
|
86
|
+
.catch((e) => {
|
|
87
|
+
if (e.code === "EEXIST") {
|
|
88
|
+
console.log(`❌ Skipping existing image ${writePath}`);
|
|
89
|
+
} else {
|
|
90
|
+
console.error(e);
|
|
91
|
+
}
|
|
92
|
+
});
|
|
93
|
+
}
|
package/src/index.ts
CHANGED
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
import yargs from "yargs";
|
|
4
4
|
import { hideBin } from "yargs/helpers";
|
|
5
|
-
import generatePageTemplate from "./pageTemplate.js";
|
|
6
5
|
import {
|
|
7
6
|
scrapePageAutomatically,
|
|
8
7
|
scrapePageWrapper,
|
|
@@ -18,7 +17,6 @@ import { scrapeReadMeSection } from "./scraping/site-scrapers/scrapeReadMeSectio
|
|
|
18
17
|
import dev from "./local-preview/index.js";
|
|
19
18
|
import installDepsCommand from "./local-preview/helper-commands/installDepsCommand.js";
|
|
20
19
|
|
|
21
|
-
// TODO - add descriptions to the command options https://github.com/yargs/yargs/blob/HEAD/docs/api.md#commandmodule
|
|
22
20
|
yargs(hideBin(process.argv))
|
|
23
21
|
.command(
|
|
24
22
|
"dev",
|
|
@@ -34,7 +32,6 @@ yargs(hideBin(process.argv))
|
|
|
34
32
|
() => {},
|
|
35
33
|
installDepsCommand
|
|
36
34
|
)
|
|
37
|
-
.command("page", "Generate a new page", () => {}, generatePageTemplate)
|
|
38
35
|
.command(
|
|
39
36
|
"scrape-page [url]",
|
|
40
37
|
"Scrapes a page",
|
|
@@ -84,4 +81,14 @@ yargs(hideBin(process.argv))
|
|
|
84
81
|
}
|
|
85
82
|
)
|
|
86
83
|
|
|
84
|
+
// Print the help menu when the user enters an invalid command.
|
|
85
|
+
.demandCommand(
|
|
86
|
+
1,
|
|
87
|
+
"Unknown command. See above for the list of supported commands."
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
// Alias option flags --help = -h, --version = -v
|
|
91
|
+
.alias("h", "help")
|
|
92
|
+
.alias("v", "version")
|
|
93
|
+
|
|
87
94
|
.parse();
|