mintlify 2.0.17 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/constants.js +1 -1
- package/bin/downloadImage.js +26 -13
- package/bin/downloadImage.js.map +1 -1
- package/bin/index.js +8 -0
- package/bin/index.js.map +1 -1
- package/bin/scraping/detectFramework.js +5 -0
- package/bin/scraping/detectFramework.js.map +1 -1
- package/bin/scraping/downloadAllImages.js +3 -2
- package/bin/scraping/downloadAllImages.js.map +1 -1
- package/bin/scraping/downloadLogoImage.js +2 -2
- package/bin/scraping/downloadLogoImage.js.map +1 -1
- package/bin/scraping/scrapePageCommands.js +15 -8
- package/bin/scraping/scrapePageCommands.js.map +1 -1
- package/bin/scraping/scrapeSectionCommands.js +14 -8
- package/bin/scraping/scrapeSectionCommands.js.map +1 -1
- package/bin/scraping/site-scrapers/Intercom/scrapeIntercomPage.js +27 -0
- package/bin/scraping/site-scrapers/Intercom/scrapeIntercomPage.js.map +1 -0
- package/bin/scraping/site-scrapers/Intercom/scrapeIntercomSection.js +32 -0
- package/bin/scraping/site-scrapers/Intercom/scrapeIntercomSection.js.map +1 -0
- package/package.json +1 -1
- package/scraper.md +1 -0
- package/src/constants.ts +1 -1
- package/src/downloadImage.ts +31 -14
- package/src/index.ts +18 -1
- package/src/scraping/detectFramework.ts +6 -0
- package/src/scraping/downloadAllImages.ts +5 -2
- package/src/scraping/downloadLogoImage.ts +3 -2
- package/src/scraping/scrapePageCommands.ts +15 -6
- package/src/scraping/scrapeSectionCommands.ts +14 -6
- package/src/scraping/site-scrapers/Intercom/scrapeIntercomPage.ts +51 -0
- package/src/scraping/site-scrapers/Intercom/scrapeIntercomSection.ts +53 -0
package/bin/constants.js
CHANGED
|
@@ -2,7 +2,7 @@ import path from "path";
|
|
|
2
2
|
import * as url from "url";
|
|
3
3
|
import os from "os";
|
|
4
4
|
// Change this to bump to a newer version of mint's client
|
|
5
|
-
export const TARGET_MINT_VERSION = "v0.0.
|
|
5
|
+
export const TARGET_MINT_VERSION = "v0.0.9";
|
|
6
6
|
// package installation location
|
|
7
7
|
export const INSTALL_PATH = url.fileURLToPath(new URL(".", import.meta.url));
|
|
8
8
|
export const HOME_DIR = os.homedir();
|
package/bin/downloadImage.js
CHANGED
|
@@ -13,20 +13,27 @@ async function writeImageToFile(imageSrc, writePath, overwrite) {
|
|
|
13
13
|
// Create the folders needed if they're missing
|
|
14
14
|
mkdirSync(path.dirname(writePath), { recursive: true });
|
|
15
15
|
const writer = createWriteStream(writePath);
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
16
|
+
try {
|
|
17
|
+
const response = await axios.get(imageSrc, {
|
|
18
|
+
responseType: "stream",
|
|
19
|
+
});
|
|
20
|
+
// wx prevents overwriting an image with the exact same name
|
|
21
|
+
// being created in the time we were downloading
|
|
22
|
+
response.data.pipe(writer, {
|
|
23
|
+
flag: "wx",
|
|
24
|
+
});
|
|
25
|
+
return new Promise((resolve, reject) => {
|
|
26
|
+
writer.on("finish", resolve);
|
|
27
|
+
writer.on("error", reject);
|
|
28
|
+
});
|
|
29
|
+
}
|
|
30
|
+
catch (e) {
|
|
31
|
+
return Promise.reject({
|
|
32
|
+
code: "ENOTFOUND",
|
|
33
|
+
});
|
|
34
|
+
}
|
|
28
35
|
}
|
|
29
|
-
export function isValidImageSrc(src) {
|
|
36
|
+
export function isValidImageSrc(src, skipValidateImageExtension) {
|
|
30
37
|
if (!src) {
|
|
31
38
|
return false;
|
|
32
39
|
}
|
|
@@ -34,6 +41,9 @@ export function isValidImageSrc(src) {
|
|
|
34
41
|
if (src.startsWith("data:")) {
|
|
35
42
|
return false;
|
|
36
43
|
}
|
|
44
|
+
if (skipValidateImageExtension) {
|
|
45
|
+
return true;
|
|
46
|
+
}
|
|
37
47
|
const imageHref = removeMetadataFromImageSrc(src);
|
|
38
48
|
const ext = getFileExtension(imageHref);
|
|
39
49
|
if (!SUPPORTED_MEDIA_EXTENSIONS.includes(ext)) {
|
|
@@ -64,6 +74,9 @@ export default async function downloadImage(imageSrc, writePath, overwrite = fal
|
|
|
64
74
|
if (e.code === "EEXIST") {
|
|
65
75
|
console.log(`❌ Skipping existing image ${writePath}`);
|
|
66
76
|
}
|
|
77
|
+
else if (e.code === "ENOTFOUND") {
|
|
78
|
+
console.error(`🚨 Cannot download the image, address not found ${imageSrc}`);
|
|
79
|
+
}
|
|
67
80
|
else {
|
|
68
81
|
console.error(e);
|
|
69
82
|
}
|
package/bin/downloadImage.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"downloadImage.js","sourceRoot":"","sources":["../src/downloadImage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,iBAAiB,EAAE,MAAM,IAAI,CAAC;AAC9D,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,gBAAgB,EAAE,MAAM,WAAW,CAAC;AAC7C,OAAO,EAAE,0BAA0B,EAAE,MAAM,gBAAgB,CAAC;AAE5D,KAAK,UAAU,gBAAgB,CAC7B,QAAgB,EAChB,SAAiB,EACjB,SAAkB;IAElB,8BAA8B;IAC9B,IAAI,UAAU,CAAC,SAAS,CAAC,IAAI,CAAC,SAAS,EAAE;QACvC,OAAO,OAAO,CAAC,MAAM,CAAC;YACpB,IAAI,EAAE,QAAQ;SACf,CAAC,CAAC;KACJ;IAED,+CAA+C;IAC/C,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAExD,MAAM,MAAM,GAAG,iBAAiB,CAAC,SAAS,CAAC,CAAC;IAE5C,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE;
|
|
1
|
+
{"version":3,"file":"downloadImage.js","sourceRoot":"","sources":["../src/downloadImage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,iBAAiB,EAAE,MAAM,IAAI,CAAC;AAC9D,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,gBAAgB,EAAE,MAAM,WAAW,CAAC;AAC7C,OAAO,EAAE,0BAA0B,EAAE,MAAM,gBAAgB,CAAC;AAE5D,KAAK,UAAU,gBAAgB,CAC7B,QAAgB,EAChB,SAAiB,EACjB,SAAkB;IAElB,8BAA8B;IAC9B,IAAI,UAAU,CAAC,SAAS,CAAC,IAAI,CAAC,SAAS,EAAE;QACvC,OAAO,OAAO,CAAC,MAAM,CAAC;YACpB,IAAI,EAAE,QAAQ;SACf,CAAC,CAAC;KACJ;IAED,+CAA+C;IAC/C,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAExD,MAAM,MAAM,GAAG,iBAAiB,CAAC,SAAS,CAAC,CAAC;IAE5C,IAAI;QACF,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE;YACzC,YAAY,EAAE,QAAQ;SACvB,CAAC,CAAC;QACH,4DAA4D;QAC5D,gDAAgD;QAChD,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;YACzB,IAAI,EAAE,IAAI;SACX,CAAC,CAAC;QAEH,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;YACrC,MAAM,CAAC,EAAE,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;YAC7B,MAAM,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;QAC7B,CAAC,CAAC,CAAC;KACJ;IAAC,OAAO,CAAC,EAAE;QACV,OAAO,OAAO,CAAC,MAAM,CAAC;YACpB,IAAI,EAAE,WAAW;SAClB,CAAC,CAAC;KACJ;AACH,CAAC;AAED,MAAM,UAAU,eAAe,CAC7B,GAAW,EACX,0BAAoC;IAEpC,IAAI,CAAC,GAAG,EAAE;QACR,OAAO,KAAK,CAAC;KACd;IAED,uDAAuD;IACvD,IAAI,GAAG,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE;QAC3B,OAAO,KAAK,CAAC;KACd;IAED,IAAI,0BAA0B,EAAE;QAC9B,OAAO,IAAI,CAAC;KACb;IAED,MAAM,SAAS,GAAG,0BAA0B,CAAC,GAAG,CAAC,CAAC;IAClD,MAAM,GAAG,GAAG,gBAAgB,CAAC,SAAS,CAAC,CAAC;IAExC,IAAI,CAAC,0BAA0B,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE;QAC7C,OAAO,CAAC,KAAK,CAAC,2CAA2C,GAAG,GAAG,CAAC,CAAC;QACjE,OAAO,KAAK,CAAC;KACd;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED,MAAM,UAAU,0BAA0B,CAAC,GAAW;IACpD,2BAA2B;IAC3B,MAAM,eAAe,GAAG,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;IAEnC,eAAe,CAAC,OAAO,CAAC,CAAC,aAAa,EAAE,EAAE;QACxC,iFAAiF;QACjF,GAAG,GAAG,GAAG,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,CAAC;IACpC,CAAC,CAAC,CAAC;IAEH,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,GAAW,EAAE,MAAc;IACvD,0DAA0D;IAC1D,OAAO,GAAG,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC;AAClE,CAAC;AAED,MAAM,CAAC,OAAO,CAAC,KAAK,UAAU,aAAa,CACzC,QAAgB,EAChB,SAAiB,EACjB,YAAqB,KAAK;IAE1B,MAAM,gBAAgB,CAAC,QAAQ,EAAE,SAAS,EAAE,SAAS,CAAC;SACnD,IAAI,CAAC,GAAG,EAAE;QACT,OAAO,CAAC,GAAG,CAAC,QAAQ,GAAG,SAAS,CAAC,CAAC;IACpC,CAAC,CAAC;SACD,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE;QACX,IAAI,CAAC,CAAC,IAAI,KAAK,QAAQ,EAAE;YACvB,OAAO,CAAC,GAAG,CAAC,6BAA6B,SAAS,EAAE,CAAC,CAAC;SACvD;aAAM,IAAI,CAAC,CAAC,IAAI,KAAK,WAAW,EAAE;YACjC,OAAO,CAAC,KAAK,CACX,mDAAmD,QAAQ,EAAE,CAC9D,CAAC;SACH;aAAM;YACL,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;SAClB;IACH,CAAC,CAAC,CAAC;AACP,CAAC"}
|
package/bin/index.js
CHANGED
|
@@ -8,6 +8,8 @@ import { scrapeSectionAutomatically, scrapeSectionAxiosWrapper, scrapeGitbookSec
|
|
|
8
8
|
import { scrapeReadMeSection } from "./scraping/site-scrapers/scrapeReadMeSection.js";
|
|
9
9
|
import dev from "./local-preview/index.js";
|
|
10
10
|
import installDepsCommand from "./local-preview/helper-commands/installDepsCommand.js";
|
|
11
|
+
import { scrapeIntercomPage } from "./scraping/site-scrapers/Intercom/scrapeIntercomPage.js";
|
|
12
|
+
import { scrapeIntercomSection } from "./scraping/site-scrapers/Intercom/scrapeIntercomSection.js";
|
|
11
13
|
yargs(hideBin(process.argv))
|
|
12
14
|
.command("dev", "Runs Mintlify locally (Must run in directory with mint.json)", () => { }, async (argv) => {
|
|
13
15
|
await dev(argv);
|
|
@@ -21,6 +23,9 @@ yargs(hideBin(process.argv))
|
|
|
21
23
|
})
|
|
22
24
|
.command("scrape-readme-page [url]", "Scrapes a ReadMe page", () => { }, async (argv) => {
|
|
23
25
|
await scrapePageWrapper(argv, scrapeReadMePage);
|
|
26
|
+
})
|
|
27
|
+
.command("scrape-intercom-page [url]", "Scrapes a Intercom page", () => { }, async (argv) => {
|
|
28
|
+
await scrapePageWrapper(argv, scrapeIntercomPage);
|
|
24
29
|
})
|
|
25
30
|
.command("scrape-section [url]", "Scrapes the docs in the section", () => { }, async (argv) => {
|
|
26
31
|
await scrapeSectionAutomatically(argv);
|
|
@@ -30,6 +35,9 @@ yargs(hideBin(process.argv))
|
|
|
30
35
|
})
|
|
31
36
|
.command("scrape-readme-section [url]", "Scrapes the ReadMe section", () => { }, async (argv) => {
|
|
32
37
|
await scrapeSectionAxiosWrapper(argv, scrapeReadMeSection);
|
|
38
|
+
})
|
|
39
|
+
.command("scrape-intercom-section [url]", "Scrapes the Intercom section", () => { }, async (argv) => {
|
|
40
|
+
await scrapeSectionAxiosWrapper(argv, scrapeIntercomSection);
|
|
33
41
|
})
|
|
34
42
|
// Print the help menu when the user enters an invalid command.
|
|
35
43
|
.strictCommands()
|
package/bin/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAEA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AACxC,OAAO,EACL,uBAAuB,EACvB,iBAAiB,GAClB,MAAM,kCAAkC,CAAC;AAC1C,OAAO,EAAE,iBAAiB,EAAE,MAAM,+CAA+C,CAAC;AAClF,OAAO,EAAE,gBAAgB,EAAE,MAAM,8CAA8C,CAAC;AAChF,OAAO,EACL,0BAA0B,EAC1B,yBAAyB,EACzB,2BAA2B,GAC5B,MAAM,qCAAqC,CAAC;AAC7C,OAAO,EAAE,mBAAmB,EAAE,MAAM,iDAAiD,CAAC;AACtF,OAAO,GAAG,MAAM,0BAA0B,CAAC;AAC3C,OAAO,kBAAkB,MAAM,uDAAuD,CAAC;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAEA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AACxC,OAAO,EACL,uBAAuB,EACvB,iBAAiB,GAClB,MAAM,kCAAkC,CAAC;AAC1C,OAAO,EAAE,iBAAiB,EAAE,MAAM,+CAA+C,CAAC;AAClF,OAAO,EAAE,gBAAgB,EAAE,MAAM,8CAA8C,CAAC;AAChF,OAAO,EACL,0BAA0B,EAC1B,yBAAyB,EACzB,2BAA2B,GAC5B,MAAM,qCAAqC,CAAC;AAC7C,OAAO,EAAE,mBAAmB,EAAE,MAAM,iDAAiD,CAAC;AACtF,OAAO,GAAG,MAAM,0BAA0B,CAAC;AAC3C,OAAO,kBAAkB,MAAM,uDAAuD,CAAC;AACvF,OAAO,EAAE,kBAAkB,EAAE,MAAM,yDAAyD,CAAC;AAC7F,OAAO,EAAE,qBAAqB,EAAE,MAAM,4DAA4D,CAAC;AAEnG,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;KACzB,OAAO,CACN,KAAK,EACL,8DAA8D,EAC9D,GAAG,EAAE,GAAE,CAAC,EACR,KAAK,EAAE,IAAI,EAAE,EAAE;IACb,MAAM,GAAG,CAAC,IAAI,CAAC,CAAC;AAClB,CAAC,CACF;KACA,OAAO,CACN,SAAS,EACT,yCAAyC,EACzC,GAAG,EAAE,GAAE,CAAC,EACR,kBAAkB,CACnB;KACA,OAAO,CACN,mBAAmB,EACnB,gBAAgB,EAChB,GAAG,EAAE,GAAE,CAAC,EACR,KAAK,EAAE,IAAI,EAAE,EAAE;IACb,MAAM,uBAAuB,CAAC,IAAI,CAAC,CAAC;AACtC,CAAC,CACF;KACA,OAAO,CACN,2BAA2B,EAC3B,wBAAwB,EACxB,GAAG,EAAE,GAAE,CAAC,EACR,KAAK,EAAE,IAAI,EAAE,EAAE;IACb,MAAM,iBAAiB,CAAC,IAAI,EAAE,iBAAiB,CAAC,CAAC;AACnD,CAAC,CACF;KACA,OAAO,CACN,0BAA0B,EAC1B,uBAAuB,EACvB,GAAG,EAAE,GAAE,CAAC,EACR,KAAK,EAAE,IAAI,EAAE,EAAE;IACb,MAAM,iBAAiB,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAAC;AAClD,CAAC,CACF;KACA,OAAO,CACN,4BAA4B,EAC5B,yBAAyB,EACzB,GAAG,EAAE,GAAE,CAAC,EACR,KAAK,EAAE,IAAI,EAAE,EAAE;IACb,MAAM,iBAAiB,CAAC,IAAI,EAAE,kBAAkB,CAAC,CAAC;AACpD,CAAC,CACF;KACA,OAAO,CACN,sBAAsB,EACtB,iCAAiC,EACjC,GAAG,EAAE,GAAE,CAAC,EACR,KAAK,EAAE,IAAI,EAAE,EAAE;IACb,MAAM,0BAA0B,CAAC,IAAI,CAAC,CAAC;AACzC,CAAC,CACF;KACA,OAAO,CACN,8BAA8B,EAC9B,6BAA6B,EAC7B,GAAG,EAAE,GAAE,CAAC,EACR,KAAK,EAAE,IAAI,EAAE,EAAE;IACb,MAAM,2BAA2B,CAAC,IAAI,CAAC,CAAC;AAC1C,CAAC,CACF;KACA,OAAO,CACN,6BAA6B,EAC7B,4BAA4B,EAC5B,GAAG,EAAE,GAAE,CAAC,EACR,KAAK,EAAE,IAAI,EAAE,EAAE;IACb,MAAM,yBAAyB,CAAC,IAAI,EAAE,mBAAmB,CAAC,CAAC;AAC7D,CAAC,CACF;KACA,OAAO,CACN,+BAA+B,EAC/B,8BAA8B,EAC9B,GAAG,EAAE,GAAE,CAAC,EACR,KAAK,EAAE,IAAI,EAAE,EAAE;IACb,MAAM,yBAAyB,CAAC,IAAI,EAAE,qBAAqB,CAAC,CAAC;AAC/D,CAAC,CACF;IACD,+DAA+D;KAC9D,cAAc,EAAE;KAChB,aAAa,CACZ,CAAC,EACD,gEAAgE,CACjE;IAED,iDAAiD;KAChD,KAAK,CAAC,GAAG,EAAE,MAAM,CAAC;KAClB,KAAK,CAAC,GAAG,EAAE,SAAS,CAAC;KAErB,KAAK,EAAE,CAAC"}
|
|
@@ -4,6 +4,7 @@ export var Frameworks;
|
|
|
4
4
|
Frameworks["DOCUSAURUS"] = "DOCUSAURUS";
|
|
5
5
|
Frameworks["GITBOOK"] = "GITBOOK";
|
|
6
6
|
Frameworks["README"] = "README";
|
|
7
|
+
Frameworks["INTERCOM"] = "INTERCOM";
|
|
7
8
|
})(Frameworks || (Frameworks = {}));
|
|
8
9
|
export function detectFramework(html) {
|
|
9
10
|
const $ = cheerio.load(html);
|
|
@@ -29,6 +30,10 @@ export function detectFramework(html) {
|
|
|
29
30
|
if (isReadMe) {
|
|
30
31
|
return { framework: Frameworks.README };
|
|
31
32
|
}
|
|
33
|
+
const isIntercom = $("meta[name='intercom:trackingEvent']").length > 0;
|
|
34
|
+
if (isIntercom) {
|
|
35
|
+
return { framework: Frameworks.INTERCOM };
|
|
36
|
+
}
|
|
32
37
|
return undefined;
|
|
33
38
|
}
|
|
34
39
|
//# sourceMappingURL=detectFramework.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"detectFramework.js","sourceRoot":"","sources":["../../src/scraping/detectFramework.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAE9B,MAAM,CAAN,IAAY,
|
|
1
|
+
{"version":3,"file":"detectFramework.js","sourceRoot":"","sources":["../../src/scraping/detectFramework.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAE9B,MAAM,CAAN,IAAY,UAKX;AALD,WAAY,UAAU;IACpB,uCAAyB,CAAA;IACzB,iCAAmB,CAAA;IACnB,+BAAiB,CAAA;IACjB,mCAAqB,CAAA;AACvB,CAAC,EALW,UAAU,KAAV,UAAU,QAKrB;AAED,MAAM,UAAU,eAAe,CAAC,IAAI;IAClC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,MAAM,cAAc,GAAG,CAAC,CAAC,wBAAwB,CAAC,CAAC;IAEnD,IACE,cAAc,CAAC,MAAM,GAAG,CAAC;QACzB,cAAc,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,QAAQ,CAAC,YAAY,CAAC,EACrD;QACA,IAAI,cAAc,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE;YACjD,OAAO,EAAE,SAAS,EAAE,UAAU,CAAC,UAAU,EAAE,OAAO,EAAE,GAAG,EAAE,CAAC;SAC3D;QACD,IAAI,cAAc,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE;YACjD,OAAO,EAAE,SAAS,EAAE,UAAU,CAAC,UAAU,EAAE,OAAO,EAAE,GAAG,EAAE,CAAC;SAC3D;aAAM,IAAI,cAAc,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE;YACxD,OAAO,CAAC,IAAI,CACV,0FAA0F,CAC3F,CAAC;YACF,OAAO,EAAE,SAAS,EAAE,UAAU,CAAC,UAAU,EAAE,OAAO,EAAE,GAAG,EAAE,CAAC;SAC3D;KACF;IAED,MAAM,SAAS,GAAG,CAAC,CAAC,eAAe,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;IAChD,IAAI,SAAS,EAAE;QACb,OAAO,EAAE,SAAS,EAAE,UAAU,CAAC,OAAO,EAAE,CAAC;KAC1C;IAED,MAAM,QAAQ,GAAG,CAAC,CAAC,4BAA4B,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;IAC5D,IAAI,QAAQ,EAAE;QACZ,OAAO,EAAE,SAAS,EAAE,UAAU,CAAC,MAAM,EAAE,CAAC;KACzC;IAED,MAAM,UAAU,GAAG,CAAC,CAAC,qCAAqC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;IACvE,IAAI,UAAU,EAAE;QACd,OAAO,EAAE,SAAS,EAAE,UAAU,CAAC,QAAQ,EAAE,CAAC;KAC3C;IAED,OAAO,SAAS,CAAC;AACnB,CAAC"}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import path from "path";
|
|
2
2
|
import downloadImage, { cleanImageSrc, isValidImageSrc, removeMetadataFromImageSrc, } from "../downloadImage.js";
|
|
3
3
|
// To Do: Use CheerioElement instead of any when we bump the cheerio version
|
|
4
|
-
export default async function downloadAllImages($, content, origin, baseDir, overwrite, modifyFileName) {
|
|
4
|
+
export default async function downloadAllImages($, content, origin, baseDir, overwrite, modifyFileName, skipValidateImageExtension) {
|
|
5
5
|
if (!baseDir) {
|
|
6
6
|
console.debug("Skipping image downloading");
|
|
7
7
|
return;
|
|
@@ -16,8 +16,9 @@ export default async function downloadAllImages($, content, origin, baseDir, ove
|
|
|
16
16
|
];
|
|
17
17
|
// Wait to all images to download before continuing
|
|
18
18
|
const origToNewArray = await Promise.all(imageSrcs.map(async (imageSrc) => {
|
|
19
|
-
if (!isValidImageSrc(imageSrc))
|
|
19
|
+
if (!isValidImageSrc(imageSrc, skipValidateImageExtension)) {
|
|
20
20
|
return;
|
|
21
|
+
}
|
|
21
22
|
const imageHref = cleanImageSrc(imageSrc, origin);
|
|
22
23
|
let fileName = removeMetadataFromImageSrc(path.basename(imageHref));
|
|
23
24
|
if (modifyFileName) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"downloadAllImages.js","sourceRoot":"","sources":["../../src/scraping/downloadAllImages.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,aAAa,EAAE,EACpB,aAAa,EACb,eAAe,EACf,0BAA0B,GAC3B,MAAM,qBAAqB,CAAC;AAE7B,4EAA4E;AAC5E,MAAM,CAAC,OAAO,CAAC,KAAK,UAAU,iBAAiB,CAC7C,CAAM,EACN,OAAY,EACZ,MAAc,EACd,OAAe,EACf,SAAkB,EAClB,cAAoB;
|
|
1
|
+
{"version":3,"file":"downloadAllImages.js","sourceRoot":"","sources":["../../src/scraping/downloadAllImages.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,aAAa,EAAE,EACpB,aAAa,EACb,eAAe,EACf,0BAA0B,GAC3B,MAAM,qBAAqB,CAAC;AAE7B,4EAA4E;AAC5E,MAAM,CAAC,OAAO,CAAC,KAAK,UAAU,iBAAiB,CAC7C,CAAM,EACN,OAAY,EACZ,MAAc,EACd,OAAe,EACf,SAAkB,EAClB,cAAoB,EACpB,0BAAoC;IAEpC,IAAI,CAAC,OAAO,EAAE;QACZ,OAAO,CAAC,KAAK,CAAC,4BAA4B,CAAC,CAAC;QAC5C,OAAO;KACR;IAED,kEAAkE;IAClE,4CAA4C;IAC5C,MAAM,SAAS,GAAG;QAChB,GAAG,IAAI,GAAG,CACR,OAAO;aACJ,IAAI,CAAC,UAAU,CAAC;aAChB,GAAG,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;aACvC,OAAO,EAAE,CACb;KACF,CAAC;IAEF,mDAAmD;IACnD,MAAM,cAAc,GAAG,MAAM,OAAO,CAAC,GAAG,CACtC,SAAS,CAAC,GAAG,CAAC,KAAK,EAAE,QAAgB,EAAE,EAAE;QACvC,IAAI,CAAC,eAAe,CAAC,QAAQ,EAAE,0BAA0B,CAAC,EAAE;YAC1D,OAAO;SACR;QAED,MAAM,SAAS,GAAG,aAAa,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;QAElD,IAAI,QAAQ,GAAG,0BAA0B,CAAC,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC;QACpE,IAAI,cAAc,EAAE;YAClB,QAAQ,GAAG,cAAc,CAAC,QAAQ,CAAC,CAAC;SACrC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;QAE/C,MAAM,aAAa,CAAC,SAAS,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC;QAErD,OAAO,EAAE,CAAC,QAAQ,CAAC,EAAE,SAAS,EAAE,CAAC;IACnC,CAAC,CAAC,CACH,CAAC;IAEF,OAAO,cAAc,CAAC,MAAM,CAC1B,CAAC,MAAM,EAAE,OAAO,EAAE,EAAE,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EACnD,EAAE,CACH,CAAC;AACJ,CAAC"}
|
|
@@ -2,8 +2,8 @@ import path from "path";
|
|
|
2
2
|
import downloadImage, { cleanImageSrc, isValidImageSrc, removeMetadataFromImageSrc, } from "../downloadImage.js";
|
|
3
3
|
import { getFileExtension } from "../util.js";
|
|
4
4
|
// To Do: Use CheerioElement instead of any when we bump the cheerio version
|
|
5
|
-
export default async function downloadLogoImage(imageSrc, imageBaseDir, origin, overwrite) {
|
|
6
|
-
if (!isValidImageSrc(imageSrc))
|
|
5
|
+
export default async function downloadLogoImage(imageSrc, imageBaseDir, origin, overwrite, skipValidateImageExtension) {
|
|
6
|
+
if (!isValidImageSrc(imageSrc, skipValidateImageExtension))
|
|
7
7
|
return;
|
|
8
8
|
const imageHref = cleanImageSrc(imageSrc, origin);
|
|
9
9
|
const ext = getFileExtension(removeMetadataFromImageSrc(imageSrc));
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"downloadLogoImage.js","sourceRoot":"","sources":["../../src/scraping/downloadLogoImage.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,aAAa,EAAE,EACpB,aAAa,EACb,eAAe,EACf,0BAA0B,GAC3B,MAAM,qBAAqB,CAAC;AAC7B,OAAO,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAE9C,4EAA4E;AAC5E,MAAM,CAAC,OAAO,CAAC,KAAK,UAAU,iBAAiB,CAC7C,QAAgB,EAChB,YAAoB,EACpB,MAAc,EACd,SAAkB;
|
|
1
|
+
{"version":3,"file":"downloadLogoImage.js","sourceRoot":"","sources":["../../src/scraping/downloadLogoImage.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,aAAa,EAAE,EACpB,aAAa,EACb,eAAe,EACf,0BAA0B,GAC3B,MAAM,qBAAqB,CAAC;AAC7B,OAAO,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAE9C,4EAA4E;AAC5E,MAAM,CAAC,OAAO,CAAC,KAAK,UAAU,iBAAiB,CAC7C,QAAgB,EAChB,YAAoB,EACpB,MAAc,EACd,SAAkB,EAClB,0BAAoC;IAEpC,IAAI,CAAC,eAAe,CAAC,QAAQ,EAAE,0BAA0B,CAAC;QAAE,OAAO;IAEnE,MAAM,SAAS,GAAG,aAAa,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;IAElD,MAAM,GAAG,GAAG,gBAAgB,CAAC,0BAA0B,CAAC,QAAQ,CAAC,CAAC,CAAC;IACnE,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,MAAM,EAAE,kBAAkB,GAAG,GAAG,CAAC,CAAC;IAE5E,MAAM,aAAa,CAAC,SAAS,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC;AACvD,CAAC"}
|
|
@@ -6,12 +6,14 @@ import { scrapeReadMePage } from "./site-scrapers/scrapeReadMePage.js";
|
|
|
6
6
|
import { detectFramework, Frameworks } from "./detectFramework.js";
|
|
7
7
|
import { getHrefFromArgs } from "../util.js";
|
|
8
8
|
import { getHtmlWithPuppeteer } from "../browser.js";
|
|
9
|
+
import { scrapeIntercomPage } from "./site-scrapers/Intercom/scrapeIntercomPage.js";
|
|
9
10
|
function validateFramework(framework) {
|
|
10
11
|
if (!framework) {
|
|
11
12
|
console.log("Could not detect the framework automatically. Please use one of:");
|
|
12
13
|
console.log("scrape-page-docusaurus");
|
|
13
14
|
console.log("scrape-page-gitbook");
|
|
14
15
|
console.log("scrape-page-readme");
|
|
16
|
+
console.log("scrape-page-intercom");
|
|
15
17
|
return process.exit(1);
|
|
16
18
|
}
|
|
17
19
|
}
|
|
@@ -35,14 +37,19 @@ export async function scrapePageAutomatically(argv) {
|
|
|
35
37
|
const { framework, version } = detectFramework(html);
|
|
36
38
|
validateFramework(framework);
|
|
37
39
|
console.log("Detected framework: " + framework);
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
40
|
+
switch (framework) {
|
|
41
|
+
case Frameworks.DOCUSAURUS:
|
|
42
|
+
await scrapePageWrapper(argv, scrapeDocusaurusPage, { version });
|
|
43
|
+
break;
|
|
44
|
+
case Frameworks.GITBOOK:
|
|
45
|
+
await scrapePageWrapper(argv, scrapeGitBookPage, { puppeteer: true });
|
|
46
|
+
break;
|
|
47
|
+
case Frameworks.README:
|
|
48
|
+
await scrapePageWrapper(argv, scrapeReadMePage);
|
|
49
|
+
break;
|
|
50
|
+
case Frameworks.INTERCOM:
|
|
51
|
+
await scrapePageWrapper(argv, scrapeIntercomPage);
|
|
52
|
+
break;
|
|
46
53
|
}
|
|
47
54
|
}
|
|
48
55
|
//# sourceMappingURL=scrapePageCommands.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrapePageCommands.js","sourceRoot":"","sources":["../../src/scraping/scrapePageCommands.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,oBAAoB,EAAE,MAAM,yCAAyC,CAAC;AAC/E,OAAO,EAAE,iBAAiB,EAAE,MAAM,sCAAsC,CAAC;AACzE,OAAO,EAAE,gBAAgB,EAAE,MAAM,qCAAqC,CAAC;AACvE,OAAO,EAAE,eAAe,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAC;AACnE,OAAO,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAC7C,OAAO,EAAE,oBAAoB,EAAE,MAAM,eAAe,CAAC;
|
|
1
|
+
{"version":3,"file":"scrapePageCommands.js","sourceRoot":"","sources":["../../src/scraping/scrapePageCommands.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,oBAAoB,EAAE,MAAM,yCAAyC,CAAC;AAC/E,OAAO,EAAE,iBAAiB,EAAE,MAAM,sCAAsC,CAAC;AACzE,OAAO,EAAE,gBAAgB,EAAE,MAAM,qCAAqC,CAAC;AACvE,OAAO,EAAE,eAAe,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAC;AACnE,OAAO,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAC7C,OAAO,EAAE,oBAAoB,EAAE,MAAM,eAAe,CAAC;AAErD,OAAO,EAAE,kBAAkB,EAAE,MAAM,gDAAgD,CAAC;AAEpF,SAAS,iBAAiB,CAAC,SAAS;IAClC,IAAI,CAAC,SAAS,EAAE;QACd,OAAO,CAAC,GAAG,CACT,kEAAkE,CACnE,CAAC;QACF,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAC;QACtC,OAAO,CAAC,GAAG,CAAC,qBAAqB,CAAC,CAAC;QACnC,OAAO,CAAC,GAAG,CAAC,oBAAoB,CAAC,CAAC;QAClC,OAAO,CAAC,GAAG,CAAC,sBAAsB,CAAC,CAAC;QACpC,OAAO,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;KACxB;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,IAAwB,EACxB,UAAwB,EACxB,OAAmD;IAEnD,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IACnC,IAAI,IAAY,CAAC;IACjB,IAAI,OAAO,EAAE,SAAS,EAAE;QACtB,IAAI,GAAG,MAAM,oBAAoB,CAAC,IAAI,CAAC,CAAC;KACzC;SAAM;QACL,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAClC,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC;KACjB;IACD,MAAM,UAAU,CAAC,UAAU,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,SAAS,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;IAC7E,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,uBAAuB,CAAC,IAAS;IACrD,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAClC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC;IACtB,MAAM,EAAE,SAAS,EAAE,OAAO,EAAE,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IAErD,iBAAiB,CAAC,SAAS,CAAC,CAAC;IAE7B,OAAO,CAAC,GAAG,CAAC,sBAAsB,GAAG,SAAS,CAAC,CAAC;IAEhD,QAAQ,SAAS,EAAE;QACjB,KAAK,UAAU,CAAC,UAAU;YACxB,MAAM,iBAAiB,CAAC,IAAI,EAAE,oBAAoB,EAAE,EAAE,OAAO,EAAE,CAAC,CAAC;YACjE,MAAM;QACR,KAAK,UAAU,CAAC,OAAO;YACrB,MAAM,iBAAiB,CAAC,IAAI,EAAE,iBAAiB,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;YACtE,MAAM;QACR,KAAK,UAAU,CAAC,MAAM;YACpB,MAAM,iBAAiB,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAAC;YAChD,MAAM;QACR,KAAK,UAAU,CAAC,QAAQ;YACtB,MAAM,iBAAiB,CAAC,IAAI,EAAE,kBAAkB,CAAC,CAAC;YAClD,MAAM;KACT;AACH,CAAC"}
|
|
@@ -8,6 +8,7 @@ import { scrapeGitBookSection } from "./site-scrapers/scrapeGitBookSection.js";
|
|
|
8
8
|
import openNestedGitbookMenus from "./site-scrapers/openNestedGitbookMenus.js";
|
|
9
9
|
import { scrapeReadMeSection } from "./site-scrapers/scrapeReadMeSection.js";
|
|
10
10
|
import { startBrowser } from "../browser.js";
|
|
11
|
+
import { scrapeIntercomSection } from "./site-scrapers/Intercom/scrapeIntercomSection.js";
|
|
11
12
|
export async function scrapeSectionAxiosWrapper(argv, scrapeFunc) {
|
|
12
13
|
const href = getHrefFromArgs(argv);
|
|
13
14
|
const res = await axios.get(href);
|
|
@@ -41,14 +42,19 @@ export async function scrapeSectionAutomatically(argv) {
|
|
|
41
42
|
const { framework, version } = detectFramework(html);
|
|
42
43
|
validateFramework(framework);
|
|
43
44
|
console.log("Detected framework: " + framework);
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
45
|
+
switch (framework) {
|
|
46
|
+
case Frameworks.DOCUSAURUS:
|
|
47
|
+
await scrapeDocusaurusSectionCommand(argv, version);
|
|
48
|
+
break;
|
|
49
|
+
case Frameworks.GITBOOK:
|
|
50
|
+
await scrapeGitbookSectionCommand(argv);
|
|
51
|
+
break;
|
|
52
|
+
case Frameworks.README:
|
|
53
|
+
await scrapeSectionAxiosWrapper(argv, scrapeReadMeSection);
|
|
54
|
+
break;
|
|
55
|
+
case Frameworks.INTERCOM:
|
|
56
|
+
await scrapeSectionAxiosWrapper(argv, scrapeIntercomSection);
|
|
57
|
+
break;
|
|
52
58
|
}
|
|
53
59
|
}
|
|
54
60
|
function validateFramework(framework) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrapeSectionCommands.js","sourceRoot":"","sources":["../../src/scraping/scrapeSectionCommands.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,eAAe,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAC;AACnE,OAAO,EAAE,eAAe,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AACxD,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AACnD,OAAO,EAAE,uBAAuB,EAAE,MAAM,4CAA4C,CAAC;AACrF,OAAO,yBAAyB,MAAM,8CAA8C,CAAC;AACrF,OAAO,EAAE,oBAAoB,EAAE,MAAM,yCAAyC,CAAC;AAC/E,OAAO,sBAAsB,MAAM,2CAA2C,CAAC;AAC/E,OAAO,EAAE,mBAAmB,EAAE,MAAM,wCAAwC,CAAC;AAC7E,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;
|
|
1
|
+
{"version":3,"file":"scrapeSectionCommands.js","sourceRoot":"","sources":["../../src/scraping/scrapeSectionCommands.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,eAAe,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAC;AACnE,OAAO,EAAE,eAAe,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AACxD,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AACnD,OAAO,EAAE,uBAAuB,EAAE,MAAM,4CAA4C,CAAC;AACrF,OAAO,yBAAyB,MAAM,8CAA8C,CAAC;AACrF,OAAO,EAAE,oBAAoB,EAAE,MAAM,yCAAyC,CAAC;AAC/E,OAAO,sBAAsB,MAAM,2CAA2C,CAAC;AAC/E,OAAO,EAAE,mBAAmB,EAAE,MAAM,wCAAwC,CAAC;AAC7E,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAE7C,OAAO,EAAE,qBAAqB,EAAE,MAAM,mDAAmD,CAAC;AAE1F,MAAM,CAAC,KAAK,UAAU,yBAAyB,CAC7C,IAAwB,EACxB,UAA2B;IAE3B,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAClC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC;IACtB,MAAM,aAAa,CACjB,UAAU,EACV,IAAI,EACJ,SAAS,CAAC,IAAI,CAAC,EACf,CAAC,CAAC,IAAI,CAAC,SAAS,EAChB,SAAS,CACV,CAAC;IACF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,8BAA8B,CAClD,IAAS,EACT,OAAe,CAAC,kBAAkB;;IAElC,MAAM,6BAA6B,CACjC,IAAI,EACJ,yBAAyB,EACzB,uBAAuB,EACvB,OAAO,CACR,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,2BAA2B,CAAC,IAAS;IACzD,MAAM,6BAA6B,CACjC,IAAI,EACJ,sBAAsB,EACtB,oBAAoB,CACrB,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,6BAA6B,CAC1C,IAAS,EACT,SAAc,EACd,UAA2B,EAC3B,OAAgB;IAEhB,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IAEnC,MAAM,OAAO,GAAG,MAAM,YAAY,EAAE,CAAC;IACrC,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;IACrC,MAAM,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;QACpB,SAAS,EAAE,cAAc;KAC1B,CAAC,CAAC;IAEH,MAAM,IAAI,GAAG,MAAM,SAAS,CAAC,IAAI,CAAC,CAAC;IACnC,OAAO,CAAC,KAAK,EAAE,CAAC;IAChB,MAAM,aAAa,CACjB,UAAU,EACV,IAAI,EACJ,SAAS,CAAC,IAAI,CAAC,EACf,CAAC,CAAC,IAAI,CAAC,SAAS,EAChB,OAAO,CACR,CAAC;IACF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,0BAA0B,CAAC,IAAS;IACxD,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAClC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC;IACtB,MAAM,EAAE,SAAS,EAAE,OAAO,EAAE,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IAErD,iBAAiB,CAAC,SAAS,CAAC,CAAC;IAE7B,OAAO,CAAC,GAAG,CAAC,sBAAsB,GAAG,SAAS,CAAC,CAAC;IAEhD,QAAQ,SAAS,EAAE;QACjB,KAAK,UAAU,CAAC,UAAU;YACxB,MAAM,8BAA8B,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;YACpD,MAAM;QACR,KAAK,UAAU,CAAC,OAAO;YACrB,MAAM,2BAA2B,CAAC,IAAI,CAAC,CAAC;YACxC,MAAM;QACR,KAAK,UAAU,CAAC,MAAM;YACpB,MAAM,yBAAyB,CAAC,IAAI,EAAE,mBAAmB,CAAC,CAAC;YAC3D,MAAM;QACR,KAAK,UAAU,CAAC,QAAQ;YACtB,MAAM,yBAAyB,CAAC,IAAI,EAAE,qBAAqB,CAAC,CAAC;YAC7D,MAAM;KACT;AACH,CAAC;AAED,SAAS,iBAAiB,CAAC,SAAiC;IAC1D,IAAI,CAAC,SAAS,EAAE;QACd,OAAO,CAAC,GAAG,CACT,4GAA4G,CAC7G,CAAC;QACF,OAAO,CAAC,IAAI,EAAE,CAAC;KAChB;AACH,CAAC"}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import cheerio from "cheerio";
|
|
2
|
+
import { NodeHtmlMarkdown } from "node-html-markdown";
|
|
3
|
+
import downloadAllImages from "../../downloadAllImages.js";
|
|
4
|
+
import replaceImagePaths from "../../replaceImagePaths.js";
|
|
5
|
+
export async function scrapeIntercomPage(html, origin, cliDir, imageBaseDir, overwrite, _ // version
|
|
6
|
+
) {
|
|
7
|
+
const $ = cheerio.load(html);
|
|
8
|
+
const titleComponent = $(".t__h1").first();
|
|
9
|
+
const title = titleComponent.text().trim();
|
|
10
|
+
let description = $(".article__desc", titleComponent.parent()).text().trim();
|
|
11
|
+
let content = $("article").first();
|
|
12
|
+
const contentHtml = $.html(content);
|
|
13
|
+
const origToWritePath = await downloadAllImages($, content, origin, imageBaseDir, overwrite, undefined, true);
|
|
14
|
+
const nhm = new NodeHtmlMarkdown({ useInlineLinks: false });
|
|
15
|
+
let markdown = nhm.translate(contentHtml);
|
|
16
|
+
// Keep headers on one line
|
|
17
|
+
markdown = markdown.replace(/# \n\n/g, "# ");
|
|
18
|
+
// Remove unnecessary nonwidth blank space characters
|
|
19
|
+
markdown = markdown.replace(/\u200b/g, "");
|
|
20
|
+
// Reduce unnecessary blank lines
|
|
21
|
+
markdown = markdown.replace(/\n\n\n/g, "\n\n");
|
|
22
|
+
// Mintlify doesn't support bolded headers, remove the asterisks
|
|
23
|
+
markdown = markdown.replace(/(\n#+) \*\*(.*)\*\*\n/g, "$1 $2\n");
|
|
24
|
+
markdown = replaceImagePaths(origToWritePath, cliDir, markdown);
|
|
25
|
+
return { title, description, markdown };
|
|
26
|
+
}
|
|
27
|
+
//# sourceMappingURL=scrapeIntercomPage.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scrapeIntercomPage.js","sourceRoot":"","sources":["../../../../src/scraping/site-scrapers/Intercom/scrapeIntercomPage.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AACtD,OAAO,iBAAiB,MAAM,4BAA4B,CAAC;AAC3D,OAAO,iBAAiB,MAAM,4BAA4B,CAAC;AAE3D,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,IAAY,EACZ,MAAc,EACd,MAAc,EACd,YAAoB,EACpB,SAAkB,EAClB,CAAqB,CAAC,UAAU;;IAEhC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,MAAM,cAAc,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC,KAAK,EAAE,CAAC;IAC3C,MAAM,KAAK,GAAG,cAAc,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IAC3C,IAAI,WAAW,GAAG,CAAC,CAAC,gBAAgB,EAAE,cAAc,CAAC,MAAM,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IAE7E,IAAI,OAAO,GAAG,CAAC,CAAC,SAAS,CAAC,CAAC,KAAK,EAAE,CAAC;IACnC,MAAM,WAAW,GAAG,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAEpC,MAAM,eAAe,GAAG,MAAM,iBAAiB,CAC7C,CAAC,EACD,OAAO,EACP,MAAM,EACN,YAAY,EACZ,SAAS,EACT,SAAS,EACT,IAAI,CACL,CAAC;IAEF,MAAM,GAAG,GAAG,IAAI,gBAAgB,CAAC,EAAE,cAAc,EAAE,KAAK,EAAE,CAAC,CAAC;IAC5D,IAAI,QAAQ,GAAG,GAAG,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;IAE1C,2BAA2B;IAC3B,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;IAE7C,qDAAqD;IACrD,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;IAE3C,iCAAiC;IACjC,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IAE/C,gEAAgE;IAChE,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,wBAAwB,EAAE,SAAS,CAAC,CAAC;IAEjE,QAAQ,GAAG,iBAAiB,CAAC,eAAe,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;IAEhE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,QAAQ,EAAE,CAAC;AAC1C,CAAC"}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import cheerio from "cheerio";
|
|
2
|
+
import { scrapeIntercomPage } from "./scrapeIntercomPage.js";
|
|
3
|
+
import { scrapeGettingFileNameFromUrl } from "../../scrapeGettingFileNameFromUrl.js";
|
|
4
|
+
import downloadLogoImage from "../../downloadLogoImage.js";
|
|
5
|
+
import axios from "axios";
|
|
6
|
+
export async function scrapeIntercomSection(html, origin, cliDir, imageBaseDir, overwrite, version) {
|
|
7
|
+
let $ = cheerio.load(html);
|
|
8
|
+
const logoSrc = $(".header__logo img").first().attr("src");
|
|
9
|
+
downloadLogoImage(logoSrc, imageBaseDir, origin, overwrite);
|
|
10
|
+
const collectionsLink = $(".section .g__space a");
|
|
11
|
+
const collectionsMap = collectionsLink
|
|
12
|
+
.toArray()
|
|
13
|
+
.map(async (s) => {
|
|
14
|
+
const href = $(s).attr("href");
|
|
15
|
+
const res = await axios.get(`${origin}${href}`);
|
|
16
|
+
const html = res.data;
|
|
17
|
+
$ = cheerio.load(html);
|
|
18
|
+
const sectionTitle = $(".collection h1").first().text().trim();
|
|
19
|
+
const sectionPages = $(".section .g__space a")
|
|
20
|
+
.toArray()
|
|
21
|
+
.map((s) => $(s).attr("href"));
|
|
22
|
+
return {
|
|
23
|
+
group: sectionTitle,
|
|
24
|
+
pages: sectionPages,
|
|
25
|
+
};
|
|
26
|
+
});
|
|
27
|
+
const collections = await Promise.all(collectionsMap);
|
|
28
|
+
return await Promise.all(collections.map(async (entry) => {
|
|
29
|
+
return await scrapeGettingFileNameFromUrl(entry, cliDir, origin, overwrite, scrapeIntercomPage, false, version);
|
|
30
|
+
}));
|
|
31
|
+
}
|
|
32
|
+
//# sourceMappingURL=scrapeIntercomSection.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scrapeIntercomSection.js","sourceRoot":"","sources":["../../../../src/scraping/site-scrapers/Intercom/scrapeIntercomSection.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,kBAAkB,EAAE,MAAM,yBAAyB,CAAC;AAC7D,OAAO,EAAE,4BAA4B,EAAE,MAAM,uCAAuC,CAAC;AACrF,OAAO,iBAAiB,MAAM,4BAA4B,CAAC;AAC3D,OAAO,KAAK,MAAM,OAAO,CAAC;AAE1B,MAAM,CAAC,KAAK,UAAU,qBAAqB,CACzC,IAAY,EACZ,MAAc,EACd,MAAc,EACd,YAAoB,EACpB,SAAkB,EAClB,OAA2B;IAE3B,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE3B,MAAM,OAAO,GAAG,CAAC,CAAC,mBAAmB,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAC3D,iBAAiB,CAAC,OAAO,EAAE,YAAY,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC;IAE5D,MAAM,eAAe,GAAG,CAAC,CAAC,sBAAsB,CAAC,CAAC;IAClD,MAAM,cAAc,GAAG,eAAe;SACnC,OAAO,EAAE;SACT,GAAG,CAAC,KAAK,EAAE,CAAkB,EAAE,EAAE;QAChC,MAAM,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAC/B,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,GAAG,MAAM,GAAG,IAAI,EAAE,CAAC,CAAC;QAChD,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC;QACtB,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvB,MAAM,YAAY,GAAG,CAAC,CAAC,gBAAgB,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QAC/D,MAAM,YAAY,GAAG,CAAC,CAAC,sBAAsB,CAAC;aAC3C,OAAO,EAAE;aACT,GAAG,CAAC,CAAC,CAAkB,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;QAClD,OAAO;YACL,KAAK,EAAE,YAAY;YACnB,KAAK,EAAE,YAAY;SACpB,CAAC;IACJ,CAAC,CAAC,CAAC;IAEL,MAAM,WAAW,GAAqB,MAAM,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC;IAExE,OAAO,MAAM,OAAO,CAAC,GAAG,CACtB,WAAW,CAAC,GAAG,CAAC,KAAK,EAAE,KAA0B,EAAE,EAAE;QACnD,OAAO,MAAM,4BAA4B,CACvC,KAAK,EACL,MAAM,EACN,MAAM,EACN,SAAS,EACT,kBAAkB,EAClB,KAAK,EACL,OAAO,CACR,CAAC;IACJ,CAAC,CAAC,CACH,CAAC;AACJ,CAAC"}
|
package/package.json
CHANGED
package/scraper.md
CHANGED
package/src/constants.ts
CHANGED
|
@@ -3,7 +3,7 @@ import * as url from "url";
|
|
|
3
3
|
import os from "os";
|
|
4
4
|
|
|
5
5
|
// Change this to bump to a newer version of mint's client
|
|
6
|
-
export const TARGET_MINT_VERSION = "v0.0.
|
|
6
|
+
export const TARGET_MINT_VERSION = "v0.0.9";
|
|
7
7
|
|
|
8
8
|
// package installation location
|
|
9
9
|
export const INSTALL_PATH = url.fileURLToPath(new URL(".", import.meta.url));
|
package/src/downloadImage.ts
CHANGED
|
@@ -21,23 +21,31 @@ async function writeImageToFile(
|
|
|
21
21
|
|
|
22
22
|
const writer = createWriteStream(writePath);
|
|
23
23
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
24
|
+
try {
|
|
25
|
+
const response = await axios.get(imageSrc, {
|
|
26
|
+
responseType: "stream",
|
|
27
|
+
});
|
|
28
|
+
// wx prevents overwriting an image with the exact same name
|
|
29
|
+
// being created in the time we were downloading
|
|
30
|
+
response.data.pipe(writer, {
|
|
31
|
+
flag: "wx",
|
|
32
|
+
});
|
|
33
33
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
34
|
+
return new Promise((resolve, reject) => {
|
|
35
|
+
writer.on("finish", resolve);
|
|
36
|
+
writer.on("error", reject);
|
|
37
|
+
});
|
|
38
|
+
} catch (e) {
|
|
39
|
+
return Promise.reject({
|
|
40
|
+
code: "ENOTFOUND",
|
|
41
|
+
});
|
|
42
|
+
}
|
|
38
43
|
}
|
|
39
44
|
|
|
40
|
-
export function isValidImageSrc(
|
|
45
|
+
export function isValidImageSrc(
|
|
46
|
+
src: string,
|
|
47
|
+
skipValidateImageExtension?: boolean
|
|
48
|
+
) {
|
|
41
49
|
if (!src) {
|
|
42
50
|
return false;
|
|
43
51
|
}
|
|
@@ -47,8 +55,13 @@ export function isValidImageSrc(src: string) {
|
|
|
47
55
|
return false;
|
|
48
56
|
}
|
|
49
57
|
|
|
58
|
+
if (skipValidateImageExtension) {
|
|
59
|
+
return true;
|
|
60
|
+
}
|
|
61
|
+
|
|
50
62
|
const imageHref = removeMetadataFromImageSrc(src);
|
|
51
63
|
const ext = getFileExtension(imageHref);
|
|
64
|
+
|
|
52
65
|
if (!SUPPORTED_MEDIA_EXTENSIONS.includes(ext)) {
|
|
53
66
|
console.error("🚨 We do not support the file extension: " + ext);
|
|
54
67
|
return false;
|
|
@@ -86,6 +99,10 @@ export default async function downloadImage(
|
|
|
86
99
|
.catch((e) => {
|
|
87
100
|
if (e.code === "EEXIST") {
|
|
88
101
|
console.log(`❌ Skipping existing image ${writePath}`);
|
|
102
|
+
} else if (e.code === "ENOTFOUND") {
|
|
103
|
+
console.error(
|
|
104
|
+
`🚨 Cannot download the image, address not found ${imageSrc}`
|
|
105
|
+
);
|
|
89
106
|
} else {
|
|
90
107
|
console.error(e);
|
|
91
108
|
}
|
package/src/index.ts
CHANGED
|
@@ -16,6 +16,8 @@ import {
|
|
|
16
16
|
import { scrapeReadMeSection } from "./scraping/site-scrapers/scrapeReadMeSection.js";
|
|
17
17
|
import dev from "./local-preview/index.js";
|
|
18
18
|
import installDepsCommand from "./local-preview/helper-commands/installDepsCommand.js";
|
|
19
|
+
import { scrapeIntercomPage } from "./scraping/site-scrapers/Intercom/scrapeIntercomPage.js";
|
|
20
|
+
import { scrapeIntercomSection } from "./scraping/site-scrapers/Intercom/scrapeIntercomSection.js";
|
|
19
21
|
|
|
20
22
|
yargs(hideBin(process.argv))
|
|
21
23
|
.command(
|
|
@@ -56,6 +58,14 @@ yargs(hideBin(process.argv))
|
|
|
56
58
|
await scrapePageWrapper(argv, scrapeReadMePage);
|
|
57
59
|
}
|
|
58
60
|
)
|
|
61
|
+
.command(
|
|
62
|
+
"scrape-intercom-page [url]",
|
|
63
|
+
"Scrapes a Intercom page",
|
|
64
|
+
() => {},
|
|
65
|
+
async (argv) => {
|
|
66
|
+
await scrapePageWrapper(argv, scrapeIntercomPage);
|
|
67
|
+
}
|
|
68
|
+
)
|
|
59
69
|
.command(
|
|
60
70
|
"scrape-section [url]",
|
|
61
71
|
"Scrapes the docs in the section",
|
|
@@ -80,7 +90,14 @@ yargs(hideBin(process.argv))
|
|
|
80
90
|
await scrapeSectionAxiosWrapper(argv, scrapeReadMeSection);
|
|
81
91
|
}
|
|
82
92
|
)
|
|
83
|
-
|
|
93
|
+
.command(
|
|
94
|
+
"scrape-intercom-section [url]",
|
|
95
|
+
"Scrapes the Intercom section",
|
|
96
|
+
() => {},
|
|
97
|
+
async (argv) => {
|
|
98
|
+
await scrapeSectionAxiosWrapper(argv, scrapeIntercomSection);
|
|
99
|
+
}
|
|
100
|
+
)
|
|
84
101
|
// Print the help menu when the user enters an invalid command.
|
|
85
102
|
.strictCommands()
|
|
86
103
|
.demandCommand(
|
|
@@ -4,6 +4,7 @@ export enum Frameworks {
|
|
|
4
4
|
DOCUSAURUS = "DOCUSAURUS",
|
|
5
5
|
GITBOOK = "GITBOOK",
|
|
6
6
|
README = "README",
|
|
7
|
+
INTERCOM = "INTERCOM",
|
|
7
8
|
}
|
|
8
9
|
|
|
9
10
|
export function detectFramework(html) {
|
|
@@ -37,5 +38,10 @@ export function detectFramework(html) {
|
|
|
37
38
|
return { framework: Frameworks.README };
|
|
38
39
|
}
|
|
39
40
|
|
|
41
|
+
const isIntercom = $("meta[name='intercom:trackingEvent']").length > 0;
|
|
42
|
+
if (isIntercom) {
|
|
43
|
+
return { framework: Frameworks.INTERCOM };
|
|
44
|
+
}
|
|
45
|
+
|
|
40
46
|
return undefined;
|
|
41
47
|
}
|
|
@@ -12,7 +12,8 @@ export default async function downloadAllImages(
|
|
|
12
12
|
origin: string,
|
|
13
13
|
baseDir: string,
|
|
14
14
|
overwrite: boolean,
|
|
15
|
-
modifyFileName?: any
|
|
15
|
+
modifyFileName?: any,
|
|
16
|
+
skipValidateImageExtension?: boolean
|
|
16
17
|
) {
|
|
17
18
|
if (!baseDir) {
|
|
18
19
|
console.debug("Skipping image downloading");
|
|
@@ -33,7 +34,9 @@ export default async function downloadAllImages(
|
|
|
33
34
|
// Wait to all images to download before continuing
|
|
34
35
|
const origToNewArray = await Promise.all(
|
|
35
36
|
imageSrcs.map(async (imageSrc: string) => {
|
|
36
|
-
if (!isValidImageSrc(imageSrc))
|
|
37
|
+
if (!isValidImageSrc(imageSrc, skipValidateImageExtension)) {
|
|
38
|
+
return;
|
|
39
|
+
}
|
|
37
40
|
|
|
38
41
|
const imageHref = cleanImageSrc(imageSrc, origin);
|
|
39
42
|
|
|
@@ -11,9 +11,10 @@ export default async function downloadLogoImage(
|
|
|
11
11
|
imageSrc: string,
|
|
12
12
|
imageBaseDir: string,
|
|
13
13
|
origin: string,
|
|
14
|
-
overwrite: boolean
|
|
14
|
+
overwrite: boolean,
|
|
15
|
+
skipValidateImageExtension?: boolean
|
|
15
16
|
) {
|
|
16
|
-
if (!isValidImageSrc(imageSrc)) return;
|
|
17
|
+
if (!isValidImageSrc(imageSrc, skipValidateImageExtension)) return;
|
|
17
18
|
|
|
18
19
|
const imageHref = cleanImageSrc(imageSrc, origin);
|
|
19
20
|
|
|
@@ -7,6 +7,7 @@ import { detectFramework, Frameworks } from "./detectFramework.js";
|
|
|
7
7
|
import { getHrefFromArgs } from "../util.js";
|
|
8
8
|
import { getHtmlWithPuppeteer } from "../browser.js";
|
|
9
9
|
import { ArgumentsCamelCase } from "yargs";
|
|
10
|
+
import { scrapeIntercomPage } from "./site-scrapers/Intercom/scrapeIntercomPage.js";
|
|
10
11
|
|
|
11
12
|
function validateFramework(framework) {
|
|
12
13
|
if (!framework) {
|
|
@@ -16,6 +17,7 @@ function validateFramework(framework) {
|
|
|
16
17
|
console.log("scrape-page-docusaurus");
|
|
17
18
|
console.log("scrape-page-gitbook");
|
|
18
19
|
console.log("scrape-page-readme");
|
|
20
|
+
console.log("scrape-page-intercom");
|
|
19
21
|
return process.exit(1);
|
|
20
22
|
}
|
|
21
23
|
}
|
|
@@ -47,11 +49,18 @@ export async function scrapePageAutomatically(argv: any) {
|
|
|
47
49
|
|
|
48
50
|
console.log("Detected framework: " + framework);
|
|
49
51
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
52
|
+
switch (framework) {
|
|
53
|
+
case Frameworks.DOCUSAURUS:
|
|
54
|
+
await scrapePageWrapper(argv, scrapeDocusaurusPage, { version });
|
|
55
|
+
break;
|
|
56
|
+
case Frameworks.GITBOOK:
|
|
57
|
+
await scrapePageWrapper(argv, scrapeGitBookPage, { puppeteer: true });
|
|
58
|
+
break;
|
|
59
|
+
case Frameworks.README:
|
|
60
|
+
await scrapePageWrapper(argv, scrapeReadMePage);
|
|
61
|
+
break;
|
|
62
|
+
case Frameworks.INTERCOM:
|
|
63
|
+
await scrapePageWrapper(argv, scrapeIntercomPage);
|
|
64
|
+
break;
|
|
56
65
|
}
|
|
57
66
|
}
|
|
@@ -9,6 +9,7 @@ import openNestedGitbookMenus from "./site-scrapers/openNestedGitbookMenus.js";
|
|
|
9
9
|
import { scrapeReadMeSection } from "./site-scrapers/scrapeReadMeSection.js";
|
|
10
10
|
import { startBrowser } from "../browser.js";
|
|
11
11
|
import { ArgumentsCamelCase } from "yargs";
|
|
12
|
+
import { scrapeIntercomSection } from "./site-scrapers/Intercom/scrapeIntercomSection.js";
|
|
12
13
|
|
|
13
14
|
export async function scrapeSectionAxiosWrapper(
|
|
14
15
|
argv: ArgumentsCamelCase,
|
|
@@ -83,12 +84,19 @@ export async function scrapeSectionAutomatically(argv: any) {
|
|
|
83
84
|
|
|
84
85
|
console.log("Detected framework: " + framework);
|
|
85
86
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
87
|
+
switch (framework) {
|
|
88
|
+
case Frameworks.DOCUSAURUS:
|
|
89
|
+
await scrapeDocusaurusSectionCommand(argv, version);
|
|
90
|
+
break;
|
|
91
|
+
case Frameworks.GITBOOK:
|
|
92
|
+
await scrapeGitbookSectionCommand(argv);
|
|
93
|
+
break;
|
|
94
|
+
case Frameworks.README:
|
|
95
|
+
await scrapeSectionAxiosWrapper(argv, scrapeReadMeSection);
|
|
96
|
+
break;
|
|
97
|
+
case Frameworks.INTERCOM:
|
|
98
|
+
await scrapeSectionAxiosWrapper(argv, scrapeIntercomSection);
|
|
99
|
+
break;
|
|
92
100
|
}
|
|
93
101
|
}
|
|
94
102
|
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import cheerio from "cheerio";
|
|
2
|
+
import { NodeHtmlMarkdown } from "node-html-markdown";
|
|
3
|
+
import downloadAllImages from "../../downloadAllImages.js";
|
|
4
|
+
import replaceImagePaths from "../../replaceImagePaths.js";
|
|
5
|
+
|
|
6
|
+
export async function scrapeIntercomPage(
|
|
7
|
+
html: string,
|
|
8
|
+
origin: string,
|
|
9
|
+
cliDir: string,
|
|
10
|
+
imageBaseDir: string,
|
|
11
|
+
overwrite: boolean,
|
|
12
|
+
_: string | undefined // version
|
|
13
|
+
) {
|
|
14
|
+
const $ = cheerio.load(html);
|
|
15
|
+
|
|
16
|
+
const titleComponent = $(".t__h1").first();
|
|
17
|
+
const title = titleComponent.text().trim();
|
|
18
|
+
let description = $(".article__desc", titleComponent.parent()).text().trim();
|
|
19
|
+
|
|
20
|
+
let content = $("article").first();
|
|
21
|
+
const contentHtml = $.html(content);
|
|
22
|
+
|
|
23
|
+
const origToWritePath = await downloadAllImages(
|
|
24
|
+
$,
|
|
25
|
+
content,
|
|
26
|
+
origin,
|
|
27
|
+
imageBaseDir,
|
|
28
|
+
overwrite,
|
|
29
|
+
undefined,
|
|
30
|
+
true
|
|
31
|
+
);
|
|
32
|
+
|
|
33
|
+
const nhm = new NodeHtmlMarkdown({ useInlineLinks: false });
|
|
34
|
+
let markdown = nhm.translate(contentHtml);
|
|
35
|
+
|
|
36
|
+
// Keep headers on one line
|
|
37
|
+
markdown = markdown.replace(/# \n\n/g, "# ");
|
|
38
|
+
|
|
39
|
+
// Remove unnecessary nonwidth blank space characters
|
|
40
|
+
markdown = markdown.replace(/\u200b/g, "");
|
|
41
|
+
|
|
42
|
+
// Reduce unnecessary blank lines
|
|
43
|
+
markdown = markdown.replace(/\n\n\n/g, "\n\n");
|
|
44
|
+
|
|
45
|
+
// Mintlify doesn't support bolded headers, remove the asterisks
|
|
46
|
+
markdown = markdown.replace(/(\n#+) \*\*(.*)\*\*\n/g, "$1 $2\n");
|
|
47
|
+
|
|
48
|
+
markdown = replaceImagePaths(origToWritePath, cliDir, markdown);
|
|
49
|
+
|
|
50
|
+
return { title, description, markdown };
|
|
51
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import cheerio from "cheerio";
|
|
2
|
+
import { scrapeIntercomPage } from "./scrapeIntercomPage.js";
|
|
3
|
+
import { scrapeGettingFileNameFromUrl } from "../../scrapeGettingFileNameFromUrl.js";
|
|
4
|
+
import downloadLogoImage from "../../downloadLogoImage.js";
|
|
5
|
+
import axios from "axios";
|
|
6
|
+
|
|
7
|
+
export async function scrapeIntercomSection(
|
|
8
|
+
html: string,
|
|
9
|
+
origin: string,
|
|
10
|
+
cliDir: string,
|
|
11
|
+
imageBaseDir: string,
|
|
12
|
+
overwrite: boolean,
|
|
13
|
+
version: string | undefined
|
|
14
|
+
): Promise<MintNavigationEntry[]> {
|
|
15
|
+
let $ = cheerio.load(html);
|
|
16
|
+
|
|
17
|
+
const logoSrc = $(".header__logo img").first().attr("src");
|
|
18
|
+
downloadLogoImage(logoSrc, imageBaseDir, origin, overwrite);
|
|
19
|
+
|
|
20
|
+
const collectionsLink = $(".section .g__space a");
|
|
21
|
+
const collectionsMap = collectionsLink
|
|
22
|
+
.toArray()
|
|
23
|
+
.map(async (s: cheerio.Element) => {
|
|
24
|
+
const href = $(s).attr("href");
|
|
25
|
+
const res = await axios.get(`${origin}${href}`);
|
|
26
|
+
const html = res.data;
|
|
27
|
+
$ = cheerio.load(html);
|
|
28
|
+
const sectionTitle = $(".collection h1").first().text().trim();
|
|
29
|
+
const sectionPages = $(".section .g__space a")
|
|
30
|
+
.toArray()
|
|
31
|
+
.map((s: cheerio.Element) => $(s).attr("href"));
|
|
32
|
+
return {
|
|
33
|
+
group: sectionTitle,
|
|
34
|
+
pages: sectionPages,
|
|
35
|
+
};
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
const collections: MintNavigation[] = await Promise.all(collectionsMap);
|
|
39
|
+
|
|
40
|
+
return await Promise.all(
|
|
41
|
+
collections.map(async (entry: MintNavigationEntry) => {
|
|
42
|
+
return await scrapeGettingFileNameFromUrl(
|
|
43
|
+
entry,
|
|
44
|
+
cliDir,
|
|
45
|
+
origin,
|
|
46
|
+
overwrite,
|
|
47
|
+
scrapeIntercomPage,
|
|
48
|
+
false,
|
|
49
|
+
version
|
|
50
|
+
);
|
|
51
|
+
})
|
|
52
|
+
);
|
|
53
|
+
}
|