@mintlify/scraping 3.0.13 → 3.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.prettierrc +1 -0
- package/bin/browser.js +3 -3
- package/bin/constants.js +23 -23
- package/bin/constants.js.map +1 -1
- package/bin/downloadImage.js +18 -18
- package/bin/downloadImage.js.map +1 -1
- package/bin/scraping/detectFramework.js +13 -13
- package/bin/scraping/detectFramework.js.map +1 -1
- package/bin/scraping/downloadAllImages.js +5 -5
- package/bin/scraping/downloadAllImages.js.map +1 -1
- package/bin/scraping/downloadLogoImage.js +4 -4
- package/bin/scraping/downloadLogoImage.js.map +1 -1
- package/bin/scraping/getSitemapLinks.js +4 -4
- package/bin/scraping/scrapeFileGettingFileNameFromUrl.js +10 -10
- package/bin/scraping/scrapeFileGettingFileNameFromUrl.js.map +1 -1
- package/bin/scraping/scrapeGettingFileNameFromUrl.js +2 -2
- package/bin/scraping/scrapeGettingFileNameFromUrl.js.map +1 -1
- package/bin/scraping/scrapePage.js +3 -3
- package/bin/scraping/scrapePage.js.map +1 -1
- package/bin/scraping/scrapePageCommands.d.ts +1 -1
- package/bin/scraping/scrapePageCommands.js +15 -15
- package/bin/scraping/scrapePageCommands.js.map +1 -1
- package/bin/scraping/scrapeSection.js +6 -6
- package/bin/scraping/scrapeSection.js.map +1 -1
- package/bin/scraping/scrapeSectionCommands.d.ts +1 -1
- package/bin/scraping/scrapeSectionCommands.js +14 -14
- package/bin/scraping/scrapeSectionCommands.js.map +1 -1
- package/bin/scraping/site-scrapers/Intercom/scrapeIntercomPage.js +11 -11
- package/bin/scraping/site-scrapers/Intercom/scrapeIntercomPage.js.map +1 -1
- package/bin/scraping/site-scrapers/Intercom/scrapeIntercomSection.js +12 -14
- package/bin/scraping/site-scrapers/Intercom/scrapeIntercomSection.js.map +1 -1
- package/bin/scraping/site-scrapers/alternateGroupTitle.js +1 -1
- package/bin/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.js +8 -11
- package/bin/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.js.map +1 -1
- package/bin/scraping/site-scrapers/links-per-group/getLinksRecursively.js +6 -6
- package/bin/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.js +4 -4
- package/bin/scraping/site-scrapers/openNestedDocusaurusMenus.d.ts +1 -1
- package/bin/scraping/site-scrapers/openNestedDocusaurusMenus.js +4 -4
- package/bin/scraping/site-scrapers/openNestedGitbookMenus.d.ts +1 -1
- package/bin/scraping/site-scrapers/openNestedGitbookMenus.js.map +1 -1
- package/bin/scraping/site-scrapers/scrapeDocusaurusPage.js +14 -14
- package/bin/scraping/site-scrapers/scrapeDocusaurusPage.js.map +1 -1
- package/bin/scraping/site-scrapers/scrapeDocusaurusSection.js +9 -9
- package/bin/scraping/site-scrapers/scrapeDocusaurusSection.js.map +1 -1
- package/bin/scraping/site-scrapers/scrapeGitBookPage.js +12 -14
- package/bin/scraping/site-scrapers/scrapeGitBookPage.js.map +1 -1
- package/bin/scraping/site-scrapers/scrapeGitBookSection.js +10 -15
- package/bin/scraping/site-scrapers/scrapeGitBookSection.js.map +1 -1
- package/bin/scraping/site-scrapers/scrapeReadMePage.js +15 -15
- package/bin/scraping/site-scrapers/scrapeReadMePage.js.map +1 -1
- package/bin/scraping/site-scrapers/scrapeReadMeSection.js +11 -15
- package/bin/scraping/site-scrapers/scrapeReadMeSection.js.map +1 -1
- package/bin/tsconfig.tsbuildinfo +1 -1
- package/bin/util.d.ts +1 -1
- package/bin/util.js +23 -26
- package/bin/util.js.map +1 -1
- package/bin/validation/stopIfInvalidLink.js +3 -3
- package/package.json +10 -3
- package/src/browser.ts +3 -3
- package/src/constants.ts +23 -23
- package/src/downloadImage.ts +21 -26
- package/src/scraping/detectFramework.ts +18 -18
- package/src/scraping/downloadAllImages.ts +7 -9
- package/src/scraping/downloadLogoImage.ts +5 -4
- package/src/scraping/getSitemapLinks.ts +4 -4
- package/src/scraping/scrapeFileGettingFileNameFromUrl.ts +12 -18
- package/src/scraping/scrapeGettingFileNameFromUrl.ts +7 -5
- package/src/scraping/scrapePage.ts +4 -3
- package/src/scraping/scrapePageCommands.ts +17 -18
- package/src/scraping/scrapeSection.ts +8 -16
- package/src/scraping/scrapeSectionCommands.ts +19 -34
- package/src/scraping/site-scrapers/Intercom/scrapeIntercomPage.ts +12 -11
- package/src/scraping/site-scrapers/Intercom/scrapeIntercomSection.ts +23 -24
- package/src/scraping/site-scrapers/alternateGroupTitle.ts +1 -1
- package/src/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.ts +8 -11
- package/src/scraping/site-scrapers/links-per-group/getLinksRecursively.ts +6 -6
- package/src/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.ts +4 -4
- package/src/scraping/site-scrapers/openNestedDocusaurusMenus.ts +5 -5
- package/src/scraping/site-scrapers/openNestedGitbookMenus.ts +2 -4
- package/src/scraping/site-scrapers/scrapeDocusaurusPage.ts +15 -18
- package/src/scraping/site-scrapers/scrapeDocusaurusSection.ts +11 -14
- package/src/scraping/site-scrapers/scrapeGitBookPage.ts +13 -14
- package/src/scraping/site-scrapers/scrapeGitBookSection.ts +11 -15
- package/src/scraping/site-scrapers/scrapeReadMePage.ts +17 -22
- package/src/scraping/site-scrapers/scrapeReadMeSection.ts +27 -31
- package/src/util.ts +25 -36
- package/src/validation/stopIfInvalidLink.ts +3 -3
package/bin/util.d.ts
CHANGED
package/bin/util.js
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
|
-
import { mkdirSync, writeFileSync } from
|
|
2
|
-
import Ora from
|
|
3
|
-
import path from
|
|
4
|
-
import shell from
|
|
5
|
-
import stopIfInvalidLink from
|
|
1
|
+
import { mkdirSync, writeFileSync } from 'fs';
|
|
2
|
+
import Ora from 'ora';
|
|
3
|
+
import path from 'path';
|
|
4
|
+
import shell from 'shelljs';
|
|
5
|
+
import stopIfInvalidLink from './validation/stopIfInvalidLink.js';
|
|
6
6
|
export const MintConfig = (name, color, ctaName, ctaUrl, filename) => {
|
|
7
7
|
return {
|
|
8
8
|
name,
|
|
9
|
-
logo:
|
|
10
|
-
favicon:
|
|
9
|
+
logo: '',
|
|
10
|
+
favicon: '',
|
|
11
11
|
colors: {
|
|
12
12
|
primary: color,
|
|
13
13
|
},
|
|
@@ -19,7 +19,7 @@ export const MintConfig = (name, color, ctaName, ctaUrl, filename) => {
|
|
|
19
19
|
anchors: [],
|
|
20
20
|
navigation: [
|
|
21
21
|
{
|
|
22
|
-
group:
|
|
22
|
+
group: 'Home',
|
|
23
23
|
pages: [filename],
|
|
24
24
|
},
|
|
25
25
|
],
|
|
@@ -38,9 +38,7 @@ export const Page = (title, description, markdown) => {
|
|
|
38
38
|
if (!endsWithQuote) {
|
|
39
39
|
title = title + '"';
|
|
40
40
|
}
|
|
41
|
-
const optionalDescription = description
|
|
42
|
-
? `\ndescription: "${description}"`
|
|
43
|
-
: "";
|
|
41
|
+
const optionalDescription = description ? `\ndescription: "${description}"` : '';
|
|
44
42
|
return `---\ntitle: ${title}${optionalDescription}\n---\n\n${markdown}`;
|
|
45
43
|
};
|
|
46
44
|
export function getOrigin(url) {
|
|
@@ -50,43 +48,43 @@ export function getOrigin(url) {
|
|
|
50
48
|
}
|
|
51
49
|
export function objToReadableString(objs) {
|
|
52
50
|
// Two spaces as indentation
|
|
53
|
-
return objs.map((obj) => JSON.stringify(obj, null, 2)).join(
|
|
51
|
+
return objs.map((obj) => JSON.stringify(obj, null, 2)).join(',\n');
|
|
54
52
|
}
|
|
55
53
|
export const toFilename = (title) => {
|
|
56
54
|
// Gets rid of special characters at the start and end
|
|
57
55
|
// of the name by converting to spaces then using trim.
|
|
58
56
|
return title
|
|
59
|
-
.replace(/[^a-z0-9]/gi,
|
|
57
|
+
.replace(/[^a-z0-9]/gi, ' ')
|
|
60
58
|
.trim()
|
|
61
|
-
.replace(/ /g,
|
|
59
|
+
.replace(/ /g, '-')
|
|
62
60
|
.toLowerCase();
|
|
63
61
|
};
|
|
64
62
|
export const addMdx = (fileName) => {
|
|
65
|
-
if (fileName.endsWith(
|
|
63
|
+
if (fileName.endsWith('.mdx')) {
|
|
66
64
|
return fileName;
|
|
67
65
|
}
|
|
68
|
-
return fileName +
|
|
66
|
+
return fileName + '.mdx';
|
|
69
67
|
};
|
|
70
|
-
export const createPage = (title, description, markdown, overwrite = false, rootDir =
|
|
68
|
+
export const createPage = (title, description, markdown, overwrite = false, rootDir = '', fileName) => {
|
|
71
69
|
const writePath = path.join(rootDir, addMdx(fileName || toFilename(title)));
|
|
72
70
|
// Create the folders needed if they're missing
|
|
73
71
|
mkdirSync(rootDir, { recursive: true });
|
|
74
72
|
// Write the page to memory
|
|
75
73
|
if (overwrite) {
|
|
76
74
|
writeFileSync(writePath, Page(title, description, markdown));
|
|
77
|
-
console.log(
|
|
75
|
+
console.log('✏️ - ' + writePath);
|
|
78
76
|
}
|
|
79
77
|
else {
|
|
80
78
|
try {
|
|
81
79
|
writeFileSync(writePath, Page(title, description, markdown), {
|
|
82
|
-
flag:
|
|
80
|
+
flag: 'wx',
|
|
83
81
|
});
|
|
84
|
-
console.log(
|
|
82
|
+
console.log('✏️ - ' + writePath);
|
|
85
83
|
}
|
|
86
84
|
catch (e) {
|
|
87
85
|
// We do a try-catch instead of an if-statement to avoid a race condition
|
|
88
86
|
// of the file being created after we started writing.
|
|
89
|
-
if (e?.code ===
|
|
87
|
+
if (e?.code === 'EEXIST') {
|
|
90
88
|
console.log(`❌ Skipping existing file ${writePath}`);
|
|
91
89
|
}
|
|
92
90
|
else {
|
|
@@ -100,23 +98,22 @@ export function getHrefFromArgs(argv) {
|
|
|
100
98
|
stopIfInvalidLink(href);
|
|
101
99
|
return href;
|
|
102
100
|
}
|
|
103
|
-
export const buildLogger = (startText =
|
|
101
|
+
export const buildLogger = (startText = '') => {
|
|
104
102
|
const logger = Ora().start(startText);
|
|
105
103
|
return logger;
|
|
106
104
|
};
|
|
107
105
|
export const getFileExtension = (filename) => {
|
|
108
|
-
const ext = filename.substring(filename.lastIndexOf(
|
|
106
|
+
const ext = filename.substring(filename.lastIndexOf('.') + 1, filename.length);
|
|
109
107
|
if (filename === ext)
|
|
110
108
|
return undefined;
|
|
111
109
|
return ext.toLowerCase();
|
|
112
110
|
};
|
|
113
111
|
export const fileBelongsInPagesFolder = (filename) => {
|
|
114
112
|
const extension = getFileExtension(filename);
|
|
115
|
-
return (extension
|
|
116
|
-
(extension === "mdx" || extension === "md" || extension === "tsx"));
|
|
113
|
+
return extension && (extension === 'mdx' || extension === 'md' || extension === 'tsx');
|
|
117
114
|
};
|
|
118
115
|
export const ensureYarn = (logger) => {
|
|
119
|
-
const yarnInstalled = shell.which(
|
|
116
|
+
const yarnInstalled = shell.which('yarn');
|
|
120
117
|
if (!yarnInstalled) {
|
|
121
118
|
logger.fail(`yarn must be installed, run
|
|
122
119
|
|
package/bin/util.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"util.js","sourceRoot":"","sources":["../src/util.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,IAAI,CAAC;AAC9C,OAAO,GAAuB,MAAM,KAAK,CAAC;AAC1C,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,KAAK,MAAM,SAAS,CAAC;
|
|
1
|
+
{"version":3,"file":"util.js","sourceRoot":"","sources":["../src/util.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,IAAI,CAAC;AAC9C,OAAO,GAAuB,MAAM,KAAK,CAAC;AAC1C,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,KAAK,MAAM,SAAS,CAAC;AAE5B,OAAO,iBAAiB,MAAM,mCAAmC,CAAC;AAElE,MAAM,CAAC,MAAM,UAAU,GAAG,CACxB,IAAY,EACZ,KAAa,EACb,OAAe,EACf,MAAc,EACd,QAAgB,EAChB,EAAE;IACF,OAAO;QACL,IAAI;QACJ,IAAI,EAAE,EAAE;QACR,OAAO,EAAE,EAAE;QACX,MAAM,EAAE;YACN,OAAO,EAAE,KAAK;SACf;QACD,WAAW,EAAE,EAAE;QACf,eAAe,EAAE;YACf,IAAI,EAAE,OAAO;YACb,GAAG,EAAE,MAAM;SACZ;QACD,OAAO,EAAE,EAAE;QACX,UAAU,EAAE;YACV;gBACE,KAAK,EAAE,MAAM;gBACb,KAAK,EAAE,CAAC,QAAQ,CAAC;aAClB;SACF;QACD,6DAA6D;KAC9D,CAAC;AACJ,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,IAAI,GAAG,CAAC,KAAa,EAAE,WAAoB,EAAE,QAAiB,EAAE,EAAE;IAC7E,uDAAuD;IACvD,yDAAyD;IACzD,wBAAwB;IACxB,MAAM,eAAe,GAAG,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;IAC9C,MAAM,aAAa,GAAG,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;IAC5C,IAAI,CAAC,eAAe,EAAE;QACpB,KAAK,GAAG,GAAG,GAAG,KAAK,CAAC;KACrB;IACD,IAAI,CAAC,aAAa,EAAE;QAClB,KAAK,GAAG,KAAK,GAAG,GAAG,CAAC;KACrB;IAED,MAAM,mBAAmB,GAAG,WAAW,CAAC,CAAC,CAAC,mBAAmB,WAAW,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;IACjF,OAAO,eAAe,KAAK,GAAG,mBAAmB,YAAY,QAAQ,EAAE,CAAC;AAC1E,CAAC,CAAC;AAEF,MAAM,UAAU,SAAS,CAAC,GAAW;IACnC,+CAA+C;IAC/C,gDAAgD;IAChD,OAAO,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC;AAC7B,CAAC;AAED,MAAM,UAAU,mBAAmB,CAAC,IAA2B;IAC7D,4BAA4B;IAC5B,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;AACrE,CAAC;AAED,MAAM,CAAC,MAAM,UAAU,GAAG,CAAC,KAAa,EAAE,EAAE;IAC1C,sDAAsD;IACtD,uDAAuD;IACvD,OAAO,KAAK;SACT,OAAO,CAAC,aAAa,EAAE,GAAG,CAAC;SAC3B,IAAI,EAAE;SACN,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC;SAClB,WAAW,EAAE,CAAC;AACnB,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,MAAM,GAAG,CAAC,QAAgB,EAAE,EAAE;IACzC,IAAI,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE;QAC7B,OAAO,QAAQ,CAAC;KACjB;IACD,OAAO,QAAQ,GAAG,MAAM,CAAC;AAC3B,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,UAAU,GAAG,CACxB,KAAa,EACb,WAAoB,EACpB,QAAiB,EACjB,SAAS,GAAG,KAAK,EACjB,OAAO,GAAG,EAAE,EACZ,QAAiB,EACjB,EAAE;IACF,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,MAAM,CAAC,QAAQ,IAAI,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAE5E,+CAA+C;IAC/C,SAAS,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAExC,2BAA2B;IAC3B,IAAI,SAAS,EAAE;QACb,aAAa,CAAC,SAAS,EAAE,IAAI,CAAC,KAAK,EAAE,WAAW,EAAE,QAAQ,CAAC,CAAC,CAAC;QAC7D,OAAO,CAAC,GAAG,CAAC,OAAO,GAAG,SAAS,CAAC,CAAC;KAClC;SAAM;QACL,IAAI;YACF,aAAa,CAAC,SAAS,EAAE,IAAI,CAAC,KAAK,EAAE,WAAW,EAAE,QAAQ,CAAC,EAAE;gBAC3D,IAAI,EAAE,IAAI;aACX,CAAC,CAAC;YACH,OAAO,CAAC,GAAG,CAAC,OAAO,GAAG,SAAS,CAAC,CAAC;SAClC;QAAC,OAAO,CAAC,EAAE;YACV,yEAAyE;YACzE,sDAAsD;YACtD,IAAK,CAAsB,EAAE,IAAI,KAAK,QAAQ,EAAE;gBAC9C,OAAO,CAAC,GAAG,CAAC,4BAA4B,SAAS,EAAE,CAAC,CAAC;aACtD;iBAAM;gBACL,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;aAClB;SACF;KACF;AACH,CAAC,CAAC;AAEF,MAAM,UAAU,eAAe,CAAC,IAAS;IACvC,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC;IACtB,iBAAiB,CAAC,IAAI,CAAC,CAAC;IACxB,OAAO,IAAI,CAAC;AACd,CAAC;AAED,MAAM,CAAC,MAAM,WAAW,GAAG,CAAC,SAAS,GAAG,EAAE,EAAW,EAAE;IACrD,MAAM,MAAM,GAAG,GAAG,EAAE,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;IACtC,OAAO,MAAM,CAAC;AAChB,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,gBAAgB,GAAG,CAAC,QAAgB,EAAE,EAAE;IACnD,MAAM,GAAG,GAAG,QAAQ,CAAC,SAAS,CAAC,QAAQ,CAAC,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;IAC/E,IAAI,QAAQ,KAAK,GAAG;QAAE,OAAO,SAAS,CAAC;IACvC,OAAO,GAAG,CAAC,WAAW,EAAE,CAAC;AAC3B,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,wBAAwB,GAAG,CAAC,QAAgB,EAAE,EAAE;IAC3D,MAAM,SAAS,GAAG,gBAAgB,CAAC,QAAQ,CAAC,CAAC;IAC7C,OAAO,SAAS,IAAI,CAAC,SAAS,KAAK,KAAK,IAAI,SAAS,KAAK,IAAI,IAAI,SAAS,KAAK,KAAK,CAAC,CAAC;AACzF,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,UAAU,GAAG,CAAC,MAAe,EAAE,EAAE;IAC5C,MAAM,aAAa,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;IAC1C,IAAI,CAAC,aAAa,EAAE;QAClB,MAAM,CAAC,IAAI,CAAC;;;;KAIX,CAAC,CAAC;QACH,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;KACjB;AACH,CAAC,CAAC"}
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import isValidLink from
|
|
1
|
+
import isValidLink from './isValidLink.js';
|
|
2
2
|
export default function stopIfInvalidLink(href) {
|
|
3
3
|
if (!isValidLink(href)) {
|
|
4
|
-
console.log(
|
|
5
|
-
console.log(
|
|
4
|
+
console.log('Invalid link: ' + href);
|
|
5
|
+
console.log('Make sure the link starts with http:// or https://');
|
|
6
6
|
process.exit(1);
|
|
7
7
|
}
|
|
8
8
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mintlify/scraping",
|
|
3
|
-
"version": "3.0.
|
|
3
|
+
"version": "3.0.15",
|
|
4
4
|
"description": "Scrape documentation frameworks to Mintlify docs",
|
|
5
5
|
"engines": {
|
|
6
6
|
"node": ">=18.0.0"
|
|
@@ -34,13 +34,16 @@
|
|
|
34
34
|
"prepare": "npm run build",
|
|
35
35
|
"build": "tsc",
|
|
36
36
|
"watch": "tsc --watch",
|
|
37
|
-
"lint": "eslint . --cache"
|
|
37
|
+
"lint": "eslint . --cache",
|
|
38
|
+
"format": "prettier \"./src/**/*.ts\" --write",
|
|
39
|
+
"format:check": "prettier \"./src/**/*.ts\" --check"
|
|
38
40
|
},
|
|
39
41
|
"dependencies": {
|
|
40
42
|
"@apidevtools/swagger-parser": "^10.1.0",
|
|
41
43
|
"axios": "^1.2.2",
|
|
42
44
|
"cheerio": "^0.22.0",
|
|
43
45
|
"favicons": "^7.0.1",
|
|
46
|
+
"fs-extra": "^11.1.1",
|
|
44
47
|
"node-html-markdown": "^1.3.0",
|
|
45
48
|
"ora": "^6.1.2",
|
|
46
49
|
"puppeteer": "^19.4.0",
|
|
@@ -50,7 +53,9 @@
|
|
|
50
53
|
"devDependencies": {
|
|
51
54
|
"@mintlify/eslint-config": "1.0.3",
|
|
52
55
|
"@mintlify/eslint-config-typescript": "1.0.7",
|
|
56
|
+
"@mintlify/prettier-config": "1.0.1",
|
|
53
57
|
"@mintlify/ts-config": "1.0.7",
|
|
58
|
+
"@trivago/prettier-plugin-sort-imports": "3.x",
|
|
54
59
|
"@tsconfig/recommended": "1.x",
|
|
55
60
|
"@types/cheerio": "^0.22.31",
|
|
56
61
|
"@types/node": "^18.7.13",
|
|
@@ -61,8 +66,10 @@
|
|
|
61
66
|
"eslint": "8.x",
|
|
62
67
|
"eslint-config-prettier": "8.x",
|
|
63
68
|
"eslint-plugin-unused-imports": "2.x",
|
|
69
|
+
"jest": "^29.3.1",
|
|
70
|
+
"openapi-types": "^12.1.3",
|
|
64
71
|
"prettier": "2.x",
|
|
65
72
|
"typescript": "^4.8.2"
|
|
66
73
|
},
|
|
67
|
-
"gitHead": "
|
|
74
|
+
"gitHead": "cf3d6884a0cc69eece8129eacc2cc74b751a38e3"
|
|
68
75
|
}
|
package/src/browser.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { launch } from
|
|
1
|
+
import { launch } from 'puppeteer';
|
|
2
2
|
|
|
3
3
|
export async function startBrowser() {
|
|
4
4
|
try {
|
|
@@ -7,7 +7,7 @@ export async function startBrowser() {
|
|
|
7
7
|
ignoreHTTPSErrors: true,
|
|
8
8
|
});
|
|
9
9
|
} catch (err) {
|
|
10
|
-
console.log(
|
|
10
|
+
console.log('Could not create a browser instance: ', err);
|
|
11
11
|
process.exit(1);
|
|
12
12
|
}
|
|
13
13
|
}
|
|
@@ -16,7 +16,7 @@ export async function getHtmlWithPuppeteer(href: string) {
|
|
|
16
16
|
const browser = await startBrowser();
|
|
17
17
|
const page = await browser.newPage();
|
|
18
18
|
await page.goto(href, {
|
|
19
|
-
waitUntil:
|
|
19
|
+
waitUntil: 'networkidle2',
|
|
20
20
|
});
|
|
21
21
|
const html = await page.content();
|
|
22
22
|
browser.close();
|
package/src/constants.ts
CHANGED
|
@@ -1,37 +1,37 @@
|
|
|
1
|
-
import
|
|
2
|
-
import
|
|
3
|
-
import
|
|
1
|
+
import os from 'os';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import * as url from 'url';
|
|
4
4
|
|
|
5
5
|
// package installation location
|
|
6
|
-
export const INSTALL_PATH = url.fileURLToPath(new URL(
|
|
6
|
+
export const INSTALL_PATH = url.fileURLToPath(new URL('.', import.meta.url));
|
|
7
7
|
|
|
8
8
|
export const HOME_DIR = os.homedir();
|
|
9
9
|
|
|
10
|
-
export const DOT_MINTLIFY = path.join(HOME_DIR,
|
|
10
|
+
export const DOT_MINTLIFY = path.join(HOME_DIR, '.mintlify');
|
|
11
11
|
|
|
12
|
-
export const VERSION_PATH = path.join(DOT_MINTLIFY,
|
|
12
|
+
export const VERSION_PATH = path.join(DOT_MINTLIFY, 'mint', 'mint-version.txt');
|
|
13
13
|
|
|
14
|
-
export const CLIENT_PATH = path.join(DOT_MINTLIFY,
|
|
14
|
+
export const CLIENT_PATH = path.join(DOT_MINTLIFY, 'mint', 'client');
|
|
15
15
|
|
|
16
|
-
export const MINT_PATH = path.join(DOT_MINTLIFY,
|
|
16
|
+
export const MINT_PATH = path.join(DOT_MINTLIFY, 'mint');
|
|
17
17
|
|
|
18
18
|
// command execution location
|
|
19
19
|
export const CMD_EXEC_PATH = process.cwd();
|
|
20
20
|
|
|
21
21
|
export const SUPPORTED_MEDIA_EXTENSIONS = [
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
22
|
+
'jpeg',
|
|
23
|
+
'jpg',
|
|
24
|
+
'jfif',
|
|
25
|
+
'pjpeg',
|
|
26
|
+
'pjp',
|
|
27
|
+
'png',
|
|
28
|
+
'svg',
|
|
29
|
+
'svgz',
|
|
30
|
+
'ico',
|
|
31
|
+
'webp',
|
|
32
|
+
'gif',
|
|
33
|
+
'apng',
|
|
34
|
+
'avif',
|
|
35
|
+
'bmp',
|
|
36
|
+
'mp4',
|
|
37
37
|
];
|
package/src/downloadImage.ts
CHANGED
|
@@ -1,18 +1,15 @@
|
|
|
1
|
-
import
|
|
2
|
-
import
|
|
3
|
-
import
|
|
4
|
-
import { getFileExtension } from "./util.js";
|
|
5
|
-
import { SUPPORTED_MEDIA_EXTENSIONS } from "./constants.js";
|
|
1
|
+
import axios from 'axios';
|
|
2
|
+
import { existsSync, mkdirSync, createWriteStream } from 'fs';
|
|
3
|
+
import path from 'path';
|
|
6
4
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
) {
|
|
5
|
+
import { SUPPORTED_MEDIA_EXTENSIONS } from './constants.js';
|
|
6
|
+
import { getFileExtension } from './util.js';
|
|
7
|
+
|
|
8
|
+
async function writeImageToFile(imageSrc: string, writePath: string, overwrite: boolean) {
|
|
12
9
|
// Avoid unnecessary downloads
|
|
13
10
|
if (existsSync(writePath) && !overwrite) {
|
|
14
11
|
return Promise.reject({
|
|
15
|
-
code:
|
|
12
|
+
code: 'EEXIST',
|
|
16
13
|
});
|
|
17
14
|
}
|
|
18
15
|
|
|
@@ -23,21 +20,21 @@ async function writeImageToFile(
|
|
|
23
20
|
|
|
24
21
|
try {
|
|
25
22
|
const response = await axios.get(imageSrc, {
|
|
26
|
-
responseType:
|
|
23
|
+
responseType: 'stream',
|
|
27
24
|
});
|
|
28
25
|
// wx prevents overwriting an image with the exact same name
|
|
29
26
|
// being created in the time we were downloading
|
|
30
27
|
response.data.pipe(writer, {
|
|
31
|
-
flag:
|
|
28
|
+
flag: 'wx',
|
|
32
29
|
});
|
|
33
30
|
|
|
34
31
|
return new Promise((resolve, reject) => {
|
|
35
|
-
writer.on(
|
|
36
|
-
writer.on(
|
|
32
|
+
writer.on('finish', resolve);
|
|
33
|
+
writer.on('error', reject);
|
|
37
34
|
});
|
|
38
35
|
} catch (e) {
|
|
39
36
|
return Promise.reject({
|
|
40
|
-
code:
|
|
37
|
+
code: 'ENOTFOUND',
|
|
41
38
|
});
|
|
42
39
|
}
|
|
43
40
|
}
|
|
@@ -47,7 +44,7 @@ export function isValidImageSrc(src: string) {
|
|
|
47
44
|
return false;
|
|
48
45
|
}
|
|
49
46
|
// We do not support downloading base64 in-line images.
|
|
50
|
-
if (src.startsWith(
|
|
47
|
+
if (src.startsWith('data:')) {
|
|
51
48
|
return false;
|
|
52
49
|
}
|
|
53
50
|
|
|
@@ -55,7 +52,7 @@ export function isValidImageSrc(src: string) {
|
|
|
55
52
|
const ext = getFileExtension(imageHref);
|
|
56
53
|
|
|
57
54
|
if (ext && !SUPPORTED_MEDIA_EXTENSIONS.includes(ext)) {
|
|
58
|
-
console.error(
|
|
55
|
+
console.error('🚨 We do not support the file extension: ' + ext);
|
|
59
56
|
return false;
|
|
60
57
|
}
|
|
61
58
|
|
|
@@ -64,7 +61,7 @@ export function isValidImageSrc(src: string) {
|
|
|
64
61
|
|
|
65
62
|
export function removeMetadataFromImageSrc(src: string) {
|
|
66
63
|
// Part of the URL standard
|
|
67
|
-
const metadataSymbols = [
|
|
64
|
+
const metadataSymbols = ['?', '#'];
|
|
68
65
|
|
|
69
66
|
metadataSymbols.forEach((dividerSymbol) => {
|
|
70
67
|
// Some frameworks add metadata after the file extension, we need to remove that.
|
|
@@ -76,7 +73,7 @@ export function removeMetadataFromImageSrc(src: string) {
|
|
|
76
73
|
|
|
77
74
|
export function cleanImageSrc(src: string, origin: string) {
|
|
78
75
|
// Add origin if the image tags are using relative sources
|
|
79
|
-
return src.startsWith(
|
|
76
|
+
return src.startsWith('http') ? src : new URL(src, origin).href;
|
|
80
77
|
}
|
|
81
78
|
|
|
82
79
|
export default async function downloadImage(
|
|
@@ -86,15 +83,13 @@ export default async function downloadImage(
|
|
|
86
83
|
) {
|
|
87
84
|
await writeImageToFile(imageSrc, writePath, overwrite)
|
|
88
85
|
.then(() => {
|
|
89
|
-
console.log(
|
|
86
|
+
console.log('🖼️ - ' + writePath);
|
|
90
87
|
})
|
|
91
88
|
.catch((e) => {
|
|
92
|
-
if (e.code ===
|
|
89
|
+
if (e.code === 'EEXIST') {
|
|
93
90
|
console.log(`❌ Skipping existing image ${writePath}`);
|
|
94
|
-
} else if (e.code ===
|
|
95
|
-
console.error(
|
|
96
|
-
`🚨 Cannot download the image, address not found ${imageSrc}`
|
|
97
|
-
);
|
|
91
|
+
} else if (e.code === 'ENOTFOUND') {
|
|
92
|
+
console.error(`🚨 Cannot download the image, address not found ${imageSrc}`);
|
|
98
93
|
} else {
|
|
99
94
|
console.error(e);
|
|
100
95
|
}
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
import cheerio from
|
|
1
|
+
import cheerio from 'cheerio';
|
|
2
2
|
|
|
3
3
|
export enum Frameworks {
|
|
4
|
-
DOCUSAURUS =
|
|
5
|
-
GITBOOK =
|
|
6
|
-
README =
|
|
7
|
-
INTERCOM =
|
|
4
|
+
DOCUSAURUS = 'DOCUSAURUS',
|
|
5
|
+
GITBOOK = 'GITBOOK',
|
|
6
|
+
README = 'README',
|
|
7
|
+
INTERCOM = 'INTERCOM',
|
|
8
8
|
}
|
|
9
9
|
|
|
10
10
|
export function detectFramework(html) {
|
|
@@ -13,25 +13,25 @@ export function detectFramework(html) {
|
|
|
13
13
|
|
|
14
14
|
if (
|
|
15
15
|
docusaurusMeta.length > 0 &&
|
|
16
|
-
docusaurusMeta.attr(
|
|
17
|
-
typeof docusaurusMeta.attr(
|
|
18
|
-
(docusaurusMeta.attr(
|
|
16
|
+
docusaurusMeta.attr('content') &&
|
|
17
|
+
typeof docusaurusMeta.attr('content') === 'string' &&
|
|
18
|
+
(docusaurusMeta.attr('content') as string).includes('Docusaurus')
|
|
19
19
|
) {
|
|
20
|
-
const metaAttrString = docusaurusMeta.attr(
|
|
21
|
-
if (metaAttrString.includes(
|
|
22
|
-
return { framework: Frameworks.DOCUSAURUS, version:
|
|
20
|
+
const metaAttrString = docusaurusMeta.attr('content') as string;
|
|
21
|
+
if (metaAttrString.includes('v3')) {
|
|
22
|
+
return { framework: Frameworks.DOCUSAURUS, version: '3' };
|
|
23
23
|
}
|
|
24
|
-
if (metaAttrString.includes(
|
|
25
|
-
return { framework: Frameworks.DOCUSAURUS, version:
|
|
26
|
-
} else if (metaAttrString.includes(
|
|
24
|
+
if (metaAttrString.includes('v2')) {
|
|
25
|
+
return { framework: Frameworks.DOCUSAURUS, version: '2' };
|
|
26
|
+
} else if (metaAttrString.includes('v1')) {
|
|
27
27
|
console.warn(
|
|
28
|
-
|
|
28
|
+
'WARNING: We detected Docusaurus version 1 but we only support scraping versions 2 and 3.'
|
|
29
29
|
);
|
|
30
|
-
return { framework: Frameworks.DOCUSAURUS, version:
|
|
30
|
+
return { framework: Frameworks.DOCUSAURUS, version: '1' };
|
|
31
31
|
}
|
|
32
32
|
}
|
|
33
33
|
|
|
34
|
-
const isGitBook = $(
|
|
34
|
+
const isGitBook = $('.gitbook-root').length > 0;
|
|
35
35
|
if (isGitBook) {
|
|
36
36
|
return { framework: Frameworks.GITBOOK };
|
|
37
37
|
}
|
|
@@ -46,5 +46,5 @@ export function detectFramework(html) {
|
|
|
46
46
|
return { framework: Frameworks.INTERCOM };
|
|
47
47
|
}
|
|
48
48
|
|
|
49
|
-
return { framework: undefined};
|
|
49
|
+
return { framework: undefined };
|
|
50
50
|
}
|
|
@@ -1,9 +1,10 @@
|
|
|
1
|
-
import path from
|
|
1
|
+
import path from 'path';
|
|
2
|
+
|
|
2
3
|
import downloadImage, {
|
|
3
4
|
cleanImageSrc,
|
|
4
5
|
isValidImageSrc,
|
|
5
6
|
removeMetadataFromImageSrc,
|
|
6
|
-
} from
|
|
7
|
+
} from '../downloadImage.js';
|
|
7
8
|
|
|
8
9
|
// To Do: Use CheerioElement instead of any when we bump the cheerio version
|
|
9
10
|
export default async function downloadAllImages(
|
|
@@ -15,7 +16,7 @@ export default async function downloadAllImages(
|
|
|
15
16
|
modifyFileName?: any
|
|
16
17
|
) {
|
|
17
18
|
if (!baseDir) {
|
|
18
|
-
console.debug(
|
|
19
|
+
console.debug('Skipping image downloading');
|
|
19
20
|
return;
|
|
20
21
|
}
|
|
21
22
|
|
|
@@ -24,8 +25,8 @@ export default async function downloadAllImages(
|
|
|
24
25
|
const imageSrcs = [
|
|
25
26
|
...new Set<string>(
|
|
26
27
|
content
|
|
27
|
-
.find(
|
|
28
|
-
.map((i, image) => $(image).attr(
|
|
28
|
+
.find('img[src]')
|
|
29
|
+
.map((i, image) => $(image).attr('src'))
|
|
29
30
|
.toArray()
|
|
30
31
|
),
|
|
31
32
|
];
|
|
@@ -52,8 +53,5 @@ export default async function downloadAllImages(
|
|
|
52
53
|
})
|
|
53
54
|
);
|
|
54
55
|
|
|
55
|
-
return origToNewArray.reduce(
|
|
56
|
-
(result, current) => Object.assign(result, current),
|
|
57
|
-
{}
|
|
58
|
-
);
|
|
56
|
+
return origToNewArray.reduce((result, current) => Object.assign(result, current), {});
|
|
59
57
|
}
|
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
import path from
|
|
1
|
+
import path from 'path';
|
|
2
|
+
|
|
2
3
|
import downloadImage, {
|
|
3
4
|
cleanImageSrc,
|
|
4
5
|
isValidImageSrc,
|
|
5
6
|
removeMetadataFromImageSrc,
|
|
6
|
-
} from
|
|
7
|
-
import { getFileExtension } from
|
|
7
|
+
} from '../downloadImage.js';
|
|
8
|
+
import { getFileExtension } from '../util.js';
|
|
8
9
|
|
|
9
10
|
// To Do: Use CheerioElement instead of any when we bump the cheerio version
|
|
10
11
|
export default async function downloadLogoImage(
|
|
@@ -18,7 +19,7 @@ export default async function downloadLogoImage(
|
|
|
18
19
|
const imageHref = cleanImageSrc(imageSrc, origin);
|
|
19
20
|
|
|
20
21
|
const ext = getFileExtension(removeMetadataFromImageSrc(imageSrc));
|
|
21
|
-
const imagePath = path.join(imageBaseDir,
|
|
22
|
+
const imagePath = path.join(imageBaseDir, 'logo', 'logo-light-mode.' + ext);
|
|
22
23
|
|
|
23
24
|
await downloadImage(imageHref, imagePath, overwrite);
|
|
24
25
|
}
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
import axios from
|
|
1
|
+
import axios from 'axios';
|
|
2
2
|
|
|
3
3
|
// Not in use.
|
|
4
4
|
// Gets all links in a sitemap.
|
|
5
5
|
export const getSitemapLinks = async (url: URL) => {
|
|
6
|
-
const hostname = url.hostname.replace(
|
|
7
|
-
const regex = new RegExp(`https?:\/\/${hostname}.+?(?=<\/loc>)`,
|
|
6
|
+
const hostname = url.hostname.replace('.', '\\.');
|
|
7
|
+
const regex = new RegExp(`https?:\/\/${hostname}.+?(?=<\/loc>)`, 'gmi');
|
|
8
8
|
|
|
9
9
|
try {
|
|
10
10
|
const indexData = (await axios.get(url.href)).data as string;
|
|
@@ -12,7 +12,7 @@ export const getSitemapLinks = async (url: URL) => {
|
|
|
12
12
|
return array || [];
|
|
13
13
|
} catch (err) {
|
|
14
14
|
console.error(err);
|
|
15
|
-
console.log(
|
|
15
|
+
console.log('Skipping sitemap links because we encountered an error.');
|
|
16
16
|
return [];
|
|
17
17
|
}
|
|
18
18
|
};
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
import
|
|
2
|
-
import
|
|
3
|
-
|
|
4
|
-
import {
|
|
1
|
+
import axios from 'axios';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
|
|
4
|
+
import { getHtmlWithPuppeteer } from '../browser.js';
|
|
5
|
+
import { createPage } from '../util.js';
|
|
5
6
|
|
|
6
7
|
export async function scrapeFileGettingFileNameFromUrl(
|
|
7
8
|
pathname: string,
|
|
@@ -25,22 +26,22 @@ export async function scrapeFileGettingFileNameFromUrl(
|
|
|
25
26
|
baseToRemove?: string
|
|
26
27
|
): Promise<MintNavigationEntry> {
|
|
27
28
|
// Skip scraping external links
|
|
28
|
-
if (pathname.startsWith(
|
|
29
|
+
if (pathname.startsWith('https://') || pathname.startsWith('http://')) {
|
|
29
30
|
return pathname;
|
|
30
31
|
}
|
|
31
32
|
|
|
32
33
|
// Removes file name from the end
|
|
33
|
-
const splitSubpath = pathname.split(
|
|
34
|
-
let folders = splitSubpath.slice(0, splitSubpath.length - 1).join(
|
|
34
|
+
const splitSubpath = pathname.split('/');
|
|
35
|
+
let folders = splitSubpath.slice(0, splitSubpath.length - 1).join('/');
|
|
35
36
|
|
|
36
37
|
// Remove base dir if passed in
|
|
37
38
|
if (baseToRemove && folders.startsWith(baseToRemove)) {
|
|
38
|
-
folders = folders.replace(baseToRemove,
|
|
39
|
+
folders = folders.replace(baseToRemove, '');
|
|
39
40
|
}
|
|
40
41
|
|
|
41
42
|
// TO DO: Improve this by putting each page's images in a separate
|
|
42
43
|
// folder named after the title of the page.
|
|
43
|
-
const imageBaseDir = path.join(cliDir,
|
|
44
|
+
const imageBaseDir = path.join(cliDir, 'images', folders);
|
|
44
45
|
|
|
45
46
|
// Scrape each page separately
|
|
46
47
|
const href = new URL(pathname, origin).href;
|
|
@@ -69,17 +70,10 @@ export async function scrapeFileGettingFileNameFromUrl(
|
|
|
69
70
|
const newFileLocation = folders ? path.join(cliDir, folders) : cliDir;
|
|
70
71
|
|
|
71
72
|
// Default to introduction.mdx if we encountered index.html
|
|
72
|
-
const fileName = splitSubpath[splitSubpath.length - 1] ||
|
|
73
|
+
const fileName = splitSubpath[splitSubpath.length - 1] || 'introduction';
|
|
73
74
|
|
|
74
75
|
// Will create subfolders as needed
|
|
75
|
-
createPage(
|
|
76
|
-
title ?? '',
|
|
77
|
-
description,
|
|
78
|
-
markdown,
|
|
79
|
-
overwrite,
|
|
80
|
-
newFileLocation,
|
|
81
|
-
fileName
|
|
82
|
-
);
|
|
76
|
+
createPage(title ?? '', description, markdown, overwrite, newFileLocation, fileName);
|
|
83
77
|
|
|
84
78
|
// Removes first slash if we are in a folder, Mintlify doesn't need it
|
|
85
79
|
return folders ? path.join(folders, fileName).substring(1) : fileName;
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { scrapeFileGettingFileNameFromUrl } from
|
|
1
|
+
import { scrapeFileGettingFileNameFromUrl } from './scrapeFileGettingFileNameFromUrl.js';
|
|
2
2
|
|
|
3
3
|
export async function scrapeGettingFileNameFromUrl(
|
|
4
4
|
navEntry: MintNavigationEntry,
|
|
@@ -21,10 +21,11 @@ export async function scrapeGettingFileNameFromUrl(
|
|
|
21
21
|
version: string | undefined,
|
|
22
22
|
baseToRemove?: string
|
|
23
23
|
): Promise<MintNavigationEntry> {
|
|
24
|
-
if (typeof navEntry !==
|
|
25
|
-
const newPages:
|
|
24
|
+
if (typeof navEntry !== 'string') {
|
|
25
|
+
const newPages: MintNavigationEntry[] = [];
|
|
26
26
|
for (const nestedNavEntry of navEntry.pages) {
|
|
27
|
-
newPages.push(
|
|
27
|
+
newPages.push(
|
|
28
|
+
await scrapeGettingFileNameFromUrl(
|
|
28
29
|
nestedNavEntry,
|
|
29
30
|
cliDir,
|
|
30
31
|
origin,
|
|
@@ -33,7 +34,8 @@ export async function scrapeGettingFileNameFromUrl(
|
|
|
33
34
|
puppeteer,
|
|
34
35
|
version,
|
|
35
36
|
baseToRemove
|
|
36
|
-
|
|
37
|
+
)
|
|
38
|
+
);
|
|
37
39
|
}
|
|
38
40
|
navEntry.pages = newPages;
|
|
39
41
|
return navEntry;
|