@mintlify/previewing 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintignore +1 -0
- package/.eslintrc.json +3 -0
- package/CONTRIBUTING.md +17 -0
- package/README.md +78 -0
- package/bin/constants.js +32 -0
- package/bin/constants.js.map +1 -0
- package/bin/downloadImage.js +82 -0
- package/bin/downloadImage.js.map +1 -0
- package/bin/index.js +19 -0
- package/bin/index.js.map +1 -0
- package/bin/local-preview/helper-commands/installDepsCommand.js +12 -0
- package/bin/local-preview/helper-commands/installDepsCommand.js.map +1 -0
- package/bin/local-preview/index.js +154 -0
- package/bin/local-preview/index.js.map +1 -0
- package/bin/local-preview/listener/categorize.js +95 -0
- package/bin/local-preview/listener/categorize.js.map +1 -0
- package/bin/local-preview/listener/generate.js +74 -0
- package/bin/local-preview/listener/generate.js.map +1 -0
- package/bin/local-preview/listener/index.js +200 -0
- package/bin/local-preview/listener/index.js.map +1 -0
- package/bin/local-preview/listener/update.js +24 -0
- package/bin/local-preview/listener/update.js.map +1 -0
- package/bin/local-preview/listener/utils/createPage.js +163 -0
- package/bin/local-preview/listener/utils/createPage.js.map +1 -0
- package/bin/local-preview/listener/utils/getOpenApiContext.js +57 -0
- package/bin/local-preview/listener/utils/getOpenApiContext.js.map +1 -0
- package/bin/local-preview/listener/utils/mintConfigFile.js +22 -0
- package/bin/local-preview/listener/utils/mintConfigFile.js.map +1 -0
- package/bin/local-preview/listener/utils/toTitleCase.js +36 -0
- package/bin/local-preview/listener/utils/toTitleCase.js.map +1 -0
- package/bin/local-preview/listener/utils/types.js +2 -0
- package/bin/local-preview/listener/utils/types.js.map +1 -0
- package/bin/local-preview/listener/utils.js +68 -0
- package/bin/local-preview/listener/utils.js.map +1 -0
- package/bin/util.js +123 -0
- package/bin/util.js.map +1 -0
- package/package.json +77 -0
- package/scraper.md +121 -0
- package/src/constants.ts +40 -0
- package/src/downloadImage.ts +102 -0
- package/src/index.ts +35 -0
- package/src/local-preview/helper-commands/installDepsCommand.ts +13 -0
- package/src/local-preview/index.ts +196 -0
- package/src/local-preview/listener/categorize.ts +107 -0
- package/src/local-preview/listener/generate.ts +121 -0
- package/src/local-preview/listener/index.ts +228 -0
- package/src/local-preview/listener/update.ts +27 -0
- package/src/local-preview/listener/utils/createPage.ts +211 -0
- package/src/local-preview/listener/utils/getOpenApiContext.ts +77 -0
- package/src/local-preview/listener/utils/mintConfigFile.ts +28 -0
- package/src/local-preview/listener/utils/toTitleCase.ts +40 -0
- package/src/local-preview/listener/utils/types.ts +14 -0
- package/src/local-preview/listener/utils.ts +87 -0
- package/src/types.d.ts +35 -0
- package/src/util.ts +154 -0
- package/tsconfig.json +19 -0
package/bin/util.js
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
import { mkdirSync, writeFileSync } from "fs";
|
|
2
|
+
import Ora from "ora";
|
|
3
|
+
import path from "path";
|
|
4
|
+
import shell from "shelljs";
|
|
5
|
+
export const MintConfig = (name, color, ctaName, ctaUrl, filename) => {
|
|
6
|
+
return {
|
|
7
|
+
name,
|
|
8
|
+
logo: "",
|
|
9
|
+
favicon: "",
|
|
10
|
+
colors: {
|
|
11
|
+
primary: color,
|
|
12
|
+
},
|
|
13
|
+
topbarLinks: [],
|
|
14
|
+
topbarCtaButton: {
|
|
15
|
+
name: ctaName,
|
|
16
|
+
url: ctaUrl,
|
|
17
|
+
},
|
|
18
|
+
anchors: [],
|
|
19
|
+
navigation: [
|
|
20
|
+
{
|
|
21
|
+
group: "Home",
|
|
22
|
+
pages: [filename],
|
|
23
|
+
},
|
|
24
|
+
],
|
|
25
|
+
// footerSocials: {}, // support object type for footer tyoes
|
|
26
|
+
};
|
|
27
|
+
};
|
|
28
|
+
export const Page = (title, description, markdown) => {
|
|
29
|
+
// If we are an empty String we want to add two quotes,
|
|
30
|
+
// if we added as we went we would detect the first quote
|
|
31
|
+
// as the closing quote.
|
|
32
|
+
const startsWithQuote = title.startsWith('"');
|
|
33
|
+
const endsWithQuote = title.startsWith('"');
|
|
34
|
+
if (!startsWithQuote) {
|
|
35
|
+
title = '"' + title;
|
|
36
|
+
}
|
|
37
|
+
if (!endsWithQuote) {
|
|
38
|
+
title = title + '"';
|
|
39
|
+
}
|
|
40
|
+
const optionalDescription = description
|
|
41
|
+
? `\ndescription: "${description}"`
|
|
42
|
+
: "";
|
|
43
|
+
return `---\ntitle: ${title}${optionalDescription}\n---\n\n${markdown}`;
|
|
44
|
+
};
|
|
45
|
+
export function getOrigin(url) {
|
|
46
|
+
// eg. https://google.com -> https://google.com
|
|
47
|
+
// https://google.com/page -> https://google.com
|
|
48
|
+
return new URL(url).origin;
|
|
49
|
+
}
|
|
50
|
+
export function objToReadableString(objs) {
|
|
51
|
+
// Two spaces as indentation
|
|
52
|
+
return objs.map((obj) => JSON.stringify(obj, null, 2)).join(",\n");
|
|
53
|
+
}
|
|
54
|
+
export const toFilename = (title) => {
|
|
55
|
+
// Gets rid of special characters at the start and end
|
|
56
|
+
// of the name by converting to spaces then using trim.
|
|
57
|
+
return title
|
|
58
|
+
.replace(/[^a-z0-9]/gi, " ")
|
|
59
|
+
.trim()
|
|
60
|
+
.replace(/ /g, "-")
|
|
61
|
+
.toLowerCase();
|
|
62
|
+
};
|
|
63
|
+
export const addMdx = (fileName) => {
|
|
64
|
+
if (fileName.endsWith(".mdx")) {
|
|
65
|
+
return fileName;
|
|
66
|
+
}
|
|
67
|
+
return fileName + ".mdx";
|
|
68
|
+
};
|
|
69
|
+
export const createPage = (title, description, markdown, overwrite = false, rootDir = "", fileName) => {
|
|
70
|
+
const writePath = path.join(rootDir, addMdx(fileName || toFilename(title)));
|
|
71
|
+
// Create the folders needed if they're missing
|
|
72
|
+
mkdirSync(rootDir, { recursive: true });
|
|
73
|
+
// Write the page to memory
|
|
74
|
+
if (overwrite) {
|
|
75
|
+
writeFileSync(writePath, Page(title, description, markdown));
|
|
76
|
+
console.log("✏️ - " + writePath);
|
|
77
|
+
}
|
|
78
|
+
else {
|
|
79
|
+
try {
|
|
80
|
+
writeFileSync(writePath, Page(title, description, markdown), {
|
|
81
|
+
flag: "wx",
|
|
82
|
+
});
|
|
83
|
+
console.log("✏️ - " + writePath);
|
|
84
|
+
}
|
|
85
|
+
catch (e) {
|
|
86
|
+
// We do a try-catch instead of an if-statement to avoid a race condition
|
|
87
|
+
// of the file being created after we started writing.
|
|
88
|
+
if (e.code === "EEXIST") {
|
|
89
|
+
console.log(`❌ Skipping existing file ${writePath}`);
|
|
90
|
+
}
|
|
91
|
+
else {
|
|
92
|
+
console.error(e);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
};
|
|
97
|
+
export const buildLogger = (startText = "") => {
|
|
98
|
+
const logger = Ora().start(startText);
|
|
99
|
+
return logger;
|
|
100
|
+
};
|
|
101
|
+
export const getFileExtension = (filename) => {
|
|
102
|
+
const ext = filename.substring(filename.lastIndexOf(".") + 1, filename.length);
|
|
103
|
+
if (filename === ext)
|
|
104
|
+
return undefined;
|
|
105
|
+
return ext;
|
|
106
|
+
};
|
|
107
|
+
export const fileBelongsInPagesFolder = (filename) => {
|
|
108
|
+
const extension = getFileExtension(filename);
|
|
109
|
+
return (extension &&
|
|
110
|
+
(extension === "mdx" || extension === "md" || extension === "tsx"));
|
|
111
|
+
};
|
|
112
|
+
export const ensureYarn = (logger) => {
|
|
113
|
+
const yarnInstalled = shell.which("yarn");
|
|
114
|
+
if (!yarnInstalled) {
|
|
115
|
+
logger.fail(`yarn must be installed, run
|
|
116
|
+
|
|
117
|
+
npm install --global yarn
|
|
118
|
+
|
|
119
|
+
`);
|
|
120
|
+
process.exit(1);
|
|
121
|
+
}
|
|
122
|
+
};
|
|
123
|
+
//# sourceMappingURL=util.js.map
|
package/bin/util.js.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"util.js","sourceRoot":"","sources":["../src/util.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,IAAI,CAAC;AAC9C,OAAO,GAAuB,MAAM,KAAK,CAAC;AAC1C,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,KAAK,MAAM,SAAS,CAAC;AAE5B,MAAM,CAAC,MAAM,UAAU,GAAG,CACxB,IAAY,EACZ,KAAa,EACb,OAAe,EACf,MAAc,EACd,QAAgB,EAChB,EAAE;IACF,OAAO;QACL,IAAI;QACJ,IAAI,EAAE,EAAE;QACR,OAAO,EAAE,EAAE;QACX,MAAM,EAAE;YACN,OAAO,EAAE,KAAK;SACf;QACD,WAAW,EAAE,EAAE;QACf,eAAe,EAAE;YACf,IAAI,EAAE,OAAO;YACb,GAAG,EAAE,MAAM;SACZ;QACD,OAAO,EAAE,EAAE;QACX,UAAU,EAAE;YACV;gBACE,KAAK,EAAE,MAAM;gBACb,KAAK,EAAE,CAAC,QAAQ,CAAC;aAClB;SACF;QACD,6DAA6D;KAC9D,CAAC;AACJ,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,IAAI,GAAG,CAClB,KAAa,EACb,WAAoB,EACpB,QAAiB,EACjB,EAAE;IACF,uDAAuD;IACvD,yDAAyD;IACzD,wBAAwB;IACxB,MAAM,eAAe,GAAG,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;IAC9C,MAAM,aAAa,GAAG,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;IAC5C,IAAI,CAAC,eAAe,EAAE;QACpB,KAAK,GAAG,GAAG,GAAG,KAAK,CAAC;KACrB;IACD,IAAI,CAAC,aAAa,EAAE;QAClB,KAAK,GAAG,KAAK,GAAG,GAAG,CAAC;KACrB;IAED,MAAM,mBAAmB,GAAG,WAAW;QACrC,CAAC,CAAC,mBAAmB,WAAW,GAAG;QACnC,CAAC,CAAC,EAAE,CAAC;IACP,OAAO,eAAe,KAAK,GAAG,mBAAmB,YAAY,QAAQ,EAAE,CAAC;AAC1E,CAAC,CAAC;AAEF,MAAM,UAAU,SAAS,CAAC,GAAW;IACnC,+CAA+C;IAC/C,gDAAgD;IAChD,OAAO,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC;AAC7B,CAAC;AAED,MAAM,UAAU,mBAAmB,CAAC,IAA2B;IAC7D,4BAA4B;IAC5B,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;AACrE,CAAC;AAED,MAAM,CAAC,MAAM,UAAU,GAAG,CAAC,KAAa,EAAE,EAAE;IAC1C,sDAAsD;IACtD,uDAAuD;IACvD,OAAO,KAAK;SACT,OAAO,CAAC,aAAa,EAAE,GAAG,CAAC;SAC3B,IAAI,EAAE;SACN,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC;SAClB,WAAW,EAAE,CAAC;AACnB,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,MAAM,GAAG,CAAC,QAAgB,EAAE,EAAE;IACzC,IAAI,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE;QAC7B,OAAO,QAAQ,CAAC;KACjB;IACD,OAAO,QAAQ,GAAG,MAAM,CAAC;AAC3B,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,UAAU,GAAG,CACxB,KAAa,EACb,WAAoB,EACpB,QAAiB,EACjB,SAAS,GAAG,KAAK,EACjB,OAAO,GAAG,EAAE,EACZ,QAAiB,EACjB,EAAE;IACF,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,MAAM,CAAC,QAAQ,IAAI,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAE5E,+CAA+C;IAC/C,SAAS,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAExC,2BAA2B;IAC3B,IAAI,SAAS,EAAE;QACb,aAAa,CAAC,SAAS,EAAE,IAAI,CAAC,KAAK,EAAE,WAAW,EAAE,QAAQ,CAAC,CAAC,CAAC;QAC7D,OAAO,CAAC,GAAG,CAAC,OAAO,GAAG,SAAS,CAAC,CAAC;KAClC;SAAM;QACL,IAAI;YACF,aAAa,CAAC,SAAS,EAAE,IAAI,CAAC,KAAK,EAAE,WAAW,EAAE,QAAQ,CAAC,EAAE;gBAC3D,IAAI,EAAE,IAAI;aACX,CAAC,CAAC;YACH,OAAO,CAAC,GAAG,CAAC,OAAO,GAAG,SAAS,CAAC,CAAC;SAClC;QAAC,OAAO,CAAC,EAAE;YACV,yEAAyE;YACzE,sDAAsD;YACtD,IAAI,CAAC,CAAC,IAAI,KAAK,QAAQ,EAAE;gBACvB,OAAO,CAAC,GAAG,CAAC,4BAA4B,SAAS,EAAE,CAAC,CAAC;aACtD;iBAAM;gBACL,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;aAClB;SACF;KACF;AACH,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,WAAW,GAAG,CAAC,SAAS,GAAG,EAAE,EAAW,EAAE;IACrD,MAAM,MAAM,GAAG,GAAG,EAAE,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;IACtC,OAAO,MAAM,CAAC;AAChB,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,gBAAgB,GAAG,CAAC,QAAgB,EAAE,EAAE;IACnD,MAAM,GAAG,GAAG,QAAQ,CAAC,SAAS,CAC5B,QAAQ,CAAC,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,EAC7B,QAAQ,CAAC,MAAM,CAChB,CAAC;IACF,IAAI,QAAQ,KAAK,GAAG;QAAE,OAAO,SAAS,CAAC;IACvC,OAAO,GAAG,CAAC;AACb,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,wBAAwB,GAAG,CAAC,QAAgB,EAAE,EAAE;IAC3D,MAAM,SAAS,GAAG,gBAAgB,CAAC,QAAQ,CAAC,CAAC;IAC7C,OAAO,CACL,SAAS;QACT,CAAC,SAAS,KAAK,KAAK,IAAI,SAAS,KAAK,IAAI,IAAI,SAAS,KAAK,KAAK,CAAC,CACnE,CAAC;AACJ,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,UAAU,GAAG,CAAC,MAAe,EAAE,EAAE;IAC5C,MAAM,aAAa,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;IAC1C,IAAI,CAAC,aAAa,EAAE;QAClB,MAAM,CAAC,IAAI,CAAC;;;;KAIX,CAAC,CAAC;QACH,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;KACjB;AACH,CAAC,CAAC"}
|
package/package.json
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@mintlify/previewing",
|
|
3
|
+
"version": "3.0.0",
|
|
4
|
+
"description": "Preview Mintlify docs locally",
|
|
5
|
+
"engines": {
|
|
6
|
+
"node": ">=18.0.0"
|
|
7
|
+
},
|
|
8
|
+
"license": "Elastic-2.0",
|
|
9
|
+
"scripts": {
|
|
10
|
+
"prepare": "npm run build",
|
|
11
|
+
"build": "tsc",
|
|
12
|
+
"watch": "tsc --watch",
|
|
13
|
+
"lint": "eslint . --cache"
|
|
14
|
+
},
|
|
15
|
+
"author": "Mintlify, Inc.",
|
|
16
|
+
"repository": {
|
|
17
|
+
"type": "git",
|
|
18
|
+
"url": "https://github.com/mintlify/mint",
|
|
19
|
+
"directory": "packages/mintlify-previewing"
|
|
20
|
+
},
|
|
21
|
+
"publishConfig": {
|
|
22
|
+
"access": "public",
|
|
23
|
+
"registry": "https://registry.npmjs.org/"
|
|
24
|
+
},
|
|
25
|
+
"exports": "./bin/index.js",
|
|
26
|
+
"bin": {
|
|
27
|
+
"mintlify-preview": "bin/index.js"
|
|
28
|
+
},
|
|
29
|
+
"type": "module",
|
|
30
|
+
"dependencies": {
|
|
31
|
+
"@apidevtools/swagger-parser": "^10.1.0",
|
|
32
|
+
"@mintlify/validation": "^0.1.9",
|
|
33
|
+
"@octokit/rest": "^19.0.5",
|
|
34
|
+
"axios": "^1.2.2",
|
|
35
|
+
"chalk": "^5.1.0",
|
|
36
|
+
"cheerio": "^0.22.0",
|
|
37
|
+
"chokidar": "^3.5.3",
|
|
38
|
+
"favicons": "^7.0.1",
|
|
39
|
+
"fs-extra": "^11.1.0",
|
|
40
|
+
"gray-matter": "^4.0.3",
|
|
41
|
+
"inquirer": "^9.1.0",
|
|
42
|
+
"is-absolute-url": "^4.0.1",
|
|
43
|
+
"is-internet-available": "^3.1.0",
|
|
44
|
+
"minimist-lite": "^2.2.1",
|
|
45
|
+
"node-html-markdown": "^1.3.0",
|
|
46
|
+
"open": "^8.4.0",
|
|
47
|
+
"openapi-types": "^12.0.2",
|
|
48
|
+
"ora": "^6.1.2",
|
|
49
|
+
"puppeteer": "^19.4.0",
|
|
50
|
+
"remark": "^14.0.2",
|
|
51
|
+
"remark-frontmatter": "^4.0.1",
|
|
52
|
+
"remark-gfm": "^3.0.1",
|
|
53
|
+
"remark-mdx": "^2.2.1",
|
|
54
|
+
"shelljs": "^0.8.5",
|
|
55
|
+
"unist-util-visit": "^4.1.1",
|
|
56
|
+
"yargs": "^17.6.0"
|
|
57
|
+
},
|
|
58
|
+
"devDependencies": {
|
|
59
|
+
"@mintlify/eslint-config": "1.0.3",
|
|
60
|
+
"@mintlify/eslint-config-typescript": "1.0.7",
|
|
61
|
+
"@mintlify/ts-config": "1.0.7",
|
|
62
|
+
"@tsconfig/recommended": "1.x",
|
|
63
|
+
"@types/cheerio": "^0.22.31",
|
|
64
|
+
"@types/fs-extra": "^9.0.13",
|
|
65
|
+
"@types/inquirer": "^9.0.1",
|
|
66
|
+
"@types/node": "^18.7.13",
|
|
67
|
+
"@types/shelljs": "^0.8.11",
|
|
68
|
+
"@types/yargs": "^17.0.13",
|
|
69
|
+
"@typescript-eslint/eslint-plugin": "5.x",
|
|
70
|
+
"@typescript-eslint/parser": "5.x",
|
|
71
|
+
"eslint": "8.x",
|
|
72
|
+
"eslint-config-prettier": "8.x",
|
|
73
|
+
"eslint-plugin-unused-imports": "2.x",
|
|
74
|
+
"prettier": "2.x",
|
|
75
|
+
"typescript": "^4.8.2"
|
|
76
|
+
}
|
|
77
|
+
}
|
package/scraper.md
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
# CLI Scraping
|
|
2
|
+
|
|
3
|
+
The CLI has many commands. This doc focuses on how we coded scraping websites.
|
|
4
|
+
|
|
5
|
+
## User Interface
|
|
6
|
+
|
|
7
|
+
There are two main commands:
|
|
8
|
+
|
|
9
|
+
`mintlify scrape-page [url]`
|
|
10
|
+
|
|
11
|
+
and
|
|
12
|
+
|
|
13
|
+
`mintlify scrape-section [url]`
|
|
14
|
+
|
|
15
|
+
Scraping a page downloads a single page’s content. Scraping a section goes through the navigation and scrapes each page. The code for downloading a page’s content is shared between the two commands.
|
|
16
|
+
|
|
17
|
+
Important files: `scraping/scrapePageCommands.ts`, `scraping/scrapeSectionAutomatically.ts`
|
|
18
|
+
|
|
19
|
+
We have `scrape-gitbook-page` and similar commands for debugging. Ignore them, they just call internal functions directly. You should not need to use them unless you are debugging issues with Detecting Frameworks.
|
|
20
|
+
|
|
21
|
+
## Overwriting
|
|
22
|
+
|
|
23
|
+
The user has to add a `--overwrite` flag if they want to overwrite their current files.
|
|
24
|
+
|
|
25
|
+
## Sections vs Websites
|
|
26
|
+
|
|
27
|
+
We call the command `scrape-section` instead of `scrape-website` because we cannot scrape pages not in the navigation of the URL first passed in. For example, ReadMe has API Reference and other sections accessible through a separate top-navigation which we do not parse. We only scrape the navigation on the left: [https://docs.readme.com/main/docs](https://docs.readme.com/main/docs)
|
|
28
|
+
|
|
29
|
+
## Detecting Frameworks
|
|
30
|
+
|
|
31
|
+
The commands look in the page HTML to detect what framework scraper to use. For example, all Docusaurus sites have a metatag with the word Docusaurus in it. Some times, the metatag even has the Docusaurus version.
|
|
32
|
+
|
|
33
|
+
Each framework’s scrapers live in `scraping/site-scrapers/`
|
|
34
|
+
|
|
35
|
+
We currently support:
|
|
36
|
+
|
|
37
|
+
- Docusaurus
|
|
38
|
+
- GitBook
|
|
39
|
+
- ReadMe
|
|
40
|
+
- Intercom
|
|
41
|
+
|
|
42
|
+
## Terminal Output
|
|
43
|
+
|
|
44
|
+
We print a line in the terminal for every file we write. `util.ts` has a createPage function that takes care of writing the file and logging.
|
|
45
|
+
|
|
46
|
+
We use a pencil emoji when we successfully write a file. Images get a picture emoji. Likewise, we print a X emoji when we find a file that already exists and the user has not enabled overwriting files. We use emojis so you can tell what the command is doing without reading each file path.
|
|
47
|
+
|
|
48
|
+
We also print the file paths when scraping sections so the user can easily copy paste them into mint.json. Note that pages the user already added in Mintlify are not included in the printed example. We do not generate mint.json completely, we are just giving a small example to help users starting from scratch.
|
|
49
|
+
|
|
50
|
+
```jsx
|
|
51
|
+
Add the following to your navigation in mint.json:
|
|
52
|
+
|
|
53
|
+
{
|
|
54
|
+
"group": "Guides",
|
|
55
|
+
"pages": ["page-we-scraped"]
|
|
56
|
+
}
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
# Navigation Scraping
|
|
60
|
+
|
|
61
|
+
Most sites use JavaScript to open navigation menus which do not automatically include the menu buttons in the HTML. We use Puppeteer to click every nested menu so the site adds the menu buttons to the HTML. For example the original site’s HTML:
|
|
62
|
+
|
|
63
|
+
```jsx
|
|
64
|
+
<div>
|
|
65
|
+
<a id="my-nested-menu"></a>
|
|
66
|
+
</div>
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
can turn into this after opening the nested menu:
|
|
70
|
+
|
|
71
|
+
```jsx
|
|
72
|
+
<div>
|
|
73
|
+
<a id="my-nested-menu" aria-expanded=true></a>
|
|
74
|
+
<div>
|
|
75
|
+
<a href="/page"></a>
|
|
76
|
+
<a href="/other-page"></a>
|
|
77
|
+
</div>
|
|
78
|
+
</div>
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Ultimately, all section scrapers need to find an array of links to visit then call the scrape page function in a loop.
|
|
82
|
+
|
|
83
|
+
We use axios instead of Puppeteer if a site doesn’t hide links. Puppeteer is slow.
|
|
84
|
+
|
|
85
|
+
# Image File Locations
|
|
86
|
+
|
|
87
|
+
Images go in an `images/` folder because that’s what most users want. Scraping per section uses the same root-level images folder. Scraping per page downloads them to the current location. Thus, scraping a single page from a folder means the user always has to move the images themselves. That’s a trade-off we are comfortable with — trying to detect an existing images folder gets too complicated too fast.
|
|
88
|
+
|
|
89
|
+
# Cheerio
|
|
90
|
+
|
|
91
|
+
Cheerio is a library to scrape/handle the HTML after we have it in a string. Most of the work is using inspect-element to view a website and figure out where the content we want is, then writing the corresponding Cheerio code.
|
|
92
|
+
|
|
93
|
+
# HTML to MDX
|
|
94
|
+
|
|
95
|
+
We use an open-source library to convert HTML to Markdown: https://github.com/crosstype/node-html-markdown
|
|
96
|
+
|
|
97
|
+
The `util.ts` createPage function assembles the MDX metadata, we just need to return an object of the form `{ title, description, content }` from each page scraper.
|
|
98
|
+
|
|
99
|
+
## Parsing Issues
|
|
100
|
+
|
|
101
|
+
Parsing struggles when documentation websites are using non-standard HTML. For example, code blocks are supposed to use. `<pre><code></code></pre>` but GitBook just uses divs.
|
|
102
|
+
|
|
103
|
+
We can write custom translators for the library that determine how we parse certain objects.
|
|
104
|
+
|
|
105
|
+
In some cases, we will want custom translators even if parsing succeeds. For example, ReadMe callouts are using quote syntax
|
|
106
|
+
|
|
107
|
+
```jsx
|
|
108
|
+
> 💡
|
|
109
|
+
> Callout text
|
|
110
|
+
>
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
When we want to convert them to:
|
|
114
|
+
|
|
115
|
+
```jsx
|
|
116
|
+
<Tip>Callout text</Tip>
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## Regex
|
|
120
|
+
|
|
121
|
+
You can use regex to make small changes where translators are overkill or there’s no obvious component to modify. For example, here’s the end of `scrapeDocusaurusPage.ts`:
|
package/src/constants.ts
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import path from "path";
|
|
2
|
+
import * as url from "url";
|
|
3
|
+
import os from "os";
|
|
4
|
+
|
|
5
|
+
// Change this to bump to a newer version of mint's client
|
|
6
|
+
export const TARGET_MINT_VERSION = "v0.0.9";
|
|
7
|
+
|
|
8
|
+
// package installation location
|
|
9
|
+
export const INSTALL_PATH = url.fileURLToPath(new URL(".", import.meta.url));
|
|
10
|
+
|
|
11
|
+
export const HOME_DIR = os.homedir();
|
|
12
|
+
|
|
13
|
+
export const DOT_MINTLIFY = path.join(HOME_DIR, ".mintlify");
|
|
14
|
+
|
|
15
|
+
export const VERSION_PATH = path.join(DOT_MINTLIFY, "mint", "mint-version.txt");
|
|
16
|
+
|
|
17
|
+
export const CLIENT_PATH = path.join(DOT_MINTLIFY, "mint", "client");
|
|
18
|
+
|
|
19
|
+
export const MINT_PATH = path.join(DOT_MINTLIFY, "mint");
|
|
20
|
+
|
|
21
|
+
// command execution location
|
|
22
|
+
export const CMD_EXEC_PATH = process.cwd();
|
|
23
|
+
|
|
24
|
+
export const SUPPORTED_MEDIA_EXTENSIONS = [
|
|
25
|
+
"jpeg",
|
|
26
|
+
"jpg",
|
|
27
|
+
"jfif",
|
|
28
|
+
"pjpeg",
|
|
29
|
+
"pjp",
|
|
30
|
+
"png",
|
|
31
|
+
"svg",
|
|
32
|
+
"svgz",
|
|
33
|
+
"ico",
|
|
34
|
+
"webp",
|
|
35
|
+
"gif",
|
|
36
|
+
"apng",
|
|
37
|
+
"avif",
|
|
38
|
+
"bmp",
|
|
39
|
+
"mp4",
|
|
40
|
+
];
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import { existsSync, mkdirSync, createWriteStream } from "fs";
|
|
2
|
+
import path from "path";
|
|
3
|
+
import axios from "axios";
|
|
4
|
+
import { getFileExtension } from "./util.js";
|
|
5
|
+
import { SUPPORTED_MEDIA_EXTENSIONS } from "./constants.js";
|
|
6
|
+
|
|
7
|
+
async function writeImageToFile(
|
|
8
|
+
imageSrc: string,
|
|
9
|
+
writePath: string,
|
|
10
|
+
overwrite: boolean
|
|
11
|
+
) {
|
|
12
|
+
// Avoid unnecessary downloads
|
|
13
|
+
if (existsSync(writePath) && !overwrite) {
|
|
14
|
+
return Promise.reject({
|
|
15
|
+
code: "EEXIST",
|
|
16
|
+
});
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
// Create the folders needed if they're missing
|
|
20
|
+
mkdirSync(path.dirname(writePath), { recursive: true });
|
|
21
|
+
|
|
22
|
+
const writer = createWriteStream(writePath);
|
|
23
|
+
|
|
24
|
+
try {
|
|
25
|
+
const response = await axios.get(imageSrc, {
|
|
26
|
+
responseType: "stream",
|
|
27
|
+
});
|
|
28
|
+
// wx prevents overwriting an image with the exact same name
|
|
29
|
+
// being created in the time we were downloading
|
|
30
|
+
response.data.pipe(writer, {
|
|
31
|
+
flag: "wx",
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
return new Promise((resolve, reject) => {
|
|
35
|
+
writer.on("finish", resolve);
|
|
36
|
+
writer.on("error", reject);
|
|
37
|
+
});
|
|
38
|
+
} catch (e) {
|
|
39
|
+
return Promise.reject({
|
|
40
|
+
code: "ENOTFOUND",
|
|
41
|
+
});
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export function isValidImageSrc(src: string) {
|
|
46
|
+
if (!src) {
|
|
47
|
+
return false;
|
|
48
|
+
}
|
|
49
|
+
// We do not support downloading base64 in-line images.
|
|
50
|
+
if (src.startsWith("data:")) {
|
|
51
|
+
return false;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
const imageHref = removeMetadataFromImageSrc(src);
|
|
55
|
+
const ext = getFileExtension(imageHref);
|
|
56
|
+
|
|
57
|
+
if (ext && !SUPPORTED_MEDIA_EXTENSIONS.includes(ext)) {
|
|
58
|
+
console.error("🚨 We do not support the file extension: " + ext);
|
|
59
|
+
return false;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
return true;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
export function removeMetadataFromImageSrc(src: string) {
|
|
66
|
+
// Part of the URL standard
|
|
67
|
+
const metadataSymbols = ["?", "#"];
|
|
68
|
+
|
|
69
|
+
metadataSymbols.forEach((dividerSymbol) => {
|
|
70
|
+
// Some frameworks add metadata after the file extension, we need to remove that.
|
|
71
|
+
src = src.split(dividerSymbol)[0];
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
return src;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
export function cleanImageSrc(src: string, origin: string) {
|
|
78
|
+
// Add origin if the image tags are using relative sources
|
|
79
|
+
return src.startsWith("http") ? src : new URL(src, origin).href;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
export default async function downloadImage(
|
|
83
|
+
imageSrc: string,
|
|
84
|
+
writePath: string,
|
|
85
|
+
overwrite = false
|
|
86
|
+
) {
|
|
87
|
+
await writeImageToFile(imageSrc, writePath, overwrite)
|
|
88
|
+
.then(() => {
|
|
89
|
+
console.log("🖼️ - " + writePath);
|
|
90
|
+
})
|
|
91
|
+
.catch((e) => {
|
|
92
|
+
if (e.code === "EEXIST") {
|
|
93
|
+
console.log(`❌ Skipping existing image ${writePath}`);
|
|
94
|
+
} else if (e.code === "ENOTFOUND") {
|
|
95
|
+
console.error(
|
|
96
|
+
`🚨 Cannot download the image, address not found ${imageSrc}`
|
|
97
|
+
);
|
|
98
|
+
} else {
|
|
99
|
+
console.error(e);
|
|
100
|
+
}
|
|
101
|
+
});
|
|
102
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/* eslint-disable @typescript-eslint/no-empty-function */
|
|
3
|
+
|
|
4
|
+
import yargs from "yargs";
|
|
5
|
+
import { hideBin } from "yargs/helpers.js";
|
|
6
|
+
import dev from "./local-preview/index.js";
|
|
7
|
+
import installDepsCommand from "./local-preview/helper-commands/installDepsCommand.js";
|
|
8
|
+
|
|
9
|
+
yargs(hideBin(process.argv))
|
|
10
|
+
.command(
|
|
11
|
+
"dev",
|
|
12
|
+
"Runs Mintlify locally (Must run in directory with mint.json)",
|
|
13
|
+
() => {},
|
|
14
|
+
async (argv) => {
|
|
15
|
+
await dev(argv);
|
|
16
|
+
}
|
|
17
|
+
)
|
|
18
|
+
.command(
|
|
19
|
+
"install",
|
|
20
|
+
"Install dependencies for local Mintlify",
|
|
21
|
+
() => {},
|
|
22
|
+
installDepsCommand
|
|
23
|
+
)
|
|
24
|
+
// Print the help menu when the user enters an invalid command.
|
|
25
|
+
.strictCommands()
|
|
26
|
+
.demandCommand(
|
|
27
|
+
1,
|
|
28
|
+
"Unknown command. See above for the list of supported commands."
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
// Alias option flags --help = -h, --version = -v
|
|
32
|
+
.alias("h", "help")
|
|
33
|
+
.alias("v", "version")
|
|
34
|
+
|
|
35
|
+
.parse();
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import shell from "shelljs";
|
|
2
|
+
import { CLIENT_PATH } from "../../constants.js";
|
|
3
|
+
import { buildLogger, ensureYarn } from "../../util.js";
|
|
4
|
+
|
|
5
|
+
const installDeps = async () => {
|
|
6
|
+
const logger = buildLogger("");
|
|
7
|
+
ensureYarn(logger);
|
|
8
|
+
shell.cd(CLIENT_PATH);
|
|
9
|
+
shell.exec("yarn");
|
|
10
|
+
logger.succeed("Dependencies installed.");
|
|
11
|
+
};
|
|
12
|
+
|
|
13
|
+
export default installDeps;
|