@mintlify/cli 4.0.953 → 4.0.954
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +55 -0
- package/bin/scrape.js +108 -0
- package/bin/tsconfig.build.tsbuildinfo +1 -1
- package/package.json +6 -5
- package/src/cli.tsx +76 -0
- package/src/scrape.tsx +122 -0
package/bin/cli.js
CHANGED
|
@@ -11,6 +11,7 @@ import { jsx as _jsx, jsxs as _jsxs } from "react/jsx-runtime";
|
|
|
11
11
|
import { validate, getOpenApiDocumentFromUrl, isAllowedLocalSchemaUrl } from '@mintlify/common';
|
|
12
12
|
import { getBrokenInternalLinks, renameFilesAndUpdateLinksInContent } from '@mintlify/link-rot';
|
|
13
13
|
import { addLog, dev, validateBuild, ErrorLog, SpinnerLog, SuccessLog, Logs, clearLogs, BrokenLinksLog, WarningLog, } from '@mintlify/previewing';
|
|
14
|
+
import { checkUrl } from '@mintlify/scraping';
|
|
14
15
|
import { render, Text } from 'ink';
|
|
15
16
|
import path from 'path';
|
|
16
17
|
import yargs from 'yargs';
|
|
@@ -20,6 +21,7 @@ import { checkPort, checkForMintJson, checkNodeVersion, upgradeConfig, checkForD
|
|
|
20
21
|
import { init } from './init.js';
|
|
21
22
|
import { mdxLinter } from './mdxLinter.js';
|
|
22
23
|
import { migrateMdx } from './migrateMdx.js';
|
|
24
|
+
import { scrapeSite, scrapePage, scrapeOpenApi } from './scrape.js';
|
|
23
25
|
import { update } from './update.js';
|
|
24
26
|
export const cli = ({ packageName = 'mint' }) => {
|
|
25
27
|
render(_jsx(Logs, {}));
|
|
@@ -290,6 +292,59 @@ export const cli = ({ packageName = 'mint' }) => {
|
|
|
290
292
|
yield terminate(1);
|
|
291
293
|
}
|
|
292
294
|
}))
|
|
295
|
+
.command('scrape', 'Scrape documentation from external sites', (yargs) => yargs
|
|
296
|
+
.command(['$0 <url>', 'site <url>'], 'Scrape an entire documentation site', (yargs) => yargs
|
|
297
|
+
.positional('url', {
|
|
298
|
+
describe: 'The URL of the documentation site to scrape',
|
|
299
|
+
type: 'string',
|
|
300
|
+
demandOption: true,
|
|
301
|
+
})
|
|
302
|
+
.option('filter', {
|
|
303
|
+
describe: 'Only scrape URLs matching this path filter (e.g. /docs will match /docs and /docs/*)',
|
|
304
|
+
type: 'string',
|
|
305
|
+
alias: 'f',
|
|
306
|
+
})
|
|
307
|
+
.check(checkUrl), (_a) => __awaiter(void 0, [_a], void 0, function* ({ url, filter }) {
|
|
308
|
+
yield scrapeSite(url, filter);
|
|
309
|
+
}))
|
|
310
|
+
.command('page <url>', 'Scrape a single documentation page', (yargs) => yargs
|
|
311
|
+
.positional('url', {
|
|
312
|
+
describe: 'The URL of the documentation page to scrape',
|
|
313
|
+
type: 'string',
|
|
314
|
+
demandOption: true,
|
|
315
|
+
})
|
|
316
|
+
.check(checkUrl), (_a) => __awaiter(void 0, [_a], void 0, function* ({ url }) {
|
|
317
|
+
yield scrapePage(url);
|
|
318
|
+
}))
|
|
319
|
+
.command('openapi <openapiLocation>', 'Generate MDX files from an OpenAPI spec', (yargs) => yargs
|
|
320
|
+
.positional('openapiLocation', {
|
|
321
|
+
describe: 'The filename or URL location of the OpenAPI spec',
|
|
322
|
+
type: 'string',
|
|
323
|
+
demandOption: true,
|
|
324
|
+
})
|
|
325
|
+
.option('writeFiles', {
|
|
326
|
+
describe: 'Whether or not to write the frontmatter files',
|
|
327
|
+
default: true,
|
|
328
|
+
type: 'boolean',
|
|
329
|
+
alias: 'w',
|
|
330
|
+
})
|
|
331
|
+
.option('outDir', {
|
|
332
|
+
describe: 'The folder in which to write any created frontmatter files',
|
|
333
|
+
type: 'string',
|
|
334
|
+
alias: 'o',
|
|
335
|
+
})
|
|
336
|
+
.option('overwrite', {
|
|
337
|
+
describe: 'Whether or not to overwrite existing files',
|
|
338
|
+
default: false,
|
|
339
|
+
type: 'boolean',
|
|
340
|
+
}), (argv) => __awaiter(void 0, void 0, void 0, function* () {
|
|
341
|
+
yield scrapeOpenApi({
|
|
342
|
+
openapiLocation: argv.openapiLocation,
|
|
343
|
+
writeFiles: argv.writeFiles,
|
|
344
|
+
outDir: argv.outDir,
|
|
345
|
+
overwrite: argv.overwrite,
|
|
346
|
+
});
|
|
347
|
+
})))
|
|
293
348
|
// Print the help menu when the user enters an invalid command.
|
|
294
349
|
.strictCommands()
|
|
295
350
|
.demandCommand(1, 'unknown command. see above for the list of supported commands.')
|
package/bin/scrape.js
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
2
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
3
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
4
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
5
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
6
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
7
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
8
|
+
});
|
|
9
|
+
};
|
|
10
|
+
import { jsx as _jsx } from "react/jsx-runtime";
|
|
11
|
+
import { addLog, ErrorLog, SuccessLog, SpinnerLog, InfoLog } from '@mintlify/previewing';
|
|
12
|
+
import { scrapePageGroup, scrapeAllSiteTabs, htmlToHast, detectFramework, framework, fetchPageHtml, write, getErrorMessage, generateOpenApiPages, FINAL_SUCCESS_MESSAGE, } from '@mintlify/scraping';
|
|
13
|
+
import { upgradeToDocsConfig } from '@mintlify/validation';
|
|
14
|
+
import { terminate } from './helpers.js';
|
|
15
|
+
export function scrapeSite(url, filter) {
|
|
16
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
17
|
+
try {
|
|
18
|
+
const urlObj = new URL(url);
|
|
19
|
+
addLog(_jsx(SpinnerLog, { message: `Fetching ${urlObj.toString()}...` }));
|
|
20
|
+
const html = yield fetchPageHtml(urlObj);
|
|
21
|
+
addLog(_jsx(SuccessLog, { message: `Successfully retrieved HTML from ${urlObj.toString()}` }));
|
|
22
|
+
addLog(_jsx(SpinnerLog, { message: "Scraping site..." }));
|
|
23
|
+
const result = yield scrapeAllSiteTabs(html, urlObj, { filter });
|
|
24
|
+
if (result.success) {
|
|
25
|
+
const mintConfig = result.data;
|
|
26
|
+
const docsConfig = upgradeToDocsConfig(mintConfig, {
|
|
27
|
+
shouldUpgradeTheme: true,
|
|
28
|
+
});
|
|
29
|
+
docsConfig.theme = 'aspen';
|
|
30
|
+
write('docs.json', JSON.stringify(docsConfig, undefined, 2));
|
|
31
|
+
addLog(_jsx(SuccessLog, { message: FINAL_SUCCESS_MESSAGE }));
|
|
32
|
+
}
|
|
33
|
+
else {
|
|
34
|
+
addLog(_jsx(ErrorLog, { message: result.message }));
|
|
35
|
+
yield terminate(1);
|
|
36
|
+
}
|
|
37
|
+
yield terminate(0);
|
|
38
|
+
}
|
|
39
|
+
catch (error) {
|
|
40
|
+
const errorMessage = getErrorMessage(error);
|
|
41
|
+
addLog(_jsx(ErrorLog, { message: errorMessage }));
|
|
42
|
+
yield terminate(1);
|
|
43
|
+
}
|
|
44
|
+
});
|
|
45
|
+
}
|
|
46
|
+
export function scrapePage(url) {
|
|
47
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
48
|
+
try {
|
|
49
|
+
const urlObj = new URL(url);
|
|
50
|
+
addLog(_jsx(SpinnerLog, { message: `Fetching ${urlObj.toString()}...` }));
|
|
51
|
+
const html = yield fetchPageHtml(urlObj);
|
|
52
|
+
addLog(_jsx(SuccessLog, { message: `Successfully retrieved HTML from ${urlObj.toString()}` }));
|
|
53
|
+
const hast = htmlToHast(html);
|
|
54
|
+
detectFramework(hast);
|
|
55
|
+
const needsBrowser = framework.vendor === 'gitbook';
|
|
56
|
+
addLog(_jsx(SpinnerLog, { message: "Scraping page..." }));
|
|
57
|
+
const results = yield scrapePageGroup([urlObj], needsBrowser);
|
|
58
|
+
const result = results[0] || {
|
|
59
|
+
success: false,
|
|
60
|
+
message: `An unknown error occurred when scraping ${url}`,
|
|
61
|
+
};
|
|
62
|
+
if (result.success) {
|
|
63
|
+
addLog(_jsx(SuccessLog, { message: `Successfully scraped ${url} ${result.data ? `into ${result.data[1]}` : ''}` }));
|
|
64
|
+
}
|
|
65
|
+
else {
|
|
66
|
+
addLog(_jsx(ErrorLog, { message: result.message }));
|
|
67
|
+
yield terminate(1);
|
|
68
|
+
}
|
|
69
|
+
yield terminate(0);
|
|
70
|
+
}
|
|
71
|
+
catch (error) {
|
|
72
|
+
const errorMessage = getErrorMessage(error);
|
|
73
|
+
addLog(_jsx(ErrorLog, { message: errorMessage }));
|
|
74
|
+
yield terminate(1);
|
|
75
|
+
}
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
export function scrapeOpenApi(_a) {
|
|
79
|
+
return __awaiter(this, arguments, void 0, function* ({ openapiLocation, writeFiles, outDir, overwrite, }) {
|
|
80
|
+
try {
|
|
81
|
+
addLog(_jsx(SpinnerLog, { message: `Processing OpenAPI spec from ${openapiLocation}...` }));
|
|
82
|
+
const { nav, isUrl } = yield generateOpenApiPages(openapiLocation, {
|
|
83
|
+
openApiFilePath: undefined,
|
|
84
|
+
version: undefined,
|
|
85
|
+
writeFiles,
|
|
86
|
+
outDir,
|
|
87
|
+
overwrite,
|
|
88
|
+
});
|
|
89
|
+
addLog(_jsx(SuccessLog, { message: "Successfully generated OpenAPI pages" }));
|
|
90
|
+
addLog(_jsx(InfoLog, { message: "Navigation object suggestion:" }));
|
|
91
|
+
addLog(_jsx(InfoLog, { message: JSON.stringify(nav, undefined, 2) }));
|
|
92
|
+
if (isUrl) {
|
|
93
|
+
addLog(_jsx(InfoLog, { message: "OpenAPI location suggestion:" }));
|
|
94
|
+
addLog(_jsx(InfoLog, { message: `openapi: ${openapiLocation}` }));
|
|
95
|
+
}
|
|
96
|
+
yield terminate(0);
|
|
97
|
+
}
|
|
98
|
+
catch (error) {
|
|
99
|
+
if (error instanceof Error) {
|
|
100
|
+
addLog(_jsx(ErrorLog, { message: error.message }));
|
|
101
|
+
}
|
|
102
|
+
else {
|
|
103
|
+
addLog(_jsx(ErrorLog, { message: String(error) }));
|
|
104
|
+
}
|
|
105
|
+
yield terminate(1);
|
|
106
|
+
}
|
|
107
|
+
});
|
|
108
|
+
}
|