@mintlify/cli 4.0.953 → 4.0.955

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/cli.js CHANGED
@@ -11,6 +11,7 @@ import { jsx as _jsx, jsxs as _jsxs } from "react/jsx-runtime";
11
11
  import { validate, getOpenApiDocumentFromUrl, isAllowedLocalSchemaUrl } from '@mintlify/common';
12
12
  import { getBrokenInternalLinks, renameFilesAndUpdateLinksInContent } from '@mintlify/link-rot';
13
13
  import { addLog, dev, validateBuild, ErrorLog, SpinnerLog, SuccessLog, Logs, clearLogs, BrokenLinksLog, WarningLog, } from '@mintlify/previewing';
14
+ import { checkUrl } from '@mintlify/scraping';
14
15
  import { render, Text } from 'ink';
15
16
  import path from 'path';
16
17
  import yargs from 'yargs';
@@ -20,6 +21,7 @@ import { checkPort, checkForMintJson, checkNodeVersion, upgradeConfig, checkForD
20
21
  import { init } from './init.js';
21
22
  import { mdxLinter } from './mdxLinter.js';
22
23
  import { migrateMdx } from './migrateMdx.js';
24
+ import { scrapeSite, scrapePage, scrapeOpenApi } from './scrape.js';
23
25
  import { update } from './update.js';
24
26
  export const cli = ({ packageName = 'mint' }) => {
25
27
  render(_jsx(Logs, {}));
@@ -290,6 +292,59 @@ export const cli = ({ packageName = 'mint' }) => {
290
292
  yield terminate(1);
291
293
  }
292
294
  }))
295
+ .command('scrape', 'Scrape documentation from external sites', (yargs) => yargs
296
+ .command(['$0 <url>', 'site <url>'], 'Scrape an entire documentation site', (yargs) => yargs
297
+ .positional('url', {
298
+ describe: 'The URL of the documentation site to scrape',
299
+ type: 'string',
300
+ demandOption: true,
301
+ })
302
+ .option('filter', {
303
+ describe: 'Only scrape URLs matching this path filter (e.g. /docs will match /docs and /docs/*)',
304
+ type: 'string',
305
+ alias: 'f',
306
+ })
307
+ .check(checkUrl), (_a) => __awaiter(void 0, [_a], void 0, function* ({ url, filter }) {
308
+ yield scrapeSite(url, filter);
309
+ }))
310
+ .command('page <url>', 'Scrape a single documentation page', (yargs) => yargs
311
+ .positional('url', {
312
+ describe: 'The URL of the documentation page to scrape',
313
+ type: 'string',
314
+ demandOption: true,
315
+ })
316
+ .check(checkUrl), (_a) => __awaiter(void 0, [_a], void 0, function* ({ url }) {
317
+ yield scrapePage(url);
318
+ }))
319
+ .command('openapi <openapiLocation>', 'Generate MDX files from an OpenAPI spec', (yargs) => yargs
320
+ .positional('openapiLocation', {
321
+ describe: 'The filename or URL location of the OpenAPI spec',
322
+ type: 'string',
323
+ demandOption: true,
324
+ })
325
+ .option('writeFiles', {
326
+ describe: 'Whether or not to write the frontmatter files',
327
+ default: true,
328
+ type: 'boolean',
329
+ alias: 'w',
330
+ })
331
+ .option('outDir', {
332
+ describe: 'The folder in which to write any created frontmatter files',
333
+ type: 'string',
334
+ alias: 'o',
335
+ })
336
+ .option('overwrite', {
337
+ describe: 'Whether or not to overwrite existing files',
338
+ default: false,
339
+ type: 'boolean',
340
+ }), (argv) => __awaiter(void 0, void 0, void 0, function* () {
341
+ yield scrapeOpenApi({
342
+ openapiLocation: argv.openapiLocation,
343
+ writeFiles: argv.writeFiles,
344
+ outDir: argv.outDir,
345
+ overwrite: argv.overwrite,
346
+ });
347
+ })))
293
348
  // Print the help menu when the user enters an invalid command.
294
349
  .strictCommands()
295
350
  .demandCommand(1, 'unknown command. see above for the list of supported commands.')
package/bin/scrape.js ADDED
@@ -0,0 +1,108 @@
1
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
2
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
3
+ return new (P || (P = Promise))(function (resolve, reject) {
4
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
5
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
6
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
7
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
8
+ });
9
+ };
10
+ import { jsx as _jsx } from "react/jsx-runtime";
11
+ import { addLog, ErrorLog, SuccessLog, SpinnerLog, InfoLog } from '@mintlify/previewing';
12
+ import { scrapePageGroup, scrapeAllSiteTabs, htmlToHast, detectFramework, framework, fetchPageHtml, write, getErrorMessage, generateOpenApiPages, FINAL_SUCCESS_MESSAGE, } from '@mintlify/scraping';
13
+ import { upgradeToDocsConfig } from '@mintlify/validation';
14
+ import { terminate } from './helpers.js';
15
+ export function scrapeSite(url, filter) {
16
+ return __awaiter(this, void 0, void 0, function* () {
17
+ try {
18
+ const urlObj = new URL(url);
19
+ addLog(_jsx(SpinnerLog, { message: `Fetching ${urlObj.toString()}...` }));
20
+ const html = yield fetchPageHtml(urlObj);
21
+ addLog(_jsx(SuccessLog, { message: `Successfully retrieved HTML from ${urlObj.toString()}` }));
22
+ addLog(_jsx(SpinnerLog, { message: "Scraping site..." }));
23
+ const result = yield scrapeAllSiteTabs(html, urlObj, { filter });
24
+ if (result.success) {
25
+ const mintConfig = result.data;
26
+ const docsConfig = upgradeToDocsConfig(mintConfig, {
27
+ shouldUpgradeTheme: true,
28
+ });
29
+ docsConfig.theme = 'aspen';
30
+ write('docs.json', JSON.stringify(docsConfig, undefined, 2));
31
+ addLog(_jsx(SuccessLog, { message: FINAL_SUCCESS_MESSAGE }));
32
+ }
33
+ else {
34
+ addLog(_jsx(ErrorLog, { message: result.message }));
35
+ yield terminate(1);
36
+ }
37
+ yield terminate(0);
38
+ }
39
+ catch (error) {
40
+ const errorMessage = getErrorMessage(error);
41
+ addLog(_jsx(ErrorLog, { message: errorMessage }));
42
+ yield terminate(1);
43
+ }
44
+ });
45
+ }
46
+ export function scrapePage(url) {
47
+ return __awaiter(this, void 0, void 0, function* () {
48
+ try {
49
+ const urlObj = new URL(url);
50
+ addLog(_jsx(SpinnerLog, { message: `Fetching ${urlObj.toString()}...` }));
51
+ const html = yield fetchPageHtml(urlObj);
52
+ addLog(_jsx(SuccessLog, { message: `Successfully retrieved HTML from ${urlObj.toString()}` }));
53
+ const hast = htmlToHast(html);
54
+ detectFramework(hast);
55
+ const needsBrowser = framework.vendor === 'gitbook';
56
+ addLog(_jsx(SpinnerLog, { message: "Scraping page..." }));
57
+ const results = yield scrapePageGroup([urlObj], needsBrowser);
58
+ const result = results[0] || {
59
+ success: false,
60
+ message: `An unknown error occurred when scraping ${url}`,
61
+ };
62
+ if (result.success) {
63
+ addLog(_jsx(SuccessLog, { message: `Successfully scraped ${url} ${result.data ? `into ${result.data[1]}` : ''}` }));
64
+ }
65
+ else {
66
+ addLog(_jsx(ErrorLog, { message: result.message }));
67
+ yield terminate(1);
68
+ }
69
+ yield terminate(0);
70
+ }
71
+ catch (error) {
72
+ const errorMessage = getErrorMessage(error);
73
+ addLog(_jsx(ErrorLog, { message: errorMessage }));
74
+ yield terminate(1);
75
+ }
76
+ });
77
+ }
78
+ export function scrapeOpenApi(_a) {
79
+ return __awaiter(this, arguments, void 0, function* ({ openapiLocation, writeFiles, outDir, overwrite, }) {
80
+ try {
81
+ addLog(_jsx(SpinnerLog, { message: `Processing OpenAPI spec from ${openapiLocation}...` }));
82
+ const { nav, isUrl } = yield generateOpenApiPages(openapiLocation, {
83
+ openApiFilePath: undefined,
84
+ version: undefined,
85
+ writeFiles,
86
+ outDir,
87
+ overwrite,
88
+ });
89
+ addLog(_jsx(SuccessLog, { message: "Successfully generated OpenAPI pages" }));
90
+ addLog(_jsx(InfoLog, { message: "Navigation object suggestion:" }));
91
+ addLog(_jsx(InfoLog, { message: JSON.stringify(nav, undefined, 2) }));
92
+ if (isUrl) {
93
+ addLog(_jsx(InfoLog, { message: "OpenAPI location suggestion:" }));
94
+ addLog(_jsx(InfoLog, { message: `openapi: ${openapiLocation}` }));
95
+ }
96
+ yield terminate(0);
97
+ }
98
+ catch (error) {
99
+ if (error instanceof Error) {
100
+ addLog(_jsx(ErrorLog, { message: error.message }));
101
+ }
102
+ else {
103
+ addLog(_jsx(ErrorLog, { message: String(error) }));
104
+ }
105
+ yield terminate(1);
106
+ }
107
+ });
108
+ }