npm - @mintlify/cli - Versions diffs - 4.0.953 → 4.0.954 - Mend

@mintlify/cli 4.0.953 → 4.0.954

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/bin/cli.js +55 -0
package/bin/scrape.js +108 -0
package/bin/tsconfig.build.tsbuildinfo +1 -1
package/package.json +6 -5
package/src/cli.tsx +76 -0
package/src/scrape.tsx +122 -0

package/bin/cli.js CHANGED Viewed

@@ -11,6 +11,7 @@ import { jsx as _jsx, jsxs as _jsxs } from "react/jsx-runtime";
 import { validate, getOpenApiDocumentFromUrl, isAllowedLocalSchemaUrl } from '@mintlify/common';
 import { getBrokenInternalLinks, renameFilesAndUpdateLinksInContent } from '@mintlify/link-rot';
 import { addLog, dev, validateBuild, ErrorLog, SpinnerLog, SuccessLog, Logs, clearLogs, BrokenLinksLog, WarningLog, } from '@mintlify/previewing';
+import { checkUrl } from '@mintlify/scraping';
 import { render, Text } from 'ink';
 import path from 'path';
 import yargs from 'yargs';
@@ -20,6 +21,7 @@ import { checkPort, checkForMintJson, checkNodeVersion, upgradeConfig, checkForD
 import { init } from './init.js';
 import { mdxLinter } from './mdxLinter.js';
 import { migrateMdx } from './migrateMdx.js';
+import { scrapeSite, scrapePage, scrapeOpenApi } from './scrape.js';
 import { update } from './update.js';
 export const cli = ({ packageName = 'mint' }) => {
     render(_jsx(Logs, {}));
@@ -290,6 +292,59 @@ export const cli = ({ packageName = 'mint' }) => {
             yield terminate(1);
         }
     }))
+        .command('scrape', 'Scrape documentation from external sites', (yargs) => yargs
+        .command(['$0 <url>', 'site <url>'], 'Scrape an entire documentation site', (yargs) => yargs
+        .positional('url', {
+        describe: 'The URL of the documentation site to scrape',
+        type: 'string',
+        demandOption: true,
+    })
+        .option('filter', {
+        describe: 'Only scrape URLs matching this path filter (e.g. /docs will match /docs and /docs/*)',
+        type: 'string',
+        alias: 'f',
+    })
+        .check(checkUrl), (_a) => __awaiter(void 0, [_a], void 0, function* ({ url, filter }) {
+        yield scrapeSite(url, filter);
+    }))
+        .command('page <url>', 'Scrape a single documentation page', (yargs) => yargs
+        .positional('url', {
+        describe: 'The URL of the documentation page to scrape',
+        type: 'string',
+        demandOption: true,
+    })
+        .check(checkUrl), (_a) => __awaiter(void 0, [_a], void 0, function* ({ url }) {
+        yield scrapePage(url);
+    }))
+        .command('openapi <openapiLocation>', 'Generate MDX files from an OpenAPI spec', (yargs) => yargs
+        .positional('openapiLocation', {
+        describe: 'The filename or URL location of the OpenAPI spec',
+        type: 'string',
+        demandOption: true,
+    })
+        .option('writeFiles', {
+        describe: 'Whether or not to write the frontmatter files',
+        default: true,
+        type: 'boolean',
+        alias: 'w',
+    })
+        .option('outDir', {
+        describe: 'The folder in which to write any created frontmatter files',
+        type: 'string',
+        alias: 'o',
+    })
+        .option('overwrite', {
+        describe: 'Whether or not to overwrite existing files',
+        default: false,
+        type: 'boolean',
+    }), (argv) => __awaiter(void 0, void 0, void 0, function* () {
+        yield scrapeOpenApi({
+            openapiLocation: argv.openapiLocation,
+            writeFiles: argv.writeFiles,
+            outDir: argv.outDir,
+            overwrite: argv.overwrite,
+        });
+    })))
         // Print the help menu when the user enters an invalid command.
         .strictCommands()
         .demandCommand(1, 'unknown command. see above for the list of supported commands.')

package/bin/scrape.js ADDED Viewed

@@ -0,0 +1,108 @@
+var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
+    function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
+    return new (P || (P = Promise))(function (resolve, reject) {
+        function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
+        function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
+        function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
+        step((generator = generator.apply(thisArg, _arguments || [])).next());
+    });
+};
+import { jsx as _jsx } from "react/jsx-runtime";
+import { addLog, ErrorLog, SuccessLog, SpinnerLog, InfoLog } from '@mintlify/previewing';
+import { scrapePageGroup, scrapeAllSiteTabs, htmlToHast, detectFramework, framework, fetchPageHtml, write, getErrorMessage, generateOpenApiPages, FINAL_SUCCESS_MESSAGE, } from '@mintlify/scraping';
+import { upgradeToDocsConfig } from '@mintlify/validation';
+import { terminate } from './helpers.js';
+export function scrapeSite(url, filter) {
+    return __awaiter(this, void 0, void 0, function* () {
+        try {
+            const urlObj = new URL(url);
+            addLog(_jsx(SpinnerLog, { message: `Fetching ${urlObj.toString()}...` }));
+            const html = yield fetchPageHtml(urlObj);
+            addLog(_jsx(SuccessLog, { message: `Successfully retrieved HTML from ${urlObj.toString()}` }));
+            addLog(_jsx(SpinnerLog, { message: "Scraping site..." }));
+            const result = yield scrapeAllSiteTabs(html, urlObj, { filter });
+            if (result.success) {
+                const mintConfig = result.data;
+                const docsConfig = upgradeToDocsConfig(mintConfig, {
+                    shouldUpgradeTheme: true,
+                });
+                docsConfig.theme = 'aspen';
+                write('docs.json', JSON.stringify(docsConfig, undefined, 2));
+                addLog(_jsx(SuccessLog, { message: FINAL_SUCCESS_MESSAGE }));
+            }
+            else {
+                addLog(_jsx(ErrorLog, { message: result.message }));
+                yield terminate(1);
+            }
+            yield terminate(0);
+        }
+        catch (error) {
+            const errorMessage = getErrorMessage(error);
+            addLog(_jsx(ErrorLog, { message: errorMessage }));
+            yield terminate(1);
+        }
+    });
+}
+export function scrapePage(url) {
+    return __awaiter(this, void 0, void 0, function* () {
+        try {
+            const urlObj = new URL(url);
+            addLog(_jsx(SpinnerLog, { message: `Fetching ${urlObj.toString()}...` }));
+            const html = yield fetchPageHtml(urlObj);
+            addLog(_jsx(SuccessLog, { message: `Successfully retrieved HTML from ${urlObj.toString()}` }));
+            const hast = htmlToHast(html);
+            detectFramework(hast);
+            const needsBrowser = framework.vendor === 'gitbook';
+            addLog(_jsx(SpinnerLog, { message: "Scraping page..." }));
+            const results = yield scrapePageGroup([urlObj], needsBrowser);
+            const result = results[0] || {
+                success: false,
+                message: `An unknown error occurred when scraping ${url}`,
+            };
+            if (result.success) {
+                addLog(_jsx(SuccessLog, { message: `Successfully scraped ${url} ${result.data ? `into ${result.data[1]}` : ''}` }));
+            }
+            else {
+                addLog(_jsx(ErrorLog, { message: result.message }));
+                yield terminate(1);
+            }
+            yield terminate(0);
+        }
+        catch (error) {
+            const errorMessage = getErrorMessage(error);
+            addLog(_jsx(ErrorLog, { message: errorMessage }));
+            yield terminate(1);
+        }
+    });
+}
+export function scrapeOpenApi(_a) {
+    return __awaiter(this, arguments, void 0, function* ({ openapiLocation, writeFiles, outDir, overwrite, }) {
+        try {
+            addLog(_jsx(SpinnerLog, { message: `Processing OpenAPI spec from ${openapiLocation}...` }));
+            const { nav, isUrl } = yield generateOpenApiPages(openapiLocation, {
+                openApiFilePath: undefined,
+                version: undefined,
+                writeFiles,
+                outDir,
+                overwrite,
+            });
+            addLog(_jsx(SuccessLog, { message: "Successfully generated OpenAPI pages" }));
+            addLog(_jsx(InfoLog, { message: "Navigation object suggestion:" }));
+            addLog(_jsx(InfoLog, { message: JSON.stringify(nav, undefined, 2) }));
+            if (isUrl) {
+                addLog(_jsx(InfoLog, { message: "OpenAPI location suggestion:" }));
+                addLog(_jsx(InfoLog, { message: `openapi: ${openapiLocation}` }));
+            }
+            yield terminate(0);
+        }
+        catch (error) {
+            if (error instanceof Error) {
+                addLog(_jsx(ErrorLog, { message: error.message }));
+            }
+            else {
+                addLog(_jsx(ErrorLog, { message: String(error) }));
+            }
+            yield terminate(1);
+        }
+    });
+}