mintlify 1.0.6 → 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -3
- package/bin/browser.js +24 -0
- package/bin/browser.js.map +1 -0
- package/bin/constants.js +8 -0
- package/bin/constants.js.map +1 -0
- package/bin/dev/getOpenApiContext.js +46 -0
- package/bin/dev/getOpenApiContext.js.map +1 -0
- package/bin/dev/index.js +164 -0
- package/bin/dev/index.js.map +1 -0
- package/bin/dev/injectNav.js +97 -0
- package/bin/dev/injectNav.js.map +1 -0
- package/bin/dev/slugToTitle.js +8 -0
- package/bin/dev/slugToTitle.js.map +1 -0
- package/bin/downloadImage.js +27 -0
- package/bin/downloadImage.js.map +1 -0
- package/bin/index.js +49 -106
- package/bin/index.js.map +1 -1
- package/bin/init-command/index.js +51 -0
- package/bin/init-command/index.js.map +1 -0
- package/bin/init-command/templates.js +41 -0
- package/bin/init-command/templates.js.map +1 -0
- package/bin/local-preview/categorizeFiles.js +56 -0
- package/bin/local-preview/categorizeFiles.js.map +1 -0
- package/bin/local-preview/getOpenApiContext.js +46 -0
- package/bin/local-preview/getOpenApiContext.js.map +1 -0
- package/bin/local-preview/index.js +138 -0
- package/bin/local-preview/index.js.map +1 -0
- package/bin/local-preview/injectFavicons.js +72 -0
- package/bin/local-preview/injectFavicons.js.map +1 -0
- package/bin/local-preview/listener.js +112 -0
- package/bin/local-preview/listener.js.map +1 -0
- package/bin/local-preview/metadata.js +121 -0
- package/bin/local-preview/metadata.js.map +1 -0
- package/bin/local-preview/mintConfigFile.js +43 -0
- package/bin/local-preview/mintConfigFile.js.map +1 -0
- package/bin/local-preview/openApiCheck.js +16 -0
- package/bin/local-preview/openApiCheck.js.map +1 -0
- package/bin/local-preview/slugToTitle.js +8 -0
- package/bin/local-preview/slugToTitle.js.map +1 -0
- package/bin/mint/client/.babel-plugin-macrosrc.json +5 -0
- package/bin/mint/client/.babelrc +4 -0
- package/bin/mint/client/.editorconfig +12 -0
- package/bin/mint/client/.eslintrc.json +7 -0
- package/bin/mint/client/.prettierignore +4 -0
- package/bin/mint/client/.prettierrc +14 -0
- package/bin/mint/client/.vscode/launch.json +28 -0
- package/bin/mint/client/README.md +46 -0
- package/bin/mint/client/jest.config.ts +195 -0
- package/bin/mint/client/next-env.d.ts +4 -0
- package/bin/mint/client/next.config.js +152 -0
- package/bin/mint/client/package.json +140 -0
- package/bin/mint/client/postcss.config.cjs +9 -0
- package/bin/mint/client/prebuild/faviconConfig.js +35 -0
- package/bin/mint/client/prebuild/getOpenApiContext.js +53 -0
- package/bin/mint/client/prebuild/index.js +117 -0
- package/bin/mint/client/prebuild/injectNav.js +115 -0
- package/bin/mint/client/prebuild/slugToTitle.js +7 -0
- package/bin/mint/client/rehype/withApiComponents.js +60 -0
- package/bin/mint/client/rehype/withCodeBlocks.js +54 -0
- package/bin/mint/client/rehype/withLayouts.js +113 -0
- package/bin/mint/client/rehype/withLinkRoles.js +13 -0
- package/bin/mint/client/rehype/withRawComponents.js +13 -0
- package/bin/mint/client/rehype/withStaticProps.js +25 -0
- package/bin/mint/client/rehype/withSyntaxHighlighting.js +60 -0
- package/bin/mint/client/remark/utils.js +369 -0
- package/bin/mint/client/remark/withFrames.js +55 -0
- package/bin/mint/client/remark/withImportsInjected.js +36 -0
- package/bin/mint/client/remark/withNextLinks.js +37 -0
- package/bin/mint/client/remark/withTableOfContents.js +71 -0
- package/bin/mint/client/scripts/local-to-docs.js +72 -0
- package/bin/mint/client/scripts/local.js +177 -0
- package/bin/mint/client/sentry.client.config.js +15 -0
- package/bin/mint/client/sentry.properties +4 -0
- package/bin/mint/client/sentry.server.config.js +15 -0
- package/bin/mint/client/src/analytics/AbstractAnalyticsImplementation.ts +50 -0
- package/bin/mint/client/src/analytics/AnalyticsContext.ts +5 -0
- package/bin/mint/client/src/analytics/AnalyticsMediator.ts +101 -0
- package/bin/mint/client/src/analytics/FakeAnalyticsMediator.ts +9 -0
- package/bin/mint/client/src/analytics/GA4Script.tsx +33 -0
- package/bin/mint/client/src/analytics/implementations/amplitude.ts +26 -0
- package/bin/mint/client/src/analytics/implementations/fathom.ts +38 -0
- package/bin/mint/client/src/analytics/implementations/ga4.ts +33 -0
- package/bin/mint/client/src/analytics/implementations/hotjar.ts +53 -0
- package/bin/mint/client/src/analytics/implementations/mixpanel-browser.d.ts +1 -0
- package/bin/mint/client/src/analytics/implementations/mixpanel.ts +52 -0
- package/bin/mint/client/src/analytics/implementations/posthog.ts +37 -0
- package/bin/mint/client/src/components/Accordion/Accordion.tsx +43 -0
- package/bin/mint/client/src/components/Accordion/index.ts +4 -0
- package/bin/mint/client/src/components/ApiExample.tsx +9 -0
- package/bin/mint/client/src/components/Card.tsx +51 -0
- package/bin/mint/client/src/components/CodeGroup.tsx +132 -0
- package/bin/mint/client/src/components/Editor.tsx +12 -0
- package/bin/mint/client/src/components/Expandable.tsx +40 -0
- package/bin/mint/client/src/components/Heading.tsx +84 -0
- package/bin/mint/client/src/components/Param.tsx +56 -0
- package/bin/mint/client/src/components/Request.tsx +19 -0
- package/bin/mint/client/src/components/ResponseField.tsx +33 -0
- package/bin/mint/client/src/components/TabBar.tsx +61 -0
- package/bin/mint/client/src/config.ts +115 -0
- package/bin/mint/client/src/css/bar-of-progress.css +10 -0
- package/bin/mint/client/src/css/base.css +29 -0
- package/bin/mint/client/src/css/font-awesome.css +7 -0
- package/bin/mint/client/src/css/fonts.css +44 -0
- package/bin/mint/client/src/css/main.css +11 -0
- package/bin/mint/client/src/css/prism.css +270 -0
- package/bin/mint/client/src/css/utilities.css +43 -0
- package/bin/mint/client/src/enums/components.ts +8 -0
- package/bin/mint/client/src/fonts/FiraCode-VF.woff +0 -0
- package/bin/mint/client/src/fonts/FiraCode-VF.woff2 +0 -0
- package/bin/mint/client/src/fonts/IBMPlexMono-Regular.ttf +0 -0
- package/bin/mint/client/src/fonts/IBMPlexMono-SemiBold.ttf +0 -0
- package/bin/mint/client/src/fonts/Inter-italic-latin.var.woff2 +0 -0
- package/bin/mint/client/src/fonts/Inter-roman-latin.var.woff2 +0 -0
- package/bin/mint/client/src/fonts/Pally-Variable.ttf +0 -0
- package/bin/mint/client/src/fonts/SourceSansPro-Regular.otf +0 -0
- package/bin/mint/client/src/fonts/SourceSerifPro-Regular.ttf +0 -0
- package/bin/mint/client/src/fonts/Synonym-Variable.ttf +0 -0
- package/bin/mint/client/src/fonts/Ubuntu-Mono-bold.woff2 +0 -0
- package/bin/mint/client/src/fonts/generated/IBMPlexMono-Regular-subset.woff2 +0 -0
- package/bin/mint/client/src/fonts/generated/IBMPlexMono-Regular-subset.zopfli.woff +0 -0
- package/bin/mint/client/src/fonts/generated/IBMPlexMono-Regular.module.css +11 -0
- package/bin/mint/client/src/fonts/generated/IBMPlexMono-SemiBold-subset.woff2 +0 -0
- package/bin/mint/client/src/fonts/generated/IBMPlexMono-SemiBold-subset.zopfli.woff +0 -0
- package/bin/mint/client/src/fonts/generated/IBMPlexMono-SemiBold.module.css +11 -0
- package/bin/mint/client/src/fonts/generated/Pally-Variable-subset.woff2 +0 -0
- package/bin/mint/client/src/fonts/generated/Pally-Variable-subset.zopfli.woff +0 -0
- package/bin/mint/client/src/fonts/generated/Pally-Variable.module.css +11 -0
- package/bin/mint/client/src/fonts/generated/SourceSerifPro-Regular-subset.woff2 +0 -0
- package/bin/mint/client/src/fonts/generated/SourceSerifPro-Regular-subset.zopfli.woff +0 -0
- package/bin/mint/client/src/fonts/generated/SourceSerifPro-Regular.module.css +11 -0
- package/bin/mint/client/src/fonts/generated/Synonym-Variable-subset.woff2 +0 -0
- package/bin/mint/client/src/fonts/generated/Synonym-Variable-subset.zopfli.woff +0 -0
- package/bin/mint/client/src/fonts/generated/Synonym-Variable.module.css +11 -0
- package/bin/mint/client/src/fonts/generated/TenorSans-Regular-subset.woff2 +0 -0
- package/bin/mint/client/src/fonts/generated/TenorSans-Regular-subset.zopfli.woff +0 -0
- package/bin/mint/client/src/fonts/generated/TenorSans-Regular.module.css +11 -0
- package/bin/mint/client/src/hooks/useActionKey.ts +20 -0
- package/bin/mint/client/src/hooks/useIsomorphicLayoutEffect.ts +3 -0
- package/bin/mint/client/src/hooks/useMedia.ts +27 -0
- package/bin/mint/client/src/hooks/usePrevNext.ts +34 -0
- package/bin/mint/client/src/hooks/useTop.ts +15 -0
- package/bin/mint/client/src/icons/CopyToClipboard.tsx +33 -0
- package/bin/mint/client/src/index.d.ts +1 -0
- package/bin/mint/client/src/layouts/ApiSupplemental.tsx +173 -0
- package/bin/mint/client/src/layouts/ContentsLayout.tsx +256 -0
- package/bin/mint/client/src/layouts/DocumentationLayout.tsx +44 -0
- package/bin/mint/client/src/layouts/OpenApiContent.tsx +301 -0
- package/bin/mint/client/src/layouts/SidebarLayout.tsx +412 -0
- package/bin/mint/client/src/layouts/UserFeedback.tsx +73 -0
- package/bin/mint/client/src/layouts/getGroupsInDivision.ts +25 -0
- package/bin/mint/client/src/layouts/isPathInGroupPages.ts +10 -0
- package/bin/mint/client/src/metadata.ts +58 -0
- package/bin/mint/client/src/openapi.ts +3 -0
- package/bin/mint/client/src/pages/404.tsx +73 -0
- package/bin/mint/client/src/pages/_app.tsx +138 -0
- package/bin/mint/client/src/pages/_document.tsx +57 -0
- package/bin/mint/client/src/pages/api/issue.ts +10 -0
- package/bin/mint/client/src/pages/api/name.ts +8 -0
- package/bin/mint/client/src/pages/api/request.ts +31 -0
- package/bin/mint/client/src/pages/api/suggest.ts +10 -0
- package/bin/mint/client/src/pages/api/syntax-highlighted-json.ts +13 -0
- package/bin/mint/client/src/pages/api/utils.ts +6 -0
- package/bin/mint/client/src/pages/index.tsx +31 -0
- package/bin/mint/client/src/ui/Api.tsx +359 -0
- package/bin/mint/client/src/ui/Footer.tsx +124 -0
- package/bin/mint/client/src/ui/Header.tsx +370 -0
- package/bin/mint/client/src/ui/Logo.tsx +55 -0
- package/bin/mint/client/src/ui/PageHeader.tsx +51 -0
- package/bin/mint/client/src/ui/Search.tsx +386 -0
- package/bin/mint/client/src/ui/ThemeToggle.tsx +285 -0
- package/bin/mint/client/src/ui/Title.tsx +22 -0
- package/bin/mint/client/src/ui/TopLevelLink.tsx +122 -0
- package/bin/mint/client/src/utils/api.ts +252 -0
- package/bin/mint/client/src/utils/brands.ts +217 -0
- package/bin/mint/client/src/utils/castArray.ts +3 -0
- package/bin/mint/client/src/utils/childrenArray.ts +3 -0
- package/bin/mint/client/src/utils/fit.ts +27 -0
- package/bin/mint/client/src/utils/fontAwesome.ts +577 -0
- package/bin/mint/client/src/utils/getAnalyticsConfig.ts +14 -0
- package/bin/mint/client/src/utils/getLogoHref.ts +9 -0
- package/bin/mint/client/src/utils/getOpenApiContext.ts +26 -0
- package/bin/mint/client/src/utils/importAll.ts +6 -0
- package/bin/mint/client/src/utils/isObject.ts +3 -0
- package/bin/mint/client/src/utils/kebabToTitleCase.ts +3 -0
- package/bin/mint/client/src/utils/loadImage.ts +8 -0
- package/bin/mint/client/src/utils/slugToTitle.ts +7 -0
- package/bin/mint/client/src/utils/wait.ts +5 -0
- package/bin/mint/client/tailwind.config.cjs +323 -0
- package/bin/mint/client/test/test.test.ts +5 -0
- package/bin/mint/client/tsconfig.json +36 -0
- package/bin/mint/client/yarn.lock +9702 -0
- package/bin/navigation.js +4 -0
- package/bin/navigation.js.map +1 -0
- package/bin/pageTemplate.js +30 -0
- package/bin/pageTemplate.js.map +1 -0
- package/bin/scraping/combineNavWithEmptyGroupTitles.js +20 -0
- package/bin/scraping/combineNavWithEmptyGroupTitles.js.map +1 -0
- package/bin/scraping/detectFramework.js +25 -0
- package/bin/scraping/detectFramework.js.map +1 -0
- package/bin/scraping/downloadAllImages.js +57 -0
- package/bin/scraping/downloadAllImages.js.map +1 -0
- package/bin/scraping/getSitemapLinks.js +18 -0
- package/bin/scraping/getSitemapLinks.js.map +1 -0
- package/bin/scraping/replaceImagePaths.js +17 -0
- package/bin/scraping/replaceImagePaths.js.map +1 -0
- package/bin/scraping/scrapeFileGettingFileNameFromUrl.js +43 -0
- package/bin/scraping/scrapeFileGettingFileNameFromUrl.js.map +1 -0
- package/bin/scraping/scrapeGettingFileNameFromUrl.js +14 -0
- package/bin/scraping/scrapeGettingFileNameFromUrl.js.map +1 -0
- package/bin/scraping/scrapePage.js +9 -0
- package/bin/scraping/scrapePage.js.map +1 -0
- package/bin/scraping/scrapePageCommands.js +48 -0
- package/bin/scraping/scrapePageCommands.js.map +1 -0
- package/bin/scraping/scrapeSection.js +9 -0
- package/bin/scraping/scrapeSection.js.map +1 -0
- package/bin/scraping/scrapeSectionCommands.js +90 -0
- package/bin/scraping/scrapeSectionCommands.js.map +1 -0
- package/bin/scraping/site-scrapers/getLinksRecursively.js +33 -0
- package/bin/scraping/site-scrapers/getLinksRecursively.js.map +1 -0
- package/bin/scraping/site-scrapers/scrapeDocusaurusPage.js +43 -0
- package/bin/scraping/site-scrapers/scrapeDocusaurusPage.js.map +1 -0
- package/bin/scraping/site-scrapers/scrapeDocusaurusSection.js +52 -0
- package/bin/scraping/site-scrapers/scrapeDocusaurusSection.js.map +1 -0
- package/bin/{scrapeGitBookPage.js → scraping/site-scrapers/scrapeGitBookPage.js} +10 -5
- package/bin/scraping/site-scrapers/scrapeGitBookPage.js.map +1 -0
- package/bin/scraping/site-scrapers/scrapeGitBookSection.js +51 -0
- package/bin/scraping/site-scrapers/scrapeGitBookSection.js.map +1 -0
- package/bin/scraping/site-scrapers/scrapeReadMePage.js +35 -0
- package/bin/scraping/site-scrapers/scrapeReadMePage.js.map +1 -0
- package/bin/scraping/site-scrapers/scrapeReadMeSection.js +38 -0
- package/bin/scraping/site-scrapers/scrapeReadMeSection.js.map +1 -0
- package/bin/util.js +47 -8
- package/bin/util.js.map +1 -1
- package/bin/validation/isValidLink.js +11 -0
- package/bin/validation/isValidLink.js.map +1 -0
- package/bin/validation/stopIfInvalidLink.js +9 -0
- package/bin/validation/stopIfInvalidLink.js.map +1 -0
- package/package.json +21 -4
- package/src/browser.ts +24 -0
- package/src/constants.ts +10 -0
- package/src/downloadImage.ts +35 -0
- package/src/index.ts +111 -122
- package/src/init-command/index.ts +59 -0
- package/src/{templates.ts → init-command/templates.ts} +0 -0
- package/src/local-preview/categorizeFiles.ts +74 -0
- package/src/local-preview/getOpenApiContext.ts +61 -0
- package/src/local-preview/index.ts +164 -0
- package/src/local-preview/injectFavicons.ts +76 -0
- package/src/local-preview/listener.ts +116 -0
- package/src/local-preview/metadata.ts +154 -0
- package/src/local-preview/mintConfigFile.ts +48 -0
- package/src/local-preview/openApiCheck.ts +19 -0
- package/src/local-preview/slugToTitle.ts +7 -0
- package/src/navigation.ts +12 -0
- package/src/pageTemplate.ts +32 -0
- package/src/scraping/combineNavWithEmptyGroupTitles.ts +21 -0
- package/src/scraping/detectFramework.ts +31 -0
- package/src/scraping/downloadAllImages.ts +79 -0
- package/src/scraping/getSitemapLinks.ts +18 -0
- package/src/scraping/replaceImagePaths.ts +21 -0
- package/src/scraping/scrapeFileGettingFileNameFromUrl.ts +81 -0
- package/src/scraping/scrapeGettingFileNameFromUrl.ts +50 -0
- package/src/scraping/scrapePage.ts +24 -0
- package/src/scraping/scrapePageCommands.ts +52 -0
- package/src/scraping/scrapeSection.ts +16 -0
- package/src/scraping/scrapeSectionCommands.ts +110 -0
- package/src/scraping/site-scrapers/getLinksRecursively.ts +40 -0
- package/src/scraping/site-scrapers/scrapeDocusaurusPage.ts +67 -0
- package/src/scraping/site-scrapers/scrapeDocusaurusSection.ts +80 -0
- package/src/{scrapeGitBookPage.ts → scraping/site-scrapers/scrapeGitBookPage.ts} +25 -5
- package/src/scraping/site-scrapers/scrapeGitBookSection.ts +77 -0
- package/src/scraping/site-scrapers/scrapeReadMePage.ts +57 -0
- package/src/scraping/site-scrapers/scrapeReadMeSection.ts +60 -0
- package/src/util.ts +53 -7
- package/src/validation/isValidLink.ts +9 -0
- package/src/validation/stopIfInvalidLink.ts +9 -0
- package/tsconfig.json +1 -1
- package/bin/scrapeGitBook.js +0 -28
- package/bin/scrapeGitBook.js.map +0 -1
- package/bin/scrapeGitBookPage.js.map +0 -1
- package/bin/scrapeReadMe.js +0 -60
- package/bin/scrapeReadMe.js.map +0 -1
- package/bin/scrapeReadMePage.js +0 -28
- package/bin/scrapeReadMePage.js.map +0 -1
- package/src/scrapeReadMe.ts +0 -79
- package/src/scrapeReadMePage.ts +0 -37
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
import axios from "axios";
|
|
2
|
+
import { detectFramework, Frameworks } from "./detectFramework.js";
|
|
3
|
+
import { getHrefFromArgs, getOrigin } from "../util.js";
|
|
4
|
+
import { scrapeSection } from "./scrapeSection.js";
|
|
5
|
+
import { scrapeDocusaurusSection } from "./site-scrapers/scrapeDocusaurusSection.js";
|
|
6
|
+
import { scrapeGitBookSection } from "./site-scrapers/scrapeGitBookSection.js";
|
|
7
|
+
import { scrapeReadMeSection } from "./site-scrapers/scrapeReadMeSection.js";
|
|
8
|
+
import { startBrowser } from "../browser.js";
|
|
9
|
+
|
|
10
|
+
function validateFramework(framework: Frameworks | undefined) {
|
|
11
|
+
if (!framework) {
|
|
12
|
+
console.log(
|
|
13
|
+
"Could not detect the framework automatically. Please use one of:"
|
|
14
|
+
);
|
|
15
|
+
console.log("scrape-page-docusaurus");
|
|
16
|
+
console.log("scrape-page-gitbook");
|
|
17
|
+
console.log("scrape-page-readme");
|
|
18
|
+
return process.exit(1);
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export async function scrapeSectionAxiosWrapper(argv: any, scrapeFunc: any) {
|
|
23
|
+
const href = getHrefFromArgs(argv);
|
|
24
|
+
const res = await axios.default.get(href);
|
|
25
|
+
const html = res.data;
|
|
26
|
+
await scrapeSection(scrapeFunc, html, getOrigin(href), argv.overwrite);
|
|
27
|
+
process.exit(0);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export async function scrapeGitbookSectionCommand(argv: any) {
|
|
31
|
+
await scrapeSectionGitBookWrapper(argv, scrapeGitBookSection);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
async function scrapeSectionGitBookWrapper(argv: any, scrapeFunc: any) {
|
|
35
|
+
const href = getHrefFromArgs(argv);
|
|
36
|
+
|
|
37
|
+
const browser = await startBrowser();
|
|
38
|
+
const page = await browser.newPage();
|
|
39
|
+
await page.goto(href, {
|
|
40
|
+
waitUntil: "networkidle2",
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
let prevEncountered: string[] = [];
|
|
44
|
+
let encounteredHref = ["fake"];
|
|
45
|
+
|
|
46
|
+
// Loop until we've encountered every link
|
|
47
|
+
while (!encounteredHref.every((href) => prevEncountered.includes(href))) {
|
|
48
|
+
prevEncountered = encounteredHref;
|
|
49
|
+
encounteredHref = await page.evaluate(
|
|
50
|
+
(encounteredHref) => {
|
|
51
|
+
const icons: HTMLElement[] = Array.from(
|
|
52
|
+
document.querySelectorAll('path[d="M9 18l6-6-6-6"]')
|
|
53
|
+
);
|
|
54
|
+
|
|
55
|
+
const linksFound: string[] = [];
|
|
56
|
+
icons.forEach(async (icon: HTMLElement) => {
|
|
57
|
+
const toClick = icon?.parentElement?.parentElement;
|
|
58
|
+
const link = toClick?.parentElement?.parentElement;
|
|
59
|
+
|
|
60
|
+
// Skip icons not in the side navigation
|
|
61
|
+
if (!link?.hasAttribute("href")) {
|
|
62
|
+
return;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
const href = link.getAttribute("href");
|
|
66
|
+
|
|
67
|
+
// Should never occur but we keep it as a fail-safe
|
|
68
|
+
if (href?.startsWith("https://") || href?.startsWith("http://")) {
|
|
69
|
+
return;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// Click any links we haven't seen before
|
|
73
|
+
if (href && !encounteredHref.includes(href)) {
|
|
74
|
+
toClick?.click();
|
|
75
|
+
}
|
|
76
|
+
if (href) {
|
|
77
|
+
linksFound.push(href);
|
|
78
|
+
}
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
return linksFound;
|
|
82
|
+
},
|
|
83
|
+
encounteredHref // Need to pass array into the browser
|
|
84
|
+
);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
const html = await page.content();
|
|
88
|
+
browser.close();
|
|
89
|
+
await scrapeSection(scrapeFunc, html, getOrigin(href), argv.overwrite);
|
|
90
|
+
process.exit(0);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
export async function scrapeSectionAutomatically(argv: any) {
|
|
94
|
+
const href = getHrefFromArgs(argv);
|
|
95
|
+
const res = await axios.default.get(href);
|
|
96
|
+
const html = res.data;
|
|
97
|
+
const framework = detectFramework(html);
|
|
98
|
+
|
|
99
|
+
validateFramework(framework);
|
|
100
|
+
|
|
101
|
+
console.log("Detected framework: " + framework);
|
|
102
|
+
|
|
103
|
+
if (framework === Frameworks.DOCUSAURUS) {
|
|
104
|
+
await scrapeSectionAxiosWrapper(argv, scrapeDocusaurusSection);
|
|
105
|
+
} else if (framework === Frameworks.GITBOOK) {
|
|
106
|
+
await scrapeGitbookSectionCommand(argv);
|
|
107
|
+
} else if (framework === Frameworks.README) {
|
|
108
|
+
await scrapeSectionAxiosWrapper(argv, scrapeReadMeSection);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
// Used by GitBook and ReadMe section scrapers
|
|
2
|
+
export default function getLinksRecursively(linkSections: any, $: any) {
|
|
3
|
+
if (linkSections == null || linkSections.length === 0) {
|
|
4
|
+
return [];
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
return linkSections
|
|
8
|
+
.map((i, s) => {
|
|
9
|
+
const subsection = $(s);
|
|
10
|
+
const link = subsection.children().first();
|
|
11
|
+
|
|
12
|
+
const linkHref = link.attr("href");
|
|
13
|
+
|
|
14
|
+
// Skip missing links. For example, GitBook uses
|
|
15
|
+
// empty divs are used for styling a line beside the nav.
|
|
16
|
+
// Skip external links until Mintlify supports them
|
|
17
|
+
if (
|
|
18
|
+
!linkHref ||
|
|
19
|
+
linkHref.startsWith("https://") ||
|
|
20
|
+
linkHref.startsWith("http://")
|
|
21
|
+
) {
|
|
22
|
+
return undefined;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
const childLinks = subsection.children().eq(1).children();
|
|
26
|
+
|
|
27
|
+
if (childLinks.length > 0) {
|
|
28
|
+
// Put the section link in the list of pages.
|
|
29
|
+
// When we support the section itself being a link we should update this
|
|
30
|
+
return {
|
|
31
|
+
group: link.text(),
|
|
32
|
+
pages: [linkHref, ...getLinksRecursively(childLinks, $)],
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
return linkHref;
|
|
37
|
+
})
|
|
38
|
+
.toArray()
|
|
39
|
+
.filter(Boolean);
|
|
40
|
+
}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import cheerio from "cheerio";
|
|
2
|
+
import { NodeHtmlMarkdown } from "node-html-markdown";
|
|
3
|
+
import downloadAllImages from "../downloadAllImages.js";
|
|
4
|
+
import replaceImagePaths from "../replaceImagePaths.js";
|
|
5
|
+
|
|
6
|
+
export async function scrapeDocusaurusPage(
|
|
7
|
+
html: string,
|
|
8
|
+
origin: string,
|
|
9
|
+
cliDir: string,
|
|
10
|
+
imageBaseDir: string
|
|
11
|
+
) {
|
|
12
|
+
const $ = cheerio.load(html);
|
|
13
|
+
|
|
14
|
+
const content = $(".theme-doc-markdown").first();
|
|
15
|
+
|
|
16
|
+
// Index pages with no additional text don't have the markdown class
|
|
17
|
+
if (content.length === 0) {
|
|
18
|
+
return {};
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
const titleComponent = content.find("h1");
|
|
22
|
+
const title = titleComponent.text().trim();
|
|
23
|
+
|
|
24
|
+
// Do not include title in the content when we insert it in our metadata
|
|
25
|
+
titleComponent.remove();
|
|
26
|
+
|
|
27
|
+
const origToWritePath = await downloadAllImages(
|
|
28
|
+
$,
|
|
29
|
+
content,
|
|
30
|
+
origin,
|
|
31
|
+
imageBaseDir
|
|
32
|
+
);
|
|
33
|
+
|
|
34
|
+
const contentHtml = content.html();
|
|
35
|
+
|
|
36
|
+
const nhm = new NodeHtmlMarkdown();
|
|
37
|
+
let markdown = nhm.translate(contentHtml);
|
|
38
|
+
|
|
39
|
+
// Description only exists in meta tags. The code is commented out because its prone to incorrectly
|
|
40
|
+
// including a description if the first line of text had markdown annotations like `.
|
|
41
|
+
// The commented out alternative is to ignore description if it's the first line of text,
|
|
42
|
+
// this means it was not set in the metadata and Docusaurus defaulted to the text.
|
|
43
|
+
const description = null;
|
|
44
|
+
// let description = $('meta[property="og:description"]').attr("content");
|
|
45
|
+
// if (markdown.startsWith(description)) {
|
|
46
|
+
// description = null;
|
|
47
|
+
// }
|
|
48
|
+
|
|
49
|
+
// Remove Docusaurus links from headers
|
|
50
|
+
// When we parse their HTML the parser adds things like:
|
|
51
|
+
// [](#setup "Direct link to heading")
|
|
52
|
+
// to the end of each header.
|
|
53
|
+
markdown = markdown.replace(/\[\]\(#.+ ".+"\)\n/g, "\n");
|
|
54
|
+
|
|
55
|
+
// Remove unnecessary nonwidth blank space characters
|
|
56
|
+
markdown = markdown.replace(/\u200b/g, "");
|
|
57
|
+
|
|
58
|
+
// Reduce unnecessary blank lines
|
|
59
|
+
markdown = markdown.replace(/\n\n\n/g, "\n\n");
|
|
60
|
+
|
|
61
|
+
// Mintlify doesn't support bolded headers, remove the asterisks
|
|
62
|
+
markdown = markdown.replace(/(\n#+) \*\*(.*)\*\*\n/g, "$1 $2\n");
|
|
63
|
+
|
|
64
|
+
markdown = replaceImagePaths(origToWritePath, cliDir, markdown);
|
|
65
|
+
|
|
66
|
+
return { title, description, markdown };
|
|
67
|
+
}
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import cheerio from "cheerio";
|
|
2
|
+
import { scrapeGettingFileNameFromUrl } from "../scrapeGettingFileNameFromUrl.js";
|
|
3
|
+
import { scrapeDocusaurusPage } from "./scrapeDocusaurusPage.js";
|
|
4
|
+
|
|
5
|
+
export async function scrapeDocusaurusSection(
|
|
6
|
+
html: string,
|
|
7
|
+
origin: string,
|
|
8
|
+
cliDir: string,
|
|
9
|
+
overwrite: boolean
|
|
10
|
+
) {
|
|
11
|
+
const $ = cheerio.load(html);
|
|
12
|
+
|
|
13
|
+
// Get all the navigation sections
|
|
14
|
+
const navigationSections = $(".theme-doc-sidebar-menu").first().children();
|
|
15
|
+
|
|
16
|
+
// Get all links per group
|
|
17
|
+
const groupsConfig = navigationSections
|
|
18
|
+
.map((i, section) => {
|
|
19
|
+
const sectionComponent = $(section);
|
|
20
|
+
|
|
21
|
+
// Links without a group
|
|
22
|
+
if (sectionComponent.hasClass("theme-doc-sidebar-item-link")) {
|
|
23
|
+
const linkHref = sectionComponent.find("a[href]").first().attr("href");
|
|
24
|
+
return {
|
|
25
|
+
group: "",
|
|
26
|
+
pages: [linkHref],
|
|
27
|
+
};
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
const sectionTitle = sectionComponent
|
|
31
|
+
.find(".menu__list-item-collapsible")
|
|
32
|
+
.first()
|
|
33
|
+
.text();
|
|
34
|
+
|
|
35
|
+
// The category title can be a page too so we find from the
|
|
36
|
+
// section component instead of the more specific menu__list child
|
|
37
|
+
const linkPaths = sectionComponent
|
|
38
|
+
.find("a[href]")
|
|
39
|
+
.map((i, link) => {
|
|
40
|
+
return $(link).attr("href");
|
|
41
|
+
})
|
|
42
|
+
.filter((i, link) => link !== "#")
|
|
43
|
+
.toArray();
|
|
44
|
+
|
|
45
|
+
// Follows the same structure as mint.json
|
|
46
|
+
return {
|
|
47
|
+
group: sectionTitle,
|
|
48
|
+
pages: linkPaths,
|
|
49
|
+
};
|
|
50
|
+
})
|
|
51
|
+
.toArray();
|
|
52
|
+
|
|
53
|
+
// Scrape each link in the navigation.
|
|
54
|
+
const groupsConfigCleanPaths = await Promise.all(
|
|
55
|
+
groupsConfig.map(async (groupConfig) => {
|
|
56
|
+
groupConfig.pages = (
|
|
57
|
+
await Promise.all(
|
|
58
|
+
groupConfig.pages.map(async (pathname: string) =>
|
|
59
|
+
// Docusaurus requires a directory on all sections wheras we use root.
|
|
60
|
+
// /docs is their default directory so we remove it
|
|
61
|
+
scrapeGettingFileNameFromUrl(
|
|
62
|
+
pathname,
|
|
63
|
+
cliDir,
|
|
64
|
+
origin,
|
|
65
|
+
overwrite,
|
|
66
|
+
scrapeDocusaurusPage,
|
|
67
|
+
false,
|
|
68
|
+
"/docs"
|
|
69
|
+
)
|
|
70
|
+
)
|
|
71
|
+
)
|
|
72
|
+
)
|
|
73
|
+
// Remove skipped index pages (they return undefined from the above function)
|
|
74
|
+
.filter(Boolean);
|
|
75
|
+
return groupConfig;
|
|
76
|
+
})
|
|
77
|
+
);
|
|
78
|
+
|
|
79
|
+
return groupsConfigCleanPaths;
|
|
80
|
+
}
|
|
@@ -1,14 +1,19 @@
|
|
|
1
|
-
import axios from "axios";
|
|
2
1
|
import cheerio from "cheerio";
|
|
3
2
|
import { NodeHtmlMarkdown } from "node-html-markdown";
|
|
3
|
+
import downloadAllImages from "../downloadAllImages.js";
|
|
4
|
+
import replaceImagePaths from "../replaceImagePaths.js";
|
|
4
5
|
|
|
5
|
-
export async function scrapeGitBookPage(
|
|
6
|
-
|
|
7
|
-
|
|
6
|
+
export async function scrapeGitBookPage(
|
|
7
|
+
html: string,
|
|
8
|
+
origin: string,
|
|
9
|
+
cliDir: string,
|
|
10
|
+
imageBaseDir: string
|
|
11
|
+
) {
|
|
12
|
+
const $ = cheerio.load(html);
|
|
8
13
|
|
|
9
14
|
const titleComponent = $('[data-testid="page.title"]').first();
|
|
10
15
|
const titleAndDescription = titleComponent.parent().parent().parent().text();
|
|
11
|
-
|
|
16
|
+
|
|
12
17
|
const description = titleAndDescription
|
|
13
18
|
.replace(titleComponent.text(), "")
|
|
14
19
|
.trim();
|
|
@@ -17,6 +22,19 @@ export async function scrapeGitBookPage(url: string) {
|
|
|
17
22
|
const content = $('[data-testid="page.contentEditor"]').first();
|
|
18
23
|
const contentHtml = $.html(content);
|
|
19
24
|
|
|
25
|
+
const modifyFileName = (fileName) =>
|
|
26
|
+
// Remove GitBook metadata from the start
|
|
27
|
+
// The first four %2F split metadata fields. Remaining ones are part of the file name.
|
|
28
|
+
fileName.split("%2F").slice(4).join("%2F");
|
|
29
|
+
|
|
30
|
+
const origToWritePath = await downloadAllImages(
|
|
31
|
+
$,
|
|
32
|
+
content,
|
|
33
|
+
origin,
|
|
34
|
+
imageBaseDir,
|
|
35
|
+
modifyFileName
|
|
36
|
+
);
|
|
37
|
+
|
|
20
38
|
const nhm = new NodeHtmlMarkdown();
|
|
21
39
|
let markdown = nhm.translate(contentHtml);
|
|
22
40
|
|
|
@@ -32,5 +50,7 @@ export async function scrapeGitBookPage(url: string) {
|
|
|
32
50
|
// Mintlify doesn't support bolded headers, remove the asterisks
|
|
33
51
|
markdown = markdown.replace(/(\n#+) \*\*(.*)\*\*\n/g, "$1 $2\n");
|
|
34
52
|
|
|
53
|
+
markdown = replaceImagePaths(origToWritePath, cliDir, markdown);
|
|
54
|
+
|
|
35
55
|
return { title, description, markdown };
|
|
36
56
|
}
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import cheerio from "cheerio";
|
|
2
|
+
import { NavigationEntry } from "../../navigation.js";
|
|
3
|
+
import { scrapeGettingFileNameFromUrl } from "../scrapeGettingFileNameFromUrl.js";
|
|
4
|
+
import { scrapeGitBookPage } from "./scrapeGitBookPage.js";
|
|
5
|
+
import combineNavWithEmptyGroupTitles from "../combineNavWithEmptyGroupTitles.js";
|
|
6
|
+
import getLinksRecursively from "./getLinksRecursively.js";
|
|
7
|
+
|
|
8
|
+
export async function scrapeGitBookSection(
|
|
9
|
+
html: string,
|
|
10
|
+
origin: string,
|
|
11
|
+
cliDir: string,
|
|
12
|
+
overwrite: boolean
|
|
13
|
+
) {
|
|
14
|
+
const $ = cheerio.load(html);
|
|
15
|
+
|
|
16
|
+
// Get all the navigation sections
|
|
17
|
+
const navigationSections = $(
|
|
18
|
+
'div[data-testid="page.desktopTableOfContents"] > div > div:first-child'
|
|
19
|
+
)
|
|
20
|
+
.children()
|
|
21
|
+
.first()
|
|
22
|
+
.children()
|
|
23
|
+
.first()
|
|
24
|
+
.children();
|
|
25
|
+
|
|
26
|
+
// Get all links per group
|
|
27
|
+
const groupsConfig = navigationSections
|
|
28
|
+
.map((i, s) => {
|
|
29
|
+
const section = $(s);
|
|
30
|
+
const sectionTitle = $(section)
|
|
31
|
+
.find('div > div[dir="auto"]')
|
|
32
|
+
.first()
|
|
33
|
+
.text();
|
|
34
|
+
|
|
35
|
+
// Only present if the nested navigation is not in a group
|
|
36
|
+
const firstLink = section.children().eq(0);
|
|
37
|
+
const firstHref = firstLink.attr("href");
|
|
38
|
+
|
|
39
|
+
const linkSections = section.children().eq(1).children();
|
|
40
|
+
const pages = getLinksRecursively(linkSections, $);
|
|
41
|
+
|
|
42
|
+
return {
|
|
43
|
+
group: sectionTitle || alternateTitle(firstLink, pages),
|
|
44
|
+
pages: firstHref ? [firstHref, ...pages] : pages,
|
|
45
|
+
};
|
|
46
|
+
})
|
|
47
|
+
.toArray()
|
|
48
|
+
.filter(Boolean);
|
|
49
|
+
|
|
50
|
+
// Merge groups with empty titles together
|
|
51
|
+
const reducedGroupsConfig = combineNavWithEmptyGroupTitles(groupsConfig);
|
|
52
|
+
|
|
53
|
+
// Scrape each link in the navigation.
|
|
54
|
+
const groupsConfigCleanPaths = await Promise.all(
|
|
55
|
+
reducedGroupsConfig.map(async (navEntry: NavigationEntry) => {
|
|
56
|
+
return await scrapeGettingFileNameFromUrl(
|
|
57
|
+
navEntry,
|
|
58
|
+
cliDir,
|
|
59
|
+
origin,
|
|
60
|
+
overwrite,
|
|
61
|
+
scrapeGitBookPage,
|
|
62
|
+
true
|
|
63
|
+
);
|
|
64
|
+
})
|
|
65
|
+
);
|
|
66
|
+
|
|
67
|
+
return groupsConfigCleanPaths;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function alternateTitle(firstLink, pages) {
|
|
71
|
+
// Only assign titles to nested navigation menus outside a section.
|
|
72
|
+
// Others should not have a title so we can merge them into one section.
|
|
73
|
+
if (pages.length > 0) {
|
|
74
|
+
return firstLink?.text();
|
|
75
|
+
}
|
|
76
|
+
return "";
|
|
77
|
+
}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import cheerio from "cheerio";
|
|
2
|
+
import { NodeHtmlMarkdown } from "node-html-markdown";
|
|
3
|
+
import downloadAllImages from "../downloadAllImages.js";
|
|
4
|
+
import replaceImagePaths from "../replaceImagePaths.js";
|
|
5
|
+
|
|
6
|
+
export async function scrapeReadMePage(
|
|
7
|
+
html: string,
|
|
8
|
+
origin: string,
|
|
9
|
+
cliDir: string,
|
|
10
|
+
imageBaseDir: string
|
|
11
|
+
) {
|
|
12
|
+
const $ = cheerio.load(html);
|
|
13
|
+
|
|
14
|
+
const titleComponent = $("h1").first();
|
|
15
|
+
const title = titleComponent.text().trim();
|
|
16
|
+
let description = $(".markdown-body", titleComponent.parent()).text().trim();
|
|
17
|
+
if (!description) {
|
|
18
|
+
description = $(".rm-Article > header p").text().trim();
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
let content = $(".content-body .markdown-body").first();
|
|
22
|
+
if (content.length === 0) {
|
|
23
|
+
content = $(".rm-Article > .markdown-body");
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
// API Pages don't have a markdown body in the same position so there's no HTML
|
|
27
|
+
let contentHtml = content.html() || "";
|
|
28
|
+
|
|
29
|
+
const origToWritePath = await downloadAllImages(
|
|
30
|
+
$,
|
|
31
|
+
content,
|
|
32
|
+
origin,
|
|
33
|
+
imageBaseDir
|
|
34
|
+
);
|
|
35
|
+
|
|
36
|
+
const nhm = new NodeHtmlMarkdown();
|
|
37
|
+
let markdown = nhm.translate(contentHtml);
|
|
38
|
+
|
|
39
|
+
// Keep headers on one line and increase their depth by one
|
|
40
|
+
markdown = markdown.replace(/# \n\n/g, "## ");
|
|
41
|
+
|
|
42
|
+
// Remove unnecessary nonwidth blank space characters
|
|
43
|
+
markdown = markdown.replace(/\u200b/g, "");
|
|
44
|
+
|
|
45
|
+
// Remove ReadMe anchor links
|
|
46
|
+
markdown = markdown.replace(/\n\[\]\(#.+\)\n/g, "\n");
|
|
47
|
+
|
|
48
|
+
// Reduce unnecessary blank lines
|
|
49
|
+
markdown = markdown.replace(/\n\n\n/g, "\n\n");
|
|
50
|
+
|
|
51
|
+
// Mintlify doesn't support bolded headers, remove the asterisks
|
|
52
|
+
markdown = markdown.replace(/(\n#+) \*\*(.*)\*\*\n/g, "$1 $2\n");
|
|
53
|
+
|
|
54
|
+
markdown = replaceImagePaths(origToWritePath, cliDir, markdown);
|
|
55
|
+
|
|
56
|
+
return { title, description, markdown };
|
|
57
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import cheerio from "cheerio";
|
|
2
|
+
import { scrapeReadMePage } from "./scrapeReadMePage.js";
|
|
3
|
+
import { scrapeGettingFileNameFromUrl } from "../scrapeGettingFileNameFromUrl.js";
|
|
4
|
+
import getLinksRecursively from "./getLinksRecursively.js";
|
|
5
|
+
import { NavigationEntry } from "../../navigation.js";
|
|
6
|
+
|
|
7
|
+
export async function scrapeReadMeSection(
|
|
8
|
+
html: string,
|
|
9
|
+
origin: string,
|
|
10
|
+
cliDir: string,
|
|
11
|
+
overwrite: boolean
|
|
12
|
+
) {
|
|
13
|
+
const $ = cheerio.load(html);
|
|
14
|
+
|
|
15
|
+
// Get all the navigation sections, but only from the first
|
|
16
|
+
// sidebar found. There are multiple in the HTML for mobile
|
|
17
|
+
// responsiveness but they all have the same links.
|
|
18
|
+
const navigationSections = $(".rm-Sidebar")
|
|
19
|
+
.first()
|
|
20
|
+
.find(".rm-Sidebar-section");
|
|
21
|
+
|
|
22
|
+
const groupsConfig = navigationSections
|
|
23
|
+
.map((i, s) => {
|
|
24
|
+
const section = $(s);
|
|
25
|
+
const sectionTitle = section.find("h3").first().text();
|
|
26
|
+
|
|
27
|
+
// Get all links, then use filter to remove duplicates.
|
|
28
|
+
// There are duplicates because of nested navigation, eg:
|
|
29
|
+
// subgroupTitle -> /first-page
|
|
30
|
+
// -- First Page -> /first-page ** DUPLICATE **
|
|
31
|
+
// -- Second Page -> /second-page
|
|
32
|
+
const linkSections = section.find(".rm-Sidebar-list").first().children();
|
|
33
|
+
const pages = getLinksRecursively(linkSections, $).filter(
|
|
34
|
+
(value: string, index: number, self) => self.indexOf(value) === index
|
|
35
|
+
);
|
|
36
|
+
|
|
37
|
+
// Follows the same structure as mint.json
|
|
38
|
+
return {
|
|
39
|
+
group: sectionTitle,
|
|
40
|
+
pages: pages,
|
|
41
|
+
};
|
|
42
|
+
})
|
|
43
|
+
.toArray();
|
|
44
|
+
|
|
45
|
+
return await Promise.all(
|
|
46
|
+
groupsConfig.map(async (navEntry: NavigationEntry) => {
|
|
47
|
+
return await scrapeGettingFileNameFromUrl(
|
|
48
|
+
// ReadMe requires a directory on all sections wheras we use root.
|
|
49
|
+
// /docs is their default directory so we remove it
|
|
50
|
+
navEntry,
|
|
51
|
+
cliDir,
|
|
52
|
+
origin,
|
|
53
|
+
overwrite,
|
|
54
|
+
scrapeReadMePage,
|
|
55
|
+
false,
|
|
56
|
+
"/docs"
|
|
57
|
+
);
|
|
58
|
+
})
|
|
59
|
+
);
|
|
60
|
+
}
|
package/src/util.ts
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
import { mkdirSync, writeFileSync } from "fs";
|
|
2
|
-
import
|
|
2
|
+
import Ora from "ora";
|
|
3
3
|
import path from "path";
|
|
4
|
+
import { Page } from "./init-command/templates.js";
|
|
5
|
+
import stopIfInvalidLink from "./validation/stopIfInvalidLink.js";
|
|
4
6
|
|
|
5
|
-
export function
|
|
6
|
-
// Gets the website
|
|
7
|
+
export function getOrigin(url: string) {
|
|
7
8
|
// eg. https://google.com -> https://google.com
|
|
8
9
|
// https://google.com/page -> https://google.com
|
|
9
|
-
return url
|
|
10
|
+
return new URL(url).origin;
|
|
10
11
|
}
|
|
11
12
|
|
|
12
13
|
export function objToReadableString(objs: Object[]) {
|
|
@@ -35,14 +36,59 @@ export const createPage = (
|
|
|
35
36
|
title: string,
|
|
36
37
|
description?: string,
|
|
37
38
|
markdown?: string,
|
|
39
|
+
overwrite: boolean = false,
|
|
38
40
|
rootDir: string = "",
|
|
39
41
|
fileName?: string
|
|
40
42
|
) => {
|
|
43
|
+
const writePath = path.join(rootDir, addMdx(fileName || toFilename(title)));
|
|
44
|
+
|
|
41
45
|
// Create the folders needed if they're missing
|
|
42
46
|
mkdirSync(rootDir, { recursive: true });
|
|
43
47
|
|
|
44
48
|
// Write the page to memory
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
49
|
+
if (overwrite) {
|
|
50
|
+
writeFileSync(writePath, Page(title, description, markdown));
|
|
51
|
+
console.log("✏️ - " + writePath);
|
|
52
|
+
} else {
|
|
53
|
+
try {
|
|
54
|
+
writeFileSync(writePath, Page(title, description, markdown), {
|
|
55
|
+
flag: "wx",
|
|
56
|
+
});
|
|
57
|
+
console.log("✏️ - " + writePath);
|
|
58
|
+
} catch (e) {
|
|
59
|
+
// We do a try-catch instead of an if-statement to avoid a race condition
|
|
60
|
+
// of the file being created after we started writing.
|
|
61
|
+
if (e.code === "EEXIST") {
|
|
62
|
+
console.log(`❌ Skipping existing file ${writePath}`);
|
|
63
|
+
} else {
|
|
64
|
+
console.error(e);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
export function getHrefFromArgs(argv: any) {
|
|
71
|
+
const href = argv.url;
|
|
72
|
+
stopIfInvalidLink(href);
|
|
73
|
+
return href;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
export const buildLogger = (startText: string = "") => {
|
|
77
|
+
const logger = Ora().start(startText);
|
|
78
|
+
return logger;
|
|
79
|
+
};
|
|
80
|
+
|
|
81
|
+
export const getFileExtension = (filename: string) => {
|
|
82
|
+
return (
|
|
83
|
+
filename.substring(filename.lastIndexOf(".") + 1, filename.length) ||
|
|
84
|
+
filename
|
|
85
|
+
);
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
export const fileBelongsInPagesFolder = (filename: string) => {
|
|
89
|
+
const extension = getFileExtension(filename);
|
|
90
|
+
return (
|
|
91
|
+
extension &&
|
|
92
|
+
(extension === "mdx" || extension === "md" || extension === "tsx")
|
|
93
|
+
);
|
|
48
94
|
};
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import isValidLink from "./isValidLink.js";
|
|
2
|
+
|
|
3
|
+
export default function stopIfInvalidLink(href: string) {
|
|
4
|
+
if (!isValidLink(href)) {
|
|
5
|
+
console.log("Invalid link: " + href);
|
|
6
|
+
console.log("Make sure the link starts with http:// or https://");
|
|
7
|
+
process.exit(1);
|
|
8
|
+
}
|
|
9
|
+
}
|
package/tsconfig.json
CHANGED
package/bin/scrapeGitBook.js
DELETED
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
import axios from "axios";
|
|
2
|
-
import cheerio from "cheerio";
|
|
3
|
-
import { NodeHtmlMarkdown } from "node-html-markdown";
|
|
4
|
-
export async function scrapeGitBook(url) {
|
|
5
|
-
const res = await axios.default.get(url);
|
|
6
|
-
const $ = cheerio.load(res.data);
|
|
7
|
-
const titleComponent = $('[data-testid="page.title"]').first();
|
|
8
|
-
const titleAndDescription = titleComponent.parent().parent().parent().text();
|
|
9
|
-
console.log(titleAndDescription);
|
|
10
|
-
const description = titleAndDescription
|
|
11
|
-
.replace(titleComponent.text(), "")
|
|
12
|
-
.trim();
|
|
13
|
-
const title = titleComponent.text().trim();
|
|
14
|
-
const content = $('[data-testid="page.contentEditor"]').first();
|
|
15
|
-
const contentHtml = $.html(content);
|
|
16
|
-
const nhm = new NodeHtmlMarkdown();
|
|
17
|
-
let markdown = nhm.translate(contentHtml);
|
|
18
|
-
// Keep headers on one line and increase their depth by one
|
|
19
|
-
markdown = markdown.replace(/# \n\n/g, "## ");
|
|
20
|
-
// Remove unnecessary nonwidth blank space characters
|
|
21
|
-
markdown = markdown.replace(/\u200b/g, "");
|
|
22
|
-
// Reduce unnecessary blank lines
|
|
23
|
-
markdown = markdown.replace(/\n\n\n/g, "\n\n");
|
|
24
|
-
// Mintlify doesn't support bolded headers, remove the asterisks
|
|
25
|
-
markdown = markdown.replace(/(\n#+) \*\*(.*)\*\*\n/g, "$1 $2\n");
|
|
26
|
-
return { title, description, markdown };
|
|
27
|
-
}
|
|
28
|
-
//# sourceMappingURL=scrapeGitBook.js.map
|