mintlify 1.1.6 → 1.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRIBUTING.md +5 -0
- package/bin/index.js +1 -8
- package/bin/index.js.map +1 -1
- package/bin/local-preview/injectNav.js +94 -0
- package/bin/local-preview/injectNav.js.map +1 -0
- package/bin/mint/client/.babel-plugin-macrosrc.json +5 -0
- package/bin/mint/client/.babelrc +4 -0
- package/bin/mint/client/.editorconfig +12 -0
- package/bin/mint/client/.eslintrc.json +7 -0
- package/bin/mint/client/.prettierignore +4 -0
- package/bin/mint/client/.prettierrc +14 -0
- package/bin/mint/client/.vscode/launch.json +28 -0
- package/bin/mint/client/README.md +44 -0
- package/bin/mint/client/jest.config.ts +195 -0
- package/bin/mint/client/next-env.d.ts +4 -0
- package/bin/mint/client/next.config.js +152 -0
- package/bin/mint/client/package.json +139 -0
- package/bin/mint/client/postcss.config.cjs +9 -0
- package/bin/mint/client/prebuild/faviconConfig.js +35 -0
- package/bin/mint/client/prebuild/getOpenApiContext.js +53 -0
- package/bin/mint/client/prebuild/index.js +117 -0
- package/bin/mint/client/prebuild/injectNav.js +115 -0
- package/bin/mint/client/prebuild/slugToTitle.js +7 -0
- package/bin/mint/client/rehype/withApiComponents.js +60 -0
- package/bin/mint/client/rehype/withCodeBlocks.js +54 -0
- package/bin/mint/client/rehype/withLayouts.js +113 -0
- package/bin/mint/client/rehype/withLinkRoles.js +13 -0
- package/bin/mint/client/rehype/withRawComponents.js +13 -0
- package/bin/mint/client/rehype/withStaticProps.js +25 -0
- package/bin/mint/client/rehype/withSyntaxHighlighting.js +60 -0
- package/bin/mint/client/remark/utils.js +369 -0
- package/bin/mint/client/remark/withFrames.js +55 -0
- package/bin/mint/client/remark/withImportsInjected.js +36 -0
- package/bin/mint/client/remark/withNextLinks.js +37 -0
- package/bin/mint/client/remark/withTableOfContents.js +71 -0
- package/bin/mint/client/scripts/local.js +177 -0
- package/bin/mint/client/sentry.client.config.js +15 -0
- package/bin/mint/client/sentry.properties +4 -0
- package/bin/mint/client/sentry.server.config.js +15 -0
- package/bin/mint/client/src/analytics/AbstractAnalyticsImplementation.ts +50 -0
- package/bin/mint/client/src/analytics/AnalyticsContext.ts +5 -0
- package/bin/mint/client/src/analytics/AnalyticsMediator.ts +101 -0
- package/bin/mint/client/src/analytics/FakeAnalyticsMediator.ts +9 -0
- package/bin/mint/client/src/analytics/GA4Script.tsx +33 -0
- package/bin/mint/client/src/analytics/implementations/amplitude.ts +26 -0
- package/bin/mint/client/src/analytics/implementations/fathom.ts +38 -0
- package/bin/mint/client/src/analytics/implementations/ga4.ts +33 -0
- package/bin/mint/client/src/analytics/implementations/hotjar.ts +53 -0
- package/bin/mint/client/src/analytics/implementations/mixpanel-browser.d.ts +1 -0
- package/bin/mint/client/src/analytics/implementations/mixpanel.ts +52 -0
- package/bin/mint/client/src/analytics/implementations/posthog.ts +37 -0
- package/bin/mint/client/src/components/Accordion/Accordion.tsx +43 -0
- package/bin/mint/client/src/components/Accordion/index.ts +4 -0
- package/bin/mint/client/src/components/ApiExample.tsx +9 -0
- package/bin/mint/client/src/components/Card.tsx +51 -0
- package/bin/mint/client/src/components/CodeGroup.tsx +132 -0
- package/bin/mint/client/src/components/Editor.tsx +12 -0
- package/bin/mint/client/src/components/Expandable.tsx +40 -0
- package/bin/mint/client/src/components/Heading.tsx +84 -0
- package/bin/mint/client/src/components/Param.tsx +56 -0
- package/bin/mint/client/src/components/Request.tsx +19 -0
- package/bin/mint/client/src/components/ResponseField.tsx +33 -0
- package/bin/mint/client/src/components/TabBar.tsx +61 -0
- package/bin/mint/client/src/config.ts +115 -0
- package/bin/mint/client/src/css/bar-of-progress.css +10 -0
- package/bin/mint/client/src/css/base.css +29 -0
- package/bin/mint/client/src/css/font-awesome.css +7 -0
- package/bin/mint/client/src/css/fonts.css +44 -0
- package/bin/mint/client/src/css/main.css +11 -0
- package/bin/mint/client/src/css/prism.css +270 -0
- package/bin/mint/client/src/css/utilities.css +43 -0
- package/bin/mint/client/src/enums/components.ts +8 -0
- package/bin/mint/client/src/fonts/FiraCode-VF.woff +0 -0
- package/bin/mint/client/src/fonts/FiraCode-VF.woff2 +0 -0
- package/bin/mint/client/src/fonts/IBMPlexMono-Regular.ttf +0 -0
- package/bin/mint/client/src/fonts/IBMPlexMono-SemiBold.ttf +0 -0
- package/bin/mint/client/src/fonts/Inter-italic-latin.var.woff2 +0 -0
- package/bin/mint/client/src/fonts/Inter-roman-latin.var.woff2 +0 -0
- package/bin/mint/client/src/fonts/Pally-Variable.ttf +0 -0
- package/bin/mint/client/src/fonts/SourceSansPro-Regular.otf +0 -0
- package/bin/mint/client/src/fonts/SourceSerifPro-Regular.ttf +0 -0
- package/bin/mint/client/src/fonts/Synonym-Variable.ttf +0 -0
- package/bin/mint/client/src/fonts/Ubuntu-Mono-bold.woff2 +0 -0
- package/bin/mint/client/src/fonts/generated/IBMPlexMono-Regular-subset.woff2 +0 -0
- package/bin/mint/client/src/fonts/generated/IBMPlexMono-Regular-subset.zopfli.woff +0 -0
- package/bin/mint/client/src/fonts/generated/IBMPlexMono-Regular.module.css +11 -0
- package/bin/mint/client/src/fonts/generated/IBMPlexMono-SemiBold-subset.woff2 +0 -0
- package/bin/mint/client/src/fonts/generated/IBMPlexMono-SemiBold-subset.zopfli.woff +0 -0
- package/bin/mint/client/src/fonts/generated/IBMPlexMono-SemiBold.module.css +11 -0
- package/bin/mint/client/src/fonts/generated/Pally-Variable-subset.woff2 +0 -0
- package/bin/mint/client/src/fonts/generated/Pally-Variable-subset.zopfli.woff +0 -0
- package/bin/mint/client/src/fonts/generated/Pally-Variable.module.css +11 -0
- package/bin/mint/client/src/fonts/generated/SourceSerifPro-Regular-subset.woff2 +0 -0
- package/bin/mint/client/src/fonts/generated/SourceSerifPro-Regular-subset.zopfli.woff +0 -0
- package/bin/mint/client/src/fonts/generated/SourceSerifPro-Regular.module.css +11 -0
- package/bin/mint/client/src/fonts/generated/Synonym-Variable-subset.woff2 +0 -0
- package/bin/mint/client/src/fonts/generated/Synonym-Variable-subset.zopfli.woff +0 -0
- package/bin/mint/client/src/fonts/generated/Synonym-Variable.module.css +11 -0
- package/bin/mint/client/src/fonts/generated/TenorSans-Regular-subset.woff2 +0 -0
- package/bin/mint/client/src/fonts/generated/TenorSans-Regular-subset.zopfli.woff +0 -0
- package/bin/mint/client/src/fonts/generated/TenorSans-Regular.module.css +11 -0
- package/bin/mint/client/src/hooks/useActionKey.ts +20 -0
- package/bin/mint/client/src/hooks/useIsomorphicLayoutEffect.ts +3 -0
- package/bin/mint/client/src/hooks/useMedia.ts +27 -0
- package/bin/mint/client/src/hooks/usePrevNext.ts +34 -0
- package/bin/mint/client/src/hooks/useTop.ts +15 -0
- package/bin/mint/client/src/icons/CopyToClipboard.tsx +33 -0
- package/bin/mint/client/src/index.d.ts +1 -0
- package/bin/mint/client/src/layouts/ApiSupplemental.tsx +173 -0
- package/bin/mint/client/src/layouts/ContentsLayout.tsx +256 -0
- package/bin/mint/client/src/layouts/DocumentationLayout.tsx +44 -0
- package/bin/mint/client/src/layouts/OpenApiContent.tsx +301 -0
- package/bin/mint/client/src/layouts/SidebarLayout.tsx +412 -0
- package/bin/mint/client/src/layouts/UserFeedback.tsx +73 -0
- package/bin/mint/client/src/layouts/getGroupsInDivision.ts +25 -0
- package/bin/mint/client/src/layouts/isPathInGroupPages.ts +10 -0
- package/bin/mint/client/src/metadata.ts +58 -0
- package/bin/mint/client/src/nav.json +219 -0
- package/bin/mint/client/src/openapi.ts +3 -0
- package/bin/mint/client/src/pages/404.tsx +73 -0
- package/bin/mint/client/src/pages/_app.tsx +138 -0
- package/bin/mint/client/src/pages/_document.tsx +57 -0
- package/bin/mint/client/src/pages/api/issue.ts +10 -0
- package/bin/mint/client/src/pages/api/name.ts +8 -0
- package/bin/mint/client/src/pages/api/request.ts +31 -0
- package/bin/mint/client/src/pages/api/suggest.ts +10 -0
- package/bin/mint/client/src/pages/api/syntax-highlighted-json.ts +13 -0
- package/bin/mint/client/src/pages/api/utils.ts +6 -0
- package/bin/mint/client/src/pages/index.tsx +31 -0
- package/bin/mint/client/src/ui/Api.tsx +359 -0
- package/bin/mint/client/src/ui/Footer.tsx +124 -0
- package/bin/mint/client/src/ui/Header.tsx +370 -0
- package/bin/mint/client/src/ui/Logo.tsx +55 -0
- package/bin/mint/client/src/ui/PageHeader.tsx +51 -0
- package/bin/mint/client/src/ui/Search.tsx +386 -0
- package/bin/mint/client/src/ui/ThemeToggle.tsx +285 -0
- package/bin/mint/client/src/ui/Title.tsx +22 -0
- package/bin/mint/client/src/ui/TopLevelLink.tsx +122 -0
- package/bin/mint/client/src/utils/api.ts +252 -0
- package/bin/mint/client/src/utils/brands.ts +217 -0
- package/bin/mint/client/src/utils/castArray.ts +3 -0
- package/bin/mint/client/src/utils/childrenArray.ts +3 -0
- package/bin/mint/client/src/utils/fit.ts +27 -0
- package/bin/mint/client/src/utils/fontAwesome.ts +577 -0
- package/bin/mint/client/src/utils/getAnalyticsConfig.ts +14 -0
- package/bin/mint/client/src/utils/getLogoHref.ts +9 -0
- package/bin/mint/client/src/utils/getOpenApiContext.ts +26 -0
- package/bin/mint/client/src/utils/importAll.ts +6 -0
- package/bin/mint/client/src/utils/isObject.ts +3 -0
- package/bin/mint/client/src/utils/kebabToTitleCase.ts +3 -0
- package/bin/mint/client/src/utils/loadImage.ts +8 -0
- package/bin/mint/client/src/utils/slugToTitle.ts +7 -0
- package/bin/mint/client/src/utils/wait.ts +5 -0
- package/bin/mint/client/tailwind.config.cjs +323 -0
- package/bin/mint/client/test/test.test.ts +5 -0
- package/bin/mint/client/tsconfig.json +36 -0
- package/bin/mint/client/yarn.lock +9702 -0
- package/bin/scraping/detectFramework.js +12 -3
- package/bin/scraping/detectFramework.js.map +1 -1
- package/bin/scraping/scrapeFileGettingFileNameFromUrl.js +2 -2
- package/bin/scraping/scrapeFileGettingFileNameFromUrl.js.map +1 -1
- package/bin/scraping/scrapeGettingFileNameFromUrl.js +3 -3
- package/bin/scraping/scrapeGettingFileNameFromUrl.js.map +1 -1
- package/bin/scraping/scrapePage.js +2 -2
- package/bin/scraping/scrapePage.js.map +1 -1
- package/bin/scraping/scrapePageCommands.js +6 -6
- package/bin/scraping/scrapePageCommands.js.map +1 -1
- package/bin/scraping/scrapeSection.js +2 -2
- package/bin/scraping/scrapeSection.js.map +1 -1
- package/bin/scraping/scrapeSectionCommands.js +8 -7
- package/bin/scraping/scrapeSectionCommands.js.map +1 -1
- package/bin/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.js +36 -0
- package/bin/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.js.map +1 -0
- package/bin/scraping/site-scrapers/links-per-group/getLinksRecursively.js +38 -0
- package/bin/scraping/site-scrapers/links-per-group/getLinksRecursively.js.map +1 -0
- package/bin/scraping/site-scrapers/scrapeDocusaurusPage.js +14 -8
- package/bin/scraping/site-scrapers/scrapeDocusaurusPage.js.map +1 -1
- package/bin/scraping/site-scrapers/scrapeDocusaurusSection.js +4 -29
- package/bin/scraping/site-scrapers/scrapeDocusaurusSection.js.map +1 -1
- package/bin/scraping/site-scrapers/scrapeGitBookPage.js +2 -1
- package/bin/scraping/site-scrapers/scrapeGitBookPage.js.map +1 -1
- package/bin/scraping/site-scrapers/scrapeGitBookSection.js +7 -5
- package/bin/scraping/site-scrapers/scrapeGitBookSection.js.map +1 -1
- package/bin/scraping/site-scrapers/scrapeReadMePage.js +2 -1
- package/bin/scraping/site-scrapers/scrapeReadMePage.js.map +1 -1
- package/bin/scraping/site-scrapers/scrapeReadMeSection.js +3 -3
- package/bin/scraping/site-scrapers/scrapeReadMeSection.js.map +1 -1
- package/package.json +1 -1
- package/src/index.ts +0 -16
- package/src/scraping/detectFramework.ts +13 -3
- package/src/scraping/scrapeFileGettingFileNameFromUrl.ts +5 -2
- package/src/scraping/scrapeGettingFileNameFromUrl.ts +5 -1
- package/src/scraping/scrapePage.ts +6 -3
- package/src/scraping/scrapePageCommands.ts +10 -6
- package/src/scraping/scrapeSection.ts +9 -2
- package/src/scraping/scrapeSectionCommands.ts +24 -7
- package/src/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.ts +46 -0
- package/src/scraping/site-scrapers/{getLinksRecursively.ts → links-per-group/getLinksRecursively.ts} +0 -0
- package/src/scraping/site-scrapers/scrapeDocusaurusPage.ts +20 -8
- package/src/scraping/site-scrapers/scrapeDocusaurusSection.ts +9 -33
- package/src/scraping/site-scrapers/scrapeGitBookPage.ts +2 -1
- package/src/scraping/site-scrapers/scrapeGitBookSection.ts +9 -5
- package/src/scraping/site-scrapers/scrapeReadMePage.ts +2 -1
- package/src/scraping/site-scrapers/scrapeReadMeSection.ts +4 -2
- package/bin/local-preview/helper-commands/cleanCommand.js +0 -8
- package/bin/local-preview/helper-commands/cleanCommand.js.map +0 -1
|
@@ -10,15 +10,24 @@ export function detectFramework(html) {
|
|
|
10
10
|
const docusaurusMeta = $('meta[name="generator"]');
|
|
11
11
|
if (docusaurusMeta.length > 0 &&
|
|
12
12
|
docusaurusMeta.attr("content").includes("Docusaurus")) {
|
|
13
|
-
|
|
13
|
+
if (docusaurusMeta.attr("content").includes("v3")) {
|
|
14
|
+
return { framework: Frameworks.DOCUSAURUS, version: "3" };
|
|
15
|
+
}
|
|
16
|
+
if (docusaurusMeta.attr("content").includes("v2")) {
|
|
17
|
+
return { framework: Frameworks.DOCUSAURUS, version: "2" };
|
|
18
|
+
}
|
|
19
|
+
else if (docusaurusMeta.attr("content").includes("v1")) {
|
|
20
|
+
console.warn("WARNING: We detected Docusaurus version 1 but we only support scraping versions 2 and 3.");
|
|
21
|
+
return { framework: Frameworks.DOCUSAURUS, version: "1" };
|
|
22
|
+
}
|
|
14
23
|
}
|
|
15
24
|
const isGitBook = $(".gitbook-root").length > 0;
|
|
16
25
|
if (isGitBook) {
|
|
17
|
-
return Frameworks.GITBOOK;
|
|
26
|
+
return { framework: Frameworks.GITBOOK };
|
|
18
27
|
}
|
|
19
28
|
const isReadMe = $('meta[name="readme-deploy"]').length > 0;
|
|
20
29
|
if (isReadMe) {
|
|
21
|
-
return Frameworks.README;
|
|
30
|
+
return { framework: Frameworks.README };
|
|
22
31
|
}
|
|
23
32
|
return undefined;
|
|
24
33
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"detectFramework.js","sourceRoot":"","sources":["../../src/scraping/detectFramework.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAE9B,MAAM,CAAN,IAAY,UAIX;AAJD,WAAY,UAAU;IACpB,uCAAyB,CAAA;IACzB,iCAAmB,CAAA;IACnB,+BAAiB,CAAA;AACnB,CAAC,EAJW,UAAU,GAAV,UAAU,KAAV,UAAU,QAIrB;AAED,MAAM,UAAU,eAAe,CAAC,IAAI;IAClC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,MAAM,cAAc,GAAG,CAAC,CAAC,wBAAwB,CAAC,CAAC;IAEnD,IACE,cAAc,CAAC,MAAM,GAAG,CAAC;QACzB,cAAc,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,QAAQ,CAAC,YAAY,CAAC,EACrD;QACA,OAAO,UAAU,CAAC,UAAU,CAAC;
|
|
1
|
+
{"version":3,"file":"detectFramework.js","sourceRoot":"","sources":["../../src/scraping/detectFramework.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAE9B,MAAM,CAAN,IAAY,UAIX;AAJD,WAAY,UAAU;IACpB,uCAAyB,CAAA;IACzB,iCAAmB,CAAA;IACnB,+BAAiB,CAAA;AACnB,CAAC,EAJW,UAAU,GAAV,UAAU,KAAV,UAAU,QAIrB;AAED,MAAM,UAAU,eAAe,CAAC,IAAI;IAClC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,MAAM,cAAc,GAAG,CAAC,CAAC,wBAAwB,CAAC,CAAC;IAEnD,IACE,cAAc,CAAC,MAAM,GAAG,CAAC;QACzB,cAAc,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,QAAQ,CAAC,YAAY,CAAC,EACrD;QACA,IAAI,cAAc,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE;YACjD,OAAO,EAAE,SAAS,EAAE,UAAU,CAAC,UAAU,EAAE,OAAO,EAAE,GAAG,EAAE,CAAC;SAC3D;QACD,IAAI,cAAc,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE;YACjD,OAAO,EAAE,SAAS,EAAE,UAAU,CAAC,UAAU,EAAE,OAAO,EAAE,GAAG,EAAE,CAAC;SAC3D;aAAM,IAAI,cAAc,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE;YACxD,OAAO,CAAC,IAAI,CACV,0FAA0F,CAC3F,CAAC;YACF,OAAO,EAAE,SAAS,EAAE,UAAU,CAAC,UAAU,EAAE,OAAO,EAAE,GAAG,EAAE,CAAC;SAC3D;KACF;IAED,MAAM,SAAS,GAAG,CAAC,CAAC,eAAe,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;IAChD,IAAI,SAAS,EAAE;QACb,OAAO,EAAE,SAAS,EAAE,UAAU,CAAC,OAAO,EAAE,CAAC;KAC1C;IAED,MAAM,QAAQ,GAAG,CAAC,CAAC,4BAA4B,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;IAC5D,IAAI,QAAQ,EAAE;QACZ,OAAO,EAAE,SAAS,EAAE,UAAU,CAAC,MAAM,EAAE,CAAC;KACzC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC"}
|
|
@@ -2,7 +2,7 @@ import path from "path";
|
|
|
2
2
|
import axios from "axios";
|
|
3
3
|
import { getHtmlWithPuppeteer } from "../browser.js";
|
|
4
4
|
import { createPage } from "../util.js";
|
|
5
|
-
export async function scrapeFileGettingFileNameFromUrl(pathname, cliDir, origin, overwrite, scrapePageFunc, puppeteer = false, baseToRemove) {
|
|
5
|
+
export async function scrapeFileGettingFileNameFromUrl(pathname, cliDir, origin, overwrite, scrapePageFunc, puppeteer = false, version, baseToRemove) {
|
|
6
6
|
// Skip scraping external links
|
|
7
7
|
if (pathname.startsWith("https://") || pathname.startsWith("http://")) {
|
|
8
8
|
return pathname;
|
|
@@ -27,7 +27,7 @@ export async function scrapeFileGettingFileNameFromUrl(pathname, cliDir, origin,
|
|
|
27
27
|
const res = await axios.default.get(href);
|
|
28
28
|
html = res.data;
|
|
29
29
|
}
|
|
30
|
-
const { title, description, markdown } = await scrapePageFunc(html, origin, cliDir, imageBaseDir);
|
|
30
|
+
const { title, description, markdown } = await scrapePageFunc(html, origin, cliDir, imageBaseDir, version);
|
|
31
31
|
// Check if page didn't have content
|
|
32
32
|
if (!title && !markdown) {
|
|
33
33
|
return undefined;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrapeFileGettingFileNameFromUrl.js","sourceRoot":"","sources":["../../src/scraping/scrapeFileGettingFileNameFromUrl.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,oBAAoB,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAExC,MAAM,CAAC,KAAK,UAAU,gCAAgC,CACpD,QAAgB,EAChB,MAAc,EACd,MAAc,EACd,SAAkB,EAClB,
|
|
1
|
+
{"version":3,"file":"scrapeFileGettingFileNameFromUrl.js","sourceRoot":"","sources":["../../src/scraping/scrapeFileGettingFileNameFromUrl.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,oBAAoB,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAExC,MAAM,CAAC,KAAK,UAAU,gCAAgC,CACpD,QAAgB,EAChB,MAAc,EACd,MAAc,EACd,SAAkB,EAClB,cAUE,EACF,SAAS,GAAG,KAAK,EACjB,OAA2B,EAC3B,YAAqB;IAErB,+BAA+B;IAC/B,IAAI,QAAQ,CAAC,UAAU,CAAC,UAAU,CAAC,IAAI,QAAQ,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE;QACrE,OAAO,QAAQ,CAAC;KACjB;IAED,iCAAiC;IACjC,MAAM,YAAY,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IACzC,IAAI,OAAO,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAEvE,+BAA+B;IAC/B,IAAI,YAAY,IAAI,OAAO,CAAC,UAAU,CAAC,YAAY,CAAC,EAAE;QACpD,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC,CAAC;KAC7C;IAED,kEAAkE;IAClE,4CAA4C;IAC5C,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;IAE1D,8BAA8B;IAC9B,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC;IAC5C,IAAI,IAAY,CAAC;IACjB,IAAI,SAAS,EAAE;QACb,IAAI,GAAG,MAAM,oBAAoB,CAAC,IAAI,CAAC,CAAC;KACzC;SAAM;QACL,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAC1C,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC;KACjB;IAED,MAAM,EAAE,KAAK,EAAE,WAAW,EAAE,QAAQ,EAAE,GAAG,MAAM,cAAc,CAC3D,IAAI,EACJ,MAAM,EACN,MAAM,EACN,YAAY,EACZ,OAAO,CACR,CAAC;IAEF,oCAAoC;IACpC,IAAI,CAAC,KAAK,IAAI,CAAC,QAAQ,EAAE;QACvB,OAAO,SAAS,CAAC;KAClB;IAED,MAAM,eAAe,GAAG,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IAEtE,2DAA2D;IAC3D,MAAM,QAAQ,GAAG,YAAY,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,IAAI,cAAc,CAAC;IAEzE,mCAAmC;IACnC,UAAU,CACR,KAAK,EACL,WAAW,EACX,QAAQ,EACR,SAAS,EACT,eAAe,EACf,QAAQ,CACT,CAAC;IAEF,sEAAsE;IACtE,OAAO,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC;AACxE,CAAC"}
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
import { isNavigation } from "../navigation.js";
|
|
2
2
|
import { scrapeFileGettingFileNameFromUrl } from "./scrapeFileGettingFileNameFromUrl.js";
|
|
3
|
-
export async function scrapeGettingFileNameFromUrl(navEntry, cliDir, origin, overwrite, scrapePageFunc, puppeteer = false, baseToRemove) {
|
|
3
|
+
export async function scrapeGettingFileNameFromUrl(navEntry, cliDir, origin, overwrite, scrapePageFunc, puppeteer = false, version, baseToRemove) {
|
|
4
4
|
if (isNavigation(navEntry)) {
|
|
5
5
|
const newPages = [];
|
|
6
6
|
for (const nestedNavEntry of navEntry.pages) {
|
|
7
|
-
newPages.push(await scrapeGettingFileNameFromUrl(nestedNavEntry, cliDir, origin, overwrite, scrapePageFunc, puppeteer, baseToRemove));
|
|
7
|
+
newPages.push(await scrapeGettingFileNameFromUrl(nestedNavEntry, cliDir, origin, overwrite, scrapePageFunc, puppeteer, version, baseToRemove));
|
|
8
8
|
}
|
|
9
9
|
navEntry.pages = newPages;
|
|
10
10
|
return navEntry;
|
|
11
11
|
}
|
|
12
|
-
return await scrapeFileGettingFileNameFromUrl(navEntry, cliDir, origin, overwrite, scrapePageFunc, puppeteer, baseToRemove);
|
|
12
|
+
return await scrapeFileGettingFileNameFromUrl(navEntry, cliDir, origin, overwrite, scrapePageFunc, puppeteer, version, baseToRemove);
|
|
13
13
|
}
|
|
14
14
|
//# sourceMappingURL=scrapeGettingFileNameFromUrl.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrapeGettingFileNameFromUrl.js","sourceRoot":"","sources":["../../src/scraping/scrapeGettingFileNameFromUrl.ts"],"names":[],"mappings":"AAAA,OAAO,EAAmB,YAAY,EAAE,MAAM,kBAAkB,CAAC;AACjE,OAAO,EAAE,gCAAgC,EAAE,MAAM,uCAAuC,CAAC;AAEzF,MAAM,CAAC,KAAK,UAAU,4BAA4B,CAChD,QAAyB,EACzB,MAAc,EACd,MAAc,EACd,SAAkB,EAClB,
|
|
1
|
+
{"version":3,"file":"scrapeGettingFileNameFromUrl.js","sourceRoot":"","sources":["../../src/scraping/scrapeGettingFileNameFromUrl.ts"],"names":[],"mappings":"AAAA,OAAO,EAAmB,YAAY,EAAE,MAAM,kBAAkB,CAAC;AACjE,OAAO,EAAE,gCAAgC,EAAE,MAAM,uCAAuC,CAAC;AAEzF,MAAM,CAAC,KAAK,UAAU,4BAA4B,CAChD,QAAyB,EACzB,MAAc,EACd,MAAc,EACd,SAAkB,EAClB,cAUE,EACF,SAAS,GAAG,KAAK,EACjB,OAA2B,EAC3B,YAAqB;IAErB,IAAI,YAAY,CAAC,QAAQ,CAAC,EAAE;QAC1B,MAAM,QAAQ,GAAG,EAAE,CAAC;QACpB,KAAK,MAAM,cAAc,IAAI,QAAQ,CAAC,KAAK,EAAE;YAC3C,QAAQ,CAAC,IAAI,CACX,MAAM,4BAA4B,CAChC,cAAc,EACd,MAAM,EACN,MAAM,EACN,SAAS,EACT,cAAc,EACd,SAAS,EACT,OAAO,EACP,YAAY,CACb,CACF,CAAC;SACH;QACD,QAAQ,CAAC,KAAK,GAAG,QAAQ,CAAC;QAC1B,OAAO,QAAQ,CAAC;KACjB;IAED,OAAO,MAAM,gCAAgC,CAC3C,QAAQ,EACR,MAAM,EACN,MAAM,EACN,SAAS,EACT,cAAc,EACd,SAAS,EACT,OAAO,EACP,YAAY,CACb,CAAC;AACJ,CAAC"}
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import path from "path";
|
|
2
2
|
import { createPage, getOrigin } from "../util.js";
|
|
3
|
-
export async function scrapePage(scrapeFunc, href, html, overwrite) {
|
|
3
|
+
export async function scrapePage(scrapeFunc, href, html, overwrite, version) {
|
|
4
4
|
const origin = getOrigin(href);
|
|
5
5
|
const imageBaseDir = path.join(process.cwd(), "images");
|
|
6
|
-
const { title, description, markdown } = await scrapeFunc(html, origin, process.cwd(), imageBaseDir);
|
|
6
|
+
const { title, description, markdown } = await scrapeFunc(html, origin, process.cwd(), imageBaseDir, version);
|
|
7
7
|
createPage(title, description, markdown, overwrite, process.cwd());
|
|
8
8
|
}
|
|
9
9
|
//# sourceMappingURL=scrapePage.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrapePage.js","sourceRoot":"","sources":["../../src/scraping/scrapePage.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AAEnD,MAAM,CAAC,KAAK,UAAU,UAAU,CAC9B,
|
|
1
|
+
{"version":3,"file":"scrapePage.js","sourceRoot":"","sources":["../../src/scraping/scrapePage.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AAEnD,MAAM,CAAC,KAAK,UAAU,UAAU,CAC9B,UAMiB,EACjB,IAAY,EACZ,IAAY,EACZ,SAAkB,EAClB,OAA2B;IAE3B,MAAM,MAAM,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAC/B,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,QAAQ,CAAC,CAAC;IACxD,MAAM,EAAE,KAAK,EAAE,WAAW,EAAE,QAAQ,EAAE,GAAG,MAAM,UAAU,CACvD,IAAI,EACJ,MAAM,EACN,OAAO,CAAC,GAAG,EAAE,EACb,YAAY,EACZ,OAAO,CACR,CAAC;IACF,UAAU,CAAC,KAAK,EAAE,WAAW,EAAE,QAAQ,EAAE,SAAS,EAAE,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC;AACrE,CAAC"}
|
|
@@ -15,31 +15,31 @@ function validateFramework(framework) {
|
|
|
15
15
|
return process.exit(1);
|
|
16
16
|
}
|
|
17
17
|
}
|
|
18
|
-
export async function scrapePageWrapper(argv, scrapeFunc,
|
|
18
|
+
export async function scrapePageWrapper(argv, scrapeFunc, options) {
|
|
19
19
|
const href = getHrefFromArgs(argv);
|
|
20
20
|
let html;
|
|
21
|
-
if (puppeteer) {
|
|
21
|
+
if (options.puppeteer) {
|
|
22
22
|
html = await getHtmlWithPuppeteer(href);
|
|
23
23
|
}
|
|
24
24
|
else {
|
|
25
25
|
const res = await axios.default.get(href);
|
|
26
26
|
html = res.data;
|
|
27
27
|
}
|
|
28
|
-
await scrapePage(scrapeFunc, href, html, argv.overwrite);
|
|
28
|
+
await scrapePage(scrapeFunc, href, html, argv.overwrite, options.version);
|
|
29
29
|
process.exit(0);
|
|
30
30
|
}
|
|
31
31
|
export async function scrapePageAutomatically(argv) {
|
|
32
32
|
const href = getHrefFromArgs(argv);
|
|
33
33
|
const res = await axios.default.get(href);
|
|
34
34
|
const html = res.data;
|
|
35
|
-
const framework = detectFramework(html);
|
|
35
|
+
const { framework, version } = detectFramework(html);
|
|
36
36
|
validateFramework(framework);
|
|
37
37
|
console.log("Detected framework: " + framework);
|
|
38
38
|
if (framework === Frameworks.DOCUSAURUS) {
|
|
39
|
-
await scrapePageWrapper(argv, scrapeDocusaurusPage);
|
|
39
|
+
await scrapePageWrapper(argv, scrapeDocusaurusPage, { version });
|
|
40
40
|
}
|
|
41
41
|
else if (framework === Frameworks.GITBOOK) {
|
|
42
|
-
await scrapePageWrapper(argv, scrapeGitBookPage, true);
|
|
42
|
+
await scrapePageWrapper(argv, scrapeGitBookPage, { puppeteer: true });
|
|
43
43
|
}
|
|
44
44
|
else if (framework === Frameworks.README) {
|
|
45
45
|
await scrapePageWrapper(argv, scrapeReadMePage);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrapePageCommands.js","sourceRoot":"","sources":["../../src/scraping/scrapePageCommands.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,oBAAoB,EAAE,MAAM,yCAAyC,CAAC;AAC/E,OAAO,EAAE,iBAAiB,EAAE,MAAM,sCAAsC,CAAC;AACzE,OAAO,EAAE,gBAAgB,EAAE,MAAM,qCAAqC,CAAC;AACvE,OAAO,EAAE,eAAe,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAC;AACnE,OAAO,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAC7C,OAAO,EAAE,oBAAoB,EAAE,MAAM,eAAe,CAAC;AAErD,SAAS,iBAAiB,CAAC,SAAS;IAClC,IAAI,CAAC,SAAS,EAAE;QACd,OAAO,CAAC,GAAG,CACT,kEAAkE,CACnE,CAAC;QACF,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAC;QACtC,OAAO,CAAC,GAAG,CAAC,qBAAqB,CAAC,CAAC;QACnC,OAAO,CAAC,GAAG,CAAC,oBAAoB,CAAC,CAAC;QAClC,OAAO,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;KACxB;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,iBAAiB,
|
|
1
|
+
{"version":3,"file":"scrapePageCommands.js","sourceRoot":"","sources":["../../src/scraping/scrapePageCommands.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,oBAAoB,EAAE,MAAM,yCAAyC,CAAC;AAC/E,OAAO,EAAE,iBAAiB,EAAE,MAAM,sCAAsC,CAAC;AACzE,OAAO,EAAE,gBAAgB,EAAE,MAAM,qCAAqC,CAAC;AACvE,OAAO,EAAE,eAAe,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAC;AACnE,OAAO,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAC7C,OAAO,EAAE,oBAAoB,EAAE,MAAM,eAAe,CAAC;AAErD,SAAS,iBAAiB,CAAC,SAAS;IAClC,IAAI,CAAC,SAAS,EAAE;QACd,OAAO,CAAC,GAAG,CACT,kEAAkE,CACnE,CAAC;QACF,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAC;QACtC,OAAO,CAAC,GAAG,CAAC,qBAAqB,CAAC,CAAC;QACnC,OAAO,CAAC,GAAG,CAAC,oBAAoB,CAAC,CAAC;QAClC,OAAO,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;KACxB;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,IAAS,EACT,UAAe,EACf,OAAmD;IAEnD,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IACnC,IAAI,IAAY,CAAC;IACjB,IAAI,OAAO,CAAC,SAAS,EAAE;QACrB,IAAI,GAAG,MAAM,oBAAoB,CAAC,IAAI,CAAC,CAAC;KACzC;SAAM;QACL,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAC1C,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC;KACjB;IACD,MAAM,UAAU,CAAC,UAAU,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,SAAS,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC;IAC1E,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,uBAAuB,CAAC,IAAS;IACrD,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAC1C,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC;IACtB,MAAM,EAAE,SAAS,EAAE,OAAO,EAAE,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IAErD,iBAAiB,CAAC,SAAS,CAAC,CAAC;IAE7B,OAAO,CAAC,GAAG,CAAC,sBAAsB,GAAG,SAAS,CAAC,CAAC;IAEhD,IAAI,SAAS,KAAK,UAAU,CAAC,UAAU,EAAE;QACvC,MAAM,iBAAiB,CAAC,IAAI,EAAE,oBAAoB,EAAE,EAAE,OAAO,EAAE,CAAC,CAAC;KAClE;SAAM,IAAI,SAAS,KAAK,UAAU,CAAC,OAAO,EAAE;QAC3C,MAAM,iBAAiB,CAAC,IAAI,EAAE,iBAAiB,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;KACvE;SAAM,IAAI,SAAS,KAAK,UAAU,CAAC,MAAM,EAAE;QAC1C,MAAM,iBAAiB,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAAC;KACjD;AACH,CAAC"}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { objToReadableString } from "../util.js";
|
|
2
|
-
export async function scrapeSection(scrapeFunc, html, origin, overwrite) {
|
|
2
|
+
export async function scrapeSection(scrapeFunc, html, origin, overwrite, version) {
|
|
3
3
|
console.log(`Started scraping${overwrite ? ", overwrite mode is on" : ""}...`);
|
|
4
|
-
const groupsConfig = await scrapeFunc(html, origin, process.cwd(), overwrite);
|
|
4
|
+
const groupsConfig = await scrapeFunc(html, origin, process.cwd(), overwrite, version);
|
|
5
5
|
console.log("Finished scraping.");
|
|
6
6
|
console.log("Add the following to your navigation in mint.json:");
|
|
7
7
|
console.log(objToReadableString(groupsConfig));
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrapeSection.js","sourceRoot":"","sources":["../../src/scraping/scrapeSection.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,YAAY,CAAC;AAEjD,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,UAAe,EACf,IAAY,EACZ,MAAc,EACd,SAAkB;
|
|
1
|
+
{"version":3,"file":"scrapeSection.js","sourceRoot":"","sources":["../../src/scraping/scrapeSection.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,YAAY,CAAC;AAEjD,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,UAAe,EACf,IAAY,EACZ,MAAc,EACd,SAAkB,EAClB,OAA2B;IAE3B,OAAO,CAAC,GAAG,CACT,mBAAmB,SAAS,CAAC,CAAC,CAAC,wBAAwB,CAAC,CAAC,CAAC,EAAE,KAAK,CAClE,CAAC;IACF,MAAM,YAAY,GAAG,MAAM,UAAU,CACnC,IAAI,EACJ,MAAM,EACN,OAAO,CAAC,GAAG,EAAE,EACb,SAAS,EACT,OAAO,CACR,CAAC;IACF,OAAO,CAAC,GAAG,CAAC,oBAAoB,CAAC,CAAC;IAClC,OAAO,CAAC,GAAG,CAAC,oDAAoD,CAAC,CAAC;IAClE,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,YAAY,CAAC,CAAC,CAAC;AACjD,CAAC"}
|
|
@@ -12,16 +12,17 @@ export async function scrapeSectionAxiosWrapper(argv, scrapeFunc) {
|
|
|
12
12
|
const href = getHrefFromArgs(argv);
|
|
13
13
|
const res = await axios.default.get(href);
|
|
14
14
|
const html = res.data;
|
|
15
|
-
await scrapeSection(scrapeFunc, html, getOrigin(href), argv.overwrite);
|
|
15
|
+
await scrapeSection(scrapeFunc, html, getOrigin(href), argv.overwrite, undefined);
|
|
16
16
|
process.exit(0);
|
|
17
17
|
}
|
|
18
|
-
export async function scrapeDocusaurusSectionCommand(argv
|
|
19
|
-
|
|
18
|
+
export async function scrapeDocusaurusSectionCommand(argv, version // "1" | "2" | "3"
|
|
19
|
+
) {
|
|
20
|
+
await scrapeSectionOpeningAllNested(argv, openNestedDocusaurusMenus, scrapeDocusaurusSection, version);
|
|
20
21
|
}
|
|
21
22
|
export async function scrapeGitbookSectionCommand(argv) {
|
|
22
23
|
await scrapeSectionOpeningAllNested(argv, openNestedGitbookMenus, scrapeGitBookSection);
|
|
23
24
|
}
|
|
24
|
-
async function scrapeSectionOpeningAllNested(argv, openLinks, scrapeFunc) {
|
|
25
|
+
async function scrapeSectionOpeningAllNested(argv, openLinks, scrapeFunc, version) {
|
|
25
26
|
const href = getHrefFromArgs(argv);
|
|
26
27
|
const browser = await startBrowser();
|
|
27
28
|
const page = await browser.newPage();
|
|
@@ -30,18 +31,18 @@ async function scrapeSectionOpeningAllNested(argv, openLinks, scrapeFunc) {
|
|
|
30
31
|
});
|
|
31
32
|
const html = await openLinks(page);
|
|
32
33
|
browser.close();
|
|
33
|
-
await scrapeSection(scrapeFunc, html, getOrigin(href), argv.overwrite);
|
|
34
|
+
await scrapeSection(scrapeFunc, html, getOrigin(href), argv.overwrite, version);
|
|
34
35
|
process.exit(0);
|
|
35
36
|
}
|
|
36
37
|
export async function scrapeSectionAutomatically(argv) {
|
|
37
38
|
const href = getHrefFromArgs(argv);
|
|
38
39
|
const res = await axios.default.get(href);
|
|
39
40
|
const html = res.data;
|
|
40
|
-
const framework = detectFramework(html);
|
|
41
|
+
const { framework, version } = detectFramework(html);
|
|
41
42
|
validateFramework(framework);
|
|
42
43
|
console.log("Detected framework: " + framework);
|
|
43
44
|
if (framework === Frameworks.DOCUSAURUS) {
|
|
44
|
-
await scrapeDocusaurusSectionCommand(argv);
|
|
45
|
+
await scrapeDocusaurusSectionCommand(argv, version);
|
|
45
46
|
}
|
|
46
47
|
else if (framework === Frameworks.GITBOOK) {
|
|
47
48
|
await scrapeGitbookSectionCommand(argv);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrapeSectionCommands.js","sourceRoot":"","sources":["../../src/scraping/scrapeSectionCommands.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,eAAe,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAC;AACnE,OAAO,EAAE,eAAe,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AACxD,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AACnD,OAAO,EAAE,uBAAuB,EAAE,MAAM,4CAA4C,CAAC;AACrF,OAAO,yBAAyB,MAAM,8CAA8C,CAAC;AACrF,OAAO,EAAE,oBAAoB,EAAE,MAAM,yCAAyC,CAAC;AAC/E,OAAO,sBAAsB,MAAM,2CAA2C,CAAC;AAC/E,OAAO,EAAE,mBAAmB,EAAE,MAAM,wCAAwC,CAAC;AAC7E,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAE7C,MAAM,CAAC,KAAK,UAAU,yBAAyB,CAAC,IAAS,EAAE,UAAe;IACxE,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAC1C,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC;IACtB,MAAM,aAAa,
|
|
1
|
+
{"version":3,"file":"scrapeSectionCommands.js","sourceRoot":"","sources":["../../src/scraping/scrapeSectionCommands.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,eAAe,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAC;AACnE,OAAO,EAAE,eAAe,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AACxD,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AACnD,OAAO,EAAE,uBAAuB,EAAE,MAAM,4CAA4C,CAAC;AACrF,OAAO,yBAAyB,MAAM,8CAA8C,CAAC;AACrF,OAAO,EAAE,oBAAoB,EAAE,MAAM,yCAAyC,CAAC;AAC/E,OAAO,sBAAsB,MAAM,2CAA2C,CAAC;AAC/E,OAAO,EAAE,mBAAmB,EAAE,MAAM,wCAAwC,CAAC;AAC7E,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAE7C,MAAM,CAAC,KAAK,UAAU,yBAAyB,CAAC,IAAS,EAAE,UAAe;IACxE,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAC1C,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC;IACtB,MAAM,aAAa,CACjB,UAAU,EACV,IAAI,EACJ,SAAS,CAAC,IAAI,CAAC,EACf,IAAI,CAAC,SAAS,EACd,SAAS,CACV,CAAC;IACF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,8BAA8B,CAClD,IAAS,EACT,OAAe,CAAC,kBAAkB;;IAElC,MAAM,6BAA6B,CACjC,IAAI,EACJ,yBAAyB,EACzB,uBAAuB,EACvB,OAAO,CACR,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,2BAA2B,CAAC,IAAS;IACzD,MAAM,6BAA6B,CACjC,IAAI,EACJ,sBAAsB,EACtB,oBAAoB,CACrB,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,6BAA6B,CAC1C,IAAS,EACT,SAAc,EACd,UAAe,EACf,OAAgB;IAEhB,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IAEnC,MAAM,OAAO,GAAG,MAAM,YAAY,EAAE,CAAC;IACrC,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;IACrC,MAAM,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;QACpB,SAAS,EAAE,cAAc;KAC1B,CAAC,CAAC;IAEH,MAAM,IAAI,GAAG,MAAM,SAAS,CAAC,IAAI,CAAC,CAAC;IACnC,OAAO,CAAC,KAAK,EAAE,CAAC;IAChB,MAAM,aAAa,CACjB,UAAU,EACV,IAAI,EACJ,SAAS,CAAC,IAAI,CAAC,EACf,IAAI,CAAC,SAAS,EACd,OAAO,CACR,CAAC;IACF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,0BAA0B,CAAC,IAAS;IACxD,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAC1C,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC;IACtB,MAAM,EAAE,SAAS,EAAE,OAAO,EAAE,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IAErD,iBAAiB,CAAC,SAAS,CAAC,CAAC;IAE7B,OAAO,CAAC,GAAG,CAAC,sBAAsB,GAAG,SAAS,CAAC,CAAC;IAEhD,IAAI,SAAS,KAAK,UAAU,CAAC,UAAU,EAAE;QACvC,MAAM,8BAA8B,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;KACrD;SAAM,IAAI,SAAS,KAAK,UAAU,CAAC,OAAO,EAAE;QAC3C,MAAM,2BAA2B,CAAC,IAAI,CAAC,CAAC;KACzC;SAAM,IAAI,SAAS,KAAK,UAAU,CAAC,MAAM,EAAE;QAC1C,MAAM,yBAAyB,CAAC,IAAI,EAAE,mBAAmB,CAAC,CAAC;KAC5D;AACH,CAAC;AAED,SAAS,iBAAiB,CAAC,SAAiC;IAC1D,IAAI,CAAC,SAAS,EAAE;QACd,OAAO,CAAC,GAAG,CACT,kEAAkE,CACnE,CAAC;QACF,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAC;QACtC,OAAO,CAAC,GAAG,CAAC,qBAAqB,CAAC,CAAC;QACnC,OAAO,CAAC,GAAG,CAAC,oBAAoB,CAAC,CAAC;QAClC,OAAO,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;KACxB;AACH,CAAC"}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import alternateGroupTitle from "../alternateGroupTitle.js";
|
|
2
|
+
import getLinksRecursively from "./getLinksRecursively.js";
|
|
3
|
+
export function getDocusaurusLinksPerGroup(navigationSections, $, version) {
|
|
4
|
+
if (version === "3" || version === "2") {
|
|
5
|
+
return getDocusaurusLinksPerGroupLoop(navigationSections, $);
|
|
6
|
+
}
|
|
7
|
+
return [];
|
|
8
|
+
}
|
|
9
|
+
function getDocusaurusLinksPerGroupLoop(navigationSections, $) {
|
|
10
|
+
return navigationSections
|
|
11
|
+
.map((i, s) => {
|
|
12
|
+
const section = $(s);
|
|
13
|
+
// Links without a group
|
|
14
|
+
if (section.hasClass("theme-doc-sidebar-item-link")) {
|
|
15
|
+
const linkHref = section.find("a[href]").first().attr("href");
|
|
16
|
+
return {
|
|
17
|
+
group: "",
|
|
18
|
+
pages: [linkHref],
|
|
19
|
+
};
|
|
20
|
+
}
|
|
21
|
+
const firstLink = section
|
|
22
|
+
.find(".menu__list-item-collapsible")
|
|
23
|
+
.first()
|
|
24
|
+
.find("a[href]");
|
|
25
|
+
const sectionTitle = firstLink.text();
|
|
26
|
+
const firstHref = firstLink.attr("href");
|
|
27
|
+
const linkSections = section.children().eq(1).children();
|
|
28
|
+
const pages = getLinksRecursively(linkSections, $);
|
|
29
|
+
return {
|
|
30
|
+
group: sectionTitle || alternateGroupTitle(firstLink, pages),
|
|
31
|
+
pages: firstHref ? [firstHref, ...pages] : pages,
|
|
32
|
+
};
|
|
33
|
+
})
|
|
34
|
+
.toArray();
|
|
35
|
+
}
|
|
36
|
+
//# sourceMappingURL=getDocusaurusLinksPerGroup.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"getDocusaurusLinksPerGroup.js","sourceRoot":"","sources":["../../../../src/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.ts"],"names":[],"mappings":"AAAA,OAAO,mBAAmB,MAAM,2BAA2B,CAAC;AAC5D,OAAO,mBAAmB,MAAM,0BAA0B,CAAC;AAE3D,MAAM,UAAU,0BAA0B,CACxC,kBAAuB,EACvB,CAAM,EACN,OAA2B;IAE3B,IAAI,OAAO,KAAK,GAAG,IAAI,OAAO,KAAK,GAAG,EAAE;QACtC,OAAO,8BAA8B,CAAC,kBAAkB,EAAE,CAAC,CAAC,CAAC;KAC9D;IACD,OAAO,EAAE,CAAC;AACZ,CAAC;AAED,SAAS,8BAA8B,CAAC,kBAAuB,EAAE,CAAM;IACrE,OAAO,kBAAkB;SACtB,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACZ,MAAM,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QAErB,wBAAwB;QACxB,IAAI,OAAO,CAAC,QAAQ,CAAC,6BAA6B,CAAC,EAAE;YACnD,MAAM,QAAQ,GAAG,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAC9D,OAAO;gBACL,KAAK,EAAE,EAAE;gBACT,KAAK,EAAE,CAAC,QAAQ,CAAC;aAClB,CAAC;SACH;QAED,MAAM,SAAS,GAAG,OAAO;aACtB,IAAI,CAAC,8BAA8B,CAAC;aACpC,KAAK,EAAE;aACP,IAAI,CAAC,SAAS,CAAC,CAAC;QAEnB,MAAM,YAAY,GAAG,SAAS,CAAC,IAAI,EAAE,CAAC;QACtC,MAAM,SAAS,GAAG,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACzC,MAAM,YAAY,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC;QAEzD,MAAM,KAAK,GAAG,mBAAmB,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC;QAEnD,OAAO;YACL,KAAK,EAAE,YAAY,IAAI,mBAAmB,CAAC,SAAS,EAAE,KAAK,CAAC;YAC5D,KAAK,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,EAAE,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK;SACjD,CAAC;IACJ,CAAC,CAAC;SACD,OAAO,EAAE,CAAC;AACf,CAAC"}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
// Used by Docusaurus, GitBook, and ReadMe section scrapers
|
|
2
|
+
export default function getLinksRecursively(linkSections, $) {
|
|
3
|
+
if (linkSections == null || linkSections.length === 0) {
|
|
4
|
+
return [];
|
|
5
|
+
}
|
|
6
|
+
return linkSections
|
|
7
|
+
.map((i, s) => {
|
|
8
|
+
const subsection = $(s);
|
|
9
|
+
let link = subsection.children().first();
|
|
10
|
+
if (!link.attr("href")) {
|
|
11
|
+
// Docusaurus nests the <a> inside a <div>
|
|
12
|
+
link = link.find("a[href]").first();
|
|
13
|
+
}
|
|
14
|
+
const linkHref = link.attr("href");
|
|
15
|
+
// Skip missing links. For example, GitBook uses
|
|
16
|
+
// empty divs are used for styling a line beside the nav.
|
|
17
|
+
// Skip external links until Mintlify supports them
|
|
18
|
+
if (!linkHref ||
|
|
19
|
+
linkHref === "#" ||
|
|
20
|
+
linkHref.startsWith("https://") ||
|
|
21
|
+
linkHref.startsWith("http://")) {
|
|
22
|
+
return undefined;
|
|
23
|
+
}
|
|
24
|
+
const childLinks = subsection.children().eq(1).children();
|
|
25
|
+
if (childLinks.length > 0) {
|
|
26
|
+
// Put the section link in the list of pages.
|
|
27
|
+
// When we support the section itself being a link we should update this
|
|
28
|
+
return {
|
|
29
|
+
group: link.text(),
|
|
30
|
+
pages: [linkHref, ...getLinksRecursively(childLinks, $)],
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
return linkHref;
|
|
34
|
+
})
|
|
35
|
+
.toArray()
|
|
36
|
+
.filter(Boolean);
|
|
37
|
+
}
|
|
38
|
+
//# sourceMappingURL=getLinksRecursively.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"getLinksRecursively.js","sourceRoot":"","sources":["../../../../src/scraping/site-scrapers/links-per-group/getLinksRecursively.ts"],"names":[],"mappings":"AAAA,2DAA2D;AAC3D,MAAM,CAAC,OAAO,UAAU,mBAAmB,CAAC,YAAiB,EAAE,CAAM;IACnE,IAAI,YAAY,IAAI,IAAI,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC,EAAE;QACrD,OAAO,EAAE,CAAC;KACX;IAED,OAAO,YAAY;SAChB,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACZ,MAAM,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACxB,IAAI,IAAI,GAAG,UAAU,CAAC,QAAQ,EAAE,CAAC,KAAK,EAAE,CAAC;QAEzC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE;YACtB,0CAA0C;YAC1C,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,KAAK,EAAE,CAAC;SACrC;QACD,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAEnC,gDAAgD;QAChD,yDAAyD;QACzD,mDAAmD;QACnD,IACE,CAAC,QAAQ;YACT,QAAQ,KAAK,GAAG;YAChB,QAAQ,CAAC,UAAU,CAAC,UAAU,CAAC;YAC/B,QAAQ,CAAC,UAAU,CAAC,SAAS,CAAC,EAC9B;YACA,OAAO,SAAS,CAAC;SAClB;QAED,MAAM,UAAU,GAAG,UAAU,CAAC,QAAQ,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC;QAE1D,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE;YACzB,6CAA6C;YAC7C,wEAAwE;YACxE,OAAO;gBACL,KAAK,EAAE,IAAI,CAAC,IAAI,EAAE;gBAClB,KAAK,EAAE,CAAC,QAAQ,EAAE,GAAG,mBAAmB,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;aACzD,CAAC;SACH;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC,CAAC;SACD,OAAO,EAAE;SACT,MAAM,CAAC,OAAO,CAAC,CAAC;AACrB,CAAC"}
|
|
@@ -2,21 +2,27 @@ import cheerio from "cheerio";
|
|
|
2
2
|
import { NodeHtmlMarkdown } from "node-html-markdown";
|
|
3
3
|
import downloadAllImages from "../downloadAllImages.js";
|
|
4
4
|
import replaceImagePaths from "../replaceImagePaths.js";
|
|
5
|
-
export async function scrapeDocusaurusPage(html, origin, cliDir, imageBaseDir
|
|
5
|
+
export async function scrapeDocusaurusPage(html, origin, cliDir, imageBaseDir, version // expects "2", or "3". Have not written support for "1" yet
|
|
6
|
+
) {
|
|
6
7
|
const $ = cheerio.load(html);
|
|
7
|
-
const
|
|
8
|
-
|
|
9
|
-
|
|
8
|
+
const article = version === "3" ? $(".theme-doc-markdown").first() : $("article").first();
|
|
9
|
+
if (article.length === 0) {
|
|
10
|
+
// Index pages with no additional text don't have the markdown class
|
|
10
11
|
return {};
|
|
11
12
|
}
|
|
12
|
-
const titleComponent =
|
|
13
|
+
const titleComponent = article.find("h1");
|
|
13
14
|
const title = titleComponent.text().trim();
|
|
14
15
|
// Do not include title in the content when we insert it in our metadata
|
|
15
16
|
titleComponent.remove();
|
|
16
|
-
const
|
|
17
|
-
const
|
|
17
|
+
const markdownContent = version === "3" ? article : article.find(".markdown").first();
|
|
18
|
+
const origToWritePath = await downloadAllImages($, markdownContent, origin, imageBaseDir);
|
|
19
|
+
const markdownHtml = markdownContent.html();
|
|
18
20
|
const nhm = new NodeHtmlMarkdown();
|
|
19
|
-
let markdown = nhm.translate(
|
|
21
|
+
let markdown = nhm.translate(markdownHtml);
|
|
22
|
+
if (markdown == null) {
|
|
23
|
+
console.error("We do not support scraping this page. Content will be empty");
|
|
24
|
+
return { title, description: null, markdown: "" };
|
|
25
|
+
}
|
|
20
26
|
// Description only exists in meta tags. The code is commented out because its prone to incorrectly
|
|
21
27
|
// including a description if the first line of text had markdown annotations like `.
|
|
22
28
|
// The commented out alternative is to ignore description if it's the first line of text,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrapeDocusaurusPage.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeDocusaurusPage.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AACtD,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AACxD,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AAExD,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,IAAY,EACZ,MAAc,EACd,MAAc,EACd,YAAoB
|
|
1
|
+
{"version":3,"file":"scrapeDocusaurusPage.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeDocusaurusPage.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AACtD,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AACxD,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AAExD,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,IAAY,EACZ,MAAc,EACd,MAAc,EACd,YAAoB,EACpB,OAA2B,CAAC,4DAA4D;;IAExF,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,MAAM,OAAO,GACX,OAAO,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,qBAAqB,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,KAAK,EAAE,CAAC;IAE5E,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE;QACxB,oEAAoE;QACpE,OAAO,EAAE,CAAC;KACX;IAED,MAAM,cAAc,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC1C,MAAM,KAAK,GAAG,cAAc,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IAE3C,wEAAwE;IACxE,cAAc,CAAC,MAAM,EAAE,CAAC;IAExB,MAAM,eAAe,GACnB,OAAO,KAAK,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,KAAK,EAAE,CAAC;IAEhE,MAAM,eAAe,GAAG,MAAM,iBAAiB,CAC7C,CAAC,EACD,eAAe,EACf,MAAM,EACN,YAAY,CACb,CAAC;IAEF,MAAM,YAAY,GAAG,eAAe,CAAC,IAAI,EAAE,CAAC;IAE5C,MAAM,GAAG,GAAG,IAAI,gBAAgB,EAAE,CAAC;IACnC,IAAI,QAAQ,GAAG,GAAG,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;IAE3C,IAAI,QAAQ,IAAI,IAAI,EAAE;QACpB,OAAO,CAAC,KAAK,CACX,6DAA6D,CAC9D,CAAC;QACF,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;KACnD;IAED,mGAAmG;IACnG,qFAAqF;IACrF,yFAAyF;IACzF,kFAAkF;IAClF,MAAM,WAAW,GAAG,IAAI,CAAC;IACzB,0EAA0E;IAC1E,0CAA0C;IAC1C,wBAAwB;IACxB,IAAI;IAEJ,uCAAuC;IACvC,wDAAwD;IACxD,sCAAsC;IACtC,6BAA6B;IAC7B,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,qBAAqB,EAAE,IAAI,CAAC,CAAC;IAEzD,qDAAqD;IACrD,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;IAE3C,iCAAiC;IACjC,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IAE/C,gEAAgE;IAChE,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,wBAAwB,EAAE,SAAS,CAAC,CAAC;IAEjE,QAAQ,GAAG,iBAAiB,CAAC,eAAe,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;IAEhE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,QAAQ,EAAE,CAAC;AAC1C,CAAC"}
|
|
@@ -2,38 +2,13 @@ import cheerio from "cheerio";
|
|
|
2
2
|
import { scrapeGettingFileNameFromUrl } from "../scrapeGettingFileNameFromUrl.js";
|
|
3
3
|
import combineNavWithEmptyGroupTitles from "../combineNavWithEmptyGroupTitles.js";
|
|
4
4
|
import { scrapeDocusaurusPage } from "./scrapeDocusaurusPage.js";
|
|
5
|
-
import
|
|
6
|
-
|
|
7
|
-
export async function scrapeDocusaurusSection(html, origin, cliDir, overwrite) {
|
|
5
|
+
import { getDocusaurusLinksPerGroup } from "./links-per-group/getDocusaurusLinksPerGroup.js";
|
|
6
|
+
export async function scrapeDocusaurusSection(html, origin, cliDir, overwrite, version) {
|
|
8
7
|
const $ = cheerio.load(html);
|
|
9
8
|
// Get all the navigation sections
|
|
10
9
|
const navigationSections = $(".theme-doc-sidebar-menu").first().children();
|
|
11
10
|
// Get all links per group
|
|
12
|
-
const groupsConfig = navigationSections
|
|
13
|
-
.map((i, s) => {
|
|
14
|
-
const section = $(s);
|
|
15
|
-
// Links without a group
|
|
16
|
-
if (section.hasClass("theme-doc-sidebar-item-link")) {
|
|
17
|
-
const linkHref = section.find("a[href]").first().attr("href");
|
|
18
|
-
return {
|
|
19
|
-
group: "",
|
|
20
|
-
pages: [linkHref],
|
|
21
|
-
};
|
|
22
|
-
}
|
|
23
|
-
const firstLink = section
|
|
24
|
-
.find(".menu__list-item-collapsible")
|
|
25
|
-
.first()
|
|
26
|
-
.find("a[href]");
|
|
27
|
-
const sectionTitle = firstLink.text();
|
|
28
|
-
const firstHref = firstLink.attr("href");
|
|
29
|
-
const linkSections = section.children().eq(1).children();
|
|
30
|
-
const pages = getLinksRecursively(linkSections, $);
|
|
31
|
-
return {
|
|
32
|
-
group: sectionTitle || alternateGroupTitle(firstLink, pages),
|
|
33
|
-
pages: firstHref ? [firstHref, ...pages] : pages,
|
|
34
|
-
};
|
|
35
|
-
})
|
|
36
|
-
.toArray();
|
|
11
|
+
const groupsConfig = getDocusaurusLinksPerGroup(navigationSections, $, version);
|
|
37
12
|
// Merge groups with empty titles together
|
|
38
13
|
const reducedGroupsConfig = combineNavWithEmptyGroupTitles(groupsConfig);
|
|
39
14
|
// Scrape each link in the navigation.
|
|
@@ -41,7 +16,7 @@ export async function scrapeDocusaurusSection(html, origin, cliDir, overwrite) {
|
|
|
41
16
|
groupConfig.pages = (await Promise.all(groupConfig.pages.map(async (navEntry) =>
|
|
42
17
|
// Docusaurus requires a directory on all sections wheras we use root.
|
|
43
18
|
// /docs is their default directory so we remove it
|
|
44
|
-
scrapeGettingFileNameFromUrl(navEntry, cliDir, origin, overwrite, scrapeDocusaurusPage, false, "/docs"))))
|
|
19
|
+
scrapeGettingFileNameFromUrl(navEntry, cliDir, origin, overwrite, scrapeDocusaurusPage, false, version, "/docs"))))
|
|
45
20
|
// Remove skipped index pages (they return undefined from the above function)
|
|
46
21
|
.filter(Boolean);
|
|
47
22
|
return groupConfig;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrapeDocusaurusSection.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeDocusaurusSection.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAE9B,OAAO,EAAE,4BAA4B,EAAE,MAAM,oCAAoC,CAAC;AAClF,OAAO,8BAA8B,MAAM,sCAAsC,CAAC;AAClF,OAAO,EAAE,oBAAoB,EAAE,MAAM,2BAA2B,CAAC;AACjE,OAAO,
|
|
1
|
+
{"version":3,"file":"scrapeDocusaurusSection.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeDocusaurusSection.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAE9B,OAAO,EAAE,4BAA4B,EAAE,MAAM,oCAAoC,CAAC;AAClF,OAAO,8BAA8B,MAAM,sCAAsC,CAAC;AAClF,OAAO,EAAE,oBAAoB,EAAE,MAAM,2BAA2B,CAAC;AACjE,OAAO,EAAE,0BAA0B,EAAE,MAAM,iDAAiD,CAAC;AAE7F,MAAM,CAAC,KAAK,UAAU,uBAAuB,CAC3C,IAAY,EACZ,MAAc,EACd,MAAc,EACd,SAAkB,EAClB,OAAe;IAEf,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,kCAAkC;IAClC,MAAM,kBAAkB,GAAG,CAAC,CAAC,yBAAyB,CAAC,CAAC,KAAK,EAAE,CAAC,QAAQ,EAAE,CAAC;IAE3E,0BAA0B;IAC1B,MAAM,YAAY,GAAG,0BAA0B,CAC7C,kBAAkB,EAClB,CAAC,EACD,OAAO,CACR,CAAC;IAEF,0CAA0C;IAC1C,MAAM,mBAAmB,GAAG,8BAA8B,CAAC,YAAY,CAAC,CAAC;IAEzE,sCAAsC;IACtC,MAAM,sBAAsB,GAAG,MAAM,OAAO,CAAC,GAAG,CAC9C,mBAAmB,CAAC,GAAG,CAAC,KAAK,EAAE,WAAW,EAAE,EAAE;QAC5C,WAAW,CAAC,KAAK,GAAG,CAClB,MAAM,OAAO,CAAC,GAAG,CACf,WAAW,CAAC,KAAK,CAAC,GAAG,CAAC,KAAK,EAAE,QAAyB,EAAE,EAAE;QACxD,sEAAsE;QACtE,mDAAmD;QACnD,4BAA4B,CAC1B,QAAQ,EACR,MAAM,EACN,MAAM,EACN,SAAS,EACT,oBAAoB,EACpB,KAAK,EACL,OAAO,EACP,OAAO,CACR,CACF,CACF,CACF;YACC,6EAA6E;aAC5E,MAAM,CAAC,OAAO,CAAC,CAAC;QACnB,OAAO,WAAW,CAAC;IACrB,CAAC,CAAC,CACH,CAAC;IAEF,OAAO,sBAAsB,CAAC;AAChC,CAAC"}
|
|
@@ -2,7 +2,8 @@ import cheerio from "cheerio";
|
|
|
2
2
|
import { NodeHtmlMarkdown } from "node-html-markdown";
|
|
3
3
|
import downloadAllImages from "../downloadAllImages.js";
|
|
4
4
|
import replaceImagePaths from "../replaceImagePaths.js";
|
|
5
|
-
export async function scrapeGitBookPage(html, origin, cliDir, imageBaseDir
|
|
5
|
+
export async function scrapeGitBookPage(html, origin, cliDir, imageBaseDir, _ // version
|
|
6
|
+
) {
|
|
6
7
|
const $ = cheerio.load(html);
|
|
7
8
|
const titleComponent = $('[data-testid="page.title"]').first();
|
|
8
9
|
const titleAndDescription = titleComponent.parent().parent().parent().text();
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrapeGitBookPage.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeGitBookPage.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AACtD,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AACxD,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AAExD,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,IAAY,EACZ,MAAc,EACd,MAAc,EACd,YAAoB
|
|
1
|
+
{"version":3,"file":"scrapeGitBookPage.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeGitBookPage.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AACtD,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AACxD,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AAExD,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,IAAY,EACZ,MAAc,EACd,MAAc,EACd,YAAoB,EACpB,CAAqB,CAAC,UAAU;;IAEhC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,MAAM,cAAc,GAAG,CAAC,CAAC,4BAA4B,CAAC,CAAC,KAAK,EAAE,CAAC;IAC/D,MAAM,mBAAmB,GAAG,cAAc,CAAC,MAAM,EAAE,CAAC,MAAM,EAAE,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,CAAC;IAE7E,MAAM,WAAW,GAAG,mBAAmB;SACpC,OAAO,CAAC,cAAc,CAAC,IAAI,EAAE,EAAE,EAAE,CAAC;SAClC,IAAI,EAAE,CAAC;IACV,MAAM,KAAK,GAAG,cAAc,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IAE3C,MAAM,OAAO,GAAG,CAAC,CAAC,oCAAoC,CAAC,CAAC,KAAK,EAAE,CAAC;IAChE,MAAM,WAAW,GAAG,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAEpC,MAAM,cAAc,GAAG,CAAC,QAAQ,EAAE,EAAE;IAClC,yCAAyC;IACzC,sFAAsF;IACtF,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAE7C,MAAM,eAAe,GAAG,MAAM,iBAAiB,CAC7C,CAAC,EACD,OAAO,EACP,MAAM,EACN,YAAY,EACZ,cAAc,CACf,CAAC;IAEF,MAAM,GAAG,GAAG,IAAI,gBAAgB,EAAE,CAAC;IACnC,IAAI,QAAQ,GAAG,GAAG,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;IAE1C,2DAA2D;IAC3D,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;IAE9C,qDAAqD;IACrD,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;IAE3C,iCAAiC;IACjC,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IAE/C,gEAAgE;IAChE,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,wBAAwB,EAAE,SAAS,CAAC,CAAC;IAEjE,QAAQ,GAAG,iBAAiB,CAAC,eAAe,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;IAEhE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,QAAQ,EAAE,CAAC;AAC1C,CAAC"}
|
|
@@ -2,14 +2,16 @@ import cheerio from "cheerio";
|
|
|
2
2
|
import { scrapeGettingFileNameFromUrl } from "../scrapeGettingFileNameFromUrl.js";
|
|
3
3
|
import { scrapeGitBookPage } from "./scrapeGitBookPage.js";
|
|
4
4
|
import combineNavWithEmptyGroupTitles from "../combineNavWithEmptyGroupTitles.js";
|
|
5
|
-
import getLinksRecursively from "./getLinksRecursively.js";
|
|
5
|
+
import getLinksRecursively from "./links-per-group/getLinksRecursively.js";
|
|
6
6
|
import alternateGroupTitle from "./alternateGroupTitle.js";
|
|
7
|
-
export async function scrapeGitBookSection(html, origin, cliDir, overwrite) {
|
|
7
|
+
export async function scrapeGitBookSection(html, origin, cliDir, overwrite, version) {
|
|
8
8
|
const $ = cheerio.load(html);
|
|
9
9
|
// Get all the navigation sections
|
|
10
|
-
|
|
10
|
+
// Some variants of the GitBook UI show the logo and search base in the side navigation bar,
|
|
11
|
+
// but the navigation sections are always the last value.
|
|
12
|
+
const navigationSections = $('div[data-testid="page.desktopTableOfContents"] > nav > div:first-child')
|
|
11
13
|
.children()
|
|
12
|
-
.
|
|
14
|
+
.eq(-1)
|
|
13
15
|
.children()
|
|
14
16
|
.first()
|
|
15
17
|
.children();
|
|
@@ -37,7 +39,7 @@ export async function scrapeGitBookSection(html, origin, cliDir, overwrite) {
|
|
|
37
39
|
const reducedGroupsConfig = combineNavWithEmptyGroupTitles(groupsConfig);
|
|
38
40
|
// Scrape each link in the navigation.
|
|
39
41
|
const groupsConfigCleanPaths = await Promise.all(reducedGroupsConfig.map(async (navEntry) => {
|
|
40
|
-
return await scrapeGettingFileNameFromUrl(navEntry, cliDir, origin, overwrite, scrapeGitBookPage, true);
|
|
42
|
+
return await scrapeGettingFileNameFromUrl(navEntry, cliDir, origin, overwrite, scrapeGitBookPage, true, version);
|
|
41
43
|
}));
|
|
42
44
|
return groupsConfigCleanPaths;
|
|
43
45
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrapeGitBookSection.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeGitBookSection.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAE9B,OAAO,EAAE,4BAA4B,EAAE,MAAM,oCAAoC,CAAC;AAClF,OAAO,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAC3D,OAAO,8BAA8B,MAAM,sCAAsC,CAAC;AAClF,OAAO,mBAAmB,MAAM,
|
|
1
|
+
{"version":3,"file":"scrapeGitBookSection.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeGitBookSection.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAE9B,OAAO,EAAE,4BAA4B,EAAE,MAAM,oCAAoC,CAAC;AAClF,OAAO,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAC3D,OAAO,8BAA8B,MAAM,sCAAsC,CAAC;AAClF,OAAO,mBAAmB,MAAM,0CAA0C,CAAC;AAC3E,OAAO,mBAAmB,MAAM,0BAA0B,CAAC;AAE3D,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,IAAY,EACZ,MAAc,EACd,MAAc,EACd,SAAkB,EAClB,OAA2B;IAE3B,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,kCAAkC;IAClC,4FAA4F;IAC5F,yDAAyD;IACzD,MAAM,kBAAkB,GAAG,CAAC,CAC1B,wEAAwE,CACzE;SACE,QAAQ,EAAE;SACV,EAAE,CAAC,CAAC,CAAC,CAAC;SACN,QAAQ,EAAE;SACV,KAAK,EAAE;SACP,QAAQ,EAAE,CAAC;IAEd,0BAA0B;IAC1B,MAAM,YAAY,GAAG,kBAAkB;SACpC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACZ,MAAM,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACrB,MAAM,YAAY,GAAG,CAAC,CAAC,OAAO,CAAC;aAC5B,IAAI,CAAC,uBAAuB,CAAC;aAC7B,KAAK,EAAE;aACP,IAAI,EAAE,CAAC;QAEV,0DAA0D;QAC1D,MAAM,SAAS,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QAC3C,MAAM,SAAS,GAAG,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAEzC,MAAM,YAAY,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC;QACzD,MAAM,KAAK,GAAG,mBAAmB,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC;QAEnD,OAAO;YACL,KAAK,EAAE,YAAY,IAAI,mBAAmB,CAAC,SAAS,EAAE,KAAK,CAAC;YAC5D,KAAK,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,EAAE,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK;SACjD,CAAC;IACJ,CAAC,CAAC;SACD,OAAO,EAAE;SACT,MAAM,CAAC,OAAO,CAAC,CAAC;IAEnB,0CAA0C;IAC1C,MAAM,mBAAmB,GAAG,8BAA8B,CAAC,YAAY,CAAC,CAAC;IAEzE,sCAAsC;IACtC,MAAM,sBAAsB,GAAG,MAAM,OAAO,CAAC,GAAG,CAC9C,mBAAmB,CAAC,GAAG,CAAC,KAAK,EAAE,QAAyB,EAAE,EAAE;QAC1D,OAAO,MAAM,4BAA4B,CACvC,QAAQ,EACR,MAAM,EACN,MAAM,EACN,SAAS,EACT,iBAAiB,EACjB,IAAI,EACJ,OAAO,CACR,CAAC;IACJ,CAAC,CAAC,CACH,CAAC;IAEF,OAAO,sBAAsB,CAAC;AAChC,CAAC"}
|
|
@@ -2,7 +2,8 @@ import cheerio from "cheerio";
|
|
|
2
2
|
import { NodeHtmlMarkdown } from "node-html-markdown";
|
|
3
3
|
import downloadAllImages from "../downloadAllImages.js";
|
|
4
4
|
import replaceImagePaths from "../replaceImagePaths.js";
|
|
5
|
-
export async function scrapeReadMePage(html, origin, cliDir, imageBaseDir
|
|
5
|
+
export async function scrapeReadMePage(html, origin, cliDir, imageBaseDir, _ // version
|
|
6
|
+
) {
|
|
6
7
|
const $ = cheerio.load(html);
|
|
7
8
|
const titleComponent = $("h1").first();
|
|
8
9
|
const title = titleComponent.text().trim();
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrapeReadMePage.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeReadMePage.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AACtD,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AACxD,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AAExD,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,IAAY,EACZ,MAAc,EACd,MAAc,EACd,YAAoB
|
|
1
|
+
{"version":3,"file":"scrapeReadMePage.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeReadMePage.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AACtD,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AACxD,OAAO,iBAAiB,MAAM,yBAAyB,CAAC;AAExD,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,IAAY,EACZ,MAAc,EACd,MAAc,EACd,YAAoB,EACpB,CAAqB,CAAC,UAAU;;IAEhC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,MAAM,cAAc,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC;IACvC,MAAM,KAAK,GAAG,cAAc,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IAC3C,IAAI,WAAW,GAAG,CAAC,CAAC,gBAAgB,EAAE,cAAc,CAAC,MAAM,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IAC7E,IAAI,CAAC,WAAW,EAAE;QAChB,WAAW,GAAG,CAAC,CAAC,wBAAwB,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;KACzD;IAED,IAAI,OAAO,GAAG,CAAC,CAAC,8BAA8B,CAAC,CAAC,KAAK,EAAE,CAAC;IACxD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE;QACxB,OAAO,GAAG,CAAC,CAAC,8BAA8B,CAAC,CAAC;KAC7C;IAED,+EAA+E;IAC/E,IAAI,WAAW,GAAG,OAAO,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC;IAEvC,MAAM,eAAe,GAAG,MAAM,iBAAiB,CAC7C,CAAC,EACD,OAAO,EACP,MAAM,EACN,YAAY,CACb,CAAC;IAEF,MAAM,GAAG,GAAG,IAAI,gBAAgB,EAAE,CAAC;IACnC,IAAI,QAAQ,GAAG,GAAG,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;IAE1C,2DAA2D;IAC3D,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;IAE9C,qDAAqD;IACrD,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;IAE3C,6BAA6B;IAC7B,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,kBAAkB,EAAE,IAAI,CAAC,CAAC;IAEtD,iCAAiC;IACjC,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IAE/C,gEAAgE;IAChE,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,wBAAwB,EAAE,SAAS,CAAC,CAAC;IAEjE,QAAQ,GAAG,iBAAiB,CAAC,eAAe,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;IAEhE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,QAAQ,EAAE,CAAC;AAC1C,CAAC"}
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import cheerio from "cheerio";
|
|
2
2
|
import { scrapeReadMePage } from "./scrapeReadMePage.js";
|
|
3
3
|
import { scrapeGettingFileNameFromUrl } from "../scrapeGettingFileNameFromUrl.js";
|
|
4
|
-
import getLinksRecursively from "./getLinksRecursively.js";
|
|
5
|
-
export async function scrapeReadMeSection(html, origin, cliDir, overwrite) {
|
|
4
|
+
import getLinksRecursively from "./links-per-group/getLinksRecursively.js";
|
|
5
|
+
export async function scrapeReadMeSection(html, origin, cliDir, overwrite, version) {
|
|
6
6
|
const $ = cheerio.load(html);
|
|
7
7
|
// Get all the navigation sections, but only from the first
|
|
8
8
|
// sidebar found. There are multiple in the HTML for mobile
|
|
@@ -32,7 +32,7 @@ export async function scrapeReadMeSection(html, origin, cliDir, overwrite) {
|
|
|
32
32
|
return await scrapeGettingFileNameFromUrl(
|
|
33
33
|
// ReadMe requires a directory on all sections wheras we use root.
|
|
34
34
|
// /docs is their default directory so we remove it
|
|
35
|
-
navEntry, cliDir, origin, overwrite, scrapeReadMePage, false, "/docs");
|
|
35
|
+
navEntry, cliDir, origin, overwrite, scrapeReadMePage, false, version, "/docs");
|
|
36
36
|
}));
|
|
37
37
|
}
|
|
38
38
|
//# sourceMappingURL=scrapeReadMeSection.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrapeReadMeSection.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeReadMeSection.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AACzD,OAAO,EAAE,4BAA4B,EAAE,MAAM,oCAAoC,CAAC;AAClF,OAAO,mBAAmB,MAAM,
|
|
1
|
+
{"version":3,"file":"scrapeReadMeSection.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeReadMeSection.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AACzD,OAAO,EAAE,4BAA4B,EAAE,MAAM,oCAAoC,CAAC;AAClF,OAAO,mBAAmB,MAAM,0CAA0C,CAAC;AAG3E,MAAM,CAAC,KAAK,UAAU,mBAAmB,CACvC,IAAY,EACZ,MAAc,EACd,MAAc,EACd,SAAkB,EAClB,OAA2B;IAE3B,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,2DAA2D;IAC3D,2DAA2D;IAC3D,mDAAmD;IACnD,MAAM,kBAAkB,GAAG,CAAC,CAAC,aAAa,CAAC;SACxC,KAAK,EAAE;SACP,IAAI,CAAC,qBAAqB,CAAC,CAAC;IAE/B,MAAM,YAAY,GAAG,kBAAkB;SACpC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACZ,MAAM,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACrB,MAAM,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,CAAC;QAEvD,uDAAuD;QACvD,yDAAyD;QACzD,+BAA+B;QAC/B,iDAAiD;QACjD,iCAAiC;QACjC,MAAM,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC,KAAK,EAAE,CAAC,QAAQ,EAAE,CAAC;QACzE,MAAM,KAAK,GAAG,mBAAmB,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC,MAAM,CACvD,CAAC,KAAa,EAAE,KAAa,EAAE,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,KAAK,KAAK,CACtE,CAAC;QAEF,0CAA0C;QAC1C,OAAO;YACL,KAAK,EAAE,YAAY;YACnB,KAAK,EAAE,KAAK;SACb,CAAC;IACJ,CAAC,CAAC;SACD,OAAO,EAAE,CAAC;IAEb,OAAO,MAAM,OAAO,CAAC,GAAG,CACtB,YAAY,CAAC,GAAG,CAAC,KAAK,EAAE,QAAyB,EAAE,EAAE;QACnD,OAAO,MAAM,4BAA4B;QACvC,kEAAkE;QAClE,mDAAmD;QACnD,QAAQ,EACR,MAAM,EACN,MAAM,EACN,SAAS,EACT,gBAAgB,EAChB,KAAK,EACL,OAAO,EACP,OAAO,CACR,CAAC;IACJ,CAAC,CAAC,CACH,CAAC;AACJ,CAAC"}
|
package/package.json
CHANGED
package/src/index.ts
CHANGED
|
@@ -54,14 +54,6 @@ yargs(hideBin(process.argv))
|
|
|
54
54
|
await scrapePageAutomatically(argv);
|
|
55
55
|
}
|
|
56
56
|
)
|
|
57
|
-
.command(
|
|
58
|
-
"scrape-docusaurus-page [url]",
|
|
59
|
-
"Scrapes a Docusaurus page",
|
|
60
|
-
() => {},
|
|
61
|
-
async (argv) => {
|
|
62
|
-
await scrapePageWrapper(argv, scrapeDocusaurusPage);
|
|
63
|
-
}
|
|
64
|
-
)
|
|
65
57
|
.command(
|
|
66
58
|
"scrape-gitbook-page [url]",
|
|
67
59
|
"Scrapes a GitBook page",
|
|
@@ -86,14 +78,6 @@ yargs(hideBin(process.argv))
|
|
|
86
78
|
await scrapeSectionAutomatically(argv);
|
|
87
79
|
}
|
|
88
80
|
)
|
|
89
|
-
.command(
|
|
90
|
-
"scrape-docusaurus-section [url]",
|
|
91
|
-
"Scrapes the Docusaurus section",
|
|
92
|
-
() => {},
|
|
93
|
-
async (argv) => {
|
|
94
|
-
await scrapeDocusaurusSectionCommand(argv);
|
|
95
|
-
}
|
|
96
|
-
)
|
|
97
81
|
.command(
|
|
98
82
|
"scrape-gitbook-section [url]",
|
|
99
83
|
"Scrapes the Gitbook section",
|