@mintlify/scraping 3.0.187 → 3.0.188
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/assert.d.ts +5 -0
- package/bin/assert.js +13 -0
- package/bin/assert.js.map +1 -0
- package/bin/cli.js +43 -72
- package/bin/cli.js.map +1 -1
- package/bin/components/Accordion.d.ts +5 -0
- package/bin/components/Accordion.js +54 -0
- package/bin/components/Accordion.js.map +1 -0
- package/bin/components/AccordionGroup.d.ts +5 -0
- package/bin/components/AccordionGroup.js +52 -0
- package/bin/components/AccordionGroup.js.map +1 -0
- package/bin/components/Callout.d.ts +5 -0
- package/bin/components/Callout.js +114 -0
- package/bin/components/Callout.js.map +1 -0
- package/bin/components/Card.d.ts +5 -0
- package/bin/components/Card.js +135 -0
- package/bin/components/Card.js.map +1 -0
- package/bin/components/CardGroup.d.ts +5 -0
- package/bin/components/CardGroup.js +52 -0
- package/bin/components/CardGroup.js.map +1 -0
- package/bin/components/CodeGroup.d.ts +5 -0
- package/bin/components/CodeGroup.js +166 -0
- package/bin/components/CodeGroup.js.map +1 -0
- package/bin/components/Frame.d.ts +5 -0
- package/bin/components/Frame.js +51 -0
- package/bin/components/Frame.js.map +1 -0
- package/bin/components/Tabs.d.ts +5 -0
- package/bin/components/Tabs.js +122 -0
- package/bin/components/Tabs.js.map +1 -0
- package/bin/components/link.d.ts +2 -0
- package/bin/components/link.js +16 -0
- package/bin/components/link.js.map +1 -0
- package/bin/constants.d.ts +6 -7
- package/bin/constants.js +31 -12
- package/bin/constants.js.map +1 -1
- package/bin/customComponents/create.d.ts +10 -0
- package/bin/customComponents/create.js +69 -0
- package/bin/customComponents/create.js.map +1 -0
- package/bin/customComponents/plugin.d.ts +2 -0
- package/bin/customComponents/plugin.js +26 -0
- package/bin/customComponents/plugin.js.map +1 -0
- package/bin/customComponents/selective.d.ts +6 -0
- package/bin/customComponents/selective.js +29 -0
- package/bin/customComponents/selective.js.map +1 -0
- package/bin/nav/iterate.d.ts +2 -0
- package/bin/nav/iterate.js +15 -0
- package/bin/nav/iterate.js.map +1 -0
- package/bin/nav/listItems.d.ts +8 -0
- package/bin/nav/listItems.js +62 -0
- package/bin/nav/listItems.js.map +1 -0
- package/bin/nav/retrieve.d.ts +3 -0
- package/bin/nav/retrieve.js +75 -0
- package/bin/nav/retrieve.js.map +1 -0
- package/bin/nav/root.d.ts +2 -0
- package/bin/nav/root.js +40 -0
- package/bin/nav/root.js.map +1 -0
- package/bin/openapi/generateOpenApiPages.js +2 -2
- package/bin/openapi/generateOpenApiPages.js.map +1 -1
- package/bin/root/retrieve.d.ts +2 -0
- package/bin/root/retrieve.js +46 -0
- package/bin/root/retrieve.js.map +1 -0
- package/bin/scrapingPipeline/group.d.ts +5 -0
- package/bin/scrapingPipeline/group.js +46 -0
- package/bin/scrapingPipeline/group.js.map +1 -0
- package/bin/scrapingPipeline/icon.d.ts +2 -0
- package/bin/scrapingPipeline/icon.js +22 -0
- package/bin/scrapingPipeline/icon.js.map +1 -0
- package/bin/scrapingPipeline/images.d.ts +3 -0
- package/bin/scrapingPipeline/images.js +50 -0
- package/bin/scrapingPipeline/images.js.map +1 -0
- package/bin/scrapingPipeline/logo.d.ts +5 -0
- package/bin/scrapingPipeline/logo.js +92 -0
- package/bin/scrapingPipeline/logo.js.map +1 -0
- package/bin/scrapingPipeline/page.d.ts +6 -0
- package/bin/scrapingPipeline/page.js +102 -0
- package/bin/scrapingPipeline/page.js.map +1 -0
- package/bin/scrapingPipeline/root.d.ts +2 -0
- package/bin/scrapingPipeline/root.js +8 -0
- package/bin/scrapingPipeline/root.js.map +1 -0
- package/bin/scrapingPipeline/site.d.ts +7 -0
- package/bin/scrapingPipeline/site.js +129 -0
- package/bin/scrapingPipeline/site.js.map +1 -0
- package/bin/scrapingPipeline/tabs.d.ts +3 -0
- package/bin/scrapingPipeline/tabs.js +67 -0
- package/bin/scrapingPipeline/tabs.js.map +1 -0
- package/bin/tabs/retrieveReadme.d.ts +3 -0
- package/bin/tabs/retrieveReadme.js +78 -0
- package/bin/tabs/retrieveReadme.js.map +1 -0
- package/bin/tsconfig.build.tsbuildinfo +1 -1
- package/bin/types/components.d.ts +2 -0
- package/bin/types/components.js +2 -0
- package/bin/types/components.js.map +1 -0
- package/bin/types/framework.d.ts +8 -0
- package/bin/types/framework.js +3 -0
- package/bin/types/framework.js.map +1 -0
- package/bin/types/hast.d.ts +6 -0
- package/bin/types/hast.js +2 -0
- package/bin/types/hast.js.map +1 -0
- package/bin/types/result.d.ts +7 -0
- package/bin/types/result.js +2 -0
- package/bin/types/result.js.map +1 -0
- package/bin/types/scrapeFunc.d.ts +3 -0
- package/bin/types/scrapeFunc.js +2 -0
- package/bin/types/scrapeFunc.js.map +1 -0
- package/bin/utils/append.d.ts +1 -0
- package/bin/utils/append.js +12 -0
- package/bin/utils/append.js.map +1 -0
- package/bin/utils/children.d.ts +5 -0
- package/bin/utils/children.js +35 -0
- package/bin/utils/children.js.map +1 -0
- package/bin/utils/className.d.ts +3 -0
- package/bin/utils/className.js +13 -0
- package/bin/utils/className.js.map +1 -0
- package/bin/utils/detectFramework.d.ts +4 -0
- package/bin/utils/detectFramework.js +60 -0
- package/bin/utils/detectFramework.js.map +1 -0
- package/bin/utils/emptyParagraphs.d.ts +3 -0
- package/bin/utils/emptyParagraphs.js +19 -0
- package/bin/utils/emptyParagraphs.js.map +1 -0
- package/bin/utils/errors.d.ts +3 -0
- package/bin/utils/errors.js +16 -0
- package/bin/utils/errors.js.map +1 -0
- package/bin/utils/escape.d.ts +2 -0
- package/bin/utils/escape.js +25 -0
- package/bin/utils/escape.js.map +1 -0
- package/bin/utils/extension.d.ts +3 -0
- package/bin/utils/extension.js +18 -0
- package/bin/utils/extension.js.map +1 -0
- package/bin/utils/file.d.ts +4 -0
- package/bin/utils/file.js +43 -0
- package/bin/utils/file.js.map +1 -0
- package/bin/utils/firstChild.d.ts +2 -0
- package/bin/utils/firstChild.js +12 -0
- package/bin/utils/firstChild.js.map +1 -0
- package/bin/utils/images.d.ts +5 -0
- package/bin/utils/images.js +86 -0
- package/bin/utils/images.js.map +1 -0
- package/bin/utils/img.d.ts +2 -0
- package/bin/utils/img.js +15 -0
- package/bin/utils/img.js.map +1 -0
- package/bin/utils/log.d.ts +18 -0
- package/bin/utils/log.js +68 -0
- package/bin/utils/log.js.map +1 -0
- package/bin/utils/nestedRoots.d.ts +7 -0
- package/bin/utils/nestedRoots.js +19 -0
- package/bin/utils/nestedRoots.js.map +1 -0
- package/bin/utils/network.d.ts +5 -0
- package/bin/utils/network.js +82 -0
- package/bin/utils/network.js.map +1 -0
- package/bin/utils/path.d.ts +1 -0
- package/bin/utils/path.js +22 -0
- package/bin/utils/path.js.map +1 -0
- package/bin/utils/position.d.ts +3 -0
- package/bin/utils/position.js +12 -0
- package/bin/utils/position.js.map +1 -0
- package/bin/utils/reservedNames.d.ts +4 -0
- package/bin/utils/reservedNames.js +27 -0
- package/bin/utils/reservedNames.js.map +1 -0
- package/bin/utils/strings.d.ts +2 -0
- package/bin/utils/strings.js +7 -0
- package/bin/utils/strings.js.map +1 -0
- package/bin/utils/text.d.ts +2 -0
- package/bin/utils/text.js +11 -0
- package/bin/utils/text.js.map +1 -0
- package/bin/utils/title.d.ts +10 -0
- package/bin/utils/title.js +58 -0
- package/bin/utils/title.js.map +1 -0
- package/bin/utils/url.d.ts +3 -0
- package/bin/utils/url.js +10 -0
- package/bin/utils/url.js.map +1 -0
- package/package.json +17 -8
- package/src/assert.ts +15 -0
- package/src/cli.ts +53 -90
- package/src/components/Accordion.ts +84 -0
- package/src/components/AccordionGroup.ts +69 -0
- package/src/components/Callout.ts +159 -0
- package/src/components/Card.ts +168 -0
- package/src/components/CardGroup.ts +69 -0
- package/src/components/CodeGroup.ts +209 -0
- package/src/components/Frame.ts +86 -0
- package/src/components/Tabs.ts +154 -0
- package/src/components/link.ts +17 -0
- package/src/constants.ts +37 -19
- package/src/customComponents/create.ts +106 -0
- package/src/customComponents/plugin.ts +31 -0
- package/src/customComponents/selective.ts +37 -0
- package/src/nav/iterate.ts +18 -0
- package/src/nav/listItems.ts +82 -0
- package/src/nav/retrieve.ts +88 -0
- package/src/nav/root.ts +47 -0
- package/src/openapi/generateOpenApiPages.ts +2 -2
- package/src/root/retrieve.ts +52 -0
- package/src/scrapingPipeline/group.ts +62 -0
- package/src/scrapingPipeline/icon.ts +26 -0
- package/src/scrapingPipeline/images.ts +67 -0
- package/src/scrapingPipeline/logo.ts +127 -0
- package/src/scrapingPipeline/page.ts +130 -0
- package/src/scrapingPipeline/root.ts +10 -0
- package/src/scrapingPipeline/site.ts +161 -0
- package/src/scrapingPipeline/tabs.ts +87 -0
- package/src/tabs/retrieveReadme.ts +99 -0
- package/src/types/components.ts +3 -0
- package/src/types/framework.ts +10 -0
- package/src/types/hast.ts +12 -0
- package/src/types/result.ts +1 -0
- package/src/types/scrapeFunc.ts +9 -0
- package/src/utils/append.ts +9 -0
- package/src/utils/children.ts +51 -0
- package/src/utils/className.ts +14 -0
- package/src/utils/detectFramework.ts +72 -0
- package/src/utils/emptyParagraphs.ts +21 -0
- package/src/utils/errors.ts +24 -0
- package/src/utils/escape.ts +30 -0
- package/src/utils/extension.ts +19 -0
- package/src/utils/file.ts +58 -0
- package/src/utils/firstChild.ts +13 -0
- package/src/utils/images.ts +101 -0
- package/src/utils/img.ts +17 -0
- package/src/utils/log.ts +82 -0
- package/src/utils/nestedRoots.ts +20 -0
- package/src/utils/network.ts +95 -0
- package/src/utils/path.ts +27 -0
- package/src/utils/position.ts +14 -0
- package/src/utils/reservedNames.ts +31 -0
- package/src/utils/strings.ts +7 -0
- package/src/utils/text.ts +11 -0
- package/src/utils/title.ts +68 -0
- package/src/utils/url.ts +8 -0
- package/bin/browser.d.ts +0 -2
- package/bin/browser.js +0 -24
- package/bin/browser.js.map +0 -1
- package/bin/checks.d.ts +0 -8
- package/bin/checks.js +0 -24
- package/bin/checks.js.map +0 -1
- package/bin/downloadImage.d.ts +0 -5
- package/bin/downloadImage.js +0 -88
- package/bin/downloadImage.js.map +0 -1
- package/bin/scraping/combineNavWithEmptyGroupTitles.d.ts +0 -2
- package/bin/scraping/combineNavWithEmptyGroupTitles.js +0 -20
- package/bin/scraping/combineNavWithEmptyGroupTitles.js.map +0 -1
- package/bin/scraping/detectFramework.d.ts +0 -9
- package/bin/scraping/detectFramework.js +0 -36
- package/bin/scraping/detectFramework.js.map +0 -1
- package/bin/scraping/downloadAllImages.d.ts +0 -4
- package/bin/scraping/downloadAllImages.js +0 -36
- package/bin/scraping/downloadAllImages.js.map +0 -1
- package/bin/scraping/downloadLogoImage.d.ts +0 -1
- package/bin/scraping/downloadLogoImage.js +0 -12
- package/bin/scraping/downloadLogoImage.js.map +0 -1
- package/bin/scraping/replaceImagePaths.d.ts +0 -1
- package/bin/scraping/replaceImagePaths.js +0 -14
- package/bin/scraping/replaceImagePaths.js.map +0 -1
- package/bin/scraping/scrapeFileGettingFileNameFromUrl.d.ts +0 -6
- package/bin/scraping/scrapeFileGettingFileNameFromUrl.js +0 -46
- package/bin/scraping/scrapeFileGettingFileNameFromUrl.js.map +0 -1
- package/bin/scraping/scrapeGettingFileNameFromUrl.d.ts +0 -6
- package/bin/scraping/scrapeGettingFileNameFromUrl.js +0 -13
- package/bin/scraping/scrapeGettingFileNameFromUrl.js.map +0 -1
- package/bin/scraping/scrapePage.d.ts +0 -8
- package/bin/scraping/scrapePage.js +0 -10
- package/bin/scraping/scrapePage.js.map +0 -1
- package/bin/scraping/scrapePageCommands.d.ts +0 -7
- package/bin/scraping/scrapePageCommands.js +0 -50
- package/bin/scraping/scrapePageCommands.js.map +0 -1
- package/bin/scraping/scrapeSection.d.ts +0 -3
- package/bin/scraping/scrapeSection.js +0 -12
- package/bin/scraping/scrapeSection.js.map +0 -1
- package/bin/scraping/scrapeSectionCommands.d.ts +0 -6
- package/bin/scraping/scrapeSectionCommands.js +0 -63
- package/bin/scraping/scrapeSectionCommands.js.map +0 -1
- package/bin/scraping/site-scrapers/Intercom/scrapeIntercomPage.d.ts +0 -5
- package/bin/scraping/site-scrapers/Intercom/scrapeIntercomPage.js +0 -29
- package/bin/scraping/site-scrapers/Intercom/scrapeIntercomPage.js.map +0 -1
- package/bin/scraping/site-scrapers/Intercom/scrapeIntercomSection.d.ts +0 -2
- package/bin/scraping/site-scrapers/Intercom/scrapeIntercomSection.js +0 -31
- package/bin/scraping/site-scrapers/Intercom/scrapeIntercomSection.js.map +0 -1
- package/bin/scraping/site-scrapers/alternateGroupTitle.d.ts +0 -3
- package/bin/scraping/site-scrapers/alternateGroupTitle.js +0 -9
- package/bin/scraping/site-scrapers/alternateGroupTitle.js.map +0 -1
- package/bin/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.d.ts +0 -5
- package/bin/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.js +0 -33
- package/bin/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.js.map +0 -1
- package/bin/scraping/site-scrapers/links-per-group/getLinksRecursively.d.ts +0 -3
- package/bin/scraping/site-scrapers/links-per-group/getLinksRecursively.js +0 -35
- package/bin/scraping/site-scrapers/links-per-group/getLinksRecursively.js.map +0 -1
- package/bin/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.d.ts +0 -3
- package/bin/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.js +0 -33
- package/bin/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.js.map +0 -1
- package/bin/scraping/site-scrapers/openNestedDocusaurusMenus.d.ts +0 -2
- package/bin/scraping/site-scrapers/openNestedDocusaurusMenus.js +0 -30
- package/bin/scraping/site-scrapers/openNestedDocusaurusMenus.js.map +0 -1
- package/bin/scraping/site-scrapers/openNestedGitbookMenus.d.ts +0 -2
- package/bin/scraping/site-scrapers/openNestedGitbookMenus.js +0 -21
- package/bin/scraping/site-scrapers/openNestedGitbookMenus.js.map +0 -1
- package/bin/scraping/site-scrapers/scrapeDocusaurusPage.d.ts +0 -5
- package/bin/scraping/site-scrapers/scrapeDocusaurusPage.js +0 -53
- package/bin/scraping/site-scrapers/scrapeDocusaurusPage.js.map +0 -1
- package/bin/scraping/site-scrapers/scrapeDocusaurusSection.d.ts +0 -2
- package/bin/scraping/site-scrapers/scrapeDocusaurusSection.js +0 -32
- package/bin/scraping/site-scrapers/scrapeDocusaurusSection.js.map +0 -1
- package/bin/scraping/site-scrapers/scrapeGitBookPage.d.ts +0 -5
- package/bin/scraping/site-scrapers/scrapeGitBookPage.js +0 -56
- package/bin/scraping/site-scrapers/scrapeGitBookPage.js.map +0 -1
- package/bin/scraping/site-scrapers/scrapeGitBookSection.d.ts +0 -2
- package/bin/scraping/site-scrapers/scrapeGitBookSection.js +0 -42
- package/bin/scraping/site-scrapers/scrapeGitBookSection.js.map +0 -1
- package/bin/scraping/site-scrapers/scrapeReadMePage.d.ts +0 -5
- package/bin/scraping/site-scrapers/scrapeReadMePage.js +0 -38
- package/bin/scraping/site-scrapers/scrapeReadMePage.js.map +0 -1
- package/bin/scraping/site-scrapers/scrapeReadMeSection.d.ts +0 -2
- package/bin/scraping/site-scrapers/scrapeReadMeSection.js +0 -39
- package/bin/scraping/site-scrapers/scrapeReadMeSection.js.map +0 -1
- package/bin/util.d.ts +0 -29
- package/bin/util.js +0 -97
- package/bin/util.js.map +0 -1
- package/src/browser.ts +0 -24
- package/src/checks.ts +0 -32
- package/src/downloadImage.ts +0 -102
- package/src/scraping/combineNavWithEmptyGroupTitles.ts +0 -21
- package/src/scraping/detectFramework.ts +0 -55
- package/src/scraping/downloadAllImages.ts +0 -61
- package/src/scraping/downloadLogoImage.ts +0 -24
- package/src/scraping/replaceImagePaths.ts +0 -17
- package/src/scraping/scrapeFileGettingFileNameFromUrl.ts +0 -84
- package/src/scraping/scrapeGettingFileNameFromUrl.ts +0 -56
- package/src/scraping/scrapePage.ts +0 -40
- package/src/scraping/scrapePageCommands.ts +0 -68
- package/src/scraping/scrapeSection.ts +0 -30
- package/src/scraping/scrapeSectionCommands.ts +0 -98
- package/src/scraping/site-scrapers/Intercom/scrapeIntercomPage.ts +0 -52
- package/src/scraping/site-scrapers/Intercom/scrapeIntercomSection.ts +0 -54
- package/src/scraping/site-scrapers/alternateGroupTitle.ts +0 -11
- package/src/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.ts +0 -45
- package/src/scraping/site-scrapers/links-per-group/getLinksRecursively.ts +0 -47
- package/src/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.ts +0 -44
- package/src/scraping/site-scrapers/openNestedDocusaurusMenus.ts +0 -42
- package/src/scraping/site-scrapers/openNestedGitbookMenus.ts +0 -27
- package/src/scraping/site-scrapers/scrapeDocusaurusPage.ts +0 -85
- package/src/scraping/site-scrapers/scrapeDocusaurusSection.ts +0 -63
- package/src/scraping/site-scrapers/scrapeGitBookPage.ts +0 -82
- package/src/scraping/site-scrapers/scrapeGitBookSection.ts +0 -69
- package/src/scraping/site-scrapers/scrapeReadMePage.ts +0 -56
- package/src/scraping/site-scrapers/scrapeReadMeSection.ts +0 -66
- package/src/util.ts +0 -122
|
@@ -1,63 +0,0 @@
|
|
|
1
|
-
import { Navigation, NavigationEntry } from '@mintlify/models';
|
|
2
|
-
import * as cheerio from 'cheerio';
|
|
3
|
-
|
|
4
|
-
import combineNavWithEmptyGroupTitles from '../combineNavWithEmptyGroupTitles.js';
|
|
5
|
-
import downloadLogoImage from '../downloadLogoImage.js';
|
|
6
|
-
import { scrapeGettingFileNameFromUrl } from '../scrapeGettingFileNameFromUrl.js';
|
|
7
|
-
import { getDocusaurusLinksPerGroup } from './links-per-group/getDocusaurusLinksPerGroup.js';
|
|
8
|
-
import { scrapeDocusaurusPage } from './scrapeDocusaurusPage.js';
|
|
9
|
-
|
|
10
|
-
export async function scrapeDocusaurusSection(
|
|
11
|
-
html: string,
|
|
12
|
-
origin: string,
|
|
13
|
-
cliDir: string,
|
|
14
|
-
imageBaseDir: string,
|
|
15
|
-
overwrite: boolean,
|
|
16
|
-
version?: string
|
|
17
|
-
): Promise<Navigation> {
|
|
18
|
-
const $ = cheerio.load(html);
|
|
19
|
-
|
|
20
|
-
// Download the logo
|
|
21
|
-
const logoSrc = $('.navbar__logo img').attr('src');
|
|
22
|
-
void downloadLogoImage(logoSrc, imageBaseDir, origin, overwrite);
|
|
23
|
-
|
|
24
|
-
// Get all the navigation sections
|
|
25
|
-
const navigationSections = !$('.theme-doc-sidebar-menu').first().children().length
|
|
26
|
-
? $('.main-wrapper').first().find('.menu__list').first().children()
|
|
27
|
-
: $('.theme-doc-sidebar-menu').first().children();
|
|
28
|
-
|
|
29
|
-
// Get all links per group
|
|
30
|
-
const groupsConfig: Navigation = getDocusaurusLinksPerGroup(navigationSections, $, version);
|
|
31
|
-
|
|
32
|
-
// Merge groups with empty titles together
|
|
33
|
-
const reducedGroupsConfig = combineNavWithEmptyGroupTitles(groupsConfig);
|
|
34
|
-
|
|
35
|
-
// Scrape each link in the navigation.
|
|
36
|
-
const groupsConfigCleanPaths = await Promise.all(
|
|
37
|
-
reducedGroupsConfig.map(async (groupConfig) => {
|
|
38
|
-
groupConfig.pages = (
|
|
39
|
-
await Promise.all(
|
|
40
|
-
groupConfig.pages.map(async (navEntry: NavigationEntry) =>
|
|
41
|
-
// Docusaurus requires a directory on all sections wheras we use root.
|
|
42
|
-
// /docs is their default directory so we remove it
|
|
43
|
-
scrapeGettingFileNameFromUrl(
|
|
44
|
-
navEntry,
|
|
45
|
-
cliDir,
|
|
46
|
-
origin,
|
|
47
|
-
overwrite,
|
|
48
|
-
scrapeDocusaurusPage,
|
|
49
|
-
false,
|
|
50
|
-
version,
|
|
51
|
-
'/docs'
|
|
52
|
-
)
|
|
53
|
-
)
|
|
54
|
-
)
|
|
55
|
-
)
|
|
56
|
-
// Remove skipped index pages (they return undefined from the above function)
|
|
57
|
-
.filter(Boolean);
|
|
58
|
-
return groupConfig;
|
|
59
|
-
})
|
|
60
|
-
);
|
|
61
|
-
|
|
62
|
-
return groupsConfigCleanPaths;
|
|
63
|
-
}
|
|
@@ -1,82 +0,0 @@
|
|
|
1
|
-
import * as cheerio from 'cheerio';
|
|
2
|
-
import { NodeHtmlMarkdown } from 'node-html-markdown';
|
|
3
|
-
|
|
4
|
-
import { SUPPORTED_MEDIA_EXTENSIONS } from '../../constants.js';
|
|
5
|
-
import { getLengthUntilMetadata } from '../../downloadImage.js';
|
|
6
|
-
import downloadAllImages from '../downloadAllImages.js';
|
|
7
|
-
import replaceImagePaths from '../replaceImagePaths.js';
|
|
8
|
-
|
|
9
|
-
export async function scrapeGitBookPage(
|
|
10
|
-
html: string,
|
|
11
|
-
origin: string,
|
|
12
|
-
cliDir: string,
|
|
13
|
-
imageBaseDir: string,
|
|
14
|
-
overwrite: boolean,
|
|
15
|
-
_: string | undefined // version
|
|
16
|
-
) {
|
|
17
|
-
const $ = cheerio.load(html);
|
|
18
|
-
|
|
19
|
-
const mainContent = $('body > div > div > div > div > main');
|
|
20
|
-
const titleAndDescription = mainContent.children('header');
|
|
21
|
-
const titleComponent = titleAndDescription.children('h1');
|
|
22
|
-
const description = titleAndDescription.text().replace(titleComponent.text(), '').trim();
|
|
23
|
-
const title = titleComponent.text().trim();
|
|
24
|
-
|
|
25
|
-
const content = titleAndDescription.next('div');
|
|
26
|
-
|
|
27
|
-
// Replace code blocks with parseable html
|
|
28
|
-
const codeBlocks = content.find('pre > code');
|
|
29
|
-
codeBlocks.each((_, c) => {
|
|
30
|
-
const code = $(c);
|
|
31
|
-
const codeContent = code
|
|
32
|
-
.children()
|
|
33
|
-
.toArray()
|
|
34
|
-
.map((d) => $(d).text())
|
|
35
|
-
.filter((text) => text !== '')
|
|
36
|
-
.join('\n');
|
|
37
|
-
code.replaceWith(`<pre><code>${codeContent}</code></pre>`);
|
|
38
|
-
});
|
|
39
|
-
|
|
40
|
-
const contentHtml = $.html(content);
|
|
41
|
-
|
|
42
|
-
const modifyFileName = (fileName: string) => {
|
|
43
|
-
// Remove GitBook metadata from the start
|
|
44
|
-
// The first four %2F split metadata fields. Remaining ones are part of the file name.
|
|
45
|
-
for (const ext of SUPPORTED_MEDIA_EXTENSIONS) {
|
|
46
|
-
if (fileName.includes(`.${ext}`)) {
|
|
47
|
-
const splitFileName = fileName.split('%2F').slice(4).join('%2F');
|
|
48
|
-
return getLengthUntilMetadata(splitFileName, ext);
|
|
49
|
-
}
|
|
50
|
-
}
|
|
51
|
-
return fileName.split('%2F').slice(4).join('%2F');
|
|
52
|
-
};
|
|
53
|
-
|
|
54
|
-
const origToWritePath = await downloadAllImages(
|
|
55
|
-
$,
|
|
56
|
-
content,
|
|
57
|
-
origin,
|
|
58
|
-
imageBaseDir,
|
|
59
|
-
overwrite,
|
|
60
|
-
modifyFileName
|
|
61
|
-
);
|
|
62
|
-
|
|
63
|
-
const nhm = new NodeHtmlMarkdown({ useInlineLinks: false });
|
|
64
|
-
let markdown = nhm.translate(contentHtml);
|
|
65
|
-
|
|
66
|
-
// Keep headers on one line
|
|
67
|
-
markdown = markdown.replace(/# \n\n/g, '# ');
|
|
68
|
-
|
|
69
|
-
// Remove unnecessary nonwidth blank space characters
|
|
70
|
-
markdown = markdown.replace(/\u200b/g, '');
|
|
71
|
-
|
|
72
|
-
// Reduce unnecessary blank lines
|
|
73
|
-
markdown = markdown.replace(/\n\n\n/g, '\n\n');
|
|
74
|
-
|
|
75
|
-
// Mintlify doesn't support bolded headers, remove the asterisks
|
|
76
|
-
markdown = markdown.replace(/(\n#+) \*\*(.*)\*\*\n/g, '$1 $2\n');
|
|
77
|
-
if (origToWritePath) {
|
|
78
|
-
markdown = replaceImagePaths(origToWritePath, cliDir, markdown);
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
return { title, description, markdown };
|
|
82
|
-
}
|
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
import { Navigation, NavigationEntry } from '@mintlify/models';
|
|
2
|
-
import * as cheerio from 'cheerio';
|
|
3
|
-
|
|
4
|
-
import combineNavWithEmptyGroupTitles from '../combineNavWithEmptyGroupTitles.js';
|
|
5
|
-
import downloadLogoImage from '../downloadLogoImage.js';
|
|
6
|
-
import { scrapeGettingFileNameFromUrl } from '../scrapeGettingFileNameFromUrl.js';
|
|
7
|
-
import alternateGroupTitle from './alternateGroupTitle.js';
|
|
8
|
-
import getLinksRecursivelyGitBook from './links-per-group/getLinksRecursivelyGitBook.js';
|
|
9
|
-
import { scrapeGitBookPage } from './scrapeGitBookPage.js';
|
|
10
|
-
|
|
11
|
-
export async function scrapeGitBookSection(
|
|
12
|
-
html: string,
|
|
13
|
-
origin: string,
|
|
14
|
-
cliDir: string,
|
|
15
|
-
imageBaseDir: string,
|
|
16
|
-
overwrite: boolean,
|
|
17
|
-
version: string | undefined
|
|
18
|
-
): Promise<NavigationEntry[]> {
|
|
19
|
-
const $ = cheerio.load(html);
|
|
20
|
-
|
|
21
|
-
// Download the logo
|
|
22
|
-
const logoSrc = $('body > header > div > div > div > a > img').first().attr('src');
|
|
23
|
-
downloadLogoImage(logoSrc, imageBaseDir, origin, overwrite).catch(console.error);
|
|
24
|
-
|
|
25
|
-
// Get all the navigation sections
|
|
26
|
-
// Some variants of the GitBook UI show the logo and search base in the side navigation bar,
|
|
27
|
-
// but the navigation sections are always the last value.
|
|
28
|
-
const navigationSections = $('body > div > div > aside > div > ul > li');
|
|
29
|
-
|
|
30
|
-
// Get all links per group
|
|
31
|
-
const groupsConfig: Navigation = navigationSections
|
|
32
|
-
.toArray()
|
|
33
|
-
.map((s: cheerio.Element) => {
|
|
34
|
-
const section = $(s);
|
|
35
|
-
const sectionHeader = section.children('div').first();
|
|
36
|
-
const sectionTitle = sectionHeader.text();
|
|
37
|
-
|
|
38
|
-
// Only present if the nested navigation is not in a group
|
|
39
|
-
const firstLink = section.find('li > div > a').first();
|
|
40
|
-
const firstHref = firstLink.attr('href') || '/';
|
|
41
|
-
|
|
42
|
-
const linkSections = section.find('ul').first().children();
|
|
43
|
-
const pages = getLinksRecursivelyGitBook(linkSections, $);
|
|
44
|
-
|
|
45
|
-
return {
|
|
46
|
-
group: sectionTitle || alternateGroupTitle(firstLink, pages),
|
|
47
|
-
pages: firstHref ? [firstHref, ...pages] : pages,
|
|
48
|
-
};
|
|
49
|
-
})
|
|
50
|
-
.filter(Boolean);
|
|
51
|
-
|
|
52
|
-
// Merge groups with empty titles together
|
|
53
|
-
const reducedGroupsConfig = combineNavWithEmptyGroupTitles(groupsConfig);
|
|
54
|
-
|
|
55
|
-
// Scrape each link in the navigation.
|
|
56
|
-
return Promise.all(
|
|
57
|
-
reducedGroupsConfig.map(async (navEntry: NavigationEntry) => {
|
|
58
|
-
return await scrapeGettingFileNameFromUrl(
|
|
59
|
-
navEntry,
|
|
60
|
-
cliDir,
|
|
61
|
-
origin,
|
|
62
|
-
overwrite,
|
|
63
|
-
scrapeGitBookPage,
|
|
64
|
-
true,
|
|
65
|
-
version
|
|
66
|
-
);
|
|
67
|
-
})
|
|
68
|
-
);
|
|
69
|
-
}
|
|
@@ -1,56 +0,0 @@
|
|
|
1
|
-
import * as cheerio from 'cheerio';
|
|
2
|
-
import { NodeHtmlMarkdown } from 'node-html-markdown';
|
|
3
|
-
|
|
4
|
-
import downloadAllImages from '../downloadAllImages.js';
|
|
5
|
-
import replaceImagePaths from '../replaceImagePaths.js';
|
|
6
|
-
|
|
7
|
-
export async function scrapeReadMePage(
|
|
8
|
-
html: string,
|
|
9
|
-
origin: string,
|
|
10
|
-
cliDir: string,
|
|
11
|
-
imageBaseDir: string,
|
|
12
|
-
overwrite: boolean,
|
|
13
|
-
_: string | undefined // version
|
|
14
|
-
) {
|
|
15
|
-
const $ = cheerio.load(html);
|
|
16
|
-
|
|
17
|
-
const titleComponent = $('h1').first();
|
|
18
|
-
const title = titleComponent.text().trim();
|
|
19
|
-
let description = $('.markdown-body', titleComponent.parent()).text().trim();
|
|
20
|
-
if (!description) {
|
|
21
|
-
description = $('.rm-Article > header p').text().trim();
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
let content = $('.content-body .markdown-body').first();
|
|
25
|
-
if (content.length === 0) {
|
|
26
|
-
content = $('.rm-Article > .markdown-body');
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
// API Pages don't have a markdown body in the same position so there's no HTML
|
|
30
|
-
const contentHtml = content.html() || '';
|
|
31
|
-
|
|
32
|
-
const origToWritePath = await downloadAllImages($, content, origin, imageBaseDir, overwrite);
|
|
33
|
-
|
|
34
|
-
const nhm = new NodeHtmlMarkdown({ useInlineLinks: false });
|
|
35
|
-
let markdown = nhm.translate(contentHtml);
|
|
36
|
-
|
|
37
|
-
// Keep headers on one line and increase their depth by one
|
|
38
|
-
markdown = markdown.replace(/# \n\n/g, '## ');
|
|
39
|
-
|
|
40
|
-
// Remove unnecessary nonwidth blank space characters
|
|
41
|
-
markdown = markdown.replace(/\u200b/g, '');
|
|
42
|
-
|
|
43
|
-
// Remove ReadMe anchor links
|
|
44
|
-
markdown = markdown.replace(/\n\[\]\(#.+\)\n/g, '\n');
|
|
45
|
-
|
|
46
|
-
// Reduce unnecessary blank lines
|
|
47
|
-
markdown = markdown.replace(/\n\n\n/g, '\n\n');
|
|
48
|
-
|
|
49
|
-
// Mintlify doesn't support bolded headers, remove the asterisks
|
|
50
|
-
markdown = markdown.replace(/(\n#+) \*\*(.*)\*\*\n/g, '$1 $2\n');
|
|
51
|
-
if (origToWritePath) {
|
|
52
|
-
markdown = replaceImagePaths(origToWritePath, cliDir, markdown);
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
return { title, description, markdown };
|
|
56
|
-
}
|
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
import { Navigation, NavigationEntry } from '@mintlify/models';
|
|
2
|
-
import * as cheerio from 'cheerio';
|
|
3
|
-
|
|
4
|
-
import downloadLogoImage from '../downloadLogoImage.js';
|
|
5
|
-
import { scrapeGettingFileNameFromUrl } from '../scrapeGettingFileNameFromUrl.js';
|
|
6
|
-
import getLinksRecursively from './links-per-group/getLinksRecursively.js';
|
|
7
|
-
import { scrapeReadMePage } from './scrapeReadMePage.js';
|
|
8
|
-
|
|
9
|
-
export async function scrapeReadMeSection(
|
|
10
|
-
html: string,
|
|
11
|
-
origin: string,
|
|
12
|
-
cliDir: string,
|
|
13
|
-
imageBaseDir: string,
|
|
14
|
-
overwrite: boolean,
|
|
15
|
-
version: string | undefined
|
|
16
|
-
): Promise<NavigationEntry[]> {
|
|
17
|
-
const $ = cheerio.load(html);
|
|
18
|
-
|
|
19
|
-
// Download the logo
|
|
20
|
-
const logoSrc = $('.rm-Logo-img').first().attr('src');
|
|
21
|
-
downloadLogoImage(logoSrc, imageBaseDir, origin, overwrite).catch(console.error);
|
|
22
|
-
|
|
23
|
-
// Get all the navigation sections, but only from the first
|
|
24
|
-
// sidebar found. There are multiple in the HTML for mobile
|
|
25
|
-
// responsiveness but they all have the same links.
|
|
26
|
-
const navigationSections = $('.rm-Sidebar').first().find('.rm-Sidebar-section');
|
|
27
|
-
|
|
28
|
-
const groupsConfig: Navigation = navigationSections.toArray().map((s) => {
|
|
29
|
-
const section = $(s);
|
|
30
|
-
const sectionTitle = section.find('h3').first().text();
|
|
31
|
-
|
|
32
|
-
// Get all links, then use filter to remove duplicates.
|
|
33
|
-
// There are duplicates because of nested navigation, eg:
|
|
34
|
-
// subgroupTitle -> /first-page
|
|
35
|
-
// -- First Page -> /first-page ** DUPLICATE **
|
|
36
|
-
// -- Second Page -> /second-page
|
|
37
|
-
const linkSections = section.find('.rm-Sidebar-list').first().children();
|
|
38
|
-
const pages = getLinksRecursively(linkSections, $).filter(
|
|
39
|
-
(value, index, array) => array.indexOf(value) === index
|
|
40
|
-
);
|
|
41
|
-
|
|
42
|
-
// Follows the same structure as mint.json
|
|
43
|
-
return {
|
|
44
|
-
group: sectionTitle,
|
|
45
|
-
pages: pages,
|
|
46
|
-
};
|
|
47
|
-
});
|
|
48
|
-
|
|
49
|
-
// Scrape each link in the navigation.
|
|
50
|
-
return Promise.all(
|
|
51
|
-
groupsConfig.map(async (navEntry: NavigationEntry) => {
|
|
52
|
-
return await scrapeGettingFileNameFromUrl(
|
|
53
|
-
// ReadMe requires a directory on all sections whereas we use root.
|
|
54
|
-
// /docs is their default directory so we remove it
|
|
55
|
-
navEntry,
|
|
56
|
-
cliDir,
|
|
57
|
-
origin,
|
|
58
|
-
overwrite,
|
|
59
|
-
scrapeReadMePage,
|
|
60
|
-
false,
|
|
61
|
-
version,
|
|
62
|
-
'/docs'
|
|
63
|
-
);
|
|
64
|
-
})
|
|
65
|
-
);
|
|
66
|
-
}
|
package/src/util.ts
DELETED
|
@@ -1,122 +0,0 @@
|
|
|
1
|
-
import { NavigationEntry } from '@mintlify/models';
|
|
2
|
-
import { existsSync, mkdirSync, writeFileSync } from 'fs';
|
|
3
|
-
import Ora, { Ora as OraType } from 'ora';
|
|
4
|
-
import path from 'path';
|
|
5
|
-
|
|
6
|
-
export const MintConfig = (
|
|
7
|
-
name: string,
|
|
8
|
-
color: string,
|
|
9
|
-
ctaName: string,
|
|
10
|
-
ctaUrl: string,
|
|
11
|
-
filename: string
|
|
12
|
-
) => {
|
|
13
|
-
return {
|
|
14
|
-
name,
|
|
15
|
-
logo: '',
|
|
16
|
-
favicon: '',
|
|
17
|
-
colors: {
|
|
18
|
-
primary: color,
|
|
19
|
-
},
|
|
20
|
-
topbarLinks: [],
|
|
21
|
-
topbarCtaButton: {
|
|
22
|
-
name: ctaName,
|
|
23
|
-
url: ctaUrl,
|
|
24
|
-
},
|
|
25
|
-
anchors: [],
|
|
26
|
-
navigation: [
|
|
27
|
-
{
|
|
28
|
-
group: 'Home',
|
|
29
|
-
pages: [filename],
|
|
30
|
-
},
|
|
31
|
-
],
|
|
32
|
-
// footerSocials: {}, // support object type for footer tyoes
|
|
33
|
-
};
|
|
34
|
-
};
|
|
35
|
-
|
|
36
|
-
export const Page = (title: string, description?: string, markdown?: string) => {
|
|
37
|
-
// If we are an empty String we want to add two quotes,
|
|
38
|
-
// if we added as we went we would detect the first quote
|
|
39
|
-
// as the closing quote.
|
|
40
|
-
const startsWithQuote = title.startsWith('"');
|
|
41
|
-
const endsWithQuote = title.startsWith('"');
|
|
42
|
-
if (!startsWithQuote) {
|
|
43
|
-
title = '"' + title;
|
|
44
|
-
}
|
|
45
|
-
if (!endsWithQuote) {
|
|
46
|
-
title = title + '"';
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
const optionalDescription = description ? `\ndescription: "${description}"` : '';
|
|
50
|
-
return `---\ntitle: ${title}${optionalDescription}\n---\n\n${markdown}`;
|
|
51
|
-
};
|
|
52
|
-
|
|
53
|
-
export function getOrigin(url: string) {
|
|
54
|
-
// eg. https://google.com -> https://google.com
|
|
55
|
-
// https://google.com/page -> https://google.com
|
|
56
|
-
return new URL(url).origin;
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
export function objToReadableString(objs: NavigationEntry[]) {
|
|
60
|
-
// Two spaces as indentation
|
|
61
|
-
return objs.map((obj) => JSON.stringify(obj, null, 2)).join(',\n');
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
export const toFilename = (title: string) => {
|
|
65
|
-
// Gets rid of special characters at the start and end
|
|
66
|
-
// of the name by converting to spaces then using trim.
|
|
67
|
-
return title
|
|
68
|
-
.replace(/[^a-z0-9]/gi, ' ')
|
|
69
|
-
.trim()
|
|
70
|
-
.replace(/ /g, '-')
|
|
71
|
-
.toLowerCase();
|
|
72
|
-
};
|
|
73
|
-
|
|
74
|
-
export const addMdx = (fileName: string) => {
|
|
75
|
-
if (fileName.endsWith('.mdx')) {
|
|
76
|
-
return fileName;
|
|
77
|
-
}
|
|
78
|
-
return fileName + '.mdx';
|
|
79
|
-
};
|
|
80
|
-
|
|
81
|
-
export const createPage = (
|
|
82
|
-
title: string,
|
|
83
|
-
description?: string,
|
|
84
|
-
markdown?: string,
|
|
85
|
-
overwrite = false,
|
|
86
|
-
rootDir = '',
|
|
87
|
-
fileName?: string
|
|
88
|
-
) => {
|
|
89
|
-
const writePath = path.join(rootDir, addMdx(fileName || toFilename(title)));
|
|
90
|
-
|
|
91
|
-
// Create the folders needed if they're missing
|
|
92
|
-
mkdirSync(rootDir, { recursive: true });
|
|
93
|
-
|
|
94
|
-
if (!overwrite && existsSync(writePath)) {
|
|
95
|
-
console.log(`❌ Skipping existing file ${writePath}`);
|
|
96
|
-
return;
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
// Write the page to disk
|
|
100
|
-
try {
|
|
101
|
-
writeFileSync(writePath, Page(title, description, markdown));
|
|
102
|
-
console.log('✏️ - ' + writePath);
|
|
103
|
-
} catch (e) {
|
|
104
|
-
console.error(e);
|
|
105
|
-
}
|
|
106
|
-
};
|
|
107
|
-
|
|
108
|
-
export const buildLogger = (startText = ''): OraType => {
|
|
109
|
-
const logger = Ora().start(startText);
|
|
110
|
-
return logger;
|
|
111
|
-
};
|
|
112
|
-
|
|
113
|
-
export const getFileExtension = (filename: string) => {
|
|
114
|
-
const ext = filename.substring(filename.lastIndexOf('.') + 1, filename.length);
|
|
115
|
-
if (filename === ext) return undefined;
|
|
116
|
-
return ext.toLowerCase();
|
|
117
|
-
};
|
|
118
|
-
|
|
119
|
-
export const fileBelongsInPagesFolder = (filename: string) => {
|
|
120
|
-
const extension = getFileExtension(filename);
|
|
121
|
-
return extension && (extension === 'mdx' || extension === 'md' || extension === 'tsx');
|
|
122
|
-
};
|