@mintlify/scraping 3.0.187 → 3.0.188
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/assert.d.ts +5 -0
- package/bin/assert.js +13 -0
- package/bin/assert.js.map +1 -0
- package/bin/cli.js +43 -72
- package/bin/cli.js.map +1 -1
- package/bin/components/Accordion.d.ts +5 -0
- package/bin/components/Accordion.js +54 -0
- package/bin/components/Accordion.js.map +1 -0
- package/bin/components/AccordionGroup.d.ts +5 -0
- package/bin/components/AccordionGroup.js +52 -0
- package/bin/components/AccordionGroup.js.map +1 -0
- package/bin/components/Callout.d.ts +5 -0
- package/bin/components/Callout.js +114 -0
- package/bin/components/Callout.js.map +1 -0
- package/bin/components/Card.d.ts +5 -0
- package/bin/components/Card.js +135 -0
- package/bin/components/Card.js.map +1 -0
- package/bin/components/CardGroup.d.ts +5 -0
- package/bin/components/CardGroup.js +52 -0
- package/bin/components/CardGroup.js.map +1 -0
- package/bin/components/CodeGroup.d.ts +5 -0
- package/bin/components/CodeGroup.js +166 -0
- package/bin/components/CodeGroup.js.map +1 -0
- package/bin/components/Frame.d.ts +5 -0
- package/bin/components/Frame.js +51 -0
- package/bin/components/Frame.js.map +1 -0
- package/bin/components/Tabs.d.ts +5 -0
- package/bin/components/Tabs.js +122 -0
- package/bin/components/Tabs.js.map +1 -0
- package/bin/components/link.d.ts +2 -0
- package/bin/components/link.js +16 -0
- package/bin/components/link.js.map +1 -0
- package/bin/constants.d.ts +6 -7
- package/bin/constants.js +31 -12
- package/bin/constants.js.map +1 -1
- package/bin/customComponents/create.d.ts +10 -0
- package/bin/customComponents/create.js +69 -0
- package/bin/customComponents/create.js.map +1 -0
- package/bin/customComponents/plugin.d.ts +2 -0
- package/bin/customComponents/plugin.js +26 -0
- package/bin/customComponents/plugin.js.map +1 -0
- package/bin/customComponents/selective.d.ts +6 -0
- package/bin/customComponents/selective.js +29 -0
- package/bin/customComponents/selective.js.map +1 -0
- package/bin/nav/iterate.d.ts +2 -0
- package/bin/nav/iterate.js +15 -0
- package/bin/nav/iterate.js.map +1 -0
- package/bin/nav/listItems.d.ts +8 -0
- package/bin/nav/listItems.js +62 -0
- package/bin/nav/listItems.js.map +1 -0
- package/bin/nav/retrieve.d.ts +3 -0
- package/bin/nav/retrieve.js +75 -0
- package/bin/nav/retrieve.js.map +1 -0
- package/bin/nav/root.d.ts +2 -0
- package/bin/nav/root.js +40 -0
- package/bin/nav/root.js.map +1 -0
- package/bin/openapi/generateOpenApiPages.js +2 -2
- package/bin/openapi/generateOpenApiPages.js.map +1 -1
- package/bin/root/retrieve.d.ts +2 -0
- package/bin/root/retrieve.js +46 -0
- package/bin/root/retrieve.js.map +1 -0
- package/bin/scrapingPipeline/group.d.ts +5 -0
- package/bin/scrapingPipeline/group.js +46 -0
- package/bin/scrapingPipeline/group.js.map +1 -0
- package/bin/scrapingPipeline/icon.d.ts +2 -0
- package/bin/scrapingPipeline/icon.js +22 -0
- package/bin/scrapingPipeline/icon.js.map +1 -0
- package/bin/scrapingPipeline/images.d.ts +3 -0
- package/bin/scrapingPipeline/images.js +50 -0
- package/bin/scrapingPipeline/images.js.map +1 -0
- package/bin/scrapingPipeline/logo.d.ts +5 -0
- package/bin/scrapingPipeline/logo.js +92 -0
- package/bin/scrapingPipeline/logo.js.map +1 -0
- package/bin/scrapingPipeline/page.d.ts +6 -0
- package/bin/scrapingPipeline/page.js +102 -0
- package/bin/scrapingPipeline/page.js.map +1 -0
- package/bin/scrapingPipeline/root.d.ts +2 -0
- package/bin/scrapingPipeline/root.js +8 -0
- package/bin/scrapingPipeline/root.js.map +1 -0
- package/bin/scrapingPipeline/site.d.ts +7 -0
- package/bin/scrapingPipeline/site.js +129 -0
- package/bin/scrapingPipeline/site.js.map +1 -0
- package/bin/scrapingPipeline/tabs.d.ts +3 -0
- package/bin/scrapingPipeline/tabs.js +67 -0
- package/bin/scrapingPipeline/tabs.js.map +1 -0
- package/bin/tabs/retrieveReadme.d.ts +3 -0
- package/bin/tabs/retrieveReadme.js +78 -0
- package/bin/tabs/retrieveReadme.js.map +1 -0
- package/bin/tsconfig.build.tsbuildinfo +1 -1
- package/bin/types/components.d.ts +2 -0
- package/bin/types/components.js +2 -0
- package/bin/types/components.js.map +1 -0
- package/bin/types/framework.d.ts +8 -0
- package/bin/types/framework.js +3 -0
- package/bin/types/framework.js.map +1 -0
- package/bin/types/hast.d.ts +6 -0
- package/bin/types/hast.js +2 -0
- package/bin/types/hast.js.map +1 -0
- package/bin/types/result.d.ts +7 -0
- package/bin/types/result.js +2 -0
- package/bin/types/result.js.map +1 -0
- package/bin/types/scrapeFunc.d.ts +3 -0
- package/bin/types/scrapeFunc.js +2 -0
- package/bin/types/scrapeFunc.js.map +1 -0
- package/bin/utils/append.d.ts +1 -0
- package/bin/utils/append.js +12 -0
- package/bin/utils/append.js.map +1 -0
- package/bin/utils/children.d.ts +5 -0
- package/bin/utils/children.js +35 -0
- package/bin/utils/children.js.map +1 -0
- package/bin/utils/className.d.ts +3 -0
- package/bin/utils/className.js +13 -0
- package/bin/utils/className.js.map +1 -0
- package/bin/utils/detectFramework.d.ts +4 -0
- package/bin/utils/detectFramework.js +60 -0
- package/bin/utils/detectFramework.js.map +1 -0
- package/bin/utils/emptyParagraphs.d.ts +3 -0
- package/bin/utils/emptyParagraphs.js +19 -0
- package/bin/utils/emptyParagraphs.js.map +1 -0
- package/bin/utils/errors.d.ts +3 -0
- package/bin/utils/errors.js +16 -0
- package/bin/utils/errors.js.map +1 -0
- package/bin/utils/escape.d.ts +2 -0
- package/bin/utils/escape.js +25 -0
- package/bin/utils/escape.js.map +1 -0
- package/bin/utils/extension.d.ts +3 -0
- package/bin/utils/extension.js +18 -0
- package/bin/utils/extension.js.map +1 -0
- package/bin/utils/file.d.ts +4 -0
- package/bin/utils/file.js +43 -0
- package/bin/utils/file.js.map +1 -0
- package/bin/utils/firstChild.d.ts +2 -0
- package/bin/utils/firstChild.js +12 -0
- package/bin/utils/firstChild.js.map +1 -0
- package/bin/utils/images.d.ts +5 -0
- package/bin/utils/images.js +86 -0
- package/bin/utils/images.js.map +1 -0
- package/bin/utils/img.d.ts +2 -0
- package/bin/utils/img.js +15 -0
- package/bin/utils/img.js.map +1 -0
- package/bin/utils/log.d.ts +18 -0
- package/bin/utils/log.js +68 -0
- package/bin/utils/log.js.map +1 -0
- package/bin/utils/nestedRoots.d.ts +7 -0
- package/bin/utils/nestedRoots.js +19 -0
- package/bin/utils/nestedRoots.js.map +1 -0
- package/bin/utils/network.d.ts +5 -0
- package/bin/utils/network.js +82 -0
- package/bin/utils/network.js.map +1 -0
- package/bin/utils/path.d.ts +1 -0
- package/bin/utils/path.js +22 -0
- package/bin/utils/path.js.map +1 -0
- package/bin/utils/position.d.ts +3 -0
- package/bin/utils/position.js +12 -0
- package/bin/utils/position.js.map +1 -0
- package/bin/utils/reservedNames.d.ts +4 -0
- package/bin/utils/reservedNames.js +27 -0
- package/bin/utils/reservedNames.js.map +1 -0
- package/bin/utils/strings.d.ts +2 -0
- package/bin/utils/strings.js +7 -0
- package/bin/utils/strings.js.map +1 -0
- package/bin/utils/text.d.ts +2 -0
- package/bin/utils/text.js +11 -0
- package/bin/utils/text.js.map +1 -0
- package/bin/utils/title.d.ts +10 -0
- package/bin/utils/title.js +58 -0
- package/bin/utils/title.js.map +1 -0
- package/bin/utils/url.d.ts +3 -0
- package/bin/utils/url.js +10 -0
- package/bin/utils/url.js.map +1 -0
- package/package.json +17 -8
- package/src/assert.ts +15 -0
- package/src/cli.ts +53 -90
- package/src/components/Accordion.ts +84 -0
- package/src/components/AccordionGroup.ts +69 -0
- package/src/components/Callout.ts +159 -0
- package/src/components/Card.ts +168 -0
- package/src/components/CardGroup.ts +69 -0
- package/src/components/CodeGroup.ts +209 -0
- package/src/components/Frame.ts +86 -0
- package/src/components/Tabs.ts +154 -0
- package/src/components/link.ts +17 -0
- package/src/constants.ts +37 -19
- package/src/customComponents/create.ts +106 -0
- package/src/customComponents/plugin.ts +31 -0
- package/src/customComponents/selective.ts +37 -0
- package/src/nav/iterate.ts +18 -0
- package/src/nav/listItems.ts +82 -0
- package/src/nav/retrieve.ts +88 -0
- package/src/nav/root.ts +47 -0
- package/src/openapi/generateOpenApiPages.ts +2 -2
- package/src/root/retrieve.ts +52 -0
- package/src/scrapingPipeline/group.ts +62 -0
- package/src/scrapingPipeline/icon.ts +26 -0
- package/src/scrapingPipeline/images.ts +67 -0
- package/src/scrapingPipeline/logo.ts +127 -0
- package/src/scrapingPipeline/page.ts +130 -0
- package/src/scrapingPipeline/root.ts +10 -0
- package/src/scrapingPipeline/site.ts +161 -0
- package/src/scrapingPipeline/tabs.ts +87 -0
- package/src/tabs/retrieveReadme.ts +99 -0
- package/src/types/components.ts +3 -0
- package/src/types/framework.ts +10 -0
- package/src/types/hast.ts +12 -0
- package/src/types/result.ts +1 -0
- package/src/types/scrapeFunc.ts +9 -0
- package/src/utils/append.ts +9 -0
- package/src/utils/children.ts +51 -0
- package/src/utils/className.ts +14 -0
- package/src/utils/detectFramework.ts +72 -0
- package/src/utils/emptyParagraphs.ts +21 -0
- package/src/utils/errors.ts +24 -0
- package/src/utils/escape.ts +30 -0
- package/src/utils/extension.ts +19 -0
- package/src/utils/file.ts +58 -0
- package/src/utils/firstChild.ts +13 -0
- package/src/utils/images.ts +101 -0
- package/src/utils/img.ts +17 -0
- package/src/utils/log.ts +82 -0
- package/src/utils/nestedRoots.ts +20 -0
- package/src/utils/network.ts +95 -0
- package/src/utils/path.ts +27 -0
- package/src/utils/position.ts +14 -0
- package/src/utils/reservedNames.ts +31 -0
- package/src/utils/strings.ts +7 -0
- package/src/utils/text.ts +11 -0
- package/src/utils/title.ts +68 -0
- package/src/utils/url.ts +8 -0
- package/bin/browser.d.ts +0 -2
- package/bin/browser.js +0 -24
- package/bin/browser.js.map +0 -1
- package/bin/checks.d.ts +0 -8
- package/bin/checks.js +0 -24
- package/bin/checks.js.map +0 -1
- package/bin/downloadImage.d.ts +0 -5
- package/bin/downloadImage.js +0 -88
- package/bin/downloadImage.js.map +0 -1
- package/bin/scraping/combineNavWithEmptyGroupTitles.d.ts +0 -2
- package/bin/scraping/combineNavWithEmptyGroupTitles.js +0 -20
- package/bin/scraping/combineNavWithEmptyGroupTitles.js.map +0 -1
- package/bin/scraping/detectFramework.d.ts +0 -9
- package/bin/scraping/detectFramework.js +0 -36
- package/bin/scraping/detectFramework.js.map +0 -1
- package/bin/scraping/downloadAllImages.d.ts +0 -4
- package/bin/scraping/downloadAllImages.js +0 -36
- package/bin/scraping/downloadAllImages.js.map +0 -1
- package/bin/scraping/downloadLogoImage.d.ts +0 -1
- package/bin/scraping/downloadLogoImage.js +0 -12
- package/bin/scraping/downloadLogoImage.js.map +0 -1
- package/bin/scraping/replaceImagePaths.d.ts +0 -1
- package/bin/scraping/replaceImagePaths.js +0 -14
- package/bin/scraping/replaceImagePaths.js.map +0 -1
- package/bin/scraping/scrapeFileGettingFileNameFromUrl.d.ts +0 -6
- package/bin/scraping/scrapeFileGettingFileNameFromUrl.js +0 -46
- package/bin/scraping/scrapeFileGettingFileNameFromUrl.js.map +0 -1
- package/bin/scraping/scrapeGettingFileNameFromUrl.d.ts +0 -6
- package/bin/scraping/scrapeGettingFileNameFromUrl.js +0 -13
- package/bin/scraping/scrapeGettingFileNameFromUrl.js.map +0 -1
- package/bin/scraping/scrapePage.d.ts +0 -8
- package/bin/scraping/scrapePage.js +0 -10
- package/bin/scraping/scrapePage.js.map +0 -1
- package/bin/scraping/scrapePageCommands.d.ts +0 -7
- package/bin/scraping/scrapePageCommands.js +0 -50
- package/bin/scraping/scrapePageCommands.js.map +0 -1
- package/bin/scraping/scrapeSection.d.ts +0 -3
- package/bin/scraping/scrapeSection.js +0 -12
- package/bin/scraping/scrapeSection.js.map +0 -1
- package/bin/scraping/scrapeSectionCommands.d.ts +0 -6
- package/bin/scraping/scrapeSectionCommands.js +0 -63
- package/bin/scraping/scrapeSectionCommands.js.map +0 -1
- package/bin/scraping/site-scrapers/Intercom/scrapeIntercomPage.d.ts +0 -5
- package/bin/scraping/site-scrapers/Intercom/scrapeIntercomPage.js +0 -29
- package/bin/scraping/site-scrapers/Intercom/scrapeIntercomPage.js.map +0 -1
- package/bin/scraping/site-scrapers/Intercom/scrapeIntercomSection.d.ts +0 -2
- package/bin/scraping/site-scrapers/Intercom/scrapeIntercomSection.js +0 -31
- package/bin/scraping/site-scrapers/Intercom/scrapeIntercomSection.js.map +0 -1
- package/bin/scraping/site-scrapers/alternateGroupTitle.d.ts +0 -3
- package/bin/scraping/site-scrapers/alternateGroupTitle.js +0 -9
- package/bin/scraping/site-scrapers/alternateGroupTitle.js.map +0 -1
- package/bin/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.d.ts +0 -5
- package/bin/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.js +0 -33
- package/bin/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.js.map +0 -1
- package/bin/scraping/site-scrapers/links-per-group/getLinksRecursively.d.ts +0 -3
- package/bin/scraping/site-scrapers/links-per-group/getLinksRecursively.js +0 -35
- package/bin/scraping/site-scrapers/links-per-group/getLinksRecursively.js.map +0 -1
- package/bin/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.d.ts +0 -3
- package/bin/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.js +0 -33
- package/bin/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.js.map +0 -1
- package/bin/scraping/site-scrapers/openNestedDocusaurusMenus.d.ts +0 -2
- package/bin/scraping/site-scrapers/openNestedDocusaurusMenus.js +0 -30
- package/bin/scraping/site-scrapers/openNestedDocusaurusMenus.js.map +0 -1
- package/bin/scraping/site-scrapers/openNestedGitbookMenus.d.ts +0 -2
- package/bin/scraping/site-scrapers/openNestedGitbookMenus.js +0 -21
- package/bin/scraping/site-scrapers/openNestedGitbookMenus.js.map +0 -1
- package/bin/scraping/site-scrapers/scrapeDocusaurusPage.d.ts +0 -5
- package/bin/scraping/site-scrapers/scrapeDocusaurusPage.js +0 -53
- package/bin/scraping/site-scrapers/scrapeDocusaurusPage.js.map +0 -1
- package/bin/scraping/site-scrapers/scrapeDocusaurusSection.d.ts +0 -2
- package/bin/scraping/site-scrapers/scrapeDocusaurusSection.js +0 -32
- package/bin/scraping/site-scrapers/scrapeDocusaurusSection.js.map +0 -1
- package/bin/scraping/site-scrapers/scrapeGitBookPage.d.ts +0 -5
- package/bin/scraping/site-scrapers/scrapeGitBookPage.js +0 -56
- package/bin/scraping/site-scrapers/scrapeGitBookPage.js.map +0 -1
- package/bin/scraping/site-scrapers/scrapeGitBookSection.d.ts +0 -2
- package/bin/scraping/site-scrapers/scrapeGitBookSection.js +0 -42
- package/bin/scraping/site-scrapers/scrapeGitBookSection.js.map +0 -1
- package/bin/scraping/site-scrapers/scrapeReadMePage.d.ts +0 -5
- package/bin/scraping/site-scrapers/scrapeReadMePage.js +0 -38
- package/bin/scraping/site-scrapers/scrapeReadMePage.js.map +0 -1
- package/bin/scraping/site-scrapers/scrapeReadMeSection.d.ts +0 -2
- package/bin/scraping/site-scrapers/scrapeReadMeSection.js +0 -39
- package/bin/scraping/site-scrapers/scrapeReadMeSection.js.map +0 -1
- package/bin/util.d.ts +0 -29
- package/bin/util.js +0 -97
- package/bin/util.js.map +0 -1
- package/src/browser.ts +0 -24
- package/src/checks.ts +0 -32
- package/src/downloadImage.ts +0 -102
- package/src/scraping/combineNavWithEmptyGroupTitles.ts +0 -21
- package/src/scraping/detectFramework.ts +0 -55
- package/src/scraping/downloadAllImages.ts +0 -61
- package/src/scraping/downloadLogoImage.ts +0 -24
- package/src/scraping/replaceImagePaths.ts +0 -17
- package/src/scraping/scrapeFileGettingFileNameFromUrl.ts +0 -84
- package/src/scraping/scrapeGettingFileNameFromUrl.ts +0 -56
- package/src/scraping/scrapePage.ts +0 -40
- package/src/scraping/scrapePageCommands.ts +0 -68
- package/src/scraping/scrapeSection.ts +0 -30
- package/src/scraping/scrapeSectionCommands.ts +0 -98
- package/src/scraping/site-scrapers/Intercom/scrapeIntercomPage.ts +0 -52
- package/src/scraping/site-scrapers/Intercom/scrapeIntercomSection.ts +0 -54
- package/src/scraping/site-scrapers/alternateGroupTitle.ts +0 -11
- package/src/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.ts +0 -45
- package/src/scraping/site-scrapers/links-per-group/getLinksRecursively.ts +0 -47
- package/src/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.ts +0 -44
- package/src/scraping/site-scrapers/openNestedDocusaurusMenus.ts +0 -42
- package/src/scraping/site-scrapers/openNestedGitbookMenus.ts +0 -27
- package/src/scraping/site-scrapers/scrapeDocusaurusPage.ts +0 -85
- package/src/scraping/site-scrapers/scrapeDocusaurusSection.ts +0 -63
- package/src/scraping/site-scrapers/scrapeGitBookPage.ts +0 -82
- package/src/scraping/site-scrapers/scrapeGitBookSection.ts +0 -69
- package/src/scraping/site-scrapers/scrapeReadMePage.ts +0 -56
- package/src/scraping/site-scrapers/scrapeReadMeSection.ts +0 -66
- package/src/util.ts +0 -122
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
import type { Root as HastRoot } from 'hast';
|
|
2
|
+
import type { Root as MdastRoot } from 'mdast';
|
|
3
|
+
import remarkGfm from 'remark-gfm';
|
|
4
|
+
import remarkMdx from 'remark-mdx';
|
|
5
|
+
import remarkStringify from 'remark-stringify';
|
|
6
|
+
import { unified } from 'unified';
|
|
7
|
+
|
|
8
|
+
import { convertHeaderLinksToText } from '../components/link.js';
|
|
9
|
+
import { CONTENT_FAILURE_MSG, MDAST_FAILURE_MSG } from '../constants.js';
|
|
10
|
+
import {
|
|
11
|
+
createCallout,
|
|
12
|
+
createCard,
|
|
13
|
+
createAccordion,
|
|
14
|
+
createAccordionGroup,
|
|
15
|
+
createFrame,
|
|
16
|
+
createCodeGroup,
|
|
17
|
+
createTabs,
|
|
18
|
+
createCardGroup,
|
|
19
|
+
} from '../customComponents/create.js';
|
|
20
|
+
import { rehypeToRemarkCustomComponents } from '../customComponents/plugin.js';
|
|
21
|
+
import { selectiveRehypeRemark } from '../customComponents/selective.js';
|
|
22
|
+
import { retrieveRootContent } from '../root/retrieve.js';
|
|
23
|
+
import type { Result } from '../types/result.js';
|
|
24
|
+
import { unifiedRemoveClassNames } from '../utils/className.js';
|
|
25
|
+
import { detectFramework, framework } from '../utils/detectFramework.js';
|
|
26
|
+
import { unifiedRemoveEmptyParagraphs } from '../utils/emptyParagraphs.js';
|
|
27
|
+
import { getErrorMessage, logErrorResults } from '../utils/errors.js';
|
|
28
|
+
import { escapeCharactersOutsideCodeBlocks } from '../utils/escape.js';
|
|
29
|
+
import { write, writePage } from '../utils/file.js';
|
|
30
|
+
import { log } from '../utils/log.js';
|
|
31
|
+
import { unifiedRemoveNestedRoots } from '../utils/nestedRoots.js';
|
|
32
|
+
import { unifiedRemovePositions } from '../utils/position.js';
|
|
33
|
+
import { removeLeadingSlash, removeTrailingSlash } from '../utils/strings.js';
|
|
34
|
+
import { getDescriptionFromRoot, getTitleFromHeading } from '../utils/title.js';
|
|
35
|
+
import { downloadImagesFromFile } from './images.js';
|
|
36
|
+
import { htmlToHast } from './root.js';
|
|
37
|
+
|
|
38
|
+
export async function scrapePage(
|
|
39
|
+
html: string,
|
|
40
|
+
url: string | URL,
|
|
41
|
+
opts: {
|
|
42
|
+
externalLink: boolean;
|
|
43
|
+
isOverviewPage?: boolean;
|
|
44
|
+
rootPath?: string;
|
|
45
|
+
} = { externalLink: false }
|
|
46
|
+
): Promise<Result<[string, string]>> {
|
|
47
|
+
url = new URL(url);
|
|
48
|
+
|
|
49
|
+
if (opts.externalLink) {
|
|
50
|
+
const filename = html;
|
|
51
|
+
const filenameWithExt = `${filename}.mdx`;
|
|
52
|
+
writePage(filenameWithExt, '', '', '', url.toString());
|
|
53
|
+
return { success: true, data: [url.toString(), filename] };
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
const hast = htmlToHast(html);
|
|
57
|
+
|
|
58
|
+
if (!framework.vendor) detectFramework(hast);
|
|
59
|
+
|
|
60
|
+
const urlStr = url.toString();
|
|
61
|
+
const content = retrieveRootContent(hast);
|
|
62
|
+
if (!content) return { success: false, message: `${urlStr}: ${CONTENT_FAILURE_MSG}` };
|
|
63
|
+
|
|
64
|
+
const contentAsRoot: HastRoot = {
|
|
65
|
+
type: 'root',
|
|
66
|
+
children: [content],
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
const mdastTree: MdastRoot = unified()
|
|
70
|
+
.use(createCard)
|
|
71
|
+
.use(createAccordion)
|
|
72
|
+
.use(createFrame)
|
|
73
|
+
.use(createTabs)
|
|
74
|
+
.use(createCallout)
|
|
75
|
+
.use(createCardGroup)
|
|
76
|
+
.use(createAccordionGroup)
|
|
77
|
+
.use(createCodeGroup)
|
|
78
|
+
.use(unifiedRemoveClassNames)
|
|
79
|
+
.use(unifiedRemovePositions)
|
|
80
|
+
.use(unifiedRemoveEmptyParagraphs)
|
|
81
|
+
.use(escapeCharactersOutsideCodeBlocks)
|
|
82
|
+
.use(selectiveRehypeRemark)
|
|
83
|
+
|
|
84
|
+
// Cleans up any nested components left untouched
|
|
85
|
+
// by `selectiveRehypeRemark`, and converts them to
|
|
86
|
+
// MDX compatible components
|
|
87
|
+
.use(rehypeToRemarkCustomComponents)
|
|
88
|
+
.use(convertHeaderLinksToText)
|
|
89
|
+
.use(unifiedRemoveNestedRoots)
|
|
90
|
+
.runSync(contentAsRoot) as MdastRoot;
|
|
91
|
+
|
|
92
|
+
try {
|
|
93
|
+
const imageResults = await downloadImagesFromFile(mdastTree, url);
|
|
94
|
+
logErrorResults(`scraping images from ${url.toString()}`, imageResults);
|
|
95
|
+
} catch (error) {
|
|
96
|
+
const errorMessage = getErrorMessage(error);
|
|
97
|
+
log(`We encountered an error when scraping the images from ${url.toString()}${errorMessage}`);
|
|
98
|
+
throw error;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
const title = getTitleFromHeading(mdastTree);
|
|
102
|
+
const description = getDescriptionFromRoot(mdastTree);
|
|
103
|
+
|
|
104
|
+
try {
|
|
105
|
+
const result = unified()
|
|
106
|
+
.use(remarkMdx)
|
|
107
|
+
.use(remarkGfm)
|
|
108
|
+
// @ts-expect-error remarkStringify errors even if used for valid code from documentation examples
|
|
109
|
+
.use(remarkStringify)
|
|
110
|
+
.stringify(mdastTree);
|
|
111
|
+
|
|
112
|
+
if (opts.rootPath) {
|
|
113
|
+
url = new URL(opts.rootPath, url.origin);
|
|
114
|
+
} else if (url.origin === removeTrailingSlash(url.toString())) {
|
|
115
|
+
url = new URL('home', new URL(url).origin);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
writePage(url, opts.isOverviewPage ? 'Overview' : title, description, String(result));
|
|
119
|
+
return {
|
|
120
|
+
success: true,
|
|
121
|
+
data: opts.rootPath
|
|
122
|
+
? [removeLeadingSlash(removeTrailingSlash(new URL(urlStr).pathname)), opts.rootPath]
|
|
123
|
+
: undefined,
|
|
124
|
+
};
|
|
125
|
+
} catch (error) {
|
|
126
|
+
write('error.json', JSON.stringify(mdastTree, undefined, 2));
|
|
127
|
+
const errorMessage = getErrorMessage(error);
|
|
128
|
+
return { success: false, message: `${urlStr}: ${MDAST_FAILURE_MSG}${errorMessage}` };
|
|
129
|
+
}
|
|
130
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type { Root as HastRoot } from 'hast';
|
|
2
|
+
import rehypeParse from 'rehype-parse';
|
|
3
|
+
import { unified } from 'unified';
|
|
4
|
+
|
|
5
|
+
import { unifiedRemovePositions } from '../utils/position.js';
|
|
6
|
+
|
|
7
|
+
export function htmlToHast(html: string): HastRoot {
|
|
8
|
+
// @ts-expect-error remarkStringify errors even if used for valid code from documentation examples
|
|
9
|
+
return unified().use(rehypeParse).use(unifiedRemovePositions).parse(html) as HastRoot;
|
|
10
|
+
}
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
import { MintConfig, Navigation, Tab } from '@mintlify/models';
|
|
2
|
+
import type { Root as HastRoot } from 'hast';
|
|
3
|
+
import traverse from 'traverse';
|
|
4
|
+
|
|
5
|
+
import { NAV_FAILURE_MSG } from '../constants.js';
|
|
6
|
+
import { iterateOverNavItems } from '../nav/iterate.js';
|
|
7
|
+
import { retrieveNavItems } from '../nav/retrieve.js';
|
|
8
|
+
import { retrieveRootNavElement } from '../nav/root.js';
|
|
9
|
+
import type { Result } from '../types/result.js';
|
|
10
|
+
import { detectFramework, framework } from '../utils/detectFramework.js';
|
|
11
|
+
import { logErrorResults } from '../utils/errors.js';
|
|
12
|
+
import { startPuppeteer } from '../utils/network.js';
|
|
13
|
+
import { INDEX_NAMES, iterateThroughReservedNames } from '../utils/reservedNames.js';
|
|
14
|
+
import { removeTrailingSlash } from '../utils/strings.js';
|
|
15
|
+
import { scrapePageGroup } from './group.js';
|
|
16
|
+
import { downloadFavicon } from './icon.js';
|
|
17
|
+
import { downloadLogos } from './logo.js';
|
|
18
|
+
import { htmlToHast } from './root.js';
|
|
19
|
+
|
|
20
|
+
export async function scrapeSite(
|
|
21
|
+
html: string,
|
|
22
|
+
url: string | URL,
|
|
23
|
+
opts: { hast?: HastRoot; tabs?: Array<Tab> } = {}
|
|
24
|
+
): Promise<Result<MintConfig>> {
|
|
25
|
+
let hast = opts.hast;
|
|
26
|
+
if (!hast) hast = htmlToHast(html);
|
|
27
|
+
|
|
28
|
+
url = new URL(url);
|
|
29
|
+
const origin = url.origin;
|
|
30
|
+
|
|
31
|
+
detectFramework(hast);
|
|
32
|
+
|
|
33
|
+
const sidebar = retrieveRootNavElement(hast);
|
|
34
|
+
if (!sidebar) return { success: false, message: `${url.toString()}: ${NAV_FAILURE_MSG}` };
|
|
35
|
+
|
|
36
|
+
const navItems = retrieveNavItems(sidebar);
|
|
37
|
+
|
|
38
|
+
if (origin === '') {
|
|
39
|
+
return { success: false, message: `invalid URL provided to scrape site: ${url}` };
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const listOfLinks = iterateOverNavItems(navItems, origin);
|
|
43
|
+
if (listOfLinks.length === 0) {
|
|
44
|
+
return { success: false, message: `no navigation links were able to be found: ${url}` };
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
const needsBrowser = framework.vendor === 'gitbook';
|
|
48
|
+
|
|
49
|
+
const externalLinks = listOfLinks.filter((url) => url.origin !== origin);
|
|
50
|
+
const internalLinks = listOfLinks.filter(
|
|
51
|
+
(url) => url.origin === origin && removeTrailingSlash(url.toString()) !== origin
|
|
52
|
+
);
|
|
53
|
+
const rootLinks = listOfLinks.filter(
|
|
54
|
+
(url) => url.origin === origin && removeTrailingSlash(url.toString()) === origin
|
|
55
|
+
);
|
|
56
|
+
|
|
57
|
+
const allPathnames = [
|
|
58
|
+
...internalLinks.map((url) => url.toString()),
|
|
59
|
+
...rootLinks.map((url) => url.toString()),
|
|
60
|
+
];
|
|
61
|
+
const rootPaths = rootLinks.map(() => {
|
|
62
|
+
const name = iterateThroughReservedNames(INDEX_NAMES, allPathnames);
|
|
63
|
+
allPathnames.push(name);
|
|
64
|
+
return name;
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
try {
|
|
68
|
+
const externalResults = await scrapePageGroup(externalLinks, needsBrowser, {
|
|
69
|
+
externalLinks: true,
|
|
70
|
+
});
|
|
71
|
+
const internalResults = await scrapePageGroup(internalLinks, needsBrowser);
|
|
72
|
+
const rootResults = await scrapePageGroup(rootLinks, needsBrowser, {
|
|
73
|
+
externalLinks: false,
|
|
74
|
+
rootPaths,
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
const externalLinkReplaceMap = new Map<string, string>(
|
|
78
|
+
externalResults
|
|
79
|
+
.filter((result) => result.success)
|
|
80
|
+
.map((result) => result.data as [string, string])
|
|
81
|
+
);
|
|
82
|
+
|
|
83
|
+
const rootPathReplaceMap = new Map<string, string>(
|
|
84
|
+
rootResults
|
|
85
|
+
.filter((result) => result.success)
|
|
86
|
+
.map((result) => result.data as [string, string])
|
|
87
|
+
);
|
|
88
|
+
|
|
89
|
+
traverse(navItems).forEach(function (value) {
|
|
90
|
+
if (typeof value === 'string') {
|
|
91
|
+
if (externalLinkReplaceMap.has(value)) {
|
|
92
|
+
this.update(externalLinkReplaceMap.get(value) ?? value);
|
|
93
|
+
} else if (rootPathReplaceMap.has(value)) {
|
|
94
|
+
this.update(rootPathReplaceMap.get(value) ?? value);
|
|
95
|
+
}
|
|
96
|
+
} else if (Array.isArray(value)) {
|
|
97
|
+
if (value.find((item) => externalLinkReplaceMap.has(item))) {
|
|
98
|
+
this.update(value.map((item) => externalLinkReplaceMap.get(item) ?? item));
|
|
99
|
+
} else if (value.find((item) => rootPathReplaceMap.has(item))) {
|
|
100
|
+
this.update(value.map((item) => rootPathReplaceMap.get(item) ?? item));
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
traverse(navItems).forEach(function (value) {
|
|
106
|
+
if (typeof value === 'string') {
|
|
107
|
+
this.update(value.replace('/mintie_overview', ''));
|
|
108
|
+
} else if (Array.isArray(value)) {
|
|
109
|
+
this.update(
|
|
110
|
+
value.map((item) =>
|
|
111
|
+
typeof item === 'string' ? item.replace('/mintie_overview', '') : item
|
|
112
|
+
)
|
|
113
|
+
);
|
|
114
|
+
}
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
navItems.forEach((navItem, index) => {
|
|
118
|
+
if (typeof navItem !== 'string') return;
|
|
119
|
+
const name = navItem
|
|
120
|
+
.split('-')
|
|
121
|
+
.map((str) => (str[0] ? `${str[0].toUpperCase()}${str.substring(1)}` : str))
|
|
122
|
+
.join(' ');
|
|
123
|
+
|
|
124
|
+
navItems[index] = {
|
|
125
|
+
group: name,
|
|
126
|
+
pages: [navItem],
|
|
127
|
+
};
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
logErrorResults('linking to external pages', externalResults);
|
|
131
|
+
logErrorResults('scraping your docs', [...internalResults, ...rootResults]);
|
|
132
|
+
|
|
133
|
+
const browser = needsBrowser ? await startPuppeteer() : undefined;
|
|
134
|
+
|
|
135
|
+
const favicon = await downloadFavicon(hast);
|
|
136
|
+
const logo = await downloadLogos(url, browser);
|
|
137
|
+
|
|
138
|
+
return {
|
|
139
|
+
success: true,
|
|
140
|
+
data: {
|
|
141
|
+
$schema: 'https://mintlify.com/schema.json',
|
|
142
|
+
name: '',
|
|
143
|
+
logo,
|
|
144
|
+
colors: {
|
|
145
|
+
primary: '',
|
|
146
|
+
},
|
|
147
|
+
favicon: favicon ?? '',
|
|
148
|
+
navigation: navItems as Navigation,
|
|
149
|
+
tabs: opts.tabs,
|
|
150
|
+
},
|
|
151
|
+
};
|
|
152
|
+
} catch (error) {
|
|
153
|
+
if (error instanceof Error) {
|
|
154
|
+
return { success: false, message: error.message };
|
|
155
|
+
}
|
|
156
|
+
return {
|
|
157
|
+
success: false,
|
|
158
|
+
message: 'An unknown error occurred when scraping this site. Please try again.',
|
|
159
|
+
};
|
|
160
|
+
}
|
|
161
|
+
}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import type { Navigation, NavigationEntry } from '@mintlify/models';
|
|
2
|
+
import { MintConfig, Tab } from '@mintlify/models';
|
|
3
|
+
|
|
4
|
+
import { retrieveTabLinks } from '../tabs/retrieveReadme.js';
|
|
5
|
+
import type { Result } from '../types/result.js';
|
|
6
|
+
import { detectFramework, framework } from '../utils/detectFramework.js';
|
|
7
|
+
import { log } from '../utils/log.js';
|
|
8
|
+
import { fetchPageHtml, startPuppeteer } from '../utils/network.js';
|
|
9
|
+
import { getTitleFromLink } from '../utils/title.js';
|
|
10
|
+
import { downloadFavicon } from './icon.js';
|
|
11
|
+
import { downloadLogos } from './logo.js';
|
|
12
|
+
import { htmlToHast } from './root.js';
|
|
13
|
+
import { scrapeSite } from './site.js';
|
|
14
|
+
|
|
15
|
+
export async function scrapeAllSiteTabs(
|
|
16
|
+
html: string,
|
|
17
|
+
url: string | URL
|
|
18
|
+
): Promise<Result<MintConfig>> {
|
|
19
|
+
const hast = htmlToHast(html);
|
|
20
|
+
url = new URL(url);
|
|
21
|
+
|
|
22
|
+
detectFramework(hast);
|
|
23
|
+
|
|
24
|
+
const needsBrowser = framework.vendor === 'gitbook';
|
|
25
|
+
const browser = needsBrowser ? await startPuppeteer() : undefined;
|
|
26
|
+
|
|
27
|
+
const favicon = await downloadFavicon(hast);
|
|
28
|
+
const logo = await downloadLogos(url, browser);
|
|
29
|
+
|
|
30
|
+
if (framework.vendor === 'readme' || framework.vendor === 'docusaurus') {
|
|
31
|
+
const links = retrieveTabLinks(hast);
|
|
32
|
+
if (
|
|
33
|
+
!links ||
|
|
34
|
+
!links.length ||
|
|
35
|
+
(links.length === 1 && links[0] && links[0].url === url.pathname)
|
|
36
|
+
)
|
|
37
|
+
return scrapeSite(html, url, { hast });
|
|
38
|
+
|
|
39
|
+
if (!links.find((link) => url.pathname.startsWith(link.url))) {
|
|
40
|
+
links.push({
|
|
41
|
+
name: getTitleFromLink(url.pathname),
|
|
42
|
+
url: url.pathname,
|
|
43
|
+
});
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const results = await Promise.all(
|
|
47
|
+
links.map(async (tabEntry) => {
|
|
48
|
+
const newUrl = new URL(url);
|
|
49
|
+
newUrl.pathname = tabEntry.url;
|
|
50
|
+
const newHtml = await fetchPageHtml(newUrl, undefined);
|
|
51
|
+
return await scrapeSite(newHtml, newUrl, { tabs: [tabEntry] });
|
|
52
|
+
})
|
|
53
|
+
);
|
|
54
|
+
|
|
55
|
+
const navigations: Array<NavigationEntry> = [];
|
|
56
|
+
const tabs: Array<Tab> = [];
|
|
57
|
+
|
|
58
|
+
const successes = results.filter((result) => result.success);
|
|
59
|
+
successes.forEach((result) => {
|
|
60
|
+
if (!result.data) return;
|
|
61
|
+
navigations.push(...result.data.navigation);
|
|
62
|
+
if (result.data.tabs) tabs.push(...result.data.tabs);
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
const failures = results.filter((result) => !result.success);
|
|
66
|
+
failures.forEach((result) => {
|
|
67
|
+
log('Failed to scrape tab: ' + result.message);
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
return {
|
|
71
|
+
success: true,
|
|
72
|
+
data: {
|
|
73
|
+
$schema: 'https://mintlify.com/schema.json',
|
|
74
|
+
name: '',
|
|
75
|
+
logo,
|
|
76
|
+
colors: {
|
|
77
|
+
primary: '',
|
|
78
|
+
},
|
|
79
|
+
favicon: favicon ?? '',
|
|
80
|
+
navigation: navigations as Navigation,
|
|
81
|
+
tabs,
|
|
82
|
+
},
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
return scrapeSite(html, url, { hast });
|
|
87
|
+
}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
import { Tab } from '@mintlify/models';
|
|
2
|
+
import type { Root as HastRoot, Element } from 'hast';
|
|
3
|
+
import { visit, EXIT, CONTINUE } from 'unist-util-visit';
|
|
4
|
+
|
|
5
|
+
import { framework } from '../utils/detectFramework.js';
|
|
6
|
+
import { findTitle, getTitleFromLink } from '../utils/title.js';
|
|
7
|
+
|
|
8
|
+
export function retrieveTabLinks(rootNode: HastRoot): Array<Tab> | undefined {
|
|
9
|
+
if (framework.vendor !== 'readme' && framework.vendor !== 'docusaurus') return undefined;
|
|
10
|
+
|
|
11
|
+
let element: Element | undefined = undefined;
|
|
12
|
+
visit(rootNode, 'element', function (node) {
|
|
13
|
+
if (framework.vendor === 'readme') {
|
|
14
|
+
if (
|
|
15
|
+
node.tagName === 'header' &&
|
|
16
|
+
node.properties.className &&
|
|
17
|
+
Array.isArray(node.properties.className) &&
|
|
18
|
+
node.properties.className.includes('rm-Header')
|
|
19
|
+
) {
|
|
20
|
+
element = node;
|
|
21
|
+
return EXIT;
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
if (framework.vendor === 'docusaurus') {
|
|
26
|
+
if (
|
|
27
|
+
node.tagName === 'nav' &&
|
|
28
|
+
node.properties.className &&
|
|
29
|
+
Array.isArray(node.properties.className) &&
|
|
30
|
+
node.properties.className.includes('navbar')
|
|
31
|
+
) {
|
|
32
|
+
element = node;
|
|
33
|
+
return EXIT;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
|
39
|
+
if (!element) return undefined;
|
|
40
|
+
|
|
41
|
+
const links: Array<Tab> = [];
|
|
42
|
+
visit(element as Element, 'element', function (node) {
|
|
43
|
+
if (framework.vendor === 'readme') {
|
|
44
|
+
if (
|
|
45
|
+
node.tagName !== 'nav' &&
|
|
46
|
+
!(
|
|
47
|
+
node.tagName === 'div' &&
|
|
48
|
+
node.properties.className &&
|
|
49
|
+
Array.isArray(node.properties.className) &&
|
|
50
|
+
node.properties.className.includes('rm-Header-right')
|
|
51
|
+
)
|
|
52
|
+
)
|
|
53
|
+
return CONTINUE;
|
|
54
|
+
|
|
55
|
+
visit(node, 'element', function (subNode) {
|
|
56
|
+
if (
|
|
57
|
+
subNode.tagName !== 'a' ||
|
|
58
|
+
!subNode.properties.href ||
|
|
59
|
+
typeof subNode.properties.href !== 'string' ||
|
|
60
|
+
subNode.properties.href.startsWith('http')
|
|
61
|
+
)
|
|
62
|
+
return CONTINUE;
|
|
63
|
+
const title = findTitle(subNode);
|
|
64
|
+
links.push({
|
|
65
|
+
name: title || getTitleFromLink(subNode.properties.href),
|
|
66
|
+
url: subNode.properties.href,
|
|
67
|
+
});
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
if (framework.vendor === 'docusaurus') {
|
|
72
|
+
if (node.tagName !== 'nav') return CONTINUE;
|
|
73
|
+
|
|
74
|
+
visit(node, 'element', function (subNode, _, parent) {
|
|
75
|
+
if (
|
|
76
|
+
subNode.tagName !== 'a' ||
|
|
77
|
+
!subNode.properties.href ||
|
|
78
|
+
typeof subNode.properties.href !== 'string' ||
|
|
79
|
+
subNode.properties.href.startsWith('http') ||
|
|
80
|
+
!parent ||
|
|
81
|
+
parent.type !== 'element' ||
|
|
82
|
+
!Array.isArray(parent.properties.className) ||
|
|
83
|
+
parent.properties.className.length !== 1 ||
|
|
84
|
+
parent.properties.className[0] !== 'navbar__items' ||
|
|
85
|
+
parent.properties.className.includes('navbar__items--right')
|
|
86
|
+
)
|
|
87
|
+
return CONTINUE;
|
|
88
|
+
|
|
89
|
+
const title = findTitle(subNode);
|
|
90
|
+
links.push({
|
|
91
|
+
name: title || getTitleFromLink(subNode.properties.href),
|
|
92
|
+
url: subNode.properties.href,
|
|
93
|
+
});
|
|
94
|
+
});
|
|
95
|
+
}
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
return links;
|
|
99
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
export const docVendors = ['gitbook', 'readme', 'docusaurus', undefined] as const;
|
|
2
|
+
export const docusaurusVersions = [2, 3] as const;
|
|
3
|
+
|
|
4
|
+
export type DocusaurusVersion = (typeof docusaurusVersions)[number];
|
|
5
|
+
export type FrameworkVendor = (typeof docVendors)[number];
|
|
6
|
+
|
|
7
|
+
export type Framework = {
|
|
8
|
+
vendor: FrameworkVendor | undefined;
|
|
9
|
+
version: DocusaurusVersion | undefined;
|
|
10
|
+
};
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { Root as HastRoot, Element, ElementContent } from 'hast';
|
|
2
|
+
import type { MdxJsxFlowElementHast, MdxJsxTextElementHast } from 'mdast-util-mdx-jsx';
|
|
3
|
+
|
|
4
|
+
export type HastNode = Element;
|
|
5
|
+
export type HastNodeIndex = number | undefined;
|
|
6
|
+
export type HastNodeParent =
|
|
7
|
+
| Element
|
|
8
|
+
| MdxJsxTextElementHast
|
|
9
|
+
| MdxJsxFlowElementHast
|
|
10
|
+
| HastRoot
|
|
11
|
+
| undefined;
|
|
12
|
+
export type HastChildrenType = Array<ElementContent>;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export type Result<T> = { success: true; data?: T } | { success: false; message: string };
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import {
|
|
2
|
+
type ElementContent,
|
|
3
|
+
type Root as HastRoot,
|
|
4
|
+
type Comment,
|
|
5
|
+
type Text,
|
|
6
|
+
type Element,
|
|
7
|
+
type RootContent as HastRootContent,
|
|
8
|
+
} from 'hast';
|
|
9
|
+
import { toMdast, defaultHandlers } from 'hast-util-to-mdast';
|
|
10
|
+
import type { State, Handle } from 'hast-util-to-mdast';
|
|
11
|
+
import type { RootContent as MdastRootContent, Root as MdastRoot } from 'mdast';
|
|
12
|
+
import { unified } from 'unified';
|
|
13
|
+
|
|
14
|
+
import { mdxJsxFlowElementHandler } from '../customComponents/selective.js';
|
|
15
|
+
|
|
16
|
+
export function turnChildrenIntoMdx(
|
|
17
|
+
children: Array<HastRootContent | ElementContent | Element | Comment | Text>,
|
|
18
|
+
opts: { jsxImages: boolean } = { jsxImages: false }
|
|
19
|
+
): Array<MdastRootContent> {
|
|
20
|
+
const hast: HastRoot = {
|
|
21
|
+
type: 'root',
|
|
22
|
+
children: children,
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
const handlers: Record<string, Handle> = { ...defaultHandlers };
|
|
26
|
+
if (opts.jsxImages) {
|
|
27
|
+
handlers['img'] = function (_: State, node: Element) {
|
|
28
|
+
Object.keys(node.properties).forEach((key) => {
|
|
29
|
+
if (key !== 'src') delete node.properties[key];
|
|
30
|
+
});
|
|
31
|
+
return mdxJsxFlowElementHandler(_, node);
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const mdxAst = unified()
|
|
36
|
+
.use(function () {
|
|
37
|
+
return function (tree: HastRoot): MdastRoot {
|
|
38
|
+
const newTree = toMdast(tree, {
|
|
39
|
+
handlers,
|
|
40
|
+
}) as MdastRoot;
|
|
41
|
+
return newTree;
|
|
42
|
+
};
|
|
43
|
+
})
|
|
44
|
+
.runSync(hast);
|
|
45
|
+
|
|
46
|
+
mdxAst.children.forEach((child, index) => {
|
|
47
|
+
if (child.type === 'html') mdxAst.children.splice(index, 1);
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
return mdxAst.children;
|
|
51
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import type { Element } from 'hast';
|
|
2
|
+
import { visit } from 'unist-util-visit';
|
|
3
|
+
|
|
4
|
+
export function unifiedRemoveClassNames() {
|
|
5
|
+
return function (node: Element) {
|
|
6
|
+
return removeClassNames(node);
|
|
7
|
+
};
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export function removeClassNames(node: Element) {
|
|
11
|
+
return visit(node, 'element', function (subNode) {
|
|
12
|
+
if ('properties' in subNode) delete subNode.properties.className;
|
|
13
|
+
});
|
|
14
|
+
}
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import type { Root } from 'hast';
|
|
2
|
+
import { visit, EXIT, CONTINUE } from 'unist-util-visit';
|
|
3
|
+
|
|
4
|
+
import type { Framework } from '../types/framework.js';
|
|
5
|
+
import { log } from './log.js';
|
|
6
|
+
|
|
7
|
+
export const framework: Framework = {
|
|
8
|
+
vendor: undefined,
|
|
9
|
+
version: undefined,
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
export function detectFramework(rootHast: Root): void {
|
|
13
|
+
visit(rootHast, 'element', function (node) {
|
|
14
|
+
if (
|
|
15
|
+
node.tagName === 'link' &&
|
|
16
|
+
Array.isArray(node.properties.rel) &&
|
|
17
|
+
node.properties.rel.includes('preconnect') &&
|
|
18
|
+
node.properties.href === 'https://api.gitbook.com'
|
|
19
|
+
) {
|
|
20
|
+
framework.vendor = 'gitbook';
|
|
21
|
+
framework.version = undefined;
|
|
22
|
+
return EXIT;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
// All of the other docs vendors rely on
|
|
26
|
+
// the `meta` element as well as a `name`
|
|
27
|
+
// property which should be a string
|
|
28
|
+
if (node.tagName !== 'meta' || typeof node.properties.name !== 'string') return CONTINUE;
|
|
29
|
+
|
|
30
|
+
switch (node.properties.name) {
|
|
31
|
+
case 'readme-deploy':
|
|
32
|
+
framework.vendor = 'readme';
|
|
33
|
+
framework.version = undefined;
|
|
34
|
+
return EXIT;
|
|
35
|
+
|
|
36
|
+
// case "intercom: trackingEvent":
|
|
37
|
+
// framework.vendor = "intercom";
|
|
38
|
+
// framework.version = undefined;
|
|
39
|
+
// return EXIT;
|
|
40
|
+
|
|
41
|
+
case 'generator':
|
|
42
|
+
if (
|
|
43
|
+
typeof node.properties.content === 'string' &&
|
|
44
|
+
node.properties.content.includes('Docusaurus')
|
|
45
|
+
) {
|
|
46
|
+
framework.vendor = 'docusaurus';
|
|
47
|
+
const meta = node.properties.content;
|
|
48
|
+
if (meta.includes('v3')) {
|
|
49
|
+
framework.version = 3;
|
|
50
|
+
} else if (meta.includes('v2')) {
|
|
51
|
+
framework.version = 2;
|
|
52
|
+
} else if (meta.includes('v1')) {
|
|
53
|
+
log(
|
|
54
|
+
'We detected Docusaurus version 1 but we only support scraping versions 2 and 3',
|
|
55
|
+
'error'
|
|
56
|
+
);
|
|
57
|
+
framework.vendor = undefined;
|
|
58
|
+
framework.version = undefined;
|
|
59
|
+
}
|
|
60
|
+
return EXIT;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
if (framework.vendor) {
|
|
66
|
+
log('Successfully detected documentation vendor: ' + framework.vendor);
|
|
67
|
+
} else {
|
|
68
|
+
log('Failed to detect documentation vendor; please contact support@mintlify.com');
|
|
69
|
+
framework.version = undefined;
|
|
70
|
+
process.exit(1);
|
|
71
|
+
}
|
|
72
|
+
}
|