@mintlify/scraping 3.0.140 → 3.0.142
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/browser.js +1 -1
- package/bin/browser.js.map +1 -1
- package/bin/checks.d.ts +8 -0
- package/bin/checks.js +24 -0
- package/bin/checks.js.map +1 -0
- package/bin/cli.js +49 -45
- package/bin/cli.js.map +1 -1
- package/bin/scraping/detectFramework.d.ts +7 -14
- package/bin/scraping/detectFramework.js +8 -14
- package/bin/scraping/detectFramework.js.map +1 -1
- package/bin/scraping/downloadAllImages.d.ts +2 -1
- package/bin/scraping/downloadAllImages.js +1 -2
- package/bin/scraping/downloadAllImages.js.map +1 -1
- package/bin/scraping/downloadLogoImage.js +0 -1
- package/bin/scraping/downloadLogoImage.js.map +1 -1
- package/bin/scraping/replaceImagePaths.d.ts +1 -1
- package/bin/scraping/replaceImagePaths.js +0 -3
- package/bin/scraping/replaceImagePaths.js.map +1 -1
- package/bin/scraping/scrapePageCommands.d.ts +3 -3
- package/bin/scraping/scrapePageCommands.js +22 -27
- package/bin/scraping/scrapePageCommands.js.map +1 -1
- package/bin/scraping/scrapeSectionCommands.d.ts +5 -5
- package/bin/scraping/scrapeSectionCommands.js +27 -30
- package/bin/scraping/scrapeSectionCommands.js.map +1 -1
- package/bin/scraping/site-scrapers/Intercom/scrapeIntercomPage.js +1 -1
- package/bin/scraping/site-scrapers/Intercom/scrapeIntercomPage.js.map +1 -1
- package/bin/scraping/site-scrapers/Intercom/scrapeIntercomSection.js +2 -2
- package/bin/scraping/site-scrapers/Intercom/scrapeIntercomSection.js.map +1 -1
- package/bin/scraping/site-scrapers/alternateGroupTitle.d.ts +3 -1
- package/bin/scraping/site-scrapers/alternateGroupTitle.js +1 -1
- package/bin/scraping/site-scrapers/alternateGroupTitle.js.map +1 -1
- package/bin/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.d.ts +5 -1
- package/bin/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.js +1 -1
- package/bin/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.js.map +1 -1
- package/bin/scraping/site-scrapers/links-per-group/getLinksRecursively.d.ts +3 -1
- package/bin/scraping/site-scrapers/links-per-group/getLinksRecursively.js +1 -4
- package/bin/scraping/site-scrapers/links-per-group/getLinksRecursively.js.map +1 -1
- package/bin/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.d.ts +3 -1
- package/bin/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.js +0 -3
- package/bin/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.js.map +1 -1
- package/bin/scraping/site-scrapers/openNestedDocusaurusMenus.js +3 -3
- package/bin/scraping/site-scrapers/openNestedDocusaurusMenus.js.map +1 -1
- package/bin/scraping/site-scrapers/openNestedGitbookMenus.js +2 -2
- package/bin/scraping/site-scrapers/openNestedGitbookMenus.js.map +1 -1
- package/bin/scraping/site-scrapers/scrapeDocusaurusPage.js +1 -1
- package/bin/scraping/site-scrapers/scrapeDocusaurusPage.js.map +1 -1
- package/bin/scraping/site-scrapers/scrapeDocusaurusSection.js +2 -2
- package/bin/scraping/site-scrapers/scrapeDocusaurusSection.js.map +1 -1
- package/bin/scraping/site-scrapers/scrapeGitBookPage.js +1 -1
- package/bin/scraping/site-scrapers/scrapeGitBookPage.js.map +1 -1
- package/bin/scraping/site-scrapers/scrapeGitBookSection.js +1 -1
- package/bin/scraping/site-scrapers/scrapeGitBookSection.js.map +1 -1
- package/bin/scraping/site-scrapers/scrapeReadMePage.js +1 -1
- package/bin/scraping/site-scrapers/scrapeReadMePage.js.map +1 -1
- package/bin/scraping/site-scrapers/scrapeReadMeSection.js +2 -2
- package/bin/scraping/site-scrapers/scrapeReadMeSection.js.map +1 -1
- package/bin/tsconfig.build.tsbuildinfo +1 -1
- package/bin/util.d.ts +0 -1
- package/bin/util.js +9 -26
- package/bin/util.js.map +1 -1
- package/package.json +7 -7
- package/src/browser.ts +1 -1
- package/src/checks.ts +32 -0
- package/src/cli.ts +48 -74
- package/src/scraping/detectFramework.ts +20 -15
- package/src/scraping/downloadAllImages.ts +7 -7
- package/src/scraping/downloadLogoImage.ts +0 -1
- package/src/scraping/replaceImagePaths.ts +1 -5
- package/src/scraping/scrapePageCommands.ts +32 -29
- package/src/scraping/scrapeSectionCommands.ts +38 -34
- package/src/scraping/site-scrapers/Intercom/scrapeIntercomPage.ts +1 -1
- package/src/scraping/site-scrapers/Intercom/scrapeIntercomSection.ts +2 -2
- package/src/scraping/site-scrapers/alternateGroupTitle.ts +5 -2
- package/src/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.ts +7 -5
- package/src/scraping/site-scrapers/links-per-group/getLinksRecursively.ts +8 -6
- package/src/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.ts +7 -5
- package/src/scraping/site-scrapers/openNestedDocusaurusMenus.ts +3 -3
- package/src/scraping/site-scrapers/openNestedGitbookMenus.ts +3 -3
- package/src/scraping/site-scrapers/scrapeDocusaurusPage.ts +1 -1
- package/src/scraping/site-scrapers/scrapeDocusaurusSection.ts +2 -2
- package/src/scraping/site-scrapers/scrapeGitBookPage.ts +1 -1
- package/src/scraping/site-scrapers/scrapeGitBookSection.ts +3 -3
- package/src/scraping/site-scrapers/scrapeReadMePage.ts +1 -1
- package/src/scraping/site-scrapers/scrapeReadMeSection.ts +3 -3
- package/src/util.ts +10 -26
- package/tsconfig.json +0 -1
- package/bin/validation/isValidLink.d.ts +0 -1
- package/bin/validation/isValidLink.js +0 -11
- package/bin/validation/isValidLink.js.map +0 -1
- package/bin/validation/stopIfInvalidLink.d.ts +0 -1
- package/bin/validation/stopIfInvalidLink.js +0 -9
- package/bin/validation/stopIfInvalidLink.js.map +0 -1
- package/src/validation/isValidLink.ts +0 -9
- package/src/validation/stopIfInvalidLink.ts +0 -9
package/bin/util.js
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
|
-
import { mkdirSync, writeFileSync } from 'fs';
|
|
1
|
+
import { existsSync, mkdirSync, writeFileSync } from 'fs';
|
|
2
2
|
import Ora from 'ora';
|
|
3
3
|
import path from 'path';
|
|
4
|
-
import stopIfInvalidLink from './validation/stopIfInvalidLink.js';
|
|
5
4
|
export const MintConfig = (name, color, ctaName, ctaUrl, filename) => {
|
|
6
5
|
return {
|
|
7
6
|
name,
|
|
@@ -68,35 +67,19 @@ export const createPage = (title, description, markdown, overwrite = false, root
|
|
|
68
67
|
const writePath = path.join(rootDir, addMdx(fileName || toFilename(title)));
|
|
69
68
|
// Create the folders needed if they're missing
|
|
70
69
|
mkdirSync(rootDir, { recursive: true });
|
|
71
|
-
|
|
72
|
-
|
|
70
|
+
if (!overwrite && existsSync(writePath)) {
|
|
71
|
+
console.log(`❌ Skipping existing file ${writePath}`);
|
|
72
|
+
return;
|
|
73
|
+
}
|
|
74
|
+
// Write the page to disk
|
|
75
|
+
try {
|
|
73
76
|
writeFileSync(writePath, Page(title, description, markdown));
|
|
74
77
|
console.log('✏️ - ' + writePath);
|
|
75
78
|
}
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
writeFileSync(writePath, Page(title, description, markdown), {
|
|
79
|
-
flag: 'wx',
|
|
80
|
-
});
|
|
81
|
-
console.log('✏️ - ' + writePath);
|
|
82
|
-
}
|
|
83
|
-
catch (e) {
|
|
84
|
-
// We do a try-catch instead of an if-statement to avoid a race condition
|
|
85
|
-
// of the file being created after we started writing.
|
|
86
|
-
if (e?.code === 'EEXIST') {
|
|
87
|
-
console.log(`❌ Skipping existing file ${writePath}`);
|
|
88
|
-
}
|
|
89
|
-
else {
|
|
90
|
-
console.error(e);
|
|
91
|
-
}
|
|
92
|
-
}
|
|
79
|
+
catch (e) {
|
|
80
|
+
console.error(e);
|
|
93
81
|
}
|
|
94
82
|
};
|
|
95
|
-
export function getHrefFromArgs(argv) {
|
|
96
|
-
const href = argv.url;
|
|
97
|
-
stopIfInvalidLink(href);
|
|
98
|
-
return href;
|
|
99
|
-
}
|
|
100
83
|
export const buildLogger = (startText = '') => {
|
|
101
84
|
const logger = Ora().start(startText);
|
|
102
85
|
return logger;
|
package/bin/util.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"util.js","sourceRoot":"","sources":["../src/util.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,IAAI,CAAC;
|
|
1
|
+
{"version":3,"file":"util.js","sourceRoot":"","sources":["../src/util.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,IAAI,CAAC;AAC1D,OAAO,GAAuB,MAAM,KAAK,CAAC;AAC1C,OAAO,IAAI,MAAM,MAAM,CAAC;AAExB,MAAM,CAAC,MAAM,UAAU,GAAG,CACxB,IAAY,EACZ,KAAa,EACb,OAAe,EACf,MAAc,EACd,QAAgB,EAChB,EAAE;IACF,OAAO;QACL,IAAI;QACJ,IAAI,EAAE,EAAE;QACR,OAAO,EAAE,EAAE;QACX,MAAM,EAAE;YACN,OAAO,EAAE,KAAK;SACf;QACD,WAAW,EAAE,EAAE;QACf,eAAe,EAAE;YACf,IAAI,EAAE,OAAO;YACb,GAAG,EAAE,MAAM;SACZ;QACD,OAAO,EAAE,EAAE;QACX,UAAU,EAAE;YACV;gBACE,KAAK,EAAE,MAAM;gBACb,KAAK,EAAE,CAAC,QAAQ,CAAC;aAClB;SACF;QACD,6DAA6D;KAC9D,CAAC;AACJ,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,IAAI,GAAG,CAAC,KAAa,EAAE,WAAoB,EAAE,QAAiB,EAAE,EAAE;IAC7E,uDAAuD;IACvD,yDAAyD;IACzD,wBAAwB;IACxB,MAAM,eAAe,GAAG,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;IAC9C,MAAM,aAAa,GAAG,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;IAC5C,IAAI,CAAC,eAAe,EAAE,CAAC;QACrB,KAAK,GAAG,GAAG,GAAG,KAAK,CAAC;IACtB,CAAC;IACD,IAAI,CAAC,aAAa,EAAE,CAAC;QACnB,KAAK,GAAG,KAAK,GAAG,GAAG,CAAC;IACtB,CAAC;IAED,MAAM,mBAAmB,GAAG,WAAW,CAAC,CAAC,CAAC,mBAAmB,WAAW,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;IACjF,OAAO,eAAe,KAAK,GAAG,mBAAmB,YAAY,QAAQ,EAAE,CAAC;AAC1E,CAAC,CAAC;AAEF,MAAM,UAAU,SAAS,CAAC,GAAW;IACnC,+CAA+C;IAC/C,gDAAgD;IAChD,OAAO,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC;AAC7B,CAAC;AAED,MAAM,UAAU,mBAAmB,CAAC,IAAuB;IACzD,4BAA4B;IAC5B,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;AACrE,CAAC;AAED,MAAM,CAAC,MAAM,UAAU,GAAG,CAAC,KAAa,EAAE,EAAE;IAC1C,sDAAsD;IACtD,uDAAuD;IACvD,OAAO,KAAK;SACT,OAAO,CAAC,aAAa,EAAE,GAAG,CAAC;SAC3B,IAAI,EAAE;SACN,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC;SAClB,WAAW,EAAE,CAAC;AACnB,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,MAAM,GAAG,CAAC,QAAgB,EAAE,EAAE;IACzC,IAAI,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QAC9B,OAAO,QAAQ,CAAC;IAClB,CAAC;IACD,OAAO,QAAQ,GAAG,MAAM,CAAC;AAC3B,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,UAAU,GAAG,CACxB,KAAa,EACb,WAAoB,EACpB,QAAiB,EACjB,SAAS,GAAG,KAAK,EACjB,OAAO,GAAG,EAAE,EACZ,QAAiB,EACjB,EAAE;IACF,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,MAAM,CAAC,QAAQ,IAAI,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAE5E,+CAA+C;IAC/C,SAAS,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAExC,IAAI,CAAC,SAAS,IAAI,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QACxC,OAAO,CAAC,GAAG,CAAC,4BAA4B,SAAS,EAAE,CAAC,CAAC;QACrD,OAAO;IACT,CAAC;IAED,yBAAyB;IACzB,IAAI,CAAC;QACH,aAAa,CAAC,SAAS,EAAE,IAAI,CAAC,KAAK,EAAE,WAAW,EAAE,QAAQ,CAAC,CAAC,CAAC;QAC7D,OAAO,CAAC,GAAG,CAAC,OAAO,GAAG,SAAS,CAAC,CAAC;IACnC,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IACnB,CAAC;AACH,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,WAAW,GAAG,CAAC,SAAS,GAAG,EAAE,EAAW,EAAE;IACrD,MAAM,MAAM,GAAG,GAAG,EAAE,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;IACtC,OAAO,MAAM,CAAC;AAChB,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,gBAAgB,GAAG,CAAC,QAAgB,EAAE,EAAE;IACnD,MAAM,GAAG,GAAG,QAAQ,CAAC,SAAS,CAAC,QAAQ,CAAC,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;IAC/E,IAAI,QAAQ,KAAK,GAAG;QAAE,OAAO,SAAS,CAAC;IACvC,OAAO,GAAG,CAAC,WAAW,EAAE,CAAC;AAC3B,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,wBAAwB,GAAG,CAAC,QAAgB,EAAE,EAAE;IAC3D,MAAM,SAAS,GAAG,gBAAgB,CAAC,QAAQ,CAAC,CAAC;IAC7C,OAAO,SAAS,IAAI,CAAC,SAAS,KAAK,KAAK,IAAI,SAAS,KAAK,IAAI,IAAI,SAAS,KAAK,KAAK,CAAC,CAAC;AACzF,CAAC,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mintlify/scraping",
|
|
3
|
-
"version": "3.0.
|
|
3
|
+
"version": "3.0.142",
|
|
4
4
|
"description": "Scrape documentation frameworks to Mintlify docs",
|
|
5
5
|
"engines": {
|
|
6
6
|
"node": ">=18.0.0"
|
|
@@ -39,22 +39,22 @@
|
|
|
39
39
|
},
|
|
40
40
|
"dependencies": {
|
|
41
41
|
"@apidevtools/swagger-parser": "^10.1.0",
|
|
42
|
-
"@mintlify/common": "1.0.
|
|
42
|
+
"@mintlify/common": "1.0.125",
|
|
43
43
|
"axios": "^1.2.2",
|
|
44
|
-
"cheerio": "^0.
|
|
44
|
+
"cheerio": "^1.0.0-rc.12",
|
|
45
45
|
"fs-extra": "^11.1.1",
|
|
46
46
|
"node-html-markdown": "^1.3.0",
|
|
47
47
|
"ora": "^6.1.2",
|
|
48
|
-
"puppeteer": "^
|
|
48
|
+
"puppeteer": "^22.14.0",
|
|
49
49
|
"yargs": "^17.6.0"
|
|
50
50
|
},
|
|
51
51
|
"devDependencies": {
|
|
52
52
|
"@mintlify/eslint-config": "1.0.5",
|
|
53
53
|
"@mintlify/eslint-config-typescript": "1.0.10",
|
|
54
|
-
"@mintlify/models": "0.0.
|
|
54
|
+
"@mintlify/models": "0.0.109",
|
|
55
55
|
"@mintlify/prettier-config": "1.0.4",
|
|
56
56
|
"@mintlify/ts-config": "2.0.2",
|
|
57
|
-
"@mintlify/validation": "0.1.
|
|
57
|
+
"@mintlify/validation": "0.1.171",
|
|
58
58
|
"@trivago/prettier-plugin-sort-imports": "^4.2.1",
|
|
59
59
|
"@tsconfig/recommended": "1.x",
|
|
60
60
|
"@types/cheerio": "^0.22.31",
|
|
@@ -71,5 +71,5 @@
|
|
|
71
71
|
"typescript": "^5.5.3",
|
|
72
72
|
"vitest": "^2.0.4"
|
|
73
73
|
},
|
|
74
|
-
"gitHead": "
|
|
74
|
+
"gitHead": "74d38f577b4f02953bfc0f9b0d493371cbb256d2"
|
|
75
75
|
}
|
package/src/browser.ts
CHANGED
package/src/checks.ts
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import { Framework } from './scraping/detectFramework.js';
|
|
2
|
+
|
|
3
|
+
// This checks the link is written correctly, not that the page exists.
|
|
4
|
+
export function checkUrl({ url }: { url: string }) {
|
|
5
|
+
try {
|
|
6
|
+
new URL(url);
|
|
7
|
+
} catch {
|
|
8
|
+
throw Error(`Invalid link: ${url}\nMake sure the link starts with http:// or https://`);
|
|
9
|
+
}
|
|
10
|
+
return true;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export function checkVersion({
|
|
14
|
+
tool,
|
|
15
|
+
docusaurusVersion,
|
|
16
|
+
}: {
|
|
17
|
+
tool: Framework | undefined;
|
|
18
|
+
docusaurusVersion: string | undefined;
|
|
19
|
+
}) {
|
|
20
|
+
if (tool === 'docusaurus') {
|
|
21
|
+
if (docusaurusVersion === undefined) {
|
|
22
|
+
throw Error(
|
|
23
|
+
'When using Docusaurus, you must specify the version (1,2,3) using the --docusaurusVersion flag'
|
|
24
|
+
);
|
|
25
|
+
}
|
|
26
|
+
} else {
|
|
27
|
+
if (docusaurusVersion !== undefined) {
|
|
28
|
+
throw Error('The --docusaurusVersion flag is only applicable when using Docusaurus.');
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
return true;
|
|
32
|
+
}
|
package/src/cli.ts
CHANGED
|
@@ -1,95 +1,71 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
|
|
3
|
-
/* eslint-disable @typescript-eslint/no-empty-function */
|
|
4
2
|
import yargs from 'yargs';
|
|
5
3
|
import { hideBin } from 'yargs/helpers';
|
|
6
4
|
|
|
5
|
+
import { checkUrl, checkVersion } from './checks.js';
|
|
7
6
|
import { generateOpenApiPages } from './openapi/generateOpenApiPages.js';
|
|
8
|
-
import {
|
|
9
|
-
import {
|
|
10
|
-
|
|
11
|
-
scrapeSectionAxiosWrapper,
|
|
12
|
-
scrapeGitbookSectionCommand,
|
|
13
|
-
} from './scraping/scrapeSectionCommands.js';
|
|
14
|
-
import { scrapeIntercomPage } from './scraping/site-scrapers/Intercom/scrapeIntercomPage.js';
|
|
15
|
-
import { scrapeIntercomSection } from './scraping/site-scrapers/Intercom/scrapeIntercomSection.js';
|
|
16
|
-
import { scrapeGitBookPage } from './scraping/site-scrapers/scrapeGitBookPage.js';
|
|
17
|
-
import { scrapeReadMePage } from './scraping/site-scrapers/scrapeReadMePage.js';
|
|
18
|
-
import { scrapeReadMeSection } from './scraping/site-scrapers/scrapeReadMeSection.js';
|
|
7
|
+
import { FrameworkHint, frameworks } from './scraping/detectFramework.js';
|
|
8
|
+
import { scrapePageAutomatically } from './scraping/scrapePageCommands.js';
|
|
9
|
+
import { scrapeSectionAutomatically } from './scraping/scrapeSectionCommands.js';
|
|
19
10
|
|
|
20
11
|
await yargs(hideBin(process.argv))
|
|
21
12
|
.command(
|
|
22
|
-
'page
|
|
13
|
+
'page <url>',
|
|
23
14
|
'Scrapes a page',
|
|
24
|
-
() =>
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
await
|
|
43
|
-
}
|
|
44
|
-
)
|
|
45
|
-
.command(
|
|
46
|
-
'intercom-page [url]',
|
|
47
|
-
'Scrapes a Intercom page',
|
|
48
|
-
() => {},
|
|
49
|
-
async (argv) => {
|
|
50
|
-
await scrapePageWrapper(argv, scrapeIntercomPage);
|
|
15
|
+
(yargs) =>
|
|
16
|
+
yargs
|
|
17
|
+
.positional('url', { type: 'string', demandOption: true })
|
|
18
|
+
.check(checkUrl)
|
|
19
|
+
.option('overwrite', { alias: 'O', type: 'boolean', default: false })
|
|
20
|
+
.option('tool', { alias: 't', choices: frameworks })
|
|
21
|
+
.option('docusaurusVersion', {
|
|
22
|
+
alias: 'd',
|
|
23
|
+
type: 'string',
|
|
24
|
+
choices: ['1', '2', '3'] as const,
|
|
25
|
+
})
|
|
26
|
+
.check(checkVersion),
|
|
27
|
+
async ({ url, overwrite, tool, docusaurusVersion }) => {
|
|
28
|
+
const frameworkHint: FrameworkHint = {
|
|
29
|
+
framework: tool,
|
|
30
|
+
version: docusaurusVersion ?? '3',
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
await scrapePageAutomatically(url, overwrite, frameworkHint);
|
|
51
34
|
}
|
|
52
35
|
)
|
|
53
36
|
.command(
|
|
54
|
-
'section
|
|
37
|
+
'section <url>',
|
|
55
38
|
'Scrapes the docs in the section',
|
|
56
|
-
() =>
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
await
|
|
75
|
-
}
|
|
76
|
-
)
|
|
77
|
-
.command(
|
|
78
|
-
'intercom-section [url]',
|
|
79
|
-
'Scrapes the Intercom section',
|
|
80
|
-
() => {},
|
|
81
|
-
async (argv) => {
|
|
82
|
-
await scrapeSectionAxiosWrapper(argv, scrapeIntercomSection);
|
|
39
|
+
(yargs) =>
|
|
40
|
+
yargs
|
|
41
|
+
.positional('url', { type: 'string', demandOption: true })
|
|
42
|
+
.check(checkUrl)
|
|
43
|
+
.option('overwrite', { alias: 'O', type: 'boolean', default: false })
|
|
44
|
+
.option('tool', { alias: 't', choices: frameworks })
|
|
45
|
+
.option('docusaurusVersion', {
|
|
46
|
+
alias: 'd',
|
|
47
|
+
type: 'string',
|
|
48
|
+
choices: ['1', '2', '3'] as const,
|
|
49
|
+
})
|
|
50
|
+
.check(checkVersion),
|
|
51
|
+
async ({ url, overwrite, tool, docusaurusVersion }) => {
|
|
52
|
+
const frameworkHint: FrameworkHint = {
|
|
53
|
+
framework: tool,
|
|
54
|
+
version: docusaurusVersion ?? '3',
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
await scrapeSectionAutomatically(url, overwrite, frameworkHint);
|
|
83
58
|
}
|
|
84
59
|
)
|
|
85
60
|
.command(
|
|
86
61
|
'openapi-file <openapiFilename>',
|
|
87
62
|
'Creates MDX files from an OpenAPI spec',
|
|
88
|
-
(yargs) =>
|
|
89
|
-
|
|
63
|
+
(yargs) =>
|
|
64
|
+
yargs
|
|
90
65
|
.positional('openapiFilename', {
|
|
91
66
|
describe: 'The filename of the OpenAPI spec',
|
|
92
67
|
type: 'string',
|
|
68
|
+
demandOption: true,
|
|
93
69
|
})
|
|
94
70
|
.option('writeFiles', {
|
|
95
71
|
describe: 'Whether or not to write the frontmatter files',
|
|
@@ -101,9 +77,7 @@ await yargs(hideBin(process.argv))
|
|
|
101
77
|
describe: 'The folder in which to write any created frontmatter files',
|
|
102
78
|
type: 'string',
|
|
103
79
|
alias: 'o',
|
|
104
|
-
})
|
|
105
|
-
.demandOption('openapiFilename');
|
|
106
|
-
},
|
|
80
|
+
}),
|
|
107
81
|
async (argv) => {
|
|
108
82
|
try {
|
|
109
83
|
const { nav } = await generateOpenApiPages(
|
|
@@ -1,14 +1,19 @@
|
|
|
1
|
-
import cheerio from 'cheerio';
|
|
1
|
+
import * as cheerio from 'cheerio';
|
|
2
2
|
|
|
3
|
-
export
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
3
|
+
export const frameworks = ['docusaurus', 'gitbook', 'readme', 'intercom'] as const;
|
|
4
|
+
export type Framework = (typeof frameworks)[number];
|
|
5
|
+
|
|
6
|
+
export type FrameworkHint =
|
|
7
|
+
| {
|
|
8
|
+
framework: 'docusaurus';
|
|
9
|
+
version: '1' | '2' | '3';
|
|
10
|
+
}
|
|
11
|
+
| {
|
|
12
|
+
framework: 'gitbook' | 'readme' | 'intercom' | undefined;
|
|
13
|
+
};
|
|
9
14
|
|
|
10
|
-
export function detectFramework(html) {
|
|
11
|
-
const
|
|
15
|
+
export function detectFramework(html: string): FrameworkHint {
|
|
16
|
+
const $ = cheerio.load(html);
|
|
12
17
|
const docusaurusMeta = $('meta[name="generator"]');
|
|
13
18
|
|
|
14
19
|
if (
|
|
@@ -19,31 +24,31 @@ export function detectFramework(html) {
|
|
|
19
24
|
) {
|
|
20
25
|
const metaAttrString = docusaurusMeta.attr('content') as string;
|
|
21
26
|
if (metaAttrString.includes('v3')) {
|
|
22
|
-
return { framework:
|
|
27
|
+
return { framework: 'docusaurus', version: '3' };
|
|
23
28
|
}
|
|
24
29
|
if (metaAttrString.includes('v2')) {
|
|
25
|
-
return { framework:
|
|
30
|
+
return { framework: 'docusaurus', version: '2' };
|
|
26
31
|
} else if (metaAttrString.includes('v1')) {
|
|
27
32
|
console.warn(
|
|
28
33
|
'WARNING: We detected Docusaurus version 1 but we only support scraping versions 2 and 3.'
|
|
29
34
|
);
|
|
30
|
-
return { framework:
|
|
35
|
+
return { framework: 'docusaurus', version: '1' };
|
|
31
36
|
}
|
|
32
37
|
}
|
|
33
38
|
|
|
34
39
|
const isGitBook = $('.gitbook-root').length > 0;
|
|
35
40
|
if (isGitBook) {
|
|
36
|
-
return { framework:
|
|
41
|
+
return { framework: 'gitbook' };
|
|
37
42
|
}
|
|
38
43
|
|
|
39
44
|
const isReadMe = $('meta[name="readme-deploy"]').length > 0;
|
|
40
45
|
if (isReadMe) {
|
|
41
|
-
return { framework:
|
|
46
|
+
return { framework: 'readme' };
|
|
42
47
|
}
|
|
43
48
|
|
|
44
49
|
const isIntercom = $("meta[name='intercom:trackingEvent']").length > 0;
|
|
45
50
|
if (isIntercom) {
|
|
46
|
-
return { framework:
|
|
51
|
+
return { framework: 'intercom' };
|
|
47
52
|
}
|
|
48
53
|
|
|
49
54
|
return { framework: undefined };
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { Cheerio, CheerioAPI, Element } from 'cheerio';
|
|
1
2
|
import path from 'path';
|
|
2
3
|
|
|
3
4
|
import downloadImage, {
|
|
@@ -6,14 +7,13 @@ import downloadImage, {
|
|
|
6
7
|
removeMetadataFromImageSrc,
|
|
7
8
|
} from '../downloadImage.js';
|
|
8
9
|
|
|
9
|
-
// To Do: Use CheerioElement instead of any when we bump the cheerio version
|
|
10
10
|
export default async function downloadAllImages(
|
|
11
|
-
$:
|
|
12
|
-
content:
|
|
11
|
+
$: CheerioAPI,
|
|
12
|
+
content: Cheerio<Element>,
|
|
13
13
|
origin: string,
|
|
14
14
|
baseDir: string,
|
|
15
15
|
overwrite: boolean,
|
|
16
|
-
modifyFileName?:
|
|
16
|
+
modifyFileName?: (fileName: string) => string
|
|
17
17
|
) {
|
|
18
18
|
if (!baseDir) {
|
|
19
19
|
console.debug('Skipping image downloading');
|
|
@@ -23,17 +23,17 @@ export default async function downloadAllImages(
|
|
|
23
23
|
// We remove duplicates because some frameworks duplicate img tags
|
|
24
24
|
// to show the image larger when clicked on.
|
|
25
25
|
const imageSrcs = [
|
|
26
|
-
...new Set
|
|
26
|
+
...new Set(
|
|
27
27
|
content
|
|
28
28
|
.find('img[src]')
|
|
29
|
-
.map((
|
|
29
|
+
.map((_, image) => $(image).attr('src'))
|
|
30
30
|
.toArray()
|
|
31
31
|
),
|
|
32
32
|
];
|
|
33
33
|
|
|
34
34
|
// Wait to all images to download before continuing
|
|
35
35
|
const origToNewArray = await Promise.all(
|
|
36
|
-
imageSrcs.map(async (imageSrc
|
|
36
|
+
imageSrcs.map(async (imageSrc) => {
|
|
37
37
|
if (!imageSrc || !isValidImageSrc(imageSrc)) {
|
|
38
38
|
return {};
|
|
39
39
|
}
|
|
@@ -7,7 +7,6 @@ import downloadImage, {
|
|
|
7
7
|
} from '../downloadImage.js';
|
|
8
8
|
import { getFileExtension } from '../util.js';
|
|
9
9
|
|
|
10
|
-
// To Do: Use CheerioElement instead of any when we bump the cheerio version
|
|
11
10
|
export default async function downloadLogoImage(
|
|
12
11
|
imageSrc: string | undefined,
|
|
13
12
|
imageBaseDir: string,
|
|
@@ -1,12 +1,8 @@
|
|
|
1
1
|
export default function replaceImagePaths(
|
|
2
|
-
origToWritePath:
|
|
2
|
+
origToWritePath: Record<string, string>,
|
|
3
3
|
cliDir: string,
|
|
4
4
|
markdown: string
|
|
5
5
|
) {
|
|
6
|
-
if (origToWritePath == null) {
|
|
7
|
-
return markdown;
|
|
8
|
-
}
|
|
9
|
-
|
|
10
6
|
// Change image paths to use the downloaded locations
|
|
11
7
|
for (const [origHref, writePath] of Object.entries(origToWritePath)) {
|
|
12
8
|
// Use relative paths within the folder we are in
|
|
@@ -1,65 +1,68 @@
|
|
|
1
1
|
import axios from 'axios';
|
|
2
|
-
import { ArgumentsCamelCase } from 'yargs';
|
|
3
2
|
|
|
4
3
|
import { getHtmlWithPuppeteer } from '../browser.js';
|
|
5
|
-
import {
|
|
6
|
-
import { detectFramework, Frameworks } from './detectFramework.js';
|
|
4
|
+
import { detectFramework, Framework, FrameworkHint, frameworks } from './detectFramework.js';
|
|
7
5
|
import { scrapePage, ScrapePageFn } from './scrapePage.js';
|
|
8
6
|
import { scrapeIntercomPage } from './site-scrapers/Intercom/scrapeIntercomPage.js';
|
|
9
7
|
import { scrapeDocusaurusPage } from './site-scrapers/scrapeDocusaurusPage.js';
|
|
10
8
|
import { scrapeGitBookPage } from './site-scrapers/scrapeGitBookPage.js';
|
|
11
9
|
import { scrapeReadMePage } from './site-scrapers/scrapeReadMePage.js';
|
|
12
10
|
|
|
13
|
-
function validateFramework(framework) {
|
|
11
|
+
function validateFramework(framework: Framework | undefined) {
|
|
14
12
|
if (!framework) {
|
|
15
|
-
console.log(
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
13
|
+
console.log(
|
|
14
|
+
`Could not detect the framework automatically. Please use the -t flag to specify one of: ${frameworks.join(
|
|
15
|
+
', '
|
|
16
|
+
)}`
|
|
17
|
+
);
|
|
20
18
|
return process.exit(1);
|
|
21
19
|
}
|
|
22
20
|
}
|
|
23
21
|
|
|
24
22
|
export async function scrapePageWrapper(
|
|
25
|
-
|
|
23
|
+
url: string,
|
|
24
|
+
overwrite: boolean,
|
|
26
25
|
scrapeFunc: ScrapePageFn,
|
|
27
26
|
options?: { version?: string; puppeteer?: boolean }
|
|
28
27
|
) {
|
|
29
|
-
const href = getHrefFromArgs(argv);
|
|
30
28
|
let html: string;
|
|
31
29
|
if (options?.puppeteer) {
|
|
32
|
-
html = await getHtmlWithPuppeteer(
|
|
30
|
+
html = await getHtmlWithPuppeteer(url);
|
|
33
31
|
} else {
|
|
34
|
-
const res = await axios.get(
|
|
32
|
+
const res = await axios.get(url);
|
|
35
33
|
html = res.data;
|
|
36
34
|
}
|
|
37
|
-
await scrapePage(scrapeFunc,
|
|
35
|
+
await scrapePage(scrapeFunc, url, html, overwrite, options?.version);
|
|
38
36
|
process.exit(0);
|
|
39
37
|
}
|
|
40
38
|
|
|
41
|
-
export async function scrapePageAutomatically(
|
|
42
|
-
|
|
43
|
-
|
|
39
|
+
export async function scrapePageAutomatically(
|
|
40
|
+
url: string,
|
|
41
|
+
overwrite: boolean,
|
|
42
|
+
frameworkHint: FrameworkHint
|
|
43
|
+
) {
|
|
44
|
+
const res = await axios.get(url);
|
|
44
45
|
const html = res.data;
|
|
45
|
-
|
|
46
|
+
frameworkHint = frameworkHint.framework ? frameworkHint : detectFramework(html);
|
|
46
47
|
|
|
47
|
-
validateFramework(framework);
|
|
48
|
+
validateFramework(frameworkHint.framework);
|
|
48
49
|
|
|
49
|
-
console.log('Detected framework: ' + framework);
|
|
50
|
+
console.log('Detected framework: ' + frameworkHint.framework);
|
|
50
51
|
|
|
51
|
-
switch (framework) {
|
|
52
|
-
case
|
|
53
|
-
await scrapePageWrapper(
|
|
52
|
+
switch (frameworkHint.framework) {
|
|
53
|
+
case 'docusaurus':
|
|
54
|
+
await scrapePageWrapper(url, overwrite, scrapeDocusaurusPage, {
|
|
55
|
+
version: frameworkHint.version,
|
|
56
|
+
});
|
|
54
57
|
break;
|
|
55
|
-
case
|
|
56
|
-
await scrapePageWrapper(
|
|
58
|
+
case 'gitbook':
|
|
59
|
+
await scrapePageWrapper(url, overwrite, scrapeGitBookPage, { puppeteer: true });
|
|
57
60
|
break;
|
|
58
|
-
case
|
|
59
|
-
await scrapePageWrapper(
|
|
61
|
+
case 'readme':
|
|
62
|
+
await scrapePageWrapper(url, overwrite, scrapeReadMePage);
|
|
60
63
|
break;
|
|
61
|
-
case
|
|
62
|
-
await scrapePageWrapper(
|
|
64
|
+
case 'intercom':
|
|
65
|
+
await scrapePageWrapper(url, overwrite, scrapeIntercomPage);
|
|
63
66
|
break;
|
|
64
67
|
}
|
|
65
68
|
}
|