@mintlify/scraping 3.0.141 → 3.0.142
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/browser.js +1 -1
- package/bin/browser.js.map +1 -1
- package/bin/checks.d.ts +8 -0
- package/bin/checks.js +24 -0
- package/bin/checks.js.map +1 -0
- package/bin/cli.js +49 -45
- package/bin/cli.js.map +1 -1
- package/bin/scraping/detectFramework.d.ts +7 -14
- package/bin/scraping/detectFramework.js +8 -14
- package/bin/scraping/detectFramework.js.map +1 -1
- package/bin/scraping/downloadAllImages.d.ts +2 -1
- package/bin/scraping/downloadAllImages.js +1 -2
- package/bin/scraping/downloadAllImages.js.map +1 -1
- package/bin/scraping/downloadLogoImage.js +0 -1
- package/bin/scraping/downloadLogoImage.js.map +1 -1
- package/bin/scraping/replaceImagePaths.d.ts +1 -1
- package/bin/scraping/replaceImagePaths.js +0 -3
- package/bin/scraping/replaceImagePaths.js.map +1 -1
- package/bin/scraping/scrapePageCommands.d.ts +3 -3
- package/bin/scraping/scrapePageCommands.js +22 -27
- package/bin/scraping/scrapePageCommands.js.map +1 -1
- package/bin/scraping/scrapeSectionCommands.d.ts +5 -5
- package/bin/scraping/scrapeSectionCommands.js +27 -30
- package/bin/scraping/scrapeSectionCommands.js.map +1 -1
- package/bin/scraping/site-scrapers/Intercom/scrapeIntercomPage.js +1 -1
- package/bin/scraping/site-scrapers/Intercom/scrapeIntercomPage.js.map +1 -1
- package/bin/scraping/site-scrapers/Intercom/scrapeIntercomSection.js +2 -2
- package/bin/scraping/site-scrapers/Intercom/scrapeIntercomSection.js.map +1 -1
- package/bin/scraping/site-scrapers/alternateGroupTitle.d.ts +3 -1
- package/bin/scraping/site-scrapers/alternateGroupTitle.js +1 -1
- package/bin/scraping/site-scrapers/alternateGroupTitle.js.map +1 -1
- package/bin/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.d.ts +5 -1
- package/bin/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.js +1 -1
- package/bin/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.js.map +1 -1
- package/bin/scraping/site-scrapers/links-per-group/getLinksRecursively.d.ts +3 -1
- package/bin/scraping/site-scrapers/links-per-group/getLinksRecursively.js +1 -4
- package/bin/scraping/site-scrapers/links-per-group/getLinksRecursively.js.map +1 -1
- package/bin/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.d.ts +3 -1
- package/bin/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.js +0 -3
- package/bin/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.js.map +1 -1
- package/bin/scraping/site-scrapers/openNestedDocusaurusMenus.js +3 -3
- package/bin/scraping/site-scrapers/openNestedDocusaurusMenus.js.map +1 -1
- package/bin/scraping/site-scrapers/openNestedGitbookMenus.js +2 -2
- package/bin/scraping/site-scrapers/openNestedGitbookMenus.js.map +1 -1
- package/bin/scraping/site-scrapers/scrapeDocusaurusPage.js +1 -1
- package/bin/scraping/site-scrapers/scrapeDocusaurusPage.js.map +1 -1
- package/bin/scraping/site-scrapers/scrapeDocusaurusSection.js +2 -2
- package/bin/scraping/site-scrapers/scrapeDocusaurusSection.js.map +1 -1
- package/bin/scraping/site-scrapers/scrapeGitBookPage.js +1 -1
- package/bin/scraping/site-scrapers/scrapeGitBookPage.js.map +1 -1
- package/bin/scraping/site-scrapers/scrapeGitBookSection.js +1 -1
- package/bin/scraping/site-scrapers/scrapeGitBookSection.js.map +1 -1
- package/bin/scraping/site-scrapers/scrapeReadMePage.js +1 -1
- package/bin/scraping/site-scrapers/scrapeReadMePage.js.map +1 -1
- package/bin/scraping/site-scrapers/scrapeReadMeSection.js +2 -2
- package/bin/scraping/site-scrapers/scrapeReadMeSection.js.map +1 -1
- package/bin/tsconfig.build.tsbuildinfo +1 -1
- package/bin/util.d.ts +0 -1
- package/bin/util.js +9 -26
- package/bin/util.js.map +1 -1
- package/package.json +4 -4
- package/src/browser.ts +1 -1
- package/src/checks.ts +32 -0
- package/src/cli.ts +48 -74
- package/src/scraping/detectFramework.ts +20 -15
- package/src/scraping/downloadAllImages.ts +7 -7
- package/src/scraping/downloadLogoImage.ts +0 -1
- package/src/scraping/replaceImagePaths.ts +1 -5
- package/src/scraping/scrapePageCommands.ts +32 -29
- package/src/scraping/scrapeSectionCommands.ts +38 -34
- package/src/scraping/site-scrapers/Intercom/scrapeIntercomPage.ts +1 -1
- package/src/scraping/site-scrapers/Intercom/scrapeIntercomSection.ts +2 -2
- package/src/scraping/site-scrapers/alternateGroupTitle.ts +5 -2
- package/src/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.ts +7 -5
- package/src/scraping/site-scrapers/links-per-group/getLinksRecursively.ts +8 -6
- package/src/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.ts +7 -5
- package/src/scraping/site-scrapers/openNestedDocusaurusMenus.ts +3 -3
- package/src/scraping/site-scrapers/openNestedGitbookMenus.ts +3 -3
- package/src/scraping/site-scrapers/scrapeDocusaurusPage.ts +1 -1
- package/src/scraping/site-scrapers/scrapeDocusaurusSection.ts +2 -2
- package/src/scraping/site-scrapers/scrapeGitBookPage.ts +1 -1
- package/src/scraping/site-scrapers/scrapeGitBookSection.ts +3 -3
- package/src/scraping/site-scrapers/scrapeReadMePage.ts +1 -1
- package/src/scraping/site-scrapers/scrapeReadMeSection.ts +3 -3
- package/src/util.ts +10 -26
- package/tsconfig.json +0 -1
- package/bin/validation/isValidLink.d.ts +0 -1
- package/bin/validation/isValidLink.js +0 -11
- package/bin/validation/isValidLink.js.map +0 -1
- package/bin/validation/stopIfInvalidLink.d.ts +0 -1
- package/bin/validation/stopIfInvalidLink.js +0 -9
- package/bin/validation/stopIfInvalidLink.js.map +0 -1
- package/src/validation/isValidLink.ts +0 -9
- package/src/validation/stopIfInvalidLink.ts +0 -9
package/bin/browser.js
CHANGED
package/bin/browser.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"browser.js","sourceRoot":"","sources":["../src/browser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,WAAW,CAAC;AAEnC,MAAM,CAAC,KAAK,UAAU,YAAY;IAChC,IAAI,CAAC;QACH,OAAO,MAAM,MAAM,CAAC;YAClB,QAAQ,EAAE,IAAI;YACd,iBAAiB,EAAE,IAAI;SACxB,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO,CAAC,GAAG,CAAC,uCAAuC,EAAE,GAAG,CAAC,CAAC;QAC1D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,oBAAoB,CAAC,IAAY;IACrD,MAAM,OAAO,GAAG,MAAM,YAAY,EAAE,CAAC;IACrC,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;IACrC,MAAM,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;QACpB,SAAS,EAAE,cAAc;KAC1B,CAAC,CAAC;IACH,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;IAClC,OAAO,CAAC,KAAK,EAAE,CAAC;
|
|
1
|
+
{"version":3,"file":"browser.js","sourceRoot":"","sources":["../src/browser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,WAAW,CAAC;AAEnC,MAAM,CAAC,KAAK,UAAU,YAAY;IAChC,IAAI,CAAC;QACH,OAAO,MAAM,MAAM,CAAC;YAClB,QAAQ,EAAE,IAAI;YACd,iBAAiB,EAAE,IAAI;SACxB,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO,CAAC,GAAG,CAAC,uCAAuC,EAAE,GAAG,CAAC,CAAC;QAC1D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,oBAAoB,CAAC,IAAY;IACrD,MAAM,OAAO,GAAG,MAAM,YAAY,EAAE,CAAC;IACrC,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;IACrC,MAAM,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;QACpB,SAAS,EAAE,cAAc;KAC1B,CAAC,CAAC;IACH,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;IAClC,KAAK,OAAO,CAAC,KAAK,EAAE,CAAC;IACrB,OAAO,IAAI,CAAC;AACd,CAAC"}
|
package/bin/checks.d.ts
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { Framework } from './scraping/detectFramework.js';
|
|
2
|
+
export declare function checkUrl({ url }: {
|
|
3
|
+
url: string;
|
|
4
|
+
}): boolean;
|
|
5
|
+
export declare function checkVersion({ tool, docusaurusVersion, }: {
|
|
6
|
+
tool: Framework | undefined;
|
|
7
|
+
docusaurusVersion: string | undefined;
|
|
8
|
+
}): boolean;
|
package/bin/checks.js
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
// This checks the link is written correctly, not that the page exists.
|
|
2
|
+
export function checkUrl({ url }) {
|
|
3
|
+
try {
|
|
4
|
+
new URL(url);
|
|
5
|
+
}
|
|
6
|
+
catch {
|
|
7
|
+
throw Error(`Invalid link: ${url}\nMake sure the link starts with http:// or https://`);
|
|
8
|
+
}
|
|
9
|
+
return true;
|
|
10
|
+
}
|
|
11
|
+
export function checkVersion({ tool, docusaurusVersion, }) {
|
|
12
|
+
if (tool === 'docusaurus') {
|
|
13
|
+
if (docusaurusVersion === undefined) {
|
|
14
|
+
throw Error('When using Docusaurus, you must specify the version (1,2,3) using the --docusaurusVersion flag');
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
else {
|
|
18
|
+
if (docusaurusVersion !== undefined) {
|
|
19
|
+
throw Error('The --docusaurusVersion flag is only applicable when using Docusaurus.');
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
return true;
|
|
23
|
+
}
|
|
24
|
+
//# sourceMappingURL=checks.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"checks.js","sourceRoot":"","sources":["../src/checks.ts"],"names":[],"mappings":"AAEA,uEAAuE;AACvE,MAAM,UAAU,QAAQ,CAAC,EAAE,GAAG,EAAmB;IAC/C,IAAI,CAAC;QACH,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;IACf,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,KAAK,CAAC,iBAAiB,GAAG,sDAAsD,CAAC,CAAC;IAC1F,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,EAC3B,IAAI,EACJ,iBAAiB,GAIlB;IACC,IAAI,IAAI,KAAK,YAAY,EAAE,CAAC;QAC1B,IAAI,iBAAiB,KAAK,SAAS,EAAE,CAAC;YACpC,MAAM,KAAK,CACT,gGAAgG,CACjG,CAAC;QACJ,CAAC;IACH,CAAC;SAAM,CAAC;QACN,IAAI,iBAAiB,KAAK,SAAS,EAAE,CAAC;YACpC,MAAM,KAAK,CAAC,wEAAwE,CAAC,CAAC;QACxF,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC"}
|
package/bin/cli.js
CHANGED
|
@@ -1,59 +1,63 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
/* eslint-disable @typescript-eslint/no-empty-function */
|
|
3
2
|
import yargs from 'yargs';
|
|
4
3
|
import { hideBin } from 'yargs/helpers';
|
|
4
|
+
import { checkUrl, checkVersion } from './checks.js';
|
|
5
5
|
import { generateOpenApiPages } from './openapi/generateOpenApiPages.js';
|
|
6
|
-
import {
|
|
7
|
-
import {
|
|
8
|
-
import {
|
|
9
|
-
import { scrapeIntercomSection } from './scraping/site-scrapers/Intercom/scrapeIntercomSection.js';
|
|
10
|
-
import { scrapeGitBookPage } from './scraping/site-scrapers/scrapeGitBookPage.js';
|
|
11
|
-
import { scrapeReadMePage } from './scraping/site-scrapers/scrapeReadMePage.js';
|
|
12
|
-
import { scrapeReadMeSection } from './scraping/site-scrapers/scrapeReadMeSection.js';
|
|
6
|
+
import { frameworks } from './scraping/detectFramework.js';
|
|
7
|
+
import { scrapePageAutomatically } from './scraping/scrapePageCommands.js';
|
|
8
|
+
import { scrapeSectionAutomatically } from './scraping/scrapeSectionCommands.js';
|
|
13
9
|
await yargs(hideBin(process.argv))
|
|
14
|
-
.command('page
|
|
15
|
-
|
|
10
|
+
.command('page <url>', 'Scrapes a page', (yargs) => yargs
|
|
11
|
+
.positional('url', { type: 'string', demandOption: true })
|
|
12
|
+
.check(checkUrl)
|
|
13
|
+
.option('overwrite', { alias: 'O', type: 'boolean', default: false })
|
|
14
|
+
.option('tool', { alias: 't', choices: frameworks })
|
|
15
|
+
.option('docusaurusVersion', {
|
|
16
|
+
alias: 'd',
|
|
17
|
+
type: 'string',
|
|
18
|
+
choices: ['1', '2', '3'],
|
|
16
19
|
})
|
|
17
|
-
.
|
|
18
|
-
|
|
20
|
+
.check(checkVersion), async ({ url, overwrite, tool, docusaurusVersion }) => {
|
|
21
|
+
const frameworkHint = {
|
|
22
|
+
framework: tool,
|
|
23
|
+
version: docusaurusVersion ?? '3',
|
|
24
|
+
};
|
|
25
|
+
await scrapePageAutomatically(url, overwrite, frameworkHint);
|
|
19
26
|
})
|
|
20
|
-
.command('
|
|
21
|
-
|
|
27
|
+
.command('section <url>', 'Scrapes the docs in the section', (yargs) => yargs
|
|
28
|
+
.positional('url', { type: 'string', demandOption: true })
|
|
29
|
+
.check(checkUrl)
|
|
30
|
+
.option('overwrite', { alias: 'O', type: 'boolean', default: false })
|
|
31
|
+
.option('tool', { alias: 't', choices: frameworks })
|
|
32
|
+
.option('docusaurusVersion', {
|
|
33
|
+
alias: 'd',
|
|
34
|
+
type: 'string',
|
|
35
|
+
choices: ['1', '2', '3'],
|
|
22
36
|
})
|
|
23
|
-
.
|
|
24
|
-
|
|
37
|
+
.check(checkVersion), async ({ url, overwrite, tool, docusaurusVersion }) => {
|
|
38
|
+
const frameworkHint = {
|
|
39
|
+
framework: tool,
|
|
40
|
+
version: docusaurusVersion ?? '3',
|
|
41
|
+
};
|
|
42
|
+
await scrapeSectionAutomatically(url, overwrite, frameworkHint);
|
|
25
43
|
})
|
|
26
|
-
.command('
|
|
27
|
-
|
|
44
|
+
.command('openapi-file <openapiFilename>', 'Creates MDX files from an OpenAPI spec', (yargs) => yargs
|
|
45
|
+
.positional('openapiFilename', {
|
|
46
|
+
describe: 'The filename of the OpenAPI spec',
|
|
47
|
+
type: 'string',
|
|
48
|
+
demandOption: true,
|
|
28
49
|
})
|
|
29
|
-
.
|
|
30
|
-
|
|
50
|
+
.option('writeFiles', {
|
|
51
|
+
describe: 'Whether or not to write the frontmatter files',
|
|
52
|
+
default: true,
|
|
53
|
+
type: 'boolean',
|
|
54
|
+
alias: 'w',
|
|
31
55
|
})
|
|
32
|
-
.
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
})
|
|
38
|
-
.command('openapi-file <openapiFilename>', 'Creates MDX files from an OpenAPI spec', (yargs) => {
|
|
39
|
-
return yargs
|
|
40
|
-
.positional('openapiFilename', {
|
|
41
|
-
describe: 'The filename of the OpenAPI spec',
|
|
42
|
-
type: 'string',
|
|
43
|
-
})
|
|
44
|
-
.option('writeFiles', {
|
|
45
|
-
describe: 'Whether or not to write the frontmatter files',
|
|
46
|
-
default: true,
|
|
47
|
-
type: 'boolean',
|
|
48
|
-
alias: 'w',
|
|
49
|
-
})
|
|
50
|
-
.option('outDir', {
|
|
51
|
-
describe: 'The folder in which to write any created frontmatter files',
|
|
52
|
-
type: 'string',
|
|
53
|
-
alias: 'o',
|
|
54
|
-
})
|
|
55
|
-
.demandOption('openapiFilename');
|
|
56
|
-
}, async (argv) => {
|
|
56
|
+
.option('outDir', {
|
|
57
|
+
describe: 'The folder in which to write any created frontmatter files',
|
|
58
|
+
type: 'string',
|
|
59
|
+
alias: 'o',
|
|
60
|
+
}), async (argv) => {
|
|
57
61
|
try {
|
|
58
62
|
const { nav } = await generateOpenApiPages(argv.openapiFilename, argv.writeFiles, argv.outDir);
|
|
59
63
|
console.log('navigation object suggestion:');
|
package/bin/cli.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";
|
|
1
|
+
{"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AACA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AAExC,OAAO,EAAE,QAAQ,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AACrD,OAAO,EAAE,oBAAoB,EAAE,MAAM,mCAAmC,CAAC;AACzE,OAAO,EAAiB,UAAU,EAAE,MAAM,+BAA+B,CAAC;AAC1E,OAAO,EAAE,uBAAuB,EAAE,MAAM,kCAAkC,CAAC;AAC3E,OAAO,EAAE,0BAA0B,EAAE,MAAM,qCAAqC,CAAC;AAEjF,MAAM,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;KAC/B,OAAO,CACN,YAAY,EACZ,gBAAgB,EAChB,CAAC,KAAK,EAAE,EAAE,CACR,KAAK;KACF,UAAU,CAAC,KAAK,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC;KACzD,KAAK,CAAC,QAAQ,CAAC;KACf,MAAM,CAAC,WAAW,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;KACpE,MAAM,CAAC,MAAM,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,OAAO,EAAE,UAAU,EAAE,CAAC;KACnD,MAAM,CAAC,mBAAmB,EAAE;IAC3B,KAAK,EAAE,GAAG;IACV,IAAI,EAAE,QAAQ;IACd,OAAO,EAAE,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAU;CAClC,CAAC;KACD,KAAK,CAAC,YAAY,CAAC,EACxB,KAAK,EAAE,EAAE,GAAG,EAAE,SAAS,EAAE,IAAI,EAAE,iBAAiB,EAAE,EAAE,EAAE;IACpD,MAAM,aAAa,GAAkB;QACnC,SAAS,EAAE,IAAI;QACf,OAAO,EAAE,iBAAiB,IAAI,GAAG;KAClC,CAAC;IAEF,MAAM,uBAAuB,CAAC,GAAG,EAAE,SAAS,EAAE,aAAa,CAAC,CAAC;AAC/D,CAAC,CACF;KACA,OAAO,CACN,eAAe,EACf,iCAAiC,EACjC,CAAC,KAAK,EAAE,EAAE,CACR,KAAK;KACF,UAAU,CAAC,KAAK,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC;KACzD,KAAK,CAAC,QAAQ,CAAC;KACf,MAAM,CAAC,WAAW,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;KACpE,MAAM,CAAC,MAAM,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,OAAO,EAAE,UAAU,EAAE,CAAC;KACnD,MAAM,CAAC,mBAAmB,EAAE;IAC3B,KAAK,EAAE,GAAG;IACV,IAAI,EAAE,QAAQ;IACd,OAAO,EAAE,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAU;CAClC,CAAC;KACD,KAAK,CAAC,YAAY,CAAC,EACxB,KAAK,EAAE,EAAE,GAAG,EAAE,SAAS,EAAE,IAAI,EAAE,iBAAiB,EAAE,EAAE,EAAE;IACpD,MAAM,aAAa,GAAkB;QACnC,SAAS,EAAE,IAAI;QACf,OAAO,EAAE,iBAAiB,IAAI,GAAG;KAClC,CAAC;IAEF,MAAM,0BAA0B,CAAC,GAAG,EAAE,SAAS,EAAE,aAAa,CAAC,CAAC;AAClE,CAAC,CACF;KACA,OAAO,CACN,gCAAgC,EAChC,wCAAwC,EACxC,CAAC,KAAK,EAAE,EAAE,CACR,KAAK;KACF,UAAU,CAAC,iBAAiB,EAAE;IAC7B,QAAQ,EAAE,kCAAkC;IAC5C,IAAI,EAAE,QAAQ;IACd,YAAY,EAAE,IAAI;CACnB,CAAC;KACD,MAAM,CAAC,YAAY,EAAE;IACpB,QAAQ,EAAE,+CAA+C;IACzD,OAAO,EAAE,IAAI;IACb,IAAI,EAAE,SAAS;IACf,KAAK,EAAE,GAAG;CACX,CAAC;KACD,MAAM,CAAC,QAAQ,EAAE;IAChB,QAAQ,EAAE,4DAA4D;IACtE,IAAI,EAAE,QAAQ;IACd,KAAK,EAAE,GAAG;CACX,CAAC,EACN,KAAK,EAAE,IAAI,EAAE,EAAE;IACb,IAAI,CAAC;QACH,MAAM,EAAE,GAAG,EAAE,GAAG,MAAM,oBAAoB,CACxC,IAAI,CAAC,eAAe,EACpB,IAAI,CAAC,UAAU,EACf,IAAI,CAAC,MAAM,CACZ,CAAC;QACF,OAAO,CAAC,GAAG,CAAC,+BAA+B,CAAC,CAAC;QAC7C,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,EAAE,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC;IACjD,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;YAC3B,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAC/B,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QACvB,CAAC;IACH,CAAC;AACH,CAAC,CACF;IACD,+DAA+D;KAC9D,cAAc,EAAE;KAChB,aAAa,CAAC,CAAC,EAAE,gEAAgE,CAAC;IAEnF,iDAAiD;KAChD,KAAK,CAAC,GAAG,EAAE,MAAM,CAAC;KAClB,KAAK,CAAC,GAAG,EAAE,SAAS,CAAC;KAErB,KAAK,EAAE,CAAC"}
|
|
@@ -1,16 +1,9 @@
|
|
|
1
|
-
export declare
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
}
|
|
7
|
-
export declare function detectFramework(html: any): {
|
|
8
|
-
framework: Frameworks;
|
|
9
|
-
version: string;
|
|
1
|
+
export declare const frameworks: readonly ["docusaurus", "gitbook", "readme", "intercom"];
|
|
2
|
+
export type Framework = (typeof frameworks)[number];
|
|
3
|
+
export type FrameworkHint = {
|
|
4
|
+
framework: 'docusaurus';
|
|
5
|
+
version: '1' | '2' | '3';
|
|
10
6
|
} | {
|
|
11
|
-
framework:
|
|
12
|
-
version?: undefined;
|
|
13
|
-
} | {
|
|
14
|
-
framework: undefined;
|
|
15
|
-
version?: undefined;
|
|
7
|
+
framework: 'gitbook' | 'readme' | 'intercom' | undefined;
|
|
16
8
|
};
|
|
9
|
+
export declare function detectFramework(html: string): FrameworkHint;
|
|
@@ -1,11 +1,5 @@
|
|
|
1
|
-
import cheerio from 'cheerio';
|
|
2
|
-
export
|
|
3
|
-
(function (Frameworks) {
|
|
4
|
-
Frameworks["DOCUSAURUS"] = "DOCUSAURUS";
|
|
5
|
-
Frameworks["GITBOOK"] = "GITBOOK";
|
|
6
|
-
Frameworks["README"] = "README";
|
|
7
|
-
Frameworks["INTERCOM"] = "INTERCOM";
|
|
8
|
-
})(Frameworks || (Frameworks = {}));
|
|
1
|
+
import * as cheerio from 'cheerio';
|
|
2
|
+
export const frameworks = ['docusaurus', 'gitbook', 'readme', 'intercom'];
|
|
9
3
|
export function detectFramework(html) {
|
|
10
4
|
const $ = cheerio.load(html);
|
|
11
5
|
const docusaurusMeta = $('meta[name="generator"]');
|
|
@@ -15,27 +9,27 @@ export function detectFramework(html) {
|
|
|
15
9
|
docusaurusMeta.attr('content').includes('Docusaurus')) {
|
|
16
10
|
const metaAttrString = docusaurusMeta.attr('content');
|
|
17
11
|
if (metaAttrString.includes('v3')) {
|
|
18
|
-
return { framework:
|
|
12
|
+
return { framework: 'docusaurus', version: '3' };
|
|
19
13
|
}
|
|
20
14
|
if (metaAttrString.includes('v2')) {
|
|
21
|
-
return { framework:
|
|
15
|
+
return { framework: 'docusaurus', version: '2' };
|
|
22
16
|
}
|
|
23
17
|
else if (metaAttrString.includes('v1')) {
|
|
24
18
|
console.warn('WARNING: We detected Docusaurus version 1 but we only support scraping versions 2 and 3.');
|
|
25
|
-
return { framework:
|
|
19
|
+
return { framework: 'docusaurus', version: '1' };
|
|
26
20
|
}
|
|
27
21
|
}
|
|
28
22
|
const isGitBook = $('.gitbook-root').length > 0;
|
|
29
23
|
if (isGitBook) {
|
|
30
|
-
return { framework:
|
|
24
|
+
return { framework: 'gitbook' };
|
|
31
25
|
}
|
|
32
26
|
const isReadMe = $('meta[name="readme-deploy"]').length > 0;
|
|
33
27
|
if (isReadMe) {
|
|
34
|
-
return { framework:
|
|
28
|
+
return { framework: 'readme' };
|
|
35
29
|
}
|
|
36
30
|
const isIntercom = $("meta[name='intercom:trackingEvent']").length > 0;
|
|
37
31
|
if (isIntercom) {
|
|
38
|
-
return { framework:
|
|
32
|
+
return { framework: 'intercom' };
|
|
39
33
|
}
|
|
40
34
|
return { framework: undefined };
|
|
41
35
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"detectFramework.js","sourceRoot":"","sources":["../../src/scraping/detectFramework.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;
|
|
1
|
+
{"version":3,"file":"detectFramework.js","sourceRoot":"","sources":["../../src/scraping/detectFramework.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAEnC,MAAM,CAAC,MAAM,UAAU,GAAG,CAAC,YAAY,EAAE,SAAS,EAAE,QAAQ,EAAE,UAAU,CAAU,CAAC;AAYnF,MAAM,UAAU,eAAe,CAAC,IAAY;IAC1C,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,MAAM,cAAc,GAAG,CAAC,CAAC,wBAAwB,CAAC,CAAC;IAEnD,IACE,cAAc,CAAC,MAAM,GAAG,CAAC;QACzB,cAAc,CAAC,IAAI,CAAC,SAAS,CAAC;QAC9B,OAAO,cAAc,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,QAAQ;QACjD,cAAc,CAAC,IAAI,CAAC,SAAS,CAAY,CAAC,QAAQ,CAAC,YAAY,CAAC,EACjE,CAAC;QACD,MAAM,cAAc,GAAG,cAAc,CAAC,IAAI,CAAC,SAAS,CAAW,CAAC;QAChE,IAAI,cAAc,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;YAClC,OAAO,EAAE,SAAS,EAAE,YAAY,EAAE,OAAO,EAAE,GAAG,EAAE,CAAC;QACnD,CAAC;QACD,IAAI,cAAc,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;YAClC,OAAO,EAAE,SAAS,EAAE,YAAY,EAAE,OAAO,EAAE,GAAG,EAAE,CAAC;QACnD,CAAC;aAAM,IAAI,cAAc,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;YACzC,OAAO,CAAC,IAAI,CACV,0FAA0F,CAC3F,CAAC;YACF,OAAO,EAAE,SAAS,EAAE,YAAY,EAAE,OAAO,EAAE,GAAG,EAAE,CAAC;QACnD,CAAC;IACH,CAAC;IAED,MAAM,SAAS,GAAG,CAAC,CAAC,eAAe,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;IAChD,IAAI,SAAS,EAAE,CAAC;QACd,OAAO,EAAE,SAAS,EAAE,SAAS,EAAE,CAAC;IAClC,CAAC;IAED,MAAM,QAAQ,GAAG,CAAC,CAAC,4BAA4B,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;IAC5D,IAAI,QAAQ,EAAE,CAAC;QACb,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,CAAC;IACjC,CAAC;IAED,MAAM,UAAU,GAAG,CAAC,CAAC,qCAAqC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;IACvE,IAAI,UAAU,EAAE,CAAC;QACf,OAAO,EAAE,SAAS,EAAE,UAAU,EAAE,CAAC;IACnC,CAAC;IAED,OAAO,EAAE,SAAS,EAAE,SAAS,EAAE,CAAC;AAClC,CAAC"}
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
-
|
|
1
|
+
import { Cheerio, CheerioAPI, Element } from 'cheerio';
|
|
2
|
+
export default function downloadAllImages($: CheerioAPI, content: Cheerio<Element>, origin: string, baseDir: string, overwrite: boolean, modifyFileName?: (fileName: string) => string): Promise<{
|
|
2
3
|
[x: string]: string;
|
|
3
4
|
} | undefined>;
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import path from 'path';
|
|
2
2
|
import downloadImage, { cleanImageSrc, isValidImageSrc, removeMetadataFromImageSrc, } from '../downloadImage.js';
|
|
3
|
-
// To Do: Use CheerioElement instead of any when we bump the cheerio version
|
|
4
3
|
export default async function downloadAllImages($, content, origin, baseDir, overwrite, modifyFileName) {
|
|
5
4
|
if (!baseDir) {
|
|
6
5
|
console.debug('Skipping image downloading');
|
|
@@ -11,7 +10,7 @@ export default async function downloadAllImages($, content, origin, baseDir, ove
|
|
|
11
10
|
const imageSrcs = [
|
|
12
11
|
...new Set(content
|
|
13
12
|
.find('img[src]')
|
|
14
|
-
.map((
|
|
13
|
+
.map((_, image) => $(image).attr('src'))
|
|
15
14
|
.toArray()),
|
|
16
15
|
];
|
|
17
16
|
// Wait to all images to download before continuing
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"downloadAllImages.js","sourceRoot":"","sources":["../../src/scraping/downloadAllImages.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"downloadAllImages.js","sourceRoot":"","sources":["../../src/scraping/downloadAllImages.ts"],"names":[],"mappings":"AACA,OAAO,IAAI,MAAM,MAAM,CAAC;AAExB,OAAO,aAAa,EAAE,EACpB,aAAa,EACb,eAAe,EACf,0BAA0B,GAC3B,MAAM,qBAAqB,CAAC;AAE7B,MAAM,CAAC,OAAO,CAAC,KAAK,UAAU,iBAAiB,CAC7C,CAAa,EACb,OAAyB,EACzB,MAAc,EACd,OAAe,EACf,SAAkB,EAClB,cAA6C;IAE7C,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,OAAO,CAAC,KAAK,CAAC,4BAA4B,CAAC,CAAC;QAC5C,OAAO;IACT,CAAC;IAED,kEAAkE;IAClE,4CAA4C;IAC5C,MAAM,SAAS,GAAG;QAChB,GAAG,IAAI,GAAG,CACR,OAAO;aACJ,IAAI,CAAC,UAAU,CAAC;aAChB,GAAG,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;aACvC,OAAO,EAAE,CACb;KACF,CAAC;IAEF,mDAAmD;IACnD,MAAM,cAAc,GAAG,MAAM,OAAO,CAAC,GAAG,CACtC,SAAS,CAAC,GAAG,CAAC,KAAK,EAAE,QAAQ,EAAE,EAAE;QAC/B,IAAI,CAAC,QAAQ,IAAI,CAAC,eAAe,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC5C,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,MAAM,SAAS,GAAG,aAAa,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;QAElD,IAAI,QAAQ,GAAG,0BAA0B,CAAC,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC;QACpE,IAAI,cAAc,EAAE,CAAC;YACnB,QAAQ,GAAG,cAAc,CAAC,QAAQ,CAAC,CAAC;QACtC,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;QAE/C,MAAM,aAAa,CAAC,SAAS,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC;QAErD,OAAO,EAAE,CAAC,QAAQ,CAAC,EAAE,SAAS,EAAE,CAAC;IACnC,CAAC,CAAC,CACH,CAAC;IAEF,OAAO,cAAc,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,OAAO,EAAE,EAAE,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,EAAE,CAAC,CAAC;AACxF,CAAC"}
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import path from 'path';
|
|
2
2
|
import downloadImage, { cleanImageSrc, isValidImageSrc, removeMetadataFromImageSrc, } from '../downloadImage.js';
|
|
3
3
|
import { getFileExtension } from '../util.js';
|
|
4
|
-
// To Do: Use CheerioElement instead of any when we bump the cheerio version
|
|
5
4
|
export default async function downloadLogoImage(imageSrc, imageBaseDir, origin, overwrite) {
|
|
6
5
|
if (!imageSrc || !isValidImageSrc(imageSrc))
|
|
7
6
|
return;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"downloadLogoImage.js","sourceRoot":"","sources":["../../src/scraping/downloadLogoImage.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AAExB,OAAO,aAAa,EAAE,EACpB,aAAa,EACb,eAAe,EACf,0BAA0B,GAC3B,MAAM,qBAAqB,CAAC;AAC7B,OAAO,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAE9C,
|
|
1
|
+
{"version":3,"file":"downloadLogoImage.js","sourceRoot":"","sources":["../../src/scraping/downloadLogoImage.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AAExB,OAAO,aAAa,EAAE,EACpB,aAAa,EACb,eAAe,EACf,0BAA0B,GAC3B,MAAM,qBAAqB,CAAC;AAC7B,OAAO,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAE9C,MAAM,CAAC,OAAO,CAAC,KAAK,UAAU,iBAAiB,CAC7C,QAA4B,EAC5B,YAAoB,EACpB,MAAc,EACd,SAAkB;IAElB,IAAI,CAAC,QAAQ,IAAI,CAAC,eAAe,CAAC,QAAQ,CAAC;QAAE,OAAO;IAEpD,MAAM,SAAS,GAAG,aAAa,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;IAElD,MAAM,GAAG,GAAG,gBAAgB,CAAC,0BAA0B,CAAC,QAAQ,CAAC,CAAC,CAAC;IACnE,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,MAAM,EAAE,kBAAkB,GAAG,GAAG,CAAC,CAAC;IAE5E,MAAM,aAAa,CAAC,SAAS,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC;AACvD,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
export default function replaceImagePaths(origToWritePath:
|
|
1
|
+
export default function replaceImagePaths(origToWritePath: Record<string, string>, cliDir: string, markdown: string): string;
|
|
@@ -1,7 +1,4 @@
|
|
|
1
1
|
export default function replaceImagePaths(origToWritePath, cliDir, markdown) {
|
|
2
|
-
if (origToWritePath == null) {
|
|
3
|
-
return markdown;
|
|
4
|
-
}
|
|
5
2
|
// Change image paths to use the downloaded locations
|
|
6
3
|
for (const [origHref, writePath] of Object.entries(origToWritePath)) {
|
|
7
4
|
// Use relative paths within the folder we are in
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"replaceImagePaths.js","sourceRoot":"","sources":["../../src/scraping/replaceImagePaths.ts"],"names":[],"mappings":"AAAA,MAAM,CAAC,OAAO,UAAU,iBAAiB,CACvC,
|
|
1
|
+
{"version":3,"file":"replaceImagePaths.js","sourceRoot":"","sources":["../../src/scraping/replaceImagePaths.ts"],"names":[],"mappings":"AAAA,MAAM,CAAC,OAAO,UAAU,iBAAiB,CACvC,eAAuC,EACvC,MAAc,EACd,QAAgB;IAEhB,qDAAqD;IACrD,KAAK,MAAM,CAAC,QAAQ,EAAE,SAAS,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,eAAe,CAAC,EAAE,CAAC;QACpE,iDAAiD;QACjD,IAAI,SAAS,CAAC,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC;YACjC,QAAQ,GAAG,QAAQ,CAAC,UAAU,CAAC,QAAQ,EAAE,SAAS,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC;QAC3E,CAAC;aAAM,CAAC;YACN,QAAQ,GAAG,QAAQ,CAAC,UAAU,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;QACtD,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { FrameworkHint } from './detectFramework.js';
|
|
2
2
|
import { ScrapePageFn } from './scrapePage.js';
|
|
3
|
-
export declare function scrapePageWrapper(
|
|
3
|
+
export declare function scrapePageWrapper(url: string, overwrite: boolean, scrapeFunc: ScrapePageFn, options?: {
|
|
4
4
|
version?: string;
|
|
5
5
|
puppeteer?: boolean;
|
|
6
6
|
}): Promise<void>;
|
|
7
|
-
export declare function scrapePageAutomatically(
|
|
7
|
+
export declare function scrapePageAutomatically(url: string, overwrite: boolean, frameworkHint: FrameworkHint): Promise<void>;
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import axios from 'axios';
|
|
2
2
|
import { getHtmlWithPuppeteer } from '../browser.js';
|
|
3
|
-
import {
|
|
4
|
-
import { detectFramework, Frameworks } from './detectFramework.js';
|
|
3
|
+
import { detectFramework, frameworks } from './detectFramework.js';
|
|
5
4
|
import { scrapePage } from './scrapePage.js';
|
|
6
5
|
import { scrapeIntercomPage } from './site-scrapers/Intercom/scrapeIntercomPage.js';
|
|
7
6
|
import { scrapeDocusaurusPage } from './site-scrapers/scrapeDocusaurusPage.js';
|
|
@@ -9,46 +8,42 @@ import { scrapeGitBookPage } from './site-scrapers/scrapeGitBookPage.js';
|
|
|
9
8
|
import { scrapeReadMePage } from './site-scrapers/scrapeReadMePage.js';
|
|
10
9
|
function validateFramework(framework) {
|
|
11
10
|
if (!framework) {
|
|
12
|
-
console.log(
|
|
13
|
-
console.log('scrape-page-docusaurus');
|
|
14
|
-
console.log('scrape-page-gitbook');
|
|
15
|
-
console.log('scrape-page-readme');
|
|
16
|
-
console.log('scrape-page-intercom');
|
|
11
|
+
console.log(`Could not detect the framework automatically. Please use the -t flag to specify one of: ${frameworks.join(', ')}`);
|
|
17
12
|
return process.exit(1);
|
|
18
13
|
}
|
|
19
14
|
}
|
|
20
|
-
export async function scrapePageWrapper(
|
|
21
|
-
const href = getHrefFromArgs(argv);
|
|
15
|
+
export async function scrapePageWrapper(url, overwrite, scrapeFunc, options) {
|
|
22
16
|
let html;
|
|
23
17
|
if (options?.puppeteer) {
|
|
24
|
-
html = await getHtmlWithPuppeteer(
|
|
18
|
+
html = await getHtmlWithPuppeteer(url);
|
|
25
19
|
}
|
|
26
20
|
else {
|
|
27
|
-
const res = await axios.get(
|
|
21
|
+
const res = await axios.get(url);
|
|
28
22
|
html = res.data;
|
|
29
23
|
}
|
|
30
|
-
await scrapePage(scrapeFunc,
|
|
24
|
+
await scrapePage(scrapeFunc, url, html, overwrite, options?.version);
|
|
31
25
|
process.exit(0);
|
|
32
26
|
}
|
|
33
|
-
export async function scrapePageAutomatically(
|
|
34
|
-
const
|
|
35
|
-
const res = await axios.get(href);
|
|
27
|
+
export async function scrapePageAutomatically(url, overwrite, frameworkHint) {
|
|
28
|
+
const res = await axios.get(url);
|
|
36
29
|
const html = res.data;
|
|
37
|
-
|
|
38
|
-
validateFramework(framework);
|
|
39
|
-
console.log('Detected framework: ' + framework);
|
|
40
|
-
switch (framework) {
|
|
41
|
-
case
|
|
42
|
-
await scrapePageWrapper(
|
|
30
|
+
frameworkHint = frameworkHint.framework ? frameworkHint : detectFramework(html);
|
|
31
|
+
validateFramework(frameworkHint.framework);
|
|
32
|
+
console.log('Detected framework: ' + frameworkHint.framework);
|
|
33
|
+
switch (frameworkHint.framework) {
|
|
34
|
+
case 'docusaurus':
|
|
35
|
+
await scrapePageWrapper(url, overwrite, scrapeDocusaurusPage, {
|
|
36
|
+
version: frameworkHint.version,
|
|
37
|
+
});
|
|
43
38
|
break;
|
|
44
|
-
case
|
|
45
|
-
await scrapePageWrapper(
|
|
39
|
+
case 'gitbook':
|
|
40
|
+
await scrapePageWrapper(url, overwrite, scrapeGitBookPage, { puppeteer: true });
|
|
46
41
|
break;
|
|
47
|
-
case
|
|
48
|
-
await scrapePageWrapper(
|
|
42
|
+
case 'readme':
|
|
43
|
+
await scrapePageWrapper(url, overwrite, scrapeReadMePage);
|
|
49
44
|
break;
|
|
50
|
-
case
|
|
51
|
-
await scrapePageWrapper(
|
|
45
|
+
case 'intercom':
|
|
46
|
+
await scrapePageWrapper(url, overwrite, scrapeIntercomPage);
|
|
52
47
|
break;
|
|
53
48
|
}
|
|
54
49
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrapePageCommands.js","sourceRoot":"","sources":["../../src/scraping/scrapePageCommands.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;
|
|
1
|
+
{"version":3,"file":"scrapePageCommands.js","sourceRoot":"","sources":["../../src/scraping/scrapePageCommands.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAE1B,OAAO,EAAE,oBAAoB,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,eAAe,EAA4B,UAAU,EAAE,MAAM,sBAAsB,CAAC;AAC7F,OAAO,EAAE,UAAU,EAAgB,MAAM,iBAAiB,CAAC;AAC3D,OAAO,EAAE,kBAAkB,EAAE,MAAM,gDAAgD,CAAC;AACpF,OAAO,EAAE,oBAAoB,EAAE,MAAM,yCAAyC,CAAC;AAC/E,OAAO,EAAE,iBAAiB,EAAE,MAAM,sCAAsC,CAAC;AACzE,OAAO,EAAE,gBAAgB,EAAE,MAAM,qCAAqC,CAAC;AAEvE,SAAS,iBAAiB,CAAC,SAAgC;IACzD,IAAI,CAAC,SAAS,EAAE,CAAC;QACf,OAAO,CAAC,GAAG,CACT,2FAA2F,UAAU,CAAC,IAAI,CACxG,IAAI,CACL,EAAE,CACJ,CAAC;QACF,OAAO,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACzB,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,GAAW,EACX,SAAkB,EAClB,UAAwB,EACxB,OAAmD;IAEnD,IAAI,IAAY,CAAC;IACjB,IAAI,OAAO,EAAE,SAAS,EAAE,CAAC;QACvB,IAAI,GAAG,MAAM,oBAAoB,CAAC,GAAG,CAAC,CAAC;IACzC,CAAC;SAAM,CAAC;QACN,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QACjC,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC;IAClB,CAAC;IACD,MAAM,UAAU,CAAC,UAAU,EAAE,GAAG,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;IACrE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,uBAAuB,CAC3C,GAAW,EACX,SAAkB,EAClB,aAA4B;IAE5B,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IACjC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC;IACtB,aAAa,GAAG,aAAa,CAAC,SAAS,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC;IAEhF,iBAAiB,CAAC,aAAa,CAAC,SAAS,CAAC,CAAC;IAE3C,OAAO,CAAC,GAAG,CAAC,sBAAsB,GAAG,aAAa,CAAC,SAAS,CAAC,CAAC;IAE9D,QAAQ,aAAa,CAAC,SAAS,EAAE,CAAC;QAChC,KAAK,YAAY;YACf,MAAM,iBAAiB,CAAC,GAAG,EAAE,SAAS,EAAE,oBAAoB,EAAE;gBAC5D,OAAO,EAAE,aAAa,CAAC,OAAO;aAC/B,CAAC,CAAC;YACH,MAAM;QACR,KAAK,SAAS;YACZ,MAAM,iBAAiB,CAAC,GAAG,EAAE,SAAS,EAAE,iBAAiB,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;YAChF,MAAM;QACR,KAAK,QAAQ;YACX,MAAM,iBAAiB,CAAC,GAAG,EAAE,SAAS,EAAE,gBAAgB,CAAC,CAAC;YAC1D,MAAM;QACR,KAAK,UAAU;YACb,MAAM,iBAAiB,CAAC,GAAG,EAAE,SAAS,EAAE,kBAAkB,CAAC,CAAC;YAC5D,MAAM;IACV,CAAC;AACH,CAAC"}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { FrameworkHint } from './detectFramework.js';
|
|
2
2
|
import { ScrapeSectionFn } from './scrapeSection.js';
|
|
3
|
-
export declare function scrapeSectionAxiosWrapper(
|
|
4
|
-
export declare function scrapeDocusaurusSectionCommand(
|
|
5
|
-
export declare function scrapeGitbookSectionCommand(
|
|
6
|
-
export declare function scrapeSectionAutomatically(
|
|
3
|
+
export declare function scrapeSectionAxiosWrapper(url: string, overwrite: boolean, scrapeFunc: ScrapeSectionFn): Promise<void>;
|
|
4
|
+
export declare function scrapeDocusaurusSectionCommand(url: string, overwrite: boolean, version: string | undefined): Promise<void>;
|
|
5
|
+
export declare function scrapeGitbookSectionCommand(url: string, overwrite: boolean): Promise<void>;
|
|
6
|
+
export declare function scrapeSectionAutomatically(url: string, overwrite: boolean, frameworkHint: FrameworkHint): Promise<void>;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import axios from 'axios';
|
|
2
2
|
import { startBrowser } from '../browser.js';
|
|
3
|
-
import {
|
|
4
|
-
import { detectFramework
|
|
3
|
+
import { getOrigin } from '../util.js';
|
|
4
|
+
import { detectFramework } from './detectFramework.js';
|
|
5
5
|
import { scrapeSection } from './scrapeSection.js';
|
|
6
6
|
import { scrapeIntercomSection } from './site-scrapers/Intercom/scrapeIntercomSection.js';
|
|
7
7
|
import openNestedDocusaurusMenus from './site-scrapers/openNestedDocusaurusMenus.js';
|
|
@@ -9,51 +9,48 @@ import openNestedGitbookMenus from './site-scrapers/openNestedGitbookMenus.js';
|
|
|
9
9
|
import { scrapeDocusaurusSection } from './site-scrapers/scrapeDocusaurusSection.js';
|
|
10
10
|
import { scrapeGitBookSection } from './site-scrapers/scrapeGitBookSection.js';
|
|
11
11
|
import { scrapeReadMeSection } from './site-scrapers/scrapeReadMeSection.js';
|
|
12
|
-
export async function scrapeSectionAxiosWrapper(
|
|
13
|
-
const
|
|
14
|
-
const res = await axios.get(href);
|
|
12
|
+
export async function scrapeSectionAxiosWrapper(url, overwrite, scrapeFunc) {
|
|
13
|
+
const res = await axios.get(url);
|
|
15
14
|
const html = res.data;
|
|
16
|
-
await scrapeSection(scrapeFunc, html, getOrigin(
|
|
15
|
+
await scrapeSection(scrapeFunc, html, getOrigin(url), overwrite, undefined);
|
|
17
16
|
process.exit(0);
|
|
18
17
|
}
|
|
19
|
-
export async function scrapeDocusaurusSectionCommand(
|
|
18
|
+
export async function scrapeDocusaurusSectionCommand(url, overwrite, version // "1" | "2" | "3"
|
|
20
19
|
) {
|
|
21
|
-
await scrapeSectionOpeningAllNested(
|
|
20
|
+
await scrapeSectionOpeningAllNested(url, overwrite, openNestedDocusaurusMenus, scrapeDocusaurusSection, version);
|
|
22
21
|
}
|
|
23
|
-
export async function scrapeGitbookSectionCommand(
|
|
24
|
-
await scrapeSectionOpeningAllNested(
|
|
22
|
+
export async function scrapeGitbookSectionCommand(url, overwrite) {
|
|
23
|
+
await scrapeSectionOpeningAllNested(url, overwrite, openNestedGitbookMenus, scrapeGitBookSection);
|
|
25
24
|
}
|
|
26
|
-
async function scrapeSectionOpeningAllNested(
|
|
27
|
-
const href = getHrefFromArgs(argv);
|
|
25
|
+
async function scrapeSectionOpeningAllNested(url, overwrite, openLinks, scrapeFunc, version) {
|
|
28
26
|
const browser = await startBrowser();
|
|
29
27
|
const page = await browser.newPage();
|
|
30
|
-
await page.goto(
|
|
28
|
+
await page.goto(url, {
|
|
31
29
|
waitUntil: 'networkidle2',
|
|
32
30
|
});
|
|
33
31
|
const html = await openLinks(page);
|
|
34
|
-
browser.close();
|
|
35
|
-
await scrapeSection(scrapeFunc, html, getOrigin(
|
|
32
|
+
void browser.close();
|
|
33
|
+
await scrapeSection(scrapeFunc, html, getOrigin(url), overwrite, version);
|
|
36
34
|
process.exit(0);
|
|
37
35
|
}
|
|
38
|
-
export async function scrapeSectionAutomatically(
|
|
39
|
-
const
|
|
40
|
-
const res = await axios.get(href);
|
|
36
|
+
export async function scrapeSectionAutomatically(url, overwrite, frameworkHint) {
|
|
37
|
+
const res = await axios.get(url);
|
|
41
38
|
const html = res.data;
|
|
42
|
-
|
|
43
|
-
validateFramework(framework);
|
|
44
|
-
console.log('Detected framework: ' + framework);
|
|
45
|
-
switch (framework) {
|
|
46
|
-
case
|
|
47
|
-
await scrapeDocusaurusSectionCommand(
|
|
39
|
+
frameworkHint = frameworkHint.framework ? frameworkHint : detectFramework(html);
|
|
40
|
+
validateFramework(frameworkHint.framework);
|
|
41
|
+
console.log('Detected framework: ' + frameworkHint.framework);
|
|
42
|
+
switch (frameworkHint.framework) {
|
|
43
|
+
case 'docusaurus':
|
|
44
|
+
await scrapeDocusaurusSectionCommand(url, overwrite, frameworkHint.version);
|
|
48
45
|
break;
|
|
49
|
-
case
|
|
50
|
-
await scrapeGitbookSectionCommand(
|
|
46
|
+
case 'gitbook':
|
|
47
|
+
await scrapeGitbookSectionCommand(url, overwrite);
|
|
51
48
|
break;
|
|
52
|
-
case
|
|
53
|
-
await scrapeSectionAxiosWrapper(
|
|
49
|
+
case 'readme':
|
|
50
|
+
await scrapeSectionAxiosWrapper(url, overwrite, scrapeReadMeSection);
|
|
54
51
|
break;
|
|
55
|
-
case
|
|
56
|
-
await scrapeSectionAxiosWrapper(
|
|
52
|
+
case 'intercom':
|
|
53
|
+
await scrapeSectionAxiosWrapper(url, overwrite, scrapeIntercomSection);
|
|
57
54
|
break;
|
|
58
55
|
}
|
|
59
56
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrapeSectionCommands.js","sourceRoot":"","sources":["../../src/scraping/scrapeSectionCommands.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAG1B,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAC7C,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"scrapeSectionCommands.js","sourceRoot":"","sources":["../../src/scraping/scrapeSectionCommands.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAG1B,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAC7C,OAAO,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AACvC,OAAO,EAAE,eAAe,EAA4B,MAAM,sBAAsB,CAAC;AACjF,OAAO,EAAmB,aAAa,EAAE,MAAM,oBAAoB,CAAC;AACpE,OAAO,EAAE,qBAAqB,EAAE,MAAM,mDAAmD,CAAC;AAC1F,OAAO,yBAAyB,MAAM,8CAA8C,CAAC;AACrF,OAAO,sBAAsB,MAAM,2CAA2C,CAAC;AAC/E,OAAO,EAAE,uBAAuB,EAAE,MAAM,4CAA4C,CAAC;AACrF,OAAO,EAAE,oBAAoB,EAAE,MAAM,yCAAyC,CAAC;AAC/E,OAAO,EAAE,mBAAmB,EAAE,MAAM,wCAAwC,CAAC;AAE7E,MAAM,CAAC,KAAK,UAAU,yBAAyB,CAC7C,GAAW,EACX,SAAkB,EAClB,UAA2B;IAE3B,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IACjC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC;IACtB,MAAM,aAAa,CAAC,UAAU,EAAE,IAAI,EAAE,SAAS,CAAC,GAAG,CAAC,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC;IAC5E,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,8BAA8B,CAClD,GAAW,EACX,SAAkB,EAClB,OAA2B,CAAC,kBAAkB;;IAE9C,MAAM,6BAA6B,CACjC,GAAG,EACH,SAAS,EACT,yBAAyB,EACzB,uBAAuB,EACvB,OAAO,CACR,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,2BAA2B,CAAC,GAAW,EAAE,SAAkB;IAC/E,MAAM,6BAA6B,CAAC,GAAG,EAAE,SAAS,EAAE,sBAAsB,EAAE,oBAAoB,CAAC,CAAC;AACpG,CAAC;AAED,KAAK,UAAU,6BAA6B,CAC1C,GAAW,EACX,SAAkB,EAClB,SAA0C,EAC1C,UAA2B,EAC3B,OAAgB;IAEhB,MAAM,OAAO,GAAG,MAAM,YAAY,EAAE,CAAC;IACrC,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;IACrC,MAAM,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE;QACnB,SAAS,EAAE,cAAc;KAC1B,CAAC,CAAC;IAEH,MAAM,IAAI,GAAG,MAAM,SAAS,CAAC,IAAI,CAAC,CAAC;IACnC,KAAK,OAAO,CAAC,KAAK,EAAE,CAAC;IACrB,MAAM,aAAa,CAAC,UAAU,EAAE,IAAI,EAAE,SAAS,CAAC,GAAG,CAAC,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC;IAC1E,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,0BAA0B,CAC9C,GAAW,EACX,SAAkB,EAClB,aAA4B;IAE5B,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IACjC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC;IACtB,aAAa,GAAG,aAAa,CAAC,SAAS,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC;IAEhF,iBAAiB,CAAC,aAAa,CAAC,SAAS,CAAC,CAAC;IAC3C,OAAO,CAAC,GAAG,CAAC,sBAAsB,GAAG,aAAa,CAAC,SAAS,CAAC,CAAC;IAE9D,QAAQ,aAAa,CAAC,SAAS,EAAE,CAAC;QAChC,KAAK,YAAY;YACf,MAAM,8BAA8B,CAAC,GAAG,EAAE,SAAS,EAAE,aAAa,CAAC,OAAO,CAAC,CAAC;YAC5E,MAAM;QACR,KAAK,SAAS;YACZ,MAAM,2BAA2B,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;YAClD,MAAM;QACR,KAAK,QAAQ;YACX,MAAM,yBAAyB,CAAC,GAAG,EAAE,SAAS,EAAE,mBAAmB,CAAC,CAAC;YACrE,MAAM;QACR,KAAK,UAAU;YACb,MAAM,yBAAyB,CAAC,GAAG,EAAE,SAAS,EAAE,qBAAqB,CAAC,CAAC;YACvE,MAAM;IACV,CAAC;AACH,CAAC;AAED,SAAS,iBAAiB,CAAC,SAAgC;IACzD,IAAI,CAAC,SAAS,EAAE,CAAC;QACf,OAAO,CAAC,GAAG,CACT,4GAA4G,CAC7G,CAAC;QACF,OAAO,CAAC,IAAI,EAAE,CAAC;IACjB,CAAC;AACH,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrapeIntercomPage.js","sourceRoot":"","sources":["../../../../src/scraping/site-scrapers/Intercom/scrapeIntercomPage.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;
|
|
1
|
+
{"version":3,"file":"scrapeIntercomPage.js","sourceRoot":"","sources":["../../../../src/scraping/site-scrapers/Intercom/scrapeIntercomPage.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AACnC,OAAO,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AAEtD,OAAO,iBAAiB,MAAM,4BAA4B,CAAC;AAC3D,OAAO,iBAAiB,MAAM,4BAA4B,CAAC;AAE3D,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,IAAY,EACZ,MAAc,EACd,MAAc,EACd,YAAoB,EACpB,SAAkB,EAClB,CAAqB,CAAC,UAAU;;IAEhC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,MAAM,cAAc,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC,KAAK,EAAE,CAAC;IAC3C,MAAM,KAAK,GAAG,cAAc,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IAC3C,MAAM,WAAW,GAAG,CAAC,CAAC,gBAAgB,EAAE,cAAc,CAAC,MAAM,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;IAE/E,MAAM,OAAO,GAAG,CAAC,CAAC,SAAS,CAAC,CAAC,KAAK,EAAE,CAAC;IACrC,MAAM,WAAW,GAAG,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAEpC,MAAM,eAAe,GAAG,MAAM,iBAAiB,CAC7C,CAAC,EACD,OAAO,EACP,MAAM,EACN,YAAY,EACZ,SAAS,EACT,SAAS,CACV,CAAC;IAEF,MAAM,GAAG,GAAG,IAAI,gBAAgB,CAAC,EAAE,cAAc,EAAE,KAAK,EAAE,CAAC,CAAC;IAC5D,IAAI,QAAQ,GAAG,GAAG,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;IAE1C,2BAA2B;IAC3B,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;IAE7C,qDAAqD;IACrD,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;IAE3C,iCAAiC;IACjC,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IAE/C,gEAAgE;IAChE,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,wBAAwB,EAAE,SAAS,CAAC,CAAC;IACjE,IAAI,eAAe,EAAE,CAAC;QACpB,QAAQ,GAAG,iBAAiB,CAAC,eAAe,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;IAClE,CAAC;IAED,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,QAAQ,EAAE,CAAC;AAC1C,CAAC"}
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
import axios from 'axios';
|
|
2
|
-
import cheerio from 'cheerio';
|
|
2
|
+
import * as cheerio from 'cheerio';
|
|
3
3
|
import downloadLogoImage from '../../downloadLogoImage.js';
|
|
4
4
|
import { scrapeGettingFileNameFromUrl } from '../../scrapeGettingFileNameFromUrl.js';
|
|
5
5
|
import { scrapeIntercomPage } from './scrapeIntercomPage.js';
|
|
6
6
|
export async function scrapeIntercomSection(html, origin, cliDir, imageBaseDir, overwrite, version) {
|
|
7
7
|
let $ = cheerio.load(html);
|
|
8
8
|
const logoSrc = $('.header__logo img').first().attr('src');
|
|
9
|
-
downloadLogoImage(logoSrc, imageBaseDir, origin, overwrite);
|
|
9
|
+
void downloadLogoImage(logoSrc, imageBaseDir, origin, overwrite);
|
|
10
10
|
const collectionsLink = $('.section .g__space a');
|
|
11
11
|
const collectionsMap = collectionsLink.toArray().map(async (s) => {
|
|
12
12
|
const href = $(s).attr('href');
|