mintlify 1.1.1 → 1.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/index.js +3 -4
- package/bin/index.js.map +1 -1
- package/bin/local-preview/helper-commands/clearCommand.js +5 -4
- package/bin/local-preview/helper-commands/clearCommand.js.map +1 -1
- package/bin/local-preview/index.js +43 -4
- package/bin/local-preview/index.js.map +1 -1
- package/bin/local-preview/utils/listener.js +1 -1
- package/bin/local-preview/utils/listener.js.map +1 -1
- package/bin/scraping/downloadAllImages.js +4 -0
- package/bin/scraping/downloadAllImages.js.map +1 -1
- package/bin/scraping/scrapeSectionCommands.js +18 -46
- package/bin/scraping/scrapeSectionCommands.js.map +1 -1
- package/bin/scraping/site-scrapers/alternateGroupTitle.js +9 -0
- package/bin/scraping/site-scrapers/alternateGroupTitle.js.map +1 -0
- package/bin/scraping/site-scrapers/getLinksRecursively.js +7 -2
- package/bin/scraping/site-scrapers/getLinksRecursively.js.map +1 -1
- package/bin/scraping/site-scrapers/openNestedDocusaurusMenus.js +30 -0
- package/bin/scraping/site-scrapers/openNestedDocusaurusMenus.js.map +1 -0
- package/bin/scraping/site-scrapers/openNestedGitbookMenus.js +37 -0
- package/bin/scraping/site-scrapers/openNestedGitbookMenus.js.map +1 -0
- package/bin/scraping/site-scrapers/scrapeDocusaurusSection.js +20 -21
- package/bin/scraping/site-scrapers/scrapeDocusaurusSection.js.map +1 -1
- package/bin/scraping/site-scrapers/scrapeGitBookSection.js +2 -9
- package/bin/scraping/site-scrapers/scrapeGitBookSection.js.map +1 -1
- package/package.json +1 -1
- package/src/index.ts +8 -3
- package/src/local-preview/helper-commands/clearCommand.ts +11 -4
- package/src/local-preview/index.ts +48 -3
- package/src/local-preview/utils/listener.ts +1 -1
- package/src/scraping/downloadAllImages.ts +5 -0
- package/src/scraping/scrapeSectionCommands.ts +34 -60
- package/src/scraping/site-scrapers/alternateGroupTitle.ts +8 -0
- package/src/scraping/site-scrapers/getLinksRecursively.ts +7 -2
- package/src/scraping/site-scrapers/openNestedDocusaurusMenus.ts +42 -0
- package/src/scraping/site-scrapers/openNestedGitbookMenus.ts +49 -0
- package/src/scraping/site-scrapers/scrapeDocusaurusSection.ts +23 -21
- package/src/scraping/site-scrapers/scrapeGitBookSection.ts +2 -10
package/bin/index.js
CHANGED
|
@@ -7,8 +7,7 @@ import { scrapePageAutomatically, scrapePageWrapper, } from "./scraping/scrapePa
|
|
|
7
7
|
import { scrapeDocusaurusPage } from "./scraping/site-scrapers/scrapeDocusaurusPage.js";
|
|
8
8
|
import { scrapeGitBookPage } from "./scraping/site-scrapers/scrapeGitBookPage.js";
|
|
9
9
|
import { scrapeReadMePage } from "./scraping/site-scrapers/scrapeReadMePage.js";
|
|
10
|
-
import { scrapeSectionAutomatically, scrapeSectionAxiosWrapper, scrapeGitbookSectionCommand, } from "./scraping/scrapeSectionCommands.js";
|
|
11
|
-
import { scrapeDocusaurusSection } from "./scraping/site-scrapers/scrapeDocusaurusSection.js";
|
|
10
|
+
import { scrapeSectionAutomatically, scrapeSectionAxiosWrapper, scrapeGitbookSectionCommand, scrapeDocusaurusSectionCommand, } from "./scraping/scrapeSectionCommands.js";
|
|
12
11
|
import { scrapeReadMeSection } from "./scraping/site-scrapers/scrapeReadMeSection.js";
|
|
13
12
|
import dev from "./local-preview/index.js";
|
|
14
13
|
import installDepsCommand from "./local-preview/helper-commands/installDepsCommand.js";
|
|
@@ -19,7 +18,7 @@ yargs(hideBin(process.argv))
|
|
|
19
18
|
await dev();
|
|
20
19
|
})
|
|
21
20
|
.command("install", "Install dependencies for local Mintlify", () => { }, installDepsCommand)
|
|
22
|
-
.command("clear", "Clear cache", () => { }, clearCommand)
|
|
21
|
+
.command("clear", "Clear cache", () => { }, (args) => clearCommand(args))
|
|
23
22
|
.command("init", "Generate a mintlify template", () => { }, initCommand)
|
|
24
23
|
.command("page", "Generate a new page", () => { }, generatePageTemplate)
|
|
25
24
|
.command("scrape-page [url]", "Scrapes a page", () => { }, async (argv) => {
|
|
@@ -38,7 +37,7 @@ yargs(hideBin(process.argv))
|
|
|
38
37
|
await scrapeSectionAutomatically(argv);
|
|
39
38
|
})
|
|
40
39
|
.command("scrape-docusaurus-section [url]", "Scrapes the Docusaurus section", () => { }, async (argv) => {
|
|
41
|
-
await
|
|
40
|
+
await scrapeDocusaurusSectionCommand(argv);
|
|
42
41
|
})
|
|
43
42
|
.command("scrape-gitbook-section [url]", "Scrapes the Gitbook section", () => { }, async (argv) => {
|
|
44
43
|
await scrapeGitbookSectionCommand(argv);
|
package/bin/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAEA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AACxC,OAAO,WAAW,MAAM,yBAAyB,CAAC;AAClD,OAAO,oBAAoB,MAAM,mBAAmB,CAAC;AACrD,OAAO,EACL,uBAAuB,EACvB,iBAAiB,GAClB,MAAM,kCAAkC,CAAC;AAC1C,OAAO,EAAE,oBAAoB,EAAE,MAAM,kDAAkD,CAAC;AACxF,OAAO,EAAE,iBAAiB,EAAE,MAAM,+CAA+C,CAAC;AAClF,OAAO,EAAE,gBAAgB,EAAE,MAAM,8CAA8C,CAAC;AAChF,OAAO,EACL,0BAA0B,EAC1B,yBAAyB,EACzB,2BAA2B,
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAEA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AACxC,OAAO,WAAW,MAAM,yBAAyB,CAAC;AAClD,OAAO,oBAAoB,MAAM,mBAAmB,CAAC;AACrD,OAAO,EACL,uBAAuB,EACvB,iBAAiB,GAClB,MAAM,kCAAkC,CAAC;AAC1C,OAAO,EAAE,oBAAoB,EAAE,MAAM,kDAAkD,CAAC;AACxF,OAAO,EAAE,iBAAiB,EAAE,MAAM,+CAA+C,CAAC;AAClF,OAAO,EAAE,gBAAgB,EAAE,MAAM,8CAA8C,CAAC;AAChF,OAAO,EACL,0BAA0B,EAC1B,yBAAyB,EACzB,2BAA2B,EAC3B,8BAA8B,GAC/B,MAAM,qCAAqC,CAAC;AAC7C,OAAO,EAAE,mBAAmB,EAAE,MAAM,iDAAiD,CAAC;AACtF,OAAO,GAAG,MAAM,0BAA0B,CAAC;AAC3C,OAAO,kBAAkB,MAAM,uDAAuD,CAAC;AACvF,OAAO,YAAY,MAAM,iDAAiD,CAAC;AAE3E,oHAAoH;AACpH,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;KACzB,OAAO,CACN,KAAK,EACL,8DAA8D,EAC9D,GAAG,EAAE,GAAE,CAAC,EACR,KAAK,IAAI,EAAE;IACT,MAAM,GAAG,EAAE,CAAC;AACd,CAAC,CACF;KACA,OAAO,CACN,SAAS,EACT,yCAAyC,EACzC,GAAG,EAAE,GAAE,CAAC,EACR,kBAAkB,CACnB;KACA,OAAO,CACN,OAAO,EACP,aAAa,EACb,GAAG,EAAE,GAAE,CAAC,EACR,CAAC,IAAI,EAAE,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,CAC7B;KACA,OAAO,CAAC,MAAM,EAAE,8BAA8B,EAAE,GAAG,EAAE,GAAE,CAAC,EAAE,WAAW,CAAC;KACtE,OAAO,CAAC,MAAM,EAAE,qBAAqB,EAAE,GAAG,EAAE,GAAE,CAAC,EAAE,oBAAoB,CAAC;KACtE,OAAO,CACN,mBAAmB,EACnB,gBAAgB,EAChB,GAAG,EAAE,GAAE,CAAC,EACR,KAAK,EAAE,IAAI,EAAE,EAAE;IACb,MAAM,uBAAuB,CAAC,IAAI,CAAC,CAAC;AACtC,CAAC,CACF;KACA,OAAO,CACN,8BAA8B,EAC9B,2BAA2B,EAC3B,GAAG,EAAE,GAAE,CAAC,EACR,KAAK,EAAE,IAAI,EAAE,EAAE;IACb,MAAM,iBAAiB,CAAC,IAAI,EAAE,oBAAoB,CAAC,CAAC;AACtD,CAAC,CACF;KACA,OAAO,CACN,2BAA2B,EAC3B,wBAAwB,EACxB,GAAG,EAAE,GAAE,CAAC,EACR,KAAK,EAAE,IAAI,EAAE,EAAE;IACb,MAAM,iBAAiB,CAAC,IAAI,EAAE,iBAAiB,CAAC,CAAC;AACnD,CAAC,CACF;KACA,OAAO,CACN,0BAA0B,EAC1B,uBAAuB,EACvB,GAAG,EAAE,GAAE,CAAC,EACR,KAAK,EAAE,IAAI,EAAE,EAAE;IACb,MAAM,iBAAiB,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAAC;AAClD,CAAC,CACF;KACA,OAAO,CACN,sBAAsB,EACtB,iCAAiC,EACjC,GAAG,EAAE,GAAE,CAAC,EACR,KAAK,EAAE,IAAI,EAAE,EAAE;IACb,MAAM,0BAA0B,CAAC,IAAI,CAAC,CAAC;AACzC,CAAC,CACF;KACA,OAAO,CACN,iCAAiC,EACjC,gCAAgC,EAChC,GAAG,EAAE,GAAE,CAAC,EACR,KAAK,EAAE,IAAI,EAAE,EAAE;IACb,MAAM,8BAA8B,CAAC,IAAI,CAAC,CAAC;AAC7C,CAAC,CACF;KACA,OAAO,CACN,8BAA8B,EAC9B,6BAA6B,EAC7B,GAAG,EAAE,GAAE,CAAC,EACR,KAAK,EAAE,IAAI,EAAE,EAAE;IACb,MAAM,2BAA2B,CAAC,IAAI,CAAC,CAAC;AAC1C,CAAC,CACF;KACA,OAAO,CACN,6BAA6B,EAC7B,4BAA4B,EAC5B,GAAG,EAAE,GAAE,CAAC,EACR,KAAK,EAAE,IAAI,EAAE,EAAE;IACb,MAAM,yBAAyB,CAAC,IAAI,EAAE,mBAAmB,CAAC,CAAC;AAC7D,CAAC,CACF;KAEA,KAAK,EAAE,CAAC"}
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import inquirer from "inquirer";
|
|
2
2
|
import shell from "shelljs";
|
|
3
3
|
import { CLIENT_PATH } from "../../constants.js";
|
|
4
|
-
const clearCommand = () => {
|
|
4
|
+
const clearCommand = async (args) => {
|
|
5
|
+
const all = args === null || args === void 0 ? void 0 : args.a;
|
|
5
6
|
shell.cd(CLIENT_PATH);
|
|
6
|
-
shell.exec(
|
|
7
|
-
inquirer
|
|
7
|
+
shell.exec(`git clean -d -x ${all ? "" : "-e node_modules "}-e last-invocation-path -n`);
|
|
8
|
+
await inquirer
|
|
8
9
|
.prompt([
|
|
9
10
|
{
|
|
10
11
|
type: "confirm",
|
|
@@ -15,7 +16,7 @@ const clearCommand = () => {
|
|
|
15
16
|
])
|
|
16
17
|
.then(({ confirm }) => {
|
|
17
18
|
if (confirm) {
|
|
18
|
-
shell.exec(
|
|
19
|
+
shell.exec(`git clean -d -x ${all ? "" : "-e node_modules "}-e last-invocation-path -f`);
|
|
19
20
|
}
|
|
20
21
|
else {
|
|
21
22
|
console.log("Clear cancelled.");
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"clearCommand.js","sourceRoot":"","sources":["../../../src/local-preview/helper-commands/clearCommand.ts"],"names":[],"mappings":"AAAA,OAAO,QAAQ,MAAM,UAAU,CAAC;AAChC,OAAO,KAAK,MAAM,SAAS,CAAC;AAC5B,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAEjD,MAAM,YAAY,GAAG,
|
|
1
|
+
{"version":3,"file":"clearCommand.js","sourceRoot":"","sources":["../../../src/local-preview/helper-commands/clearCommand.ts"],"names":[],"mappings":"AAAA,OAAO,QAAQ,MAAM,UAAU,CAAC;AAChC,OAAO,KAAK,MAAM,SAAS,CAAC;AAC5B,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAEjD,MAAM,YAAY,GAAG,KAAK,EAAE,IAAI,EAAE,EAAE;IAClC,MAAM,GAAG,GAAG,IAAI,aAAJ,IAAI,uBAAJ,IAAI,CAAE,CAAC,CAAC;IACpB,KAAK,CAAC,EAAE,CAAC,WAAW,CAAC,CAAC;IACtB,KAAK,CAAC,IAAI,CACR,mBAAmB,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,kBAAkB,4BAA4B,CAC7E,CAAC;IACF,MAAM,QAAQ;SACX,MAAM,CAAC;QACN;YACE,IAAI,EAAE,SAAS;YACf,IAAI,EAAE,SAAS;YACf,OAAO,EAAE,8BAA8B;YACvC,OAAO,EAAE,IAAI;SACd;KACF,CAAC;SACD,IAAI,CAAC,CAAC,EAAE,OAAO,EAAE,EAAE,EAAE;QACpB,IAAI,OAAO,EAAE;YACX,KAAK,CAAC,IAAI,CACR,mBACE,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,kBACb,4BAA4B,CAC7B,CAAC;SACH;aAAM;YACL,OAAO,CAAC,GAAG,CAAC,kBAAkB,CAAC,CAAC;SACjC;IACH,CAAC,CAAC,CAAC;AACP,CAAC,CAAC;AAEF,eAAe,YAAY,CAAC"}
|
|
@@ -2,16 +2,31 @@ import Chalk from "chalk";
|
|
|
2
2
|
import open from "open";
|
|
3
3
|
import { promises as _promises } from "fs";
|
|
4
4
|
import fse, { pathExists } from "fs-extra";
|
|
5
|
+
import inquirer from "inquirer";
|
|
5
6
|
import { isInternetAvailable } from "is-internet-available";
|
|
6
7
|
import path from "path";
|
|
7
8
|
import shell from "shelljs";
|
|
8
9
|
import categorizeFiles from "./utils/categorizeFiles.js";
|
|
9
|
-
import { CMD_EXEC_PATH, CLIENT_PATH, HOME_DIR, DOT_MINTLIFY, } from "../constants.js";
|
|
10
|
+
import { CMD_EXEC_PATH, CLIENT_PATH, HOME_DIR, DOT_MINTLIFY, LAST_INVOCATION_PATH_FILE_LOCATION, } from "../constants.js";
|
|
10
11
|
import { injectFavicons } from "./utils/injectFavicons.js";
|
|
11
12
|
import listener from "./utils/listener.js";
|
|
12
13
|
import { createPage, createMetadataFileFromPages } from "./utils/metadata.js";
|
|
13
14
|
import { updateConfigFile } from "./utils/mintConfigFile.js";
|
|
14
15
|
import { buildLogger, ensureYarn } from "../util.js";
|
|
16
|
+
import clearCommand from "./helper-commands/clearCommand.js";
|
|
17
|
+
const saveInvocationPath = async () => {
|
|
18
|
+
await fse.outputFile(LAST_INVOCATION_PATH_FILE_LOCATION, CMD_EXEC_PATH);
|
|
19
|
+
};
|
|
20
|
+
const cleanOldFiles = async () => {
|
|
21
|
+
const lastInvocationPathExists = await pathExists(LAST_INVOCATION_PATH_FILE_LOCATION);
|
|
22
|
+
if (!lastInvocationPathExists)
|
|
23
|
+
return;
|
|
24
|
+
const lastInvocationPath = (await readFile(LAST_INVOCATION_PATH_FILE_LOCATION)).toString();
|
|
25
|
+
if (lastInvocationPath !== CMD_EXEC_PATH) {
|
|
26
|
+
// clean if invoked in new location
|
|
27
|
+
await clearCommand({});
|
|
28
|
+
}
|
|
29
|
+
};
|
|
15
30
|
const { readFile } = _promises;
|
|
16
31
|
const copyFiles = async (logger) => {
|
|
17
32
|
logger.start("Syncing doc files...");
|
|
@@ -69,8 +84,32 @@ const shellExec = (cmd) => {
|
|
|
69
84
|
const nodeModulesExists = async () => {
|
|
70
85
|
return pathExists(path.join(DOT_MINTLIFY, "mint", "client", "node_modules"));
|
|
71
86
|
};
|
|
87
|
+
const promptForYarn = async () => {
|
|
88
|
+
const yarnInstalled = shell.which("yarn");
|
|
89
|
+
if (!yarnInstalled) {
|
|
90
|
+
await inquirer
|
|
91
|
+
.prompt([
|
|
92
|
+
{
|
|
93
|
+
type: "confirm",
|
|
94
|
+
name: "confirm",
|
|
95
|
+
message: "yarn must be globally installed. Install yarn?",
|
|
96
|
+
default: true,
|
|
97
|
+
},
|
|
98
|
+
])
|
|
99
|
+
.then(({ confirm }) => {
|
|
100
|
+
if (confirm) {
|
|
101
|
+
shell.exec("npm install --global yarn");
|
|
102
|
+
}
|
|
103
|
+
else {
|
|
104
|
+
console.log("Installation cancelled.");
|
|
105
|
+
}
|
|
106
|
+
});
|
|
107
|
+
}
|
|
108
|
+
};
|
|
72
109
|
const dev = async () => {
|
|
73
110
|
shell.cd(HOME_DIR);
|
|
111
|
+
await cleanOldFiles();
|
|
112
|
+
await promptForYarn();
|
|
74
113
|
const logger = buildLogger("Starting a local Mintlify instance...");
|
|
75
114
|
await fse.ensureDir(path.join(DOT_MINTLIFY, "mint"));
|
|
76
115
|
shell.cd(path.join(HOME_DIR, ".mintlify", "mint"));
|
|
@@ -103,8 +142,7 @@ const dev = async () => {
|
|
|
103
142
|
runYarn = false;
|
|
104
143
|
}
|
|
105
144
|
shell.cd(CLIENT_PATH);
|
|
106
|
-
runYarn
|
|
107
|
-
if (internet && runYarn) {
|
|
145
|
+
if (internet && (runYarn || !(await nodeModulesExists()))) {
|
|
108
146
|
if (firstInstallation) {
|
|
109
147
|
logger.succeed("Local Mintlify instance initialized");
|
|
110
148
|
}
|
|
@@ -126,12 +164,13 @@ const dev = async () => {
|
|
|
126
164
|
`);
|
|
127
165
|
process.exit(1);
|
|
128
166
|
}
|
|
167
|
+
await saveInvocationPath();
|
|
129
168
|
await copyFiles(logger);
|
|
130
169
|
run();
|
|
131
170
|
};
|
|
132
171
|
const run = () => {
|
|
133
172
|
shell.cd(CLIENT_PATH);
|
|
134
|
-
console.log(`🌿 ${Chalk.green("Navigate to your local preview at
|
|
173
|
+
console.log(`🌿 ${Chalk.green("Navigate to your local preview at http://localhost:3000")}`);
|
|
135
174
|
shell.exec("npm run dev", { async: true });
|
|
136
175
|
open("http://localhost:3000");
|
|
137
176
|
listener();
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/local-preview/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,QAAQ,IAAI,SAAS,EAAE,MAAM,IAAI,CAAC;AAC3C,OAAO,GAAG,EAAE,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AAC3C,OAAO,EAAE,mBAAmB,EAAE,MAAM,uBAAuB,CAAC;AAC5D,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,KAAK,MAAM,SAAS,CAAC;AAC5B,OAAO,eAAe,MAAM,4BAA4B,CAAC;AACzD,OAAO,EACL,aAAa,EACb,WAAW,EACX,QAAQ,EACR,YAAY,
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/local-preview/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,QAAQ,IAAI,SAAS,EAAE,MAAM,IAAI,CAAC;AAC3C,OAAO,GAAG,EAAE,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AAC3C,OAAO,QAAQ,MAAM,UAAU,CAAC;AAChC,OAAO,EAAE,mBAAmB,EAAE,MAAM,uBAAuB,CAAC;AAC5D,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,KAAK,MAAM,SAAS,CAAC;AAC5B,OAAO,eAAe,MAAM,4BAA4B,CAAC;AACzD,OAAO,EACL,aAAa,EACb,WAAW,EACX,QAAQ,EACR,YAAY,EACZ,kCAAkC,GACnC,MAAM,iBAAiB,CAAC;AACzB,OAAO,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAC3D,OAAO,QAAQ,MAAM,qBAAqB,CAAC;AAC3C,OAAO,EAAE,UAAU,EAAE,2BAA2B,EAAE,MAAM,qBAAqB,CAAC;AAC9E,OAAO,EAAE,gBAAgB,EAAE,MAAM,2BAA2B,CAAC;AAC7D,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AACrD,OAAO,YAAY,MAAM,mCAAmC,CAAC;AAE7D,MAAM,kBAAkB,GAAG,KAAK,IAAI,EAAE;IACpC,MAAM,GAAG,CAAC,UAAU,CAAC,kCAAkC,EAAE,aAAa,CAAC,CAAC;AAC1E,CAAC,CAAC;AAEF,MAAM,aAAa,GAAG,KAAK,IAAI,EAAE;IAC/B,MAAM,wBAAwB,GAAG,MAAM,UAAU,CAC/C,kCAAkC,CACnC,CAAC;IACF,IAAI,CAAC,wBAAwB;QAAE,OAAO;IACtC,MAAM,kBAAkB,GAAG,CACzB,MAAM,QAAQ,CAAC,kCAAkC,CAAC,CACnD,CAAC,QAAQ,EAAE,CAAC;IACb,IAAI,kBAAkB,KAAK,aAAa,EAAE;QACxC,mCAAmC;QACnC,MAAM,YAAY,CAAC,EAAE,CAAC,CAAC;KACxB;AACH,CAAC,CAAC;AAEF,MAAM,EAAE,QAAQ,EAAE,GAAG,SAAS,CAAC;AAE/B,MAAM,SAAS,GAAG,KAAK,EAAE,MAAW,EAAE,EAAE;IACtC,MAAM,CAAC,KAAK,CAAC,sBAAsB,CAAC,CAAC;IACrC,KAAK,CAAC,EAAE,CAAC,aAAa,CAAC,CAAC;IACxB,MAAM,EAAE,aAAa,EAAE,WAAW,EAAE,aAAa,EAAE,GAAG,MAAM,eAAe,EAAE,CAAC;IAE9E,MAAM,SAAS,GAAG,MAAM,gBAAgB,CAAC,MAAM,CAAC,CAAC;IAEjD,MAAM,iBAAiB,GAAG,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,KAAK,EAAE,cAAc,CAAC,CAAC;IACxE,IAAI,UAAU,GAAG,IAAI,CAAC;IACtB,IAAI,aAAa,EAAE;QACjB,MAAM,CAAC,OAAO,CAAC,qBAAqB,CAAC,CAAC;QACtC,MAAM,GAAG,CAAC,UAAU,CAAC,iBAAiB,EAAE,MAAM,CAAC,IAAI,CAAC,aAAa,CAAC,EAAE;YAClE,IAAI,EAAE,GAAG;SACV,CAAC,CAAC;QACH,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,aAAa,CAAC,QAAQ,EAAE,CAAC,CAAC;KACnD;SAAM;QACL,MAAM,GAAG,CAAC,UAAU,CAAC,iBAAiB,EAAE,IAAI,EAAE,EAAE,IAAI,EAAE,GAAG,EAAE,CAAC,CAAC;KAC9D;IACD,IAAI,KAAK,GAAG,EAAE,CAAC;IACf,MAAM,UAAU,GAAG,EAAE,CAAC;IACtB,aAAa,CAAC,OAAO,CAAC,CAAC,QAAQ,EAAE,EAAE;QACjC,UAAU,CAAC,IAAI,CACb,CAAC,KAAK,IAAI,EAAE;YACV,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE,QAAQ,CAAC,CAAC;YACtD,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,KAAK,EAAE,OAAO,CAAC,CAAC;YACxD,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;YAEjD,MAAM,GAAG,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;YAC7B,MAAM,GAAG,CAAC,IAAI,CAAC,UAAU,EAAE,UAAU,CAAC,CAAC;YAEvC,MAAM,WAAW,GAAG,MAAM,QAAQ,CAAC,UAAU,CAAC,CAAC;YAC/C,MAAM,UAAU,GAAG,WAAW,CAAC,QAAQ,EAAE,CAAC;YAC1C,MAAM,IAAI,GAAG,UAAU,CAAC,QAAQ,EAAE,UAAU,EAAE,UAAU,CAAC,CAAC;YAC1D,KAAK,mCACA,KAAK,GACL,IAAI,CACR,CAAC;QACJ,CAAC,CAAC,EAAE,CACL,CAAC;IACJ,CAAC,CAAC,CAAC;IACH,MAAM,kBAAkB,GAAG,EAAE,CAAC;IAC9B,WAAW,CAAC,OAAO,CAAC,CAAC,QAAQ,EAAE,EAAE;QAC/B,kBAAkB,CAAC,IAAI,CACrB,CAAC,KAAK,IAAI,EAAE;YACV,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE,QAAQ,CAAC,CAAC;YACtD,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;YACnD,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;YAElD,MAAM,GAAG,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;YAC7B,MAAM,GAAG,CAAC,IAAI,CAAC,UAAU,EAAE,UAAU,CAAC,CAAC;QACzC,CAAC,CAAC,EAAE,CACL,CAAC;IACJ,CAAC,CAAC,CAAC;IACH,MAAM,OAAO,CAAC,GAAG,CAAC;QAChB,GAAG,UAAU;QACb,GAAG,kBAAkB;QACrB,MAAM,cAAc,CAAC,SAAS,EAAE,MAAM,CAAC;KACxC,CAAC,CAAC;IACH,2BAA2B,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;IAC9C,MAAM,CAAC,OAAO,CAAC,kBAAkB,CAAC,CAAC;AACrC,CAAC,CAAC;AAEF,MAAM,SAAS,GAAG,CAAC,GAAW,EAAE,EAAE;IAChC,OAAO,KAAK,CAAC,IAAI,CAAC,GAAG,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;AAC3C,CAAC,CAAC;AAEF,MAAM,iBAAiB,GAAG,KAAK,IAAI,EAAE;IACnC,OAAO,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,MAAM,EAAE,QAAQ,EAAE,cAAc,CAAC,CAAC,CAAC;AAC/E,CAAC,CAAC;AAEF,MAAM,aAAa,GAAG,KAAK,IAAI,EAAE;IAC/B,MAAM,aAAa,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;IAC1C,IAAI,CAAC,aAAa,EAAE;QAClB,MAAM,QAAQ;aACX,MAAM,CAAC;YACN;gBACE,IAAI,EAAE,SAAS;gBACf,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE,gDAAgD;gBACzD,OAAO,EAAE,IAAI;aACd;SACF,CAAC;aACD,IAAI,CAAC,CAAC,EAAE,OAAO,EAAE,EAAE,EAAE;YACpB,IAAI,OAAO,EAAE;gBACX,KAAK,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;aACzC;iBAAM;gBACL,OAAO,CAAC,GAAG,CAAC,yBAAyB,CAAC,CAAC;aACxC;QACH,CAAC,CAAC,CAAC;KACN;AACH,CAAC,CAAC;AAEF,MAAM,GAAG,GAAG,KAAK,IAAI,EAAE;IACrB,KAAK,CAAC,EAAE,CAAC,QAAQ,CAAC,CAAC;IACnB,MAAM,aAAa,EAAE,CAAC;IACtB,MAAM,aAAa,EAAE,CAAC;IACtB,MAAM,MAAM,GAAG,WAAW,CAAC,uCAAuC,CAAC,CAAC;IACpE,MAAM,GAAG,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC,CAAC;IACrD,KAAK,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,WAAW,EAAE,MAAM,CAAC,CAAC,CAAC;IACnD,IAAI,OAAO,GAAG,IAAI,CAAC;IACnB,MAAM,YAAY,GAAG,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IACxC,IAAI,iBAAiB,GAAG,KAAK,CAAC;IAC9B,MAAM,kBAAkB,GAAG,MAAM,UAAU,CACzC,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,MAAM,EAAE,MAAM,CAAC,CACxC,CAAC;IACF,IAAI,CAAC,kBAAkB,EAAE;QACvB,iBAAiB,GAAG,IAAI,CAAC;QACzB,IAAI,YAAY,EAAE;YAChB,MAAM,CAAC,KAAK,CAAC,yCAAyC,CAAC,CAAC;YACxD,SAAS,CAAC,UAAU,CAAC,CAAC;YACtB,SAAS,CACP,oEAAoE,CACrE,CAAC;SACH;aAAM;YACL,MAAM,CAAC,IAAI,CACT,mEAAmE,CACpE,CAAC;YACF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;SACjB;KACF;IAED,MAAM,QAAQ,GAAG,MAAM,mBAAmB,EAAE,CAAC;IAC7C,IAAI,UAAU,GAAG,IAAI,CAAC;IACtB,IAAI,QAAQ,IAAI,YAAY,EAAE;QAC5B,SAAS,CAAC,qCAAqC,CAAC,CAAC;QACjD,SAAS,CAAC,6CAA6C,CAAC,CAAC;QACzD,UAAU,GAAG,SAAS,CAAC,2BAA2B,CAAC,CAAC,MAAM,CAAC;QAC3D,SAAS,CAAC,sCAAsC,CAAC,CAAC;QAClD,SAAS,CAAC,8BAA8B,CAAC,CAAC;KAC3C;IACD,IAAI,UAAU,KAAK,uBAAuB,EAAE;QAC1C,OAAO,GAAG,KAAK,CAAC;KACjB;IACD,KAAK,CAAC,EAAE,CAAC,WAAW,CAAC,CAAC;IACtB,IAAI,QAAQ,IAAI,CAAC,OAAO,IAAI,CAAC,CAAC,MAAM,iBAAiB,EAAE,CAAC,CAAC,EAAE;QACzD,IAAI,iBAAiB,EAAE;YACrB,MAAM,CAAC,OAAO,CAAC,qCAAqC,CAAC,CAAC;SACvD;QACD,MAAM,CAAC,KAAK,CAAC,0BAA0B,CAAC,CAAC;QACzC,UAAU,CAAC,MAAM,CAAC,CAAC;QACnB,SAAS,CAAC,MAAM,CAAC,CAAC;QAClB,IAAI,iBAAiB,EAAE;YACrB,MAAM,CAAC,OAAO,CAAC,uBAAuB,CAAC,CAAC;SACzC;aAAM;YACL,MAAM,CAAC,OAAO,CAAC,sBAAsB,CAAC,CAAC;SACxC;KACF;IAED,IAAI,CAAC,CAAC,MAAM,iBAAiB,EAAE,CAAC,EAAE;QAChC,MAAM,CAAC,IAAI,CAAC;;;;KAIX,CAAC,CAAC;QACH,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;KACjB;IACD,MAAM,kBAAkB,EAAE,CAAC;IAC3B,MAAM,SAAS,CAAC,MAAM,CAAC,CAAC;IACxB,GAAG,EAAE,CAAC;AACR,CAAC,CAAC;AAEF,MAAM,GAAG,GAAG,GAAG,EAAE;IACf,KAAK,CAAC,EAAE,CAAC,WAAW,CAAC,CAAC;IACtB,OAAO,CAAC,GAAG,CACT,MAAM,KAAK,CAAC,KAAK,CACf,yDAAyD,CAC1D,EAAE,CACJ,CAAC;IACF,KAAK,CAAC,IAAI,CAAC,aAAa,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;IAC3C,IAAI,CAAC,uBAAuB,CAAC,CAAC;IAC9B,QAAQ,EAAE,CAAC;AACb,CAAC,CAAC;AAEF,eAAe,GAAG,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"listener.js","sourceRoot":"","sources":["../../../src/local-preview/utils/listener.ts"],"names":[],"mappings":"AAAA,OAAO,QAAQ,MAAM,UAAU,CAAC;AAChC,OAAO,GAAG,MAAM,UAAU,CAAC;AAC3B,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAChE,OAAO,EAAE,wBAAwB,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAC;AAC3E,OAAO,EAAE,kBAAkB,EAAE,MAAM,eAAe,CAAC;AACnD,OAAO,YAAY,MAAM,mBAAmB,CAAC;AAE7C,MAAM,QAAQ,GAAG,GAAG,EAAE;IACpB,QAAQ;SACL,KAAK,CAAC,aAAa,EAAE;QACpB,aAAa,EAAE,IAAI;QACnB,OAAO,EAAE,cAAc;
|
|
1
|
+
{"version":3,"file":"listener.js","sourceRoot":"","sources":["../../../src/local-preview/utils/listener.ts"],"names":[],"mappings":"AAAA,OAAO,QAAQ,MAAM,UAAU,CAAC;AAChC,OAAO,GAAG,MAAM,UAAU,CAAC;AAC3B,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAChE,OAAO,EAAE,wBAAwB,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAC;AAC3E,OAAO,EAAE,kBAAkB,EAAE,MAAM,eAAe,CAAC;AACnD,OAAO,YAAY,MAAM,mBAAmB,CAAC;AAE7C,MAAM,QAAQ,GAAG,GAAG,EAAE;IACpB,QAAQ;SACL,KAAK,CAAC,aAAa,EAAE;QACpB,aAAa,EAAE,IAAI;QACnB,OAAO,EAAE,CAAC,cAAc,EAAE,MAAM,CAAC;QACjC,GAAG,EAAE,aAAa;KACnB,CAAC;SACD,EAAE,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,QAAQ,EAAE,EAAE;QACnC,IAAI,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,WAAW,EAAE;YAC/C,IAAI,wBAAwB,CAAC,QAAQ,CAAC,EAAE;gBACtC,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,KAAK,EAAE,OAAO,EAAE,QAAQ,CAAC,CAAC;gBACpE,MAAM,GAAG,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;gBAC7B,OAAO,CAAC,GAAG,CAAC,gBAAgB,EAAE,QAAQ,CAAC,CAAC;aACzC;iBAAM,IACL,QAAQ,KAAK,kBAAkB;gBAC/B,QAAQ,KAAK,WAAW,EACxB;gBACA,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,KAAK,EAAE,WAAW,CAAC,CAAC;gBAC9D,MAAM,GAAG,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;gBAC7B,OAAO,CAAC,GAAG,CACT,8EAA8E,CAC/E,CAAC;gBACF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;aACjB;iBAAM;gBACL,kBAAkB;gBAClB,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAC;gBAC9D,MAAM,GAAG,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;gBAC7B,OAAO,CAAC,GAAG,CAAC,uBAAuB,EAAE,QAAQ,CAAC,CAAC;aAChD;SACF;aAAM;YACL,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE,QAAQ,CAAC,CAAC;YACpD,IAAI,cAAc,GAAG,KAAK,CAAC;YAC3B,IAAI,wBAAwB,CAAC,QAAQ,CAAC,EAAE;gBACtC,cAAc,GAAG,IAAI,CAAC;gBACtB,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,KAAK,EAAE,OAAO,EAAE,QAAQ,CAAC,CAAC;gBACpE,MAAM,GAAG,CAAC,IAAI,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;gBACrC,QAAQ,KAAK,EAAE;oBACb,KAAK,KAAK,CAAC;oBACX,KAAK,QAAQ;wBACX,OAAO,CAAC,GAAG,CAAC,qBAAqB,EAAE,QAAQ,CAAC,CAAC;wBAC7C,MAAM;oBACR;wBACE,OAAO,CAAC,GAAG,CAAC,eAAe,EAAE,QAAQ,CAAC,CAAC;wBACvC,MAAM;iBACT;aACF;iBAAM,IACL,QAAQ,KAAK,kBAAkB;gBAC/B,QAAQ,KAAK,WAAW,EACxB;gBACA,cAAc,GAAG,IAAI,CAAC;gBACtB,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,KAAK,EAAE,WAAW,CAAC,CAAC;gBAC9D,MAAM,GAAG,CAAC,IAAI,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;gBACrC,QAAQ,KAAK,EAAE;oBACb,KAAK,KAAK,CAAC;oBACX,KAAK,QAAQ;wBACX,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC;wBAC5B,MAAM;oBACR;wBACE,OAAO,CAAC,GAAG,CAAC,eAAe,CAAC,CAAC;wBAC7B,MAAM;iBACT;aACF;iBAAM;gBACL,MAAM,SAAS,GAAG,gBAAgB,CAAC,QAAQ,CAAC,CAAC;gBAC7C,IAAI,SAAS,GAAG,KAAK,CAAC;gBACtB,IACE,SAAS;oBACT,CAAC,SAAS,KAAK,MAAM;wBACnB,SAAS,KAAK,MAAM;wBACpB,SAAS,KAAK,KAAK,CAAC,EACtB;oBACA,MAAM,WAAW,GAAG,MAAM,YAAY,CACpC,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE,QAAQ,CAAC,CACnC,CAAC;oBACF,SAAS,GAAG,WAAW,CAAC,SAAS,CAAC;iBACnC;gBACD,IAAI,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAC;gBAC5D,IAAI,SAAS,EAAE;oBACb,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,KAAK,EAAE,cAAc,CAAC,CAAC;oBAC3D,cAAc,GAAG,IAAI,CAAC;iBACvB;gBACD,kBAAkB;gBAClB,MAAM,GAAG,CAAC,IAAI,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;gBACrC,QAAQ,KAAK,EAAE;oBACb,KAAK,KAAK,CAAC;oBACX,KAAK,QAAQ;wBACX,IAAI,SAAS,EAAE;4BACb,OAAO,CAAC,GAAG,CAAC,sBAAsB,EAAE,QAAQ,CAAC,CAAC;yBAC/C;6BAAM;4BACL,OAAO,CAAC,GAAG,CAAC,qBAAqB,EAAE,QAAQ,CAAC,CAAC;yBAC9C;wBACD,MAAM;oBACR;wBACE,IAAI,SAAS,EAAE;4BACb,OAAO,CAAC,GAAG,CAAC,uBAAuB,EAAE,QAAQ,CAAC,CAAC;yBAChD;6BAAM;4BACL,OAAO,CAAC,GAAG,CAAC,sBAAsB,EAAE,QAAQ,CAAC,CAAC;yBAC/C;wBACD,MAAM;iBACT;aACF;YACD,IAAI,cAAc,EAAE;gBAClB,MAAM,kBAAkB,EAAE,CAAC;aAC5B;SACF;IACH,CAAC,CAAC,CAAC;AACP,CAAC,CAAC;AAEF,eAAe,QAAQ,CAAC"}
|
|
@@ -16,6 +16,10 @@ export default async function downloadAllImages($, content, origin, baseDir, mod
|
|
|
16
16
|
];
|
|
17
17
|
// Wait to all images to download before continuing
|
|
18
18
|
const origToNewArray = await Promise.all(imageSrcs.map(async (origImageSrc) => {
|
|
19
|
+
// We do not support downloading base64 in-line images.
|
|
20
|
+
if (origImageSrc.startsWith("data:")) {
|
|
21
|
+
return undefined;
|
|
22
|
+
}
|
|
19
23
|
// Add origin if the image tags are using relative sources
|
|
20
24
|
const imageHref = origImageSrc.startsWith("http")
|
|
21
25
|
? origImageSrc
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"downloadAllImages.js","sourceRoot":"","sources":["../../src/scraping/downloadAllImages.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,aAAa,MAAM,qBAAqB,CAAC;AAEhD,4EAA4E;AAC5E,MAAM,CAAC,OAAO,CAAC,KAAK,UAAU,iBAAiB,CAC7C,CAAM,EACN,OAAY,EACZ,MAAc,EACd,OAAe,EACf,cAAoB;IAEpB,IAAI,CAAC,OAAO,EAAE;QACZ,OAAO,CAAC,KAAK,CAAC,4BAA4B,CAAC,CAAC;QAC5C,OAAO;KACR;IAED,kEAAkE;IAClE,4CAA4C;IAC5C,MAAM,SAAS,GAAG;QAChB,GAAG,IAAI,GAAG,CACR,OAAO;aACJ,IAAI,CAAC,UAAU,CAAC;aAChB,GAAG,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;aACvC,OAAO,EAAE,CACb;KACF,CAAC;IAEF,mDAAmD;IACnD,MAAM,cAAc,GAAG,MAAM,OAAO,CAAC,GAAG,CACtC,SAAS,CAAC,GAAG,CAAC,KAAK,EAAE,YAAoB,EAAE,EAAE;QAC3C,0DAA0D;QAC1D,MAAM,SAAS,GAAG,YAAY,CAAC,UAAU,CAAC,MAAM,CAAC;YAC/C,CAAC,CAAC,YAAY;YACd,CAAC,CAAC,IAAI,GAAG,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC;QAEvC,IAAI,QAAQ,GAAG,2BAA2B,CAAC,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC;QACrE,IAAI,cAAc,EAAE;YAClB,QAAQ,GAAG,cAAc,CAAC,QAAQ,CAAC,CAAC;SACrC;QAED,IAAI,CAAC,QAAQ,EAAE;YACb,OAAO,CAAC,KAAK,CAAC,qBAAqB,GAAG,SAAS,CAAC,CAAC;YACjD,OAAO;SACR;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;QAE/C,MAAM,aAAa,CAAC,SAAS,EAAE,SAAS,CAAC;aACtC,IAAI,CAAC,GAAG,EAAE;YACT,OAAO,CAAC,GAAG,CAAC,QAAQ,GAAG,SAAS,CAAC,CAAC;QACpC,CAAC,CAAC;aACD,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE;YACX,IAAI,CAAC,CAAC,IAAI,KAAK,QAAQ,EAAE;gBACvB,OAAO,CAAC,GAAG,CAAC,6BAA6B,SAAS,EAAE,CAAC,CAAC;aACvD;iBAAM;gBACL,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;aAClB;QACH,CAAC,CAAC,CAAC;QAEL,OAAO,EAAE,CAAC,YAAY,CAAC,EAAE,SAAS,EAAE,CAAC;IACvC,CAAC,CAAC,CACH,CAAC;IAEF,OAAO,cAAc,CAAC,MAAM,CAC1B,CAAC,MAAM,EAAE,OAAO,EAAE,EAAE,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EACnD,EAAE,CACH,CAAC;AACJ,CAAC;AAED,SAAS,2BAA2B,CAAC,GAAW;IAC9C,2BAA2B;IAC3B,MAAM,eAAe,GAAG,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;IAEnC,eAAe,CAAC,OAAO,CAAC,CAAC,aAAa,EAAE,EAAE;QACxC,iFAAiF;QACjF,GAAG,GAAG,GAAG,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,CAAC;IACpC,CAAC,CAAC,CAAC;IACH,OAAO,GAAG,CAAC;AACb,CAAC"}
|
|
1
|
+
{"version":3,"file":"downloadAllImages.js","sourceRoot":"","sources":["../../src/scraping/downloadAllImages.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,aAAa,MAAM,qBAAqB,CAAC;AAEhD,4EAA4E;AAC5E,MAAM,CAAC,OAAO,CAAC,KAAK,UAAU,iBAAiB,CAC7C,CAAM,EACN,OAAY,EACZ,MAAc,EACd,OAAe,EACf,cAAoB;IAEpB,IAAI,CAAC,OAAO,EAAE;QACZ,OAAO,CAAC,KAAK,CAAC,4BAA4B,CAAC,CAAC;QAC5C,OAAO;KACR;IAED,kEAAkE;IAClE,4CAA4C;IAC5C,MAAM,SAAS,GAAG;QAChB,GAAG,IAAI,GAAG,CACR,OAAO;aACJ,IAAI,CAAC,UAAU,CAAC;aAChB,GAAG,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;aACvC,OAAO,EAAE,CACb;KACF,CAAC;IAEF,mDAAmD;IACnD,MAAM,cAAc,GAAG,MAAM,OAAO,CAAC,GAAG,CACtC,SAAS,CAAC,GAAG,CAAC,KAAK,EAAE,YAAoB,EAAE,EAAE;QAC3C,uDAAuD;QACvD,IAAI,YAAY,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE;YACpC,OAAO,SAAS,CAAC;SAClB;QAED,0DAA0D;QAC1D,MAAM,SAAS,GAAG,YAAY,CAAC,UAAU,CAAC,MAAM,CAAC;YAC/C,CAAC,CAAC,YAAY;YACd,CAAC,CAAC,IAAI,GAAG,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC;QAEvC,IAAI,QAAQ,GAAG,2BAA2B,CAAC,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC;QACrE,IAAI,cAAc,EAAE;YAClB,QAAQ,GAAG,cAAc,CAAC,QAAQ,CAAC,CAAC;SACrC;QAED,IAAI,CAAC,QAAQ,EAAE;YACb,OAAO,CAAC,KAAK,CAAC,qBAAqB,GAAG,SAAS,CAAC,CAAC;YACjD,OAAO;SACR;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;QAE/C,MAAM,aAAa,CAAC,SAAS,EAAE,SAAS,CAAC;aACtC,IAAI,CAAC,GAAG,EAAE;YACT,OAAO,CAAC,GAAG,CAAC,QAAQ,GAAG,SAAS,CAAC,CAAC;QACpC,CAAC,CAAC;aACD,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE;YACX,IAAI,CAAC,CAAC,IAAI,KAAK,QAAQ,EAAE;gBACvB,OAAO,CAAC,GAAG,CAAC,6BAA6B,SAAS,EAAE,CAAC,CAAC;aACvD;iBAAM;gBACL,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;aAClB;QACH,CAAC,CAAC,CAAC;QAEL,OAAO,EAAE,CAAC,YAAY,CAAC,EAAE,SAAS,EAAE,CAAC;IACvC,CAAC,CAAC,CACH,CAAC;IAEF,OAAO,cAAc,CAAC,MAAM,CAC1B,CAAC,MAAM,EAAE,OAAO,EAAE,EAAE,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EACnD,EAAE,CACH,CAAC;AACJ,CAAC;AAED,SAAS,2BAA2B,CAAC,GAAW;IAC9C,2BAA2B;IAC3B,MAAM,eAAe,GAAG,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;IAEnC,eAAe,CAAC,OAAO,CAAC,CAAC,aAAa,EAAE,EAAE;QACxC,iFAAiF;QACjF,GAAG,GAAG,GAAG,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,CAAC;IACpC,CAAC,CAAC,CAAC;IACH,OAAO,GAAG,CAAC;AACb,CAAC"}
|
|
@@ -3,18 +3,11 @@ import { detectFramework, Frameworks } from "./detectFramework.js";
|
|
|
3
3
|
import { getHrefFromArgs, getOrigin } from "../util.js";
|
|
4
4
|
import { scrapeSection } from "./scrapeSection.js";
|
|
5
5
|
import { scrapeDocusaurusSection } from "./site-scrapers/scrapeDocusaurusSection.js";
|
|
6
|
+
import openNestedDocusaurusMenus from "./site-scrapers/openNestedDocusaurusMenus.js";
|
|
6
7
|
import { scrapeGitBookSection } from "./site-scrapers/scrapeGitBookSection.js";
|
|
8
|
+
import openNestedGitbookMenus from "./site-scrapers/openNestedGitbookMenus.js";
|
|
7
9
|
import { scrapeReadMeSection } from "./site-scrapers/scrapeReadMeSection.js";
|
|
8
10
|
import { startBrowser } from "../browser.js";
|
|
9
|
-
function validateFramework(framework) {
|
|
10
|
-
if (!framework) {
|
|
11
|
-
console.log("Could not detect the framework automatically. Please use one of:");
|
|
12
|
-
console.log("scrape-page-docusaurus");
|
|
13
|
-
console.log("scrape-page-gitbook");
|
|
14
|
-
console.log("scrape-page-readme");
|
|
15
|
-
return process.exit(1);
|
|
16
|
-
}
|
|
17
|
-
}
|
|
18
11
|
export async function scrapeSectionAxiosWrapper(argv, scrapeFunc) {
|
|
19
12
|
const href = getHrefFromArgs(argv);
|
|
20
13
|
const res = await axios.default.get(href);
|
|
@@ -22,50 +15,20 @@ export async function scrapeSectionAxiosWrapper(argv, scrapeFunc) {
|
|
|
22
15
|
await scrapeSection(scrapeFunc, html, getOrigin(href), argv.overwrite);
|
|
23
16
|
process.exit(0);
|
|
24
17
|
}
|
|
18
|
+
export async function scrapeDocusaurusSectionCommand(argv) {
|
|
19
|
+
await scrapeSectionOpeningAllNested(argv, openNestedDocusaurusMenus, scrapeDocusaurusSection);
|
|
20
|
+
}
|
|
25
21
|
export async function scrapeGitbookSectionCommand(argv) {
|
|
26
|
-
await
|
|
22
|
+
await scrapeSectionOpeningAllNested(argv, openNestedGitbookMenus, scrapeGitBookSection);
|
|
27
23
|
}
|
|
28
|
-
async function
|
|
24
|
+
async function scrapeSectionOpeningAllNested(argv, openLinks, scrapeFunc) {
|
|
29
25
|
const href = getHrefFromArgs(argv);
|
|
30
26
|
const browser = await startBrowser();
|
|
31
27
|
const page = await browser.newPage();
|
|
32
28
|
await page.goto(href, {
|
|
33
29
|
waitUntil: "networkidle2",
|
|
34
30
|
});
|
|
35
|
-
|
|
36
|
-
let encounteredHref = ["fake"];
|
|
37
|
-
// Loop until we've encountered every link
|
|
38
|
-
while (!encounteredHref.every((href) => prevEncountered.includes(href))) {
|
|
39
|
-
prevEncountered = encounteredHref;
|
|
40
|
-
encounteredHref = await page.evaluate((encounteredHref) => {
|
|
41
|
-
const icons = Array.from(document.querySelectorAll('path[d="M9 18l6-6-6-6"]'));
|
|
42
|
-
const linksFound = [];
|
|
43
|
-
icons.forEach(async (icon) => {
|
|
44
|
-
var _a, _b;
|
|
45
|
-
const toClick = (_a = icon === null || icon === void 0 ? void 0 : icon.parentElement) === null || _a === void 0 ? void 0 : _a.parentElement;
|
|
46
|
-
const link = (_b = toClick === null || toClick === void 0 ? void 0 : toClick.parentElement) === null || _b === void 0 ? void 0 : _b.parentElement;
|
|
47
|
-
// Skip icons not in the side navigation
|
|
48
|
-
if (!(link === null || link === void 0 ? void 0 : link.hasAttribute("href"))) {
|
|
49
|
-
return;
|
|
50
|
-
}
|
|
51
|
-
const href = link.getAttribute("href");
|
|
52
|
-
// Should never occur but we keep it as a fail-safe
|
|
53
|
-
if ((href === null || href === void 0 ? void 0 : href.startsWith("https://")) || (href === null || href === void 0 ? void 0 : href.startsWith("http://"))) {
|
|
54
|
-
return;
|
|
55
|
-
}
|
|
56
|
-
// Click any links we haven't seen before
|
|
57
|
-
if (href && !encounteredHref.includes(href)) {
|
|
58
|
-
toClick === null || toClick === void 0 ? void 0 : toClick.click();
|
|
59
|
-
}
|
|
60
|
-
if (href) {
|
|
61
|
-
linksFound.push(href);
|
|
62
|
-
}
|
|
63
|
-
});
|
|
64
|
-
return linksFound;
|
|
65
|
-
}, encounteredHref // Need to pass array into the browser
|
|
66
|
-
);
|
|
67
|
-
}
|
|
68
|
-
const html = await page.content();
|
|
31
|
+
const html = await openLinks(page);
|
|
69
32
|
browser.close();
|
|
70
33
|
await scrapeSection(scrapeFunc, html, getOrigin(href), argv.overwrite);
|
|
71
34
|
process.exit(0);
|
|
@@ -78,7 +41,7 @@ export async function scrapeSectionAutomatically(argv) {
|
|
|
78
41
|
validateFramework(framework);
|
|
79
42
|
console.log("Detected framework: " + framework);
|
|
80
43
|
if (framework === Frameworks.DOCUSAURUS) {
|
|
81
|
-
await
|
|
44
|
+
await scrapeDocusaurusSectionCommand(argv);
|
|
82
45
|
}
|
|
83
46
|
else if (framework === Frameworks.GITBOOK) {
|
|
84
47
|
await scrapeGitbookSectionCommand(argv);
|
|
@@ -87,4 +50,13 @@ export async function scrapeSectionAutomatically(argv) {
|
|
|
87
50
|
await scrapeSectionAxiosWrapper(argv, scrapeReadMeSection);
|
|
88
51
|
}
|
|
89
52
|
}
|
|
53
|
+
function validateFramework(framework) {
|
|
54
|
+
if (!framework) {
|
|
55
|
+
console.log("Could not detect the framework automatically. Please use one of:");
|
|
56
|
+
console.log("scrape-page-docusaurus");
|
|
57
|
+
console.log("scrape-page-gitbook");
|
|
58
|
+
console.log("scrape-page-readme");
|
|
59
|
+
return process.exit(1);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
90
62
|
//# sourceMappingURL=scrapeSectionCommands.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrapeSectionCommands.js","sourceRoot":"","sources":["../../src/scraping/scrapeSectionCommands.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,eAAe,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAC;AACnE,OAAO,EAAE,eAAe,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AACxD,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AACnD,OAAO,EAAE,uBAAuB,EAAE,MAAM,4CAA4C,CAAC;AACrF,OAAO,
|
|
1
|
+
{"version":3,"file":"scrapeSectionCommands.js","sourceRoot":"","sources":["../../src/scraping/scrapeSectionCommands.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,eAAe,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAC;AACnE,OAAO,EAAE,eAAe,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AACxD,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AACnD,OAAO,EAAE,uBAAuB,EAAE,MAAM,4CAA4C,CAAC;AACrF,OAAO,yBAAyB,MAAM,8CAA8C,CAAC;AACrF,OAAO,EAAE,oBAAoB,EAAE,MAAM,yCAAyC,CAAC;AAC/E,OAAO,sBAAsB,MAAM,2CAA2C,CAAC;AAC/E,OAAO,EAAE,mBAAmB,EAAE,MAAM,wCAAwC,CAAC;AAC7E,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAE7C,MAAM,CAAC,KAAK,UAAU,yBAAyB,CAAC,IAAS,EAAE,UAAe;IACxE,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAC1C,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC;IACtB,MAAM,aAAa,CAAC,UAAU,EAAE,IAAI,EAAE,SAAS,CAAC,IAAI,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC;IACvE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,8BAA8B,CAAC,IAAS;IAC5D,MAAM,6BAA6B,CACjC,IAAI,EACJ,yBAAyB,EACzB,uBAAuB,CACxB,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,2BAA2B,CAAC,IAAS;IACzD,MAAM,6BAA6B,CACjC,IAAI,EACJ,sBAAsB,EACtB,oBAAoB,CACrB,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,6BAA6B,CAC1C,IAAS,EACT,SAAc,EACd,UAAe;IAEf,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IAEnC,MAAM,OAAO,GAAG,MAAM,YAAY,EAAE,CAAC;IACrC,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;IACrC,MAAM,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;QACpB,SAAS,EAAE,cAAc;KAC1B,CAAC,CAAC;IAEH,MAAM,IAAI,GAAG,MAAM,SAAS,CAAC,IAAI,CAAC,CAAC;IACnC,OAAO,CAAC,KAAK,EAAE,CAAC;IAChB,MAAM,aAAa,CAAC,UAAU,EAAE,IAAI,EAAE,SAAS,CAAC,IAAI,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC;IACvE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,0BAA0B,CAAC,IAAS;IACxD,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAC1C,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC;IACtB,MAAM,SAAS,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IAExC,iBAAiB,CAAC,SAAS,CAAC,CAAC;IAE7B,OAAO,CAAC,GAAG,CAAC,sBAAsB,GAAG,SAAS,CAAC,CAAC;IAEhD,IAAI,SAAS,KAAK,UAAU,CAAC,UAAU,EAAE;QACvC,MAAM,8BAA8B,CAAC,IAAI,CAAC,CAAC;KAC5C;SAAM,IAAI,SAAS,KAAK,UAAU,CAAC,OAAO,EAAE;QAC3C,MAAM,2BAA2B,CAAC,IAAI,CAAC,CAAC;KACzC;SAAM,IAAI,SAAS,KAAK,UAAU,CAAC,MAAM,EAAE;QAC1C,MAAM,yBAAyB,CAAC,IAAI,EAAE,mBAAmB,CAAC,CAAC;KAC5D;AACH,CAAC;AAED,SAAS,iBAAiB,CAAC,SAAiC;IAC1D,IAAI,CAAC,SAAS,EAAE;QACd,OAAO,CAAC,GAAG,CACT,kEAAkE,CACnE,CAAC;QACF,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAC;QACtC,OAAO,CAAC,GAAG,CAAC,qBAAqB,CAAC,CAAC;QACnC,OAAO,CAAC,GAAG,CAAC,oBAAoB,CAAC,CAAC;QAClC,OAAO,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;KACxB;AACH,CAAC"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
export default function alternateGroupTitle(firstLink, pages) {
|
|
2
|
+
// Only assign titles to nested navigation menus outside a section.
|
|
3
|
+
// Others should not have a title so we can merge them into one section.
|
|
4
|
+
if (pages.length > 0) {
|
|
5
|
+
return firstLink === null || firstLink === void 0 ? void 0 : firstLink.text();
|
|
6
|
+
}
|
|
7
|
+
return "";
|
|
8
|
+
}
|
|
9
|
+
//# sourceMappingURL=alternateGroupTitle.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"alternateGroupTitle.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/alternateGroupTitle.ts"],"names":[],"mappings":"AAAA,MAAM,CAAC,OAAO,UAAU,mBAAmB,CAAC,SAAS,EAAE,KAAK;IAC1D,mEAAmE;IACnE,wEAAwE;IACxE,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE;QACpB,OAAO,SAAS,aAAT,SAAS,uBAAT,SAAS,CAAE,IAAI,EAAE,CAAC;KAC1B;IACD,OAAO,EAAE,CAAC;AACZ,CAAC"}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// Used by GitBook and ReadMe section scrapers
|
|
1
|
+
// Used by Docusaurus, GitBook, and ReadMe section scrapers
|
|
2
2
|
export default function getLinksRecursively(linkSections, $) {
|
|
3
3
|
if (linkSections == null || linkSections.length === 0) {
|
|
4
4
|
return [];
|
|
@@ -6,12 +6,17 @@ export default function getLinksRecursively(linkSections, $) {
|
|
|
6
6
|
return linkSections
|
|
7
7
|
.map((i, s) => {
|
|
8
8
|
const subsection = $(s);
|
|
9
|
-
|
|
9
|
+
let link = subsection.children().first();
|
|
10
|
+
if (!link.attr("href")) {
|
|
11
|
+
// Docusaurus nests the <a> inside a <div>
|
|
12
|
+
link = link.find("a[href]").first();
|
|
13
|
+
}
|
|
10
14
|
const linkHref = link.attr("href");
|
|
11
15
|
// Skip missing links. For example, GitBook uses
|
|
12
16
|
// empty divs are used for styling a line beside the nav.
|
|
13
17
|
// Skip external links until Mintlify supports them
|
|
14
18
|
if (!linkHref ||
|
|
19
|
+
linkHref === "#" ||
|
|
15
20
|
linkHref.startsWith("https://") ||
|
|
16
21
|
linkHref.startsWith("http://")) {
|
|
17
22
|
return undefined;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"getLinksRecursively.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/getLinksRecursively.ts"],"names":[],"mappings":"AAAA,
|
|
1
|
+
{"version":3,"file":"getLinksRecursively.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/getLinksRecursively.ts"],"names":[],"mappings":"AAAA,2DAA2D;AAC3D,MAAM,CAAC,OAAO,UAAU,mBAAmB,CAAC,YAAiB,EAAE,CAAM;IACnE,IAAI,YAAY,IAAI,IAAI,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC,EAAE;QACrD,OAAO,EAAE,CAAC;KACX;IAED,OAAO,YAAY;SAChB,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACZ,MAAM,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACxB,IAAI,IAAI,GAAG,UAAU,CAAC,QAAQ,EAAE,CAAC,KAAK,EAAE,CAAC;QAEzC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE;YACtB,0CAA0C;YAC1C,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,KAAK,EAAE,CAAC;SACrC;QACD,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAEnC,gDAAgD;QAChD,yDAAyD;QACzD,mDAAmD;QACnD,IACE,CAAC,QAAQ;YACT,QAAQ,KAAK,GAAG;YAChB,QAAQ,CAAC,UAAU,CAAC,UAAU,CAAC;YAC/B,QAAQ,CAAC,UAAU,CAAC,SAAS,CAAC,EAC9B;YACA,OAAO,SAAS,CAAC;SAClB;QAED,MAAM,UAAU,GAAG,UAAU,CAAC,QAAQ,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC;QAE1D,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE;YACzB,6CAA6C;YAC7C,wEAAwE;YACxE,OAAO;gBACL,KAAK,EAAE,IAAI,CAAC,IAAI,EAAE;gBAClB,KAAK,EAAE,CAAC,QAAQ,EAAE,GAAG,mBAAmB,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;aACzD,CAAC;SACH;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC,CAAC;SACD,OAAO,EAAE;SACT,MAAM,CAAC,OAAO,CAAC,CAAC;AACrB,CAAC"}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
export default async function openNestedDocusaurusMenus(page) {
|
|
2
|
+
let prevEncountered = [];
|
|
3
|
+
let encounteredHref = ["fake-href-to-make-loop-run-at-least-once"];
|
|
4
|
+
// Loop until we've encountered every link
|
|
5
|
+
while (!encounteredHref.every((href) => prevEncountered.includes(href))) {
|
|
6
|
+
prevEncountered = encounteredHref;
|
|
7
|
+
encounteredHref = await page.evaluate((encounteredHref) => {
|
|
8
|
+
const collapsible = Array.from(document.querySelectorAll(".menu__link.menu__link--sublist"));
|
|
9
|
+
const linksFound = [];
|
|
10
|
+
collapsible.forEach(async (collapsibleItem) => {
|
|
11
|
+
const href = collapsibleItem === null || collapsibleItem === void 0 ? void 0 : collapsibleItem.getAttribute("href");
|
|
12
|
+
// Should never occur but we keep it as a fail-safe
|
|
13
|
+
if ((href === null || href === void 0 ? void 0 : href.startsWith("https://")) || (href === null || href === void 0 ? void 0 : href.startsWith("http://"))) {
|
|
14
|
+
return;
|
|
15
|
+
}
|
|
16
|
+
// Click any links we haven't seen before
|
|
17
|
+
if (href && !encounteredHref.includes(href)) {
|
|
18
|
+
collapsibleItem === null || collapsibleItem === void 0 ? void 0 : collapsibleItem.click();
|
|
19
|
+
}
|
|
20
|
+
if (href) {
|
|
21
|
+
linksFound.push(href);
|
|
22
|
+
}
|
|
23
|
+
});
|
|
24
|
+
return linksFound;
|
|
25
|
+
}, encounteredHref // Need to pass array into the browser
|
|
26
|
+
);
|
|
27
|
+
}
|
|
28
|
+
return await page.content();
|
|
29
|
+
}
|
|
30
|
+
//# sourceMappingURL=openNestedDocusaurusMenus.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"openNestedDocusaurusMenus.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/openNestedDocusaurusMenus.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,OAAO,CAAC,KAAK,UAAU,yBAAyB,CAAC,IAAU;IAChE,IAAI,eAAe,GAAa,EAAE,CAAC;IACnC,IAAI,eAAe,GAAG,CAAC,0CAA0C,CAAC,CAAC;IAEnE,0CAA0C;IAC1C,OAAO,CAAC,eAAe,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,eAAe,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,EAAE;QACvE,eAAe,GAAG,eAAe,CAAC;QAClC,eAAe,GAAG,MAAM,IAAI,CAAC,QAAQ,CACnC,CAAC,eAAe,EAAE,EAAE;YAClB,MAAM,WAAW,GAAkB,KAAK,CAAC,IAAI,CAC3C,QAAQ,CAAC,gBAAgB,CAAC,iCAAiC,CAAC,CAC7D,CAAC;YAEF,MAAM,UAAU,GAAa,EAAE,CAAC;YAChC,WAAW,CAAC,OAAO,CAAC,KAAK,EAAE,eAA4B,EAAE,EAAE;gBACzD,MAAM,IAAI,GAAG,eAAe,aAAf,eAAe,uBAAf,eAAe,CAAE,YAAY,CAAC,MAAM,CAAC,CAAC;gBAEnD,mDAAmD;gBACnD,IAAI,CAAA,IAAI,aAAJ,IAAI,uBAAJ,IAAI,CAAE,UAAU,CAAC,UAAU,CAAC,MAAI,IAAI,aAAJ,IAAI,uBAAJ,IAAI,CAAE,UAAU,CAAC,SAAS,CAAC,CAAA,EAAE;oBAC/D,OAAO;iBACR;gBAED,yCAAyC;gBACzC,IAAI,IAAI,IAAI,CAAC,eAAe,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE;oBAC3C,eAAe,aAAf,eAAe,uBAAf,eAAe,CAAE,KAAK,EAAE,CAAC;iBAC1B;gBAED,IAAI,IAAI,EAAE;oBACR,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;iBACvB;YACH,CAAC,CAAC,CAAC;YAEH,OAAO,UAAU,CAAC;QACpB,CAAC,EACD,eAAe,CAAC,sCAAsC;SACvD,CAAC;KACH;IAED,OAAO,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;AAC9B,CAAC"}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
export default async function openNestedGitbookMenus(page) {
|
|
2
|
+
let prevEncountered = [];
|
|
3
|
+
let encounteredHref = ["fake-href-to-make-loop-run-at-least-once"];
|
|
4
|
+
// Loop until we've encountered every link
|
|
5
|
+
while (!encounteredHref.every((href) => prevEncountered.includes(href))) {
|
|
6
|
+
prevEncountered = encounteredHref;
|
|
7
|
+
encounteredHref = await page.evaluate((encounteredHref) => {
|
|
8
|
+
const icons = Array.from(document.querySelectorAll('path[d="M9 18l6-6-6-6"]'));
|
|
9
|
+
const linksFound = [];
|
|
10
|
+
icons.forEach(async (icon) => {
|
|
11
|
+
var _a, _b;
|
|
12
|
+
const toClick = (_a = icon === null || icon === void 0 ? void 0 : icon.parentElement) === null || _a === void 0 ? void 0 : _a.parentElement;
|
|
13
|
+
const link = (_b = toClick === null || toClick === void 0 ? void 0 : toClick.parentElement) === null || _b === void 0 ? void 0 : _b.parentElement;
|
|
14
|
+
// Skip icons not in the side navigation
|
|
15
|
+
if (!(link === null || link === void 0 ? void 0 : link.hasAttribute("href"))) {
|
|
16
|
+
return;
|
|
17
|
+
}
|
|
18
|
+
const href = link.getAttribute("href");
|
|
19
|
+
// Should never occur but we keep it as a fail-safe
|
|
20
|
+
if ((href === null || href === void 0 ? void 0 : href.startsWith("https://")) || (href === null || href === void 0 ? void 0 : href.startsWith("http://"))) {
|
|
21
|
+
return;
|
|
22
|
+
}
|
|
23
|
+
// Click any links we haven't seen before
|
|
24
|
+
if (href && !encounteredHref.includes(href)) {
|
|
25
|
+
toClick === null || toClick === void 0 ? void 0 : toClick.click();
|
|
26
|
+
}
|
|
27
|
+
if (href) {
|
|
28
|
+
linksFound.push(href);
|
|
29
|
+
}
|
|
30
|
+
});
|
|
31
|
+
return linksFound;
|
|
32
|
+
}, encounteredHref // Need to pass array into the browser
|
|
33
|
+
);
|
|
34
|
+
}
|
|
35
|
+
return await page.content();
|
|
36
|
+
}
|
|
37
|
+
//# sourceMappingURL=openNestedGitbookMenus.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"openNestedGitbookMenus.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/openNestedGitbookMenus.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,OAAO,CAAC,KAAK,UAAU,sBAAsB,CAAC,IAAU;IAC7D,IAAI,eAAe,GAAa,EAAE,CAAC;IACnC,IAAI,eAAe,GAAG,CAAC,0CAA0C,CAAC,CAAC;IAEnE,0CAA0C;IAC1C,OAAO,CAAC,eAAe,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,eAAe,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,EAAE;QACvE,eAAe,GAAG,eAAe,CAAC;QAClC,eAAe,GAAG,MAAM,IAAI,CAAC,QAAQ,CACnC,CAAC,eAAe,EAAE,EAAE;YAClB,MAAM,KAAK,GAAkB,KAAK,CAAC,IAAI,CACrC,QAAQ,CAAC,gBAAgB,CAAC,yBAAyB,CAAC,CACrD,CAAC;YAEF,MAAM,UAAU,GAAa,EAAE,CAAC;YAChC,KAAK,CAAC,OAAO,CAAC,KAAK,EAAE,IAAiB,EAAE,EAAE;;gBACxC,MAAM,OAAO,GAAG,MAAA,IAAI,aAAJ,IAAI,uBAAJ,IAAI,CAAE,aAAa,0CAAE,aAAa,CAAC;gBACnD,MAAM,IAAI,GAAG,MAAA,OAAO,aAAP,OAAO,uBAAP,OAAO,CAAE,aAAa,0CAAE,aAAa,CAAC;gBAEnD,wCAAwC;gBACxC,IAAI,CAAC,CAAA,IAAI,aAAJ,IAAI,uBAAJ,IAAI,CAAE,YAAY,CAAC,MAAM,CAAC,CAAA,EAAE;oBAC/B,OAAO;iBACR;gBAED,MAAM,IAAI,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;gBAEvC,mDAAmD;gBACnD,IAAI,CAAA,IAAI,aAAJ,IAAI,uBAAJ,IAAI,CAAE,UAAU,CAAC,UAAU,CAAC,MAAI,IAAI,aAAJ,IAAI,uBAAJ,IAAI,CAAE,UAAU,CAAC,SAAS,CAAC,CAAA,EAAE;oBAC/D,OAAO;iBACR;gBAED,yCAAyC;gBACzC,IAAI,IAAI,IAAI,CAAC,eAAe,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE;oBAC3C,OAAO,aAAP,OAAO,uBAAP,OAAO,CAAE,KAAK,EAAE,CAAC;iBAClB;gBACD,IAAI,IAAI,EAAE;oBACR,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;iBACvB;YACH,CAAC,CAAC,CAAC;YAEH,OAAO,UAAU,CAAC;QACpB,CAAC,EACD,eAAe,CAAC,sCAAsC;SACvD,CAAC;KACH;IAED,OAAO,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;AAC9B,CAAC"}
|
|
@@ -1,48 +1,47 @@
|
|
|
1
1
|
import cheerio from "cheerio";
|
|
2
2
|
import { scrapeGettingFileNameFromUrl } from "../scrapeGettingFileNameFromUrl.js";
|
|
3
|
+
import combineNavWithEmptyGroupTitles from "../combineNavWithEmptyGroupTitles.js";
|
|
3
4
|
import { scrapeDocusaurusPage } from "./scrapeDocusaurusPage.js";
|
|
5
|
+
import getLinksRecursively from "./getLinksRecursively.js";
|
|
6
|
+
import alternateGroupTitle from "./alternateGroupTitle.js";
|
|
4
7
|
export async function scrapeDocusaurusSection(html, origin, cliDir, overwrite) {
|
|
5
8
|
const $ = cheerio.load(html);
|
|
6
9
|
// Get all the navigation sections
|
|
7
10
|
const navigationSections = $(".theme-doc-sidebar-menu").first().children();
|
|
8
11
|
// Get all links per group
|
|
9
12
|
const groupsConfig = navigationSections
|
|
10
|
-
.map((i,
|
|
11
|
-
const
|
|
13
|
+
.map((i, s) => {
|
|
14
|
+
const section = $(s);
|
|
12
15
|
// Links without a group
|
|
13
|
-
if (
|
|
14
|
-
const linkHref =
|
|
16
|
+
if (section.hasClass("theme-doc-sidebar-item-link")) {
|
|
17
|
+
const linkHref = section.find("a[href]").first().attr("href");
|
|
15
18
|
return {
|
|
16
19
|
group: "",
|
|
17
20
|
pages: [linkHref],
|
|
18
21
|
};
|
|
19
22
|
}
|
|
20
|
-
const
|
|
23
|
+
const firstLink = section
|
|
21
24
|
.find(".menu__list-item-collapsible")
|
|
22
25
|
.first()
|
|
23
|
-
.
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
const
|
|
27
|
-
|
|
28
|
-
.map((i, link) => {
|
|
29
|
-
return $(link).attr("href");
|
|
30
|
-
})
|
|
31
|
-
.filter((i, link) => link !== "#")
|
|
32
|
-
.toArray();
|
|
33
|
-
// Follows the same structure as mint.json
|
|
26
|
+
.find("a[href]");
|
|
27
|
+
const sectionTitle = firstLink.text();
|
|
28
|
+
const firstHref = firstLink.attr("href");
|
|
29
|
+
const linkSections = section.children().eq(1).children();
|
|
30
|
+
const pages = getLinksRecursively(linkSections, $);
|
|
34
31
|
return {
|
|
35
|
-
group: sectionTitle,
|
|
36
|
-
pages:
|
|
32
|
+
group: sectionTitle || alternateGroupTitle(firstLink, pages),
|
|
33
|
+
pages: firstHref ? [firstHref, ...pages] : pages,
|
|
37
34
|
};
|
|
38
35
|
})
|
|
39
36
|
.toArray();
|
|
37
|
+
// Merge groups with empty titles together
|
|
38
|
+
const reducedGroupsConfig = combineNavWithEmptyGroupTitles(groupsConfig);
|
|
40
39
|
// Scrape each link in the navigation.
|
|
41
|
-
const groupsConfigCleanPaths = await Promise.all(
|
|
42
|
-
groupConfig.pages = (await Promise.all(groupConfig.pages.map(async (
|
|
40
|
+
const groupsConfigCleanPaths = await Promise.all(reducedGroupsConfig.map(async (groupConfig) => {
|
|
41
|
+
groupConfig.pages = (await Promise.all(groupConfig.pages.map(async (navEntry) =>
|
|
43
42
|
// Docusaurus requires a directory on all sections wheras we use root.
|
|
44
43
|
// /docs is their default directory so we remove it
|
|
45
|
-
scrapeGettingFileNameFromUrl(
|
|
44
|
+
scrapeGettingFileNameFromUrl(navEntry, cliDir, origin, overwrite, scrapeDocusaurusPage, false, "/docs"))))
|
|
46
45
|
// Remove skipped index pages (they return undefined from the above function)
|
|
47
46
|
.filter(Boolean);
|
|
48
47
|
return groupConfig;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrapeDocusaurusSection.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeDocusaurusSection.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;
|
|
1
|
+
{"version":3,"file":"scrapeDocusaurusSection.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeDocusaurusSection.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAE9B,OAAO,EAAE,4BAA4B,EAAE,MAAM,oCAAoC,CAAC;AAClF,OAAO,8BAA8B,MAAM,sCAAsC,CAAC;AAClF,OAAO,EAAE,oBAAoB,EAAE,MAAM,2BAA2B,CAAC;AACjE,OAAO,mBAAmB,MAAM,0BAA0B,CAAC;AAC3D,OAAO,mBAAmB,MAAM,0BAA0B,CAAC;AAE3D,MAAM,CAAC,KAAK,UAAU,uBAAuB,CAC3C,IAAY,EACZ,MAAc,EACd,MAAc,EACd,SAAkB;IAElB,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,kCAAkC;IAClC,MAAM,kBAAkB,GAAG,CAAC,CAAC,yBAAyB,CAAC,CAAC,KAAK,EAAE,CAAC,QAAQ,EAAE,CAAC;IAE3E,0BAA0B;IAC1B,MAAM,YAAY,GAAG,kBAAkB;SACpC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACZ,MAAM,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QAErB,wBAAwB;QACxB,IAAI,OAAO,CAAC,QAAQ,CAAC,6BAA6B,CAAC,EAAE;YACnD,MAAM,QAAQ,GAAG,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAC9D,OAAO;gBACL,KAAK,EAAE,EAAE;gBACT,KAAK,EAAE,CAAC,QAAQ,CAAC;aAClB,CAAC;SACH;QAED,MAAM,SAAS,GAAG,OAAO;aACtB,IAAI,CAAC,8BAA8B,CAAC;aACpC,KAAK,EAAE;aACP,IAAI,CAAC,SAAS,CAAC,CAAC;QAEnB,MAAM,YAAY,GAAG,SAAS,CAAC,IAAI,EAAE,CAAC;QACtC,MAAM,SAAS,GAAG,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACzC,MAAM,YAAY,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC;QAEzD,MAAM,KAAK,GAAG,mBAAmB,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC;QAEnD,OAAO;YACL,KAAK,EAAE,YAAY,IAAI,mBAAmB,CAAC,SAAS,EAAE,KAAK,CAAC;YAC5D,KAAK,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,EAAE,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK;SACjD,CAAC;IACJ,CAAC,CAAC;SACD,OAAO,EAAE,CAAC;IAEb,0CAA0C;IAC1C,MAAM,mBAAmB,GAAG,8BAA8B,CAAC,YAAY,CAAC,CAAC;IAEzE,sCAAsC;IACtC,MAAM,sBAAsB,GAAG,MAAM,OAAO,CAAC,GAAG,CAC9C,mBAAmB,CAAC,GAAG,CAAC,KAAK,EAAE,WAAW,EAAE,EAAE;QAC5C,WAAW,CAAC,KAAK,GAAG,CAClB,MAAM,OAAO,CAAC,GAAG,CACf,WAAW,CAAC,KAAK,CAAC,GAAG,CAAC,KAAK,EAAE,QAAyB,EAAE,EAAE;QACxD,sEAAsE;QACtE,mDAAmD;QACnD,4BAA4B,CAC1B,QAAQ,EACR,MAAM,EACN,MAAM,EACN,SAAS,EACT,oBAAoB,EACpB,KAAK,EACL,OAAO,CACR,CACF,CACF,CACF;YACC,6EAA6E;aAC5E,MAAM,CAAC,OAAO,CAAC,CAAC;QACnB,OAAO,WAAW,CAAC;IACrB,CAAC,CAAC,CACH,CAAC;IAEF,OAAO,sBAAsB,CAAC;AAChC,CAAC"}
|
|
@@ -3,6 +3,7 @@ import { scrapeGettingFileNameFromUrl } from "../scrapeGettingFileNameFromUrl.js
|
|
|
3
3
|
import { scrapeGitBookPage } from "./scrapeGitBookPage.js";
|
|
4
4
|
import combineNavWithEmptyGroupTitles from "../combineNavWithEmptyGroupTitles.js";
|
|
5
5
|
import getLinksRecursively from "./getLinksRecursively.js";
|
|
6
|
+
import alternateGroupTitle from "./alternateGroupTitle.js";
|
|
6
7
|
export async function scrapeGitBookSection(html, origin, cliDir, overwrite) {
|
|
7
8
|
const $ = cheerio.load(html);
|
|
8
9
|
// Get all the navigation sections
|
|
@@ -26,7 +27,7 @@ export async function scrapeGitBookSection(html, origin, cliDir, overwrite) {
|
|
|
26
27
|
const linkSections = section.children().eq(1).children();
|
|
27
28
|
const pages = getLinksRecursively(linkSections, $);
|
|
28
29
|
return {
|
|
29
|
-
group: sectionTitle ||
|
|
30
|
+
group: sectionTitle || alternateGroupTitle(firstLink, pages),
|
|
30
31
|
pages: firstHref ? [firstHref, ...pages] : pages,
|
|
31
32
|
};
|
|
32
33
|
})
|
|
@@ -40,12 +41,4 @@ export async function scrapeGitBookSection(html, origin, cliDir, overwrite) {
|
|
|
40
41
|
}));
|
|
41
42
|
return groupsConfigCleanPaths;
|
|
42
43
|
}
|
|
43
|
-
function alternateTitle(firstLink, pages) {
|
|
44
|
-
// Only assign titles to nested navigation menus outside a section.
|
|
45
|
-
// Others should not have a title so we can merge them into one section.
|
|
46
|
-
if (pages.length > 0) {
|
|
47
|
-
return firstLink === null || firstLink === void 0 ? void 0 : firstLink.text();
|
|
48
|
-
}
|
|
49
|
-
return "";
|
|
50
|
-
}
|
|
51
44
|
//# sourceMappingURL=scrapeGitBookSection.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrapeGitBookSection.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeGitBookSection.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAE9B,OAAO,EAAE,4BAA4B,EAAE,MAAM,oCAAoC,CAAC;AAClF,OAAO,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAC3D,OAAO,8BAA8B,MAAM,sCAAsC,CAAC;AAClF,OAAO,mBAAmB,MAAM,0BAA0B,CAAC;AAE3D,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,IAAY,EACZ,MAAc,EACd,MAAc,EACd,SAAkB;IAElB,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,kCAAkC;IAClC,MAAM,kBAAkB,GAAG,CAAC,CAC1B,wEAAwE,CACzE;SACE,QAAQ,EAAE;SACV,KAAK,EAAE;SACP,QAAQ,EAAE;SACV,KAAK,EAAE;SACP,QAAQ,EAAE,CAAC;IAEd,0BAA0B;IAC1B,MAAM,YAAY,GAAG,kBAAkB;SACpC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACZ,MAAM,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACrB,MAAM,YAAY,GAAG,CAAC,CAAC,OAAO,CAAC;aAC5B,IAAI,CAAC,uBAAuB,CAAC;aAC7B,KAAK,EAAE;aACP,IAAI,EAAE,CAAC;QAEV,0DAA0D;QAC1D,MAAM,SAAS,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QAC3C,MAAM,SAAS,GAAG,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAEzC,MAAM,YAAY,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC;QACzD,MAAM,KAAK,GAAG,mBAAmB,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC;QAEnD,OAAO;YACL,KAAK,EAAE,YAAY,IAAI,
|
|
1
|
+
{"version":3,"file":"scrapeGitBookSection.js","sourceRoot":"","sources":["../../../src/scraping/site-scrapers/scrapeGitBookSection.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAE9B,OAAO,EAAE,4BAA4B,EAAE,MAAM,oCAAoC,CAAC;AAClF,OAAO,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAC3D,OAAO,8BAA8B,MAAM,sCAAsC,CAAC;AAClF,OAAO,mBAAmB,MAAM,0BAA0B,CAAC;AAC3D,OAAO,mBAAmB,MAAM,0BAA0B,CAAC;AAE3D,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,IAAY,EACZ,MAAc,EACd,MAAc,EACd,SAAkB;IAElB,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE7B,kCAAkC;IAClC,MAAM,kBAAkB,GAAG,CAAC,CAC1B,wEAAwE,CACzE;SACE,QAAQ,EAAE;SACV,KAAK,EAAE;SACP,QAAQ,EAAE;SACV,KAAK,EAAE;SACP,QAAQ,EAAE,CAAC;IAEd,0BAA0B;IAC1B,MAAM,YAAY,GAAG,kBAAkB;SACpC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACZ,MAAM,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACrB,MAAM,YAAY,GAAG,CAAC,CAAC,OAAO,CAAC;aAC5B,IAAI,CAAC,uBAAuB,CAAC;aAC7B,KAAK,EAAE;aACP,IAAI,EAAE,CAAC;QAEV,0DAA0D;QAC1D,MAAM,SAAS,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QAC3C,MAAM,SAAS,GAAG,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAEzC,MAAM,YAAY,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC;QACzD,MAAM,KAAK,GAAG,mBAAmB,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC;QAEnD,OAAO;YACL,KAAK,EAAE,YAAY,IAAI,mBAAmB,CAAC,SAAS,EAAE,KAAK,CAAC;YAC5D,KAAK,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,EAAE,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK;SACjD,CAAC;IACJ,CAAC,CAAC;SACD,OAAO,EAAE;SACT,MAAM,CAAC,OAAO,CAAC,CAAC;IAEnB,0CAA0C;IAC1C,MAAM,mBAAmB,GAAG,8BAA8B,CAAC,YAAY,CAAC,CAAC;IAEzE,sCAAsC;IACtC,MAAM,sBAAsB,GAAG,MAAM,OAAO,CAAC,GAAG,CAC9C,mBAAmB,CAAC,GAAG,CAAC,KAAK,EAAE,QAAyB,EAAE,EAAE;QAC1D,OAAO,MAAM,4BAA4B,CACvC,QAAQ,EACR,MAAM,EACN,MAAM,EACN,SAAS,EACT,iBAAiB,EACjB,IAAI,CACL,CAAC;IACJ,CAAC,CAAC,CACH,CAAC;IAEF,OAAO,sBAAsB,CAAC;AAChC,CAAC"}
|
package/package.json
CHANGED
package/src/index.ts
CHANGED
|
@@ -15,8 +15,8 @@ import {
|
|
|
15
15
|
scrapeSectionAutomatically,
|
|
16
16
|
scrapeSectionAxiosWrapper,
|
|
17
17
|
scrapeGitbookSectionCommand,
|
|
18
|
+
scrapeDocusaurusSectionCommand,
|
|
18
19
|
} from "./scraping/scrapeSectionCommands.js";
|
|
19
|
-
import { scrapeDocusaurusSection } from "./scraping/site-scrapers/scrapeDocusaurusSection.js";
|
|
20
20
|
import { scrapeReadMeSection } from "./scraping/site-scrapers/scrapeReadMeSection.js";
|
|
21
21
|
import dev from "./local-preview/index.js";
|
|
22
22
|
import installDepsCommand from "./local-preview/helper-commands/installDepsCommand.js";
|
|
@@ -38,7 +38,12 @@ yargs(hideBin(process.argv))
|
|
|
38
38
|
() => {},
|
|
39
39
|
installDepsCommand
|
|
40
40
|
)
|
|
41
|
-
.command(
|
|
41
|
+
.command(
|
|
42
|
+
"clear",
|
|
43
|
+
"Clear cache",
|
|
44
|
+
() => {},
|
|
45
|
+
(args) => clearCommand(args)
|
|
46
|
+
)
|
|
42
47
|
.command("init", "Generate a mintlify template", () => {}, initCommand)
|
|
43
48
|
.command("page", "Generate a new page", () => {}, generatePageTemplate)
|
|
44
49
|
.command(
|
|
@@ -86,7 +91,7 @@ yargs(hideBin(process.argv))
|
|
|
86
91
|
"Scrapes the Docusaurus section",
|
|
87
92
|
() => {},
|
|
88
93
|
async (argv) => {
|
|
89
|
-
await
|
|
94
|
+
await scrapeDocusaurusSectionCommand(argv);
|
|
90
95
|
}
|
|
91
96
|
)
|
|
92
97
|
.command(
|
|
@@ -2,10 +2,13 @@ import inquirer from "inquirer";
|
|
|
2
2
|
import shell from "shelljs";
|
|
3
3
|
import { CLIENT_PATH } from "../../constants.js";
|
|
4
4
|
|
|
5
|
-
const clearCommand = () => {
|
|
5
|
+
const clearCommand = async (args) => {
|
|
6
|
+
const all = args?.a;
|
|
6
7
|
shell.cd(CLIENT_PATH);
|
|
7
|
-
shell.exec(
|
|
8
|
-
|
|
8
|
+
shell.exec(
|
|
9
|
+
`git clean -d -x ${all ? "" : "-e node_modules "}-e last-invocation-path -n`
|
|
10
|
+
);
|
|
11
|
+
await inquirer
|
|
9
12
|
.prompt([
|
|
10
13
|
{
|
|
11
14
|
type: "confirm",
|
|
@@ -16,7 +19,11 @@ const clearCommand = () => {
|
|
|
16
19
|
])
|
|
17
20
|
.then(({ confirm }) => {
|
|
18
21
|
if (confirm) {
|
|
19
|
-
shell.exec(
|
|
22
|
+
shell.exec(
|
|
23
|
+
`git clean -d -x ${
|
|
24
|
+
all ? "" : "-e node_modules "
|
|
25
|
+
}-e last-invocation-path -f`
|
|
26
|
+
);
|
|
20
27
|
} else {
|
|
21
28
|
console.log("Clear cancelled.");
|
|
22
29
|
}
|
|
@@ -2,6 +2,7 @@ import Chalk from "chalk";
|
|
|
2
2
|
import open from "open";
|
|
3
3
|
import { promises as _promises } from "fs";
|
|
4
4
|
import fse, { pathExists } from "fs-extra";
|
|
5
|
+
import inquirer from "inquirer";
|
|
5
6
|
import { isInternetAvailable } from "is-internet-available";
|
|
6
7
|
import path from "path";
|
|
7
8
|
import shell from "shelljs";
|
|
@@ -18,6 +19,25 @@ import listener from "./utils/listener.js";
|
|
|
18
19
|
import { createPage, createMetadataFileFromPages } from "./utils/metadata.js";
|
|
19
20
|
import { updateConfigFile } from "./utils/mintConfigFile.js";
|
|
20
21
|
import { buildLogger, ensureYarn } from "../util.js";
|
|
22
|
+
import clearCommand from "./helper-commands/clearCommand.js";
|
|
23
|
+
|
|
24
|
+
const saveInvocationPath = async () => {
|
|
25
|
+
await fse.outputFile(LAST_INVOCATION_PATH_FILE_LOCATION, CMD_EXEC_PATH);
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
const cleanOldFiles = async () => {
|
|
29
|
+
const lastInvocationPathExists = await pathExists(
|
|
30
|
+
LAST_INVOCATION_PATH_FILE_LOCATION
|
|
31
|
+
);
|
|
32
|
+
if (!lastInvocationPathExists) return;
|
|
33
|
+
const lastInvocationPath = (
|
|
34
|
+
await readFile(LAST_INVOCATION_PATH_FILE_LOCATION)
|
|
35
|
+
).toString();
|
|
36
|
+
if (lastInvocationPath !== CMD_EXEC_PATH) {
|
|
37
|
+
// clean if invoked in new location
|
|
38
|
+
await clearCommand({});
|
|
39
|
+
}
|
|
40
|
+
};
|
|
21
41
|
|
|
22
42
|
const { readFile } = _promises;
|
|
23
43
|
|
|
@@ -90,8 +110,33 @@ const shellExec = (cmd: string) => {
|
|
|
90
110
|
const nodeModulesExists = async () => {
|
|
91
111
|
return pathExists(path.join(DOT_MINTLIFY, "mint", "client", "node_modules"));
|
|
92
112
|
};
|
|
113
|
+
|
|
114
|
+
const promptForYarn = async () => {
|
|
115
|
+
const yarnInstalled = shell.which("yarn");
|
|
116
|
+
if (!yarnInstalled) {
|
|
117
|
+
await inquirer
|
|
118
|
+
.prompt([
|
|
119
|
+
{
|
|
120
|
+
type: "confirm",
|
|
121
|
+
name: "confirm",
|
|
122
|
+
message: "yarn must be globally installed. Install yarn?",
|
|
123
|
+
default: true,
|
|
124
|
+
},
|
|
125
|
+
])
|
|
126
|
+
.then(({ confirm }) => {
|
|
127
|
+
if (confirm) {
|
|
128
|
+
shell.exec("npm install --global yarn");
|
|
129
|
+
} else {
|
|
130
|
+
console.log("Installation cancelled.");
|
|
131
|
+
}
|
|
132
|
+
});
|
|
133
|
+
}
|
|
134
|
+
};
|
|
135
|
+
|
|
93
136
|
const dev = async () => {
|
|
94
137
|
shell.cd(HOME_DIR);
|
|
138
|
+
await cleanOldFiles();
|
|
139
|
+
await promptForYarn();
|
|
95
140
|
const logger = buildLogger("Starting a local Mintlify instance...");
|
|
96
141
|
await fse.ensureDir(path.join(DOT_MINTLIFY, "mint"));
|
|
97
142
|
shell.cd(path.join(HOME_DIR, ".mintlify", "mint"));
|
|
@@ -130,8 +175,7 @@ const dev = async () => {
|
|
|
130
175
|
runYarn = false;
|
|
131
176
|
}
|
|
132
177
|
shell.cd(CLIENT_PATH);
|
|
133
|
-
runYarn
|
|
134
|
-
if (internet && runYarn) {
|
|
178
|
+
if (internet && (runYarn || !(await nodeModulesExists()))) {
|
|
135
179
|
if (firstInstallation) {
|
|
136
180
|
logger.succeed("Local Mintlify instance initialized");
|
|
137
181
|
}
|
|
@@ -153,6 +197,7 @@ const dev = async () => {
|
|
|
153
197
|
`);
|
|
154
198
|
process.exit(1);
|
|
155
199
|
}
|
|
200
|
+
await saveInvocationPath();
|
|
156
201
|
await copyFiles(logger);
|
|
157
202
|
run();
|
|
158
203
|
};
|
|
@@ -161,7 +206,7 @@ const run = () => {
|
|
|
161
206
|
shell.cd(CLIENT_PATH);
|
|
162
207
|
console.log(
|
|
163
208
|
`🌿 ${Chalk.green(
|
|
164
|
-
"Navigate to your local preview at
|
|
209
|
+
"Navigate to your local preview at http://localhost:3000"
|
|
165
210
|
)}`
|
|
166
211
|
);
|
|
167
212
|
shell.exec("npm run dev", { async: true });
|
|
@@ -28,6 +28,11 @@ export default async function downloadAllImages(
|
|
|
28
28
|
// Wait to all images to download before continuing
|
|
29
29
|
const origToNewArray = await Promise.all(
|
|
30
30
|
imageSrcs.map(async (origImageSrc: string) => {
|
|
31
|
+
// We do not support downloading base64 in-line images.
|
|
32
|
+
if (origImageSrc.startsWith("data:")) {
|
|
33
|
+
return undefined;
|
|
34
|
+
}
|
|
35
|
+
|
|
31
36
|
// Add origin if the image tags are using relative sources
|
|
32
37
|
const imageHref = origImageSrc.startsWith("http")
|
|
33
38
|
? origImageSrc
|
|
@@ -3,22 +3,12 @@ import { detectFramework, Frameworks } from "./detectFramework.js";
|
|
|
3
3
|
import { getHrefFromArgs, getOrigin } from "../util.js";
|
|
4
4
|
import { scrapeSection } from "./scrapeSection.js";
|
|
5
5
|
import { scrapeDocusaurusSection } from "./site-scrapers/scrapeDocusaurusSection.js";
|
|
6
|
+
import openNestedDocusaurusMenus from "./site-scrapers/openNestedDocusaurusMenus.js";
|
|
6
7
|
import { scrapeGitBookSection } from "./site-scrapers/scrapeGitBookSection.js";
|
|
8
|
+
import openNestedGitbookMenus from "./site-scrapers/openNestedGitbookMenus.js";
|
|
7
9
|
import { scrapeReadMeSection } from "./site-scrapers/scrapeReadMeSection.js";
|
|
8
10
|
import { startBrowser } from "../browser.js";
|
|
9
11
|
|
|
10
|
-
function validateFramework(framework: Frameworks | undefined) {
|
|
11
|
-
if (!framework) {
|
|
12
|
-
console.log(
|
|
13
|
-
"Could not detect the framework automatically. Please use one of:"
|
|
14
|
-
);
|
|
15
|
-
console.log("scrape-page-docusaurus");
|
|
16
|
-
console.log("scrape-page-gitbook");
|
|
17
|
-
console.log("scrape-page-readme");
|
|
18
|
-
return process.exit(1);
|
|
19
|
-
}
|
|
20
|
-
}
|
|
21
|
-
|
|
22
12
|
export async function scrapeSectionAxiosWrapper(argv: any, scrapeFunc: any) {
|
|
23
13
|
const href = getHrefFromArgs(argv);
|
|
24
14
|
const res = await axios.default.get(href);
|
|
@@ -27,11 +17,27 @@ export async function scrapeSectionAxiosWrapper(argv: any, scrapeFunc: any) {
|
|
|
27
17
|
process.exit(0);
|
|
28
18
|
}
|
|
29
19
|
|
|
20
|
+
export async function scrapeDocusaurusSectionCommand(argv: any) {
|
|
21
|
+
await scrapeSectionOpeningAllNested(
|
|
22
|
+
argv,
|
|
23
|
+
openNestedDocusaurusMenus,
|
|
24
|
+
scrapeDocusaurusSection
|
|
25
|
+
);
|
|
26
|
+
}
|
|
27
|
+
|
|
30
28
|
export async function scrapeGitbookSectionCommand(argv: any) {
|
|
31
|
-
await
|
|
29
|
+
await scrapeSectionOpeningAllNested(
|
|
30
|
+
argv,
|
|
31
|
+
openNestedGitbookMenus,
|
|
32
|
+
scrapeGitBookSection
|
|
33
|
+
);
|
|
32
34
|
}
|
|
33
35
|
|
|
34
|
-
async function
|
|
36
|
+
async function scrapeSectionOpeningAllNested(
|
|
37
|
+
argv: any,
|
|
38
|
+
openLinks: any,
|
|
39
|
+
scrapeFunc: any
|
|
40
|
+
) {
|
|
35
41
|
const href = getHrefFromArgs(argv);
|
|
36
42
|
|
|
37
43
|
const browser = await startBrowser();
|
|
@@ -40,51 +46,7 @@ async function scrapeSectionGitBookWrapper(argv: any, scrapeFunc: any) {
|
|
|
40
46
|
waitUntil: "networkidle2",
|
|
41
47
|
});
|
|
42
48
|
|
|
43
|
-
|
|
44
|
-
let encounteredHref = ["fake"];
|
|
45
|
-
|
|
46
|
-
// Loop until we've encountered every link
|
|
47
|
-
while (!encounteredHref.every((href) => prevEncountered.includes(href))) {
|
|
48
|
-
prevEncountered = encounteredHref;
|
|
49
|
-
encounteredHref = await page.evaluate(
|
|
50
|
-
(encounteredHref) => {
|
|
51
|
-
const icons: HTMLElement[] = Array.from(
|
|
52
|
-
document.querySelectorAll('path[d="M9 18l6-6-6-6"]')
|
|
53
|
-
);
|
|
54
|
-
|
|
55
|
-
const linksFound: string[] = [];
|
|
56
|
-
icons.forEach(async (icon: HTMLElement) => {
|
|
57
|
-
const toClick = icon?.parentElement?.parentElement;
|
|
58
|
-
const link = toClick?.parentElement?.parentElement;
|
|
59
|
-
|
|
60
|
-
// Skip icons not in the side navigation
|
|
61
|
-
if (!link?.hasAttribute("href")) {
|
|
62
|
-
return;
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
const href = link.getAttribute("href");
|
|
66
|
-
|
|
67
|
-
// Should never occur but we keep it as a fail-safe
|
|
68
|
-
if (href?.startsWith("https://") || href?.startsWith("http://")) {
|
|
69
|
-
return;
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
// Click any links we haven't seen before
|
|
73
|
-
if (href && !encounteredHref.includes(href)) {
|
|
74
|
-
toClick?.click();
|
|
75
|
-
}
|
|
76
|
-
if (href) {
|
|
77
|
-
linksFound.push(href);
|
|
78
|
-
}
|
|
79
|
-
});
|
|
80
|
-
|
|
81
|
-
return linksFound;
|
|
82
|
-
},
|
|
83
|
-
encounteredHref // Need to pass array into the browser
|
|
84
|
-
);
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
const html = await page.content();
|
|
49
|
+
const html = await openLinks(page);
|
|
88
50
|
browser.close();
|
|
89
51
|
await scrapeSection(scrapeFunc, html, getOrigin(href), argv.overwrite);
|
|
90
52
|
process.exit(0);
|
|
@@ -101,10 +63,22 @@ export async function scrapeSectionAutomatically(argv: any) {
|
|
|
101
63
|
console.log("Detected framework: " + framework);
|
|
102
64
|
|
|
103
65
|
if (framework === Frameworks.DOCUSAURUS) {
|
|
104
|
-
await
|
|
66
|
+
await scrapeDocusaurusSectionCommand(argv);
|
|
105
67
|
} else if (framework === Frameworks.GITBOOK) {
|
|
106
68
|
await scrapeGitbookSectionCommand(argv);
|
|
107
69
|
} else if (framework === Frameworks.README) {
|
|
108
70
|
await scrapeSectionAxiosWrapper(argv, scrapeReadMeSection);
|
|
109
71
|
}
|
|
110
72
|
}
|
|
73
|
+
|
|
74
|
+
function validateFramework(framework: Frameworks | undefined) {
|
|
75
|
+
if (!framework) {
|
|
76
|
+
console.log(
|
|
77
|
+
"Could not detect the framework automatically. Please use one of:"
|
|
78
|
+
);
|
|
79
|
+
console.log("scrape-page-docusaurus");
|
|
80
|
+
console.log("scrape-page-gitbook");
|
|
81
|
+
console.log("scrape-page-readme");
|
|
82
|
+
return process.exit(1);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
export default function alternateGroupTitle(firstLink, pages) {
|
|
2
|
+
// Only assign titles to nested navigation menus outside a section.
|
|
3
|
+
// Others should not have a title so we can merge them into one section.
|
|
4
|
+
if (pages.length > 0) {
|
|
5
|
+
return firstLink?.text();
|
|
6
|
+
}
|
|
7
|
+
return "";
|
|
8
|
+
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// Used by GitBook and ReadMe section scrapers
|
|
1
|
+
// Used by Docusaurus, GitBook, and ReadMe section scrapers
|
|
2
2
|
export default function getLinksRecursively(linkSections: any, $: any) {
|
|
3
3
|
if (linkSections == null || linkSections.length === 0) {
|
|
4
4
|
return [];
|
|
@@ -7,8 +7,12 @@ export default function getLinksRecursively(linkSections: any, $: any) {
|
|
|
7
7
|
return linkSections
|
|
8
8
|
.map((i, s) => {
|
|
9
9
|
const subsection = $(s);
|
|
10
|
-
|
|
10
|
+
let link = subsection.children().first();
|
|
11
11
|
|
|
12
|
+
if (!link.attr("href")) {
|
|
13
|
+
// Docusaurus nests the <a> inside a <div>
|
|
14
|
+
link = link.find("a[href]").first();
|
|
15
|
+
}
|
|
12
16
|
const linkHref = link.attr("href");
|
|
13
17
|
|
|
14
18
|
// Skip missing links. For example, GitBook uses
|
|
@@ -16,6 +20,7 @@ export default function getLinksRecursively(linkSections: any, $: any) {
|
|
|
16
20
|
// Skip external links until Mintlify supports them
|
|
17
21
|
if (
|
|
18
22
|
!linkHref ||
|
|
23
|
+
linkHref === "#" ||
|
|
19
24
|
linkHref.startsWith("https://") ||
|
|
20
25
|
linkHref.startsWith("http://")
|
|
21
26
|
) {
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import { Page } from "puppeteer";
|
|
2
|
+
|
|
3
|
+
export default async function openNestedDocusaurusMenus(page: Page) {
|
|
4
|
+
let prevEncountered: string[] = [];
|
|
5
|
+
let encounteredHref = ["fake-href-to-make-loop-run-at-least-once"];
|
|
6
|
+
|
|
7
|
+
// Loop until we've encountered every link
|
|
8
|
+
while (!encounteredHref.every((href) => prevEncountered.includes(href))) {
|
|
9
|
+
prevEncountered = encounteredHref;
|
|
10
|
+
encounteredHref = await page.evaluate(
|
|
11
|
+
(encounteredHref) => {
|
|
12
|
+
const collapsible: HTMLElement[] = Array.from(
|
|
13
|
+
document.querySelectorAll(".menu__link.menu__link--sublist")
|
|
14
|
+
);
|
|
15
|
+
|
|
16
|
+
const linksFound: string[] = [];
|
|
17
|
+
collapsible.forEach(async (collapsibleItem: HTMLElement) => {
|
|
18
|
+
const href = collapsibleItem?.getAttribute("href");
|
|
19
|
+
|
|
20
|
+
// Should never occur but we keep it as a fail-safe
|
|
21
|
+
if (href?.startsWith("https://") || href?.startsWith("http://")) {
|
|
22
|
+
return;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
// Click any links we haven't seen before
|
|
26
|
+
if (href && !encounteredHref.includes(href)) {
|
|
27
|
+
collapsibleItem?.click();
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
if (href) {
|
|
31
|
+
linksFound.push(href);
|
|
32
|
+
}
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
return linksFound;
|
|
36
|
+
},
|
|
37
|
+
encounteredHref // Need to pass array into the browser
|
|
38
|
+
);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
return await page.content();
|
|
42
|
+
}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import { Page } from "puppeteer";
|
|
2
|
+
|
|
3
|
+
export default async function openNestedGitbookMenus(page: Page) {
|
|
4
|
+
let prevEncountered: string[] = [];
|
|
5
|
+
let encounteredHref = ["fake-href-to-make-loop-run-at-least-once"];
|
|
6
|
+
|
|
7
|
+
// Loop until we've encountered every link
|
|
8
|
+
while (!encounteredHref.every((href) => prevEncountered.includes(href))) {
|
|
9
|
+
prevEncountered = encounteredHref;
|
|
10
|
+
encounteredHref = await page.evaluate(
|
|
11
|
+
(encounteredHref) => {
|
|
12
|
+
const icons: HTMLElement[] = Array.from(
|
|
13
|
+
document.querySelectorAll('path[d="M9 18l6-6-6-6"]')
|
|
14
|
+
);
|
|
15
|
+
|
|
16
|
+
const linksFound: string[] = [];
|
|
17
|
+
icons.forEach(async (icon: HTMLElement) => {
|
|
18
|
+
const toClick = icon?.parentElement?.parentElement;
|
|
19
|
+
const link = toClick?.parentElement?.parentElement;
|
|
20
|
+
|
|
21
|
+
// Skip icons not in the side navigation
|
|
22
|
+
if (!link?.hasAttribute("href")) {
|
|
23
|
+
return;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const href = link.getAttribute("href");
|
|
27
|
+
|
|
28
|
+
// Should never occur but we keep it as a fail-safe
|
|
29
|
+
if (href?.startsWith("https://") || href?.startsWith("http://")) {
|
|
30
|
+
return;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// Click any links we haven't seen before
|
|
34
|
+
if (href && !encounteredHref.includes(href)) {
|
|
35
|
+
toClick?.click();
|
|
36
|
+
}
|
|
37
|
+
if (href) {
|
|
38
|
+
linksFound.push(href);
|
|
39
|
+
}
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
return linksFound;
|
|
43
|
+
},
|
|
44
|
+
encounteredHref // Need to pass array into the browser
|
|
45
|
+
);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
return await page.content();
|
|
49
|
+
}
|
|
@@ -1,6 +1,10 @@
|
|
|
1
1
|
import cheerio from "cheerio";
|
|
2
|
+
import { NavigationEntry } from "../..//navigation.js";
|
|
2
3
|
import { scrapeGettingFileNameFromUrl } from "../scrapeGettingFileNameFromUrl.js";
|
|
4
|
+
import combineNavWithEmptyGroupTitles from "../combineNavWithEmptyGroupTitles.js";
|
|
3
5
|
import { scrapeDocusaurusPage } from "./scrapeDocusaurusPage.js";
|
|
6
|
+
import getLinksRecursively from "./getLinksRecursively.js";
|
|
7
|
+
import alternateGroupTitle from "./alternateGroupTitle.js";
|
|
4
8
|
|
|
5
9
|
export async function scrapeDocusaurusSection(
|
|
6
10
|
html: string,
|
|
@@ -15,51 +19,49 @@ export async function scrapeDocusaurusSection(
|
|
|
15
19
|
|
|
16
20
|
// Get all links per group
|
|
17
21
|
const groupsConfig = navigationSections
|
|
18
|
-
.map((i,
|
|
19
|
-
const
|
|
22
|
+
.map((i, s) => {
|
|
23
|
+
const section = $(s);
|
|
20
24
|
|
|
21
25
|
// Links without a group
|
|
22
|
-
if (
|
|
23
|
-
const linkHref =
|
|
26
|
+
if (section.hasClass("theme-doc-sidebar-item-link")) {
|
|
27
|
+
const linkHref = section.find("a[href]").first().attr("href");
|
|
24
28
|
return {
|
|
25
29
|
group: "",
|
|
26
30
|
pages: [linkHref],
|
|
27
31
|
};
|
|
28
32
|
}
|
|
29
33
|
|
|
30
|
-
const
|
|
34
|
+
const firstLink = section
|
|
31
35
|
.find(".menu__list-item-collapsible")
|
|
32
36
|
.first()
|
|
33
|
-
.
|
|
37
|
+
.find("a[href]");
|
|
34
38
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
const
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
return $(link).attr("href");
|
|
41
|
-
})
|
|
42
|
-
.filter((i, link) => link !== "#")
|
|
43
|
-
.toArray();
|
|
39
|
+
const sectionTitle = firstLink.text();
|
|
40
|
+
const firstHref = firstLink.attr("href");
|
|
41
|
+
const linkSections = section.children().eq(1).children();
|
|
42
|
+
|
|
43
|
+
const pages = getLinksRecursively(linkSections, $);
|
|
44
44
|
|
|
45
|
-
// Follows the same structure as mint.json
|
|
46
45
|
return {
|
|
47
|
-
group: sectionTitle,
|
|
48
|
-
pages:
|
|
46
|
+
group: sectionTitle || alternateGroupTitle(firstLink, pages),
|
|
47
|
+
pages: firstHref ? [firstHref, ...pages] : pages,
|
|
49
48
|
};
|
|
50
49
|
})
|
|
51
50
|
.toArray();
|
|
52
51
|
|
|
52
|
+
// Merge groups with empty titles together
|
|
53
|
+
const reducedGroupsConfig = combineNavWithEmptyGroupTitles(groupsConfig);
|
|
54
|
+
|
|
53
55
|
// Scrape each link in the navigation.
|
|
54
56
|
const groupsConfigCleanPaths = await Promise.all(
|
|
55
|
-
|
|
57
|
+
reducedGroupsConfig.map(async (groupConfig) => {
|
|
56
58
|
groupConfig.pages = (
|
|
57
59
|
await Promise.all(
|
|
58
|
-
groupConfig.pages.map(async (
|
|
60
|
+
groupConfig.pages.map(async (navEntry: NavigationEntry) =>
|
|
59
61
|
// Docusaurus requires a directory on all sections wheras we use root.
|
|
60
62
|
// /docs is their default directory so we remove it
|
|
61
63
|
scrapeGettingFileNameFromUrl(
|
|
62
|
-
|
|
64
|
+
navEntry,
|
|
63
65
|
cliDir,
|
|
64
66
|
origin,
|
|
65
67
|
overwrite,
|
|
@@ -4,6 +4,7 @@ import { scrapeGettingFileNameFromUrl } from "../scrapeGettingFileNameFromUrl.js
|
|
|
4
4
|
import { scrapeGitBookPage } from "./scrapeGitBookPage.js";
|
|
5
5
|
import combineNavWithEmptyGroupTitles from "../combineNavWithEmptyGroupTitles.js";
|
|
6
6
|
import getLinksRecursively from "./getLinksRecursively.js";
|
|
7
|
+
import alternateGroupTitle from "./alternateGroupTitle.js";
|
|
7
8
|
|
|
8
9
|
export async function scrapeGitBookSection(
|
|
9
10
|
html: string,
|
|
@@ -40,7 +41,7 @@ export async function scrapeGitBookSection(
|
|
|
40
41
|
const pages = getLinksRecursively(linkSections, $);
|
|
41
42
|
|
|
42
43
|
return {
|
|
43
|
-
group: sectionTitle ||
|
|
44
|
+
group: sectionTitle || alternateGroupTitle(firstLink, pages),
|
|
44
45
|
pages: firstHref ? [firstHref, ...pages] : pages,
|
|
45
46
|
};
|
|
46
47
|
})
|
|
@@ -66,12 +67,3 @@ export async function scrapeGitBookSection(
|
|
|
66
67
|
|
|
67
68
|
return groupsConfigCleanPaths;
|
|
68
69
|
}
|
|
69
|
-
|
|
70
|
-
function alternateTitle(firstLink, pages) {
|
|
71
|
-
// Only assign titles to nested navigation menus outside a section.
|
|
72
|
-
// Others should not have a title so we can merge them into one section.
|
|
73
|
-
if (pages.length > 0) {
|
|
74
|
-
return firstLink?.text();
|
|
75
|
-
}
|
|
76
|
-
return "";
|
|
77
|
-
}
|