epub2md 1.2.1 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +39 -9
- package/lib/bin/cli.cjs +119 -38
- package/lib/bin/convert.cjs +238 -102
- package/lib/bin/helper.cjs +38 -20
- package/lib/bin/merge.cjs +50 -0
- package/lib/converter.cjs +3 -2
- package/lib/converter.d.ts +1 -1
- package/lib/converter.mjs +3 -2
- package/lib/index.cjs +0 -7
- package/lib/index.d.ts +1 -2
- package/lib/index.mjs +1 -2
- package/lib/logger.cjs +77 -0
- package/lib/logger.d.ts +65 -0
- package/lib/logger.mjs +59 -0
- package/lib/parseEpub.cjs +83 -31
- package/lib/parseEpub.d.ts +49 -3
- package/lib/parseEpub.mjs +86 -32
- package/lib/parseLink.cjs +5 -4
- package/lib/parseLink.d.ts +10 -2
- package/lib/parseLink.mjs +5 -4
- package/lib/utils.cjs +15 -32
- package/lib/utils.d.ts +2 -2
- package/lib/utils.mjs +12 -26
- package/package.json +22 -18
package/README.md
CHANGED
|
@@ -2,7 +2,18 @@
|
|
|
2
2
|
|
|
3
3
|
Even though the package is primarily intended for CLI is to convert EPUB to Markdown, but it can be used programmatically.
|
|
4
4
|
|
|
5
|
-
主要的目标是转换 epub
|
|
5
|
+
主要的目标是转换 epub 为 多个 markdown 文件,或者合并为 单个 markdown 文件,可以处理其中的远程图片资源;当然了也可以当做 epub 的解析器库使用.
|
|
6
|
+
|
|
7
|
+
## Main Functions
|
|
8
|
+
|
|
9
|
+
- **Convert EPUB to Markdown**: By default, convert and output numbered markdown files in sequence.
|
|
10
|
+
- **Autocorrection**: Have option to Handle spaces and punctuation between Chinese and English as You Need.
|
|
11
|
+
- **Merge Chapters**: Optionally merge all markdown files into a single Markdown file, Support link jumping.
|
|
12
|
+
- **Image Processing**:
|
|
13
|
+
- Retain the original online image links.
|
|
14
|
+
- Download and localize online images (save remote images locally).
|
|
15
|
+
- **View Information**: Easy to View the basic information, structure, and chapters of the EPUB.
|
|
16
|
+
- **Extraction Function**: Dont need convert, just extract the useful contents of the EPUB file.
|
|
6
17
|
|
|
7
18
|
## Global Install for CLI
|
|
8
19
|
|
|
@@ -27,22 +38,41 @@ $ npm install @uxiew/epub2md
|
|
|
27
38
|
## CLI
|
|
28
39
|
|
|
29
40
|
```bash
|
|
30
|
-
#
|
|
41
|
+
# Show usage help
|
|
31
42
|
$ epub2md -h
|
|
32
43
|
|
|
33
44
|
# Convert directly to markdown format
|
|
34
45
|
$ epub2md ../../fixtures/zhihu.epub
|
|
35
|
-
# or -m
|
|
46
|
+
# or use -m
|
|
36
47
|
$ epub2md -m ../../fixtures/zhihu.epub
|
|
37
48
|
|
|
38
|
-
# Convert
|
|
49
|
+
# Convert to markdown and automatically correct spaces and punctuation between Chinese and English (CLI only)
|
|
39
50
|
$ epub2md -M ../../fixtures/zhihu.epub
|
|
40
51
|
|
|
41
|
-
#
|
|
42
|
-
$ epub2md -
|
|
43
|
-
|
|
44
|
-
$ epub2md
|
|
45
|
-
|
|
52
|
+
# Convert and directly generate a single merged markdown file (no intermediate files)
|
|
53
|
+
$ epub2md -m ../../fixtures/zhihu.epub --merge
|
|
54
|
+
# You can also use the epub file path as the first parameter directly
|
|
55
|
+
$ epub2md ../../fixtures/zhihu.epub --merge
|
|
56
|
+
|
|
57
|
+
# Use --merge=filename.md
|
|
58
|
+
$ epub2md ../../fixtures/zhihu.epub --merge="merged-book.md"
|
|
59
|
+
|
|
60
|
+
# By default, DONT downloaded. Basically, the images in the epub are already included, so there is no need to download.
|
|
61
|
+
# However, some epub image links are remote, You will see some warning,maybe they need to be downloaded.
|
|
62
|
+
# Download and localize online images (download remote images to local) (need node > 18.0)
|
|
63
|
+
$ epub2md ../../fixtures/zhihu.epub --localize
|
|
64
|
+
|
|
65
|
+
# Download and localize online images, while merging all chapters into a single file
|
|
66
|
+
$ epub2md ../../fixtures/zhihu.epub --merge --localize
|
|
67
|
+
|
|
68
|
+
# Merge existing markdown files in a directory
|
|
69
|
+
$ epub2md --merge ./path/to/markdown/dir
|
|
70
|
+
|
|
71
|
+
# Show additional information
|
|
72
|
+
$ epub2md -u ../../fixtures/zhihu.epub # Extract epub
|
|
73
|
+
$ epub2md -i ../../fixtures/zhihu.epub # Show basic information
|
|
74
|
+
$ epub2md -S ../../fixtures/zhihu.epub # Show structure information
|
|
75
|
+
$ epub2md -s ../../fixtures/zhihu.epub # Show chapter information
|
|
46
76
|
```
|
|
47
77
|
|
|
48
78
|
## Usage
|
package/lib/bin/cli.cjs
CHANGED
|
@@ -6,59 +6,140 @@ Object.defineProperty(exports, "__esModule", {
|
|
|
6
6
|
});
|
|
7
7
|
exports.Commands = void 0;
|
|
8
8
|
var _args = _interopRequireDefault(require("args"));
|
|
9
|
-
var _chalk = _interopRequireDefault(require("chalk"));
|
|
10
9
|
var _nodeProcess = _interopRequireDefault(require("node:process"));
|
|
10
|
+
var _nodeFs = _interopRequireDefault(require("node:fs"));
|
|
11
11
|
var _parseEpub = _interopRequireDefault(require("../parseEpub.cjs"));
|
|
12
|
-
var _convert =
|
|
13
|
-
var
|
|
12
|
+
var _convert = require("./convert.cjs");
|
|
13
|
+
var _merge = require("./merge.cjs");
|
|
14
|
+
var _logger = _interopRequireDefault(require("../logger.cjs"));
|
|
14
15
|
function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
|
|
15
16
|
const name = "epub2md";
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
const
|
|
27
|
-
|
|
17
|
+
const Commands = exports.Commands = {
|
|
18
|
+
convert: "convert",
|
|
19
|
+
autocorrect: "autocorrect",
|
|
20
|
+
unzip: "unzip",
|
|
21
|
+
info: "info",
|
|
22
|
+
structure: "structure",
|
|
23
|
+
sections: "sections",
|
|
24
|
+
merge: "merge",
|
|
25
|
+
localize: "localize"
|
|
26
|
+
};
|
|
27
|
+
const commands = [[Commands.convert, "convert the epub file to markdown format"], [Commands.autocorrect, "convert the epub file to markdown format with autocorrect"], [Commands.unzip, "unzip epub file"], [Commands.info, "get epub file basic info"], [Commands.structure, "get epub file structure"], [Commands.sections, "get epub file sections"], [Commands.merge, "merge all markdown files into a single file, can also specify output filename with --merge=filename.md"], [Commands.localize, "Retain the original online link and do not convert it to a local path", false]];
|
|
28
|
+
const DEFAULT_COMMAND = Commands.convert;
|
|
29
|
+
commands.forEach(cmd => _args.default.option(cmd[0], cmd[1], cmd[2]));
|
|
28
30
|
const flags = _args.default.parse(_nodeProcess.default.argv, {
|
|
29
31
|
name
|
|
30
32
|
});
|
|
31
|
-
|
|
33
|
+
const unprocessedArgs = _nodeProcess.default.argv.slice(2).filter(arg => !arg.startsWith("--") && !arg.startsWith("-"));
|
|
34
|
+
if (unprocessedArgs.length > 0) {
|
|
35
|
+
flags[DEFAULT_COMMAND] = unprocessedArgs[0];
|
|
36
|
+
}
|
|
37
|
+
let hasRun = false;
|
|
38
|
+
for (const cmd of [Commands.info, Commands.structure, Commands.sections]) {
|
|
32
39
|
if (flags[cmd]) {
|
|
33
|
-
|
|
34
|
-
|
|
40
|
+
if (typeof flags[cmd] !== "string") {
|
|
41
|
+
if (unprocessedArgs.length > 0) {
|
|
42
|
+
flags[cmd] = unprocessedArgs[0];
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
if (typeof flags[cmd] === "string") {
|
|
46
|
+
run(cmd);
|
|
47
|
+
hasRun = true;
|
|
48
|
+
break;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
if (!hasRun && flags[Commands.unzip]) {
|
|
53
|
+
const epubPath = typeof flags[Commands.unzip] === "string" ? flags[Commands.unzip] : unprocessedArgs.length > 0 ? unprocessedArgs[0] : null;
|
|
54
|
+
if (epubPath) {
|
|
55
|
+
_logger.default.info("unzipping...");
|
|
56
|
+
new _convert.Converter(epubPath).run({
|
|
57
|
+
cmd: Commands.unzip,
|
|
58
|
+
// Use cmd to indicate unzip only
|
|
59
|
+
mergedFilename: void 0,
|
|
60
|
+
shouldMerge: false,
|
|
61
|
+
localize: false
|
|
62
|
+
}).then(outDir => {
|
|
63
|
+
_logger.default.info(`Unzip successful! output: ${outDir}`);
|
|
64
|
+
}).catch(error => {
|
|
65
|
+
_logger.default.error(error);
|
|
66
|
+
});
|
|
67
|
+
hasRun = true;
|
|
35
68
|
} else {
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
69
|
+
_logger.default.error("No valid epub file path provided for unzip command");
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
if (!hasRun) {
|
|
73
|
+
if (flags.merge && typeof flags.merge === "string" && flags.merge !== "") {
|
|
74
|
+
if (_nodeFs.default.existsSync(flags.merge) && _nodeFs.default.statSync(flags.merge).isDirectory()) {
|
|
75
|
+
_logger.default.info("merging markdown files in directory...");
|
|
76
|
+
(0, _merge.mergeMarkdowns)(flags.merge).then(outputPath => {
|
|
77
|
+
_logger.default.info(`Merging successful! Output file: ${outputPath}`);
|
|
78
|
+
}).catch(error => {
|
|
79
|
+
_logger.default.info(`Merging failed: ${error}`);
|
|
80
|
+
});
|
|
81
|
+
hasRun = true;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
if (!hasRun) {
|
|
85
|
+
for (const cmd of [Commands.convert, Commands.autocorrect]) {
|
|
86
|
+
if (flags[cmd]) {
|
|
87
|
+
if (typeof flags[cmd] !== "string") {
|
|
88
|
+
if (unprocessedArgs.length > 0) {
|
|
89
|
+
flags[cmd] = unprocessedArgs[0];
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
run(cmd);
|
|
93
|
+
hasRun = true;
|
|
94
|
+
break;
|
|
41
95
|
}
|
|
96
|
+
}
|
|
97
|
+
if (!hasRun && unprocessedArgs.length > 0) {
|
|
98
|
+
run(DEFAULT_COMMAND);
|
|
99
|
+
} else if (!hasRun) {
|
|
42
100
|
_args.default.showHelp();
|
|
43
101
|
}
|
|
44
102
|
}
|
|
45
|
-
}
|
|
103
|
+
}
|
|
46
104
|
function run(cmd) {
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
105
|
+
if (cmd === Commands.convert || cmd === Commands.autocorrect) {
|
|
106
|
+
const epubPath = typeof flags[cmd] === "string" ? flags[cmd] : null;
|
|
107
|
+
if (!epubPath) {
|
|
108
|
+
_logger.default.error("No valid epub file path provided");
|
|
109
|
+
return;
|
|
110
|
+
}
|
|
111
|
+
_logger.default.info(`converting${cmd === Commands.autocorrect ? " with autocorrect" : ""}${flags[Commands.merge] ? " and merging" : ""}...`);
|
|
112
|
+
const shouldMerge = flags.merge === true || typeof flags.merge === "string" && flags.merge !== "";
|
|
113
|
+
let mergedFilename;
|
|
114
|
+
if (typeof flags.merge === "string" && flags.merge !== "") {
|
|
115
|
+
mergedFilename = flags.merge;
|
|
116
|
+
}
|
|
117
|
+
const localize = flags.localize === true;
|
|
118
|
+
new _convert.Converter(epubPath).run({
|
|
119
|
+
cmd,
|
|
120
|
+
mergedFilename,
|
|
121
|
+
shouldMerge,
|
|
122
|
+
localize
|
|
123
|
+
}).then(outDir => {
|
|
124
|
+
if (shouldMerge) {
|
|
125
|
+
_logger.default.info(`Merging successful! Output file: ${outDir}`);
|
|
126
|
+
} else {
|
|
127
|
+
_logger.default.info(`Conversion successful! output: ${outDir}`);
|
|
128
|
+
}
|
|
129
|
+
}).catch(error => {
|
|
130
|
+
_logger.default.error(error);
|
|
55
131
|
});
|
|
56
132
|
return;
|
|
57
133
|
}
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
134
|
+
const cmdPath = flags[cmd];
|
|
135
|
+
if (typeof cmdPath === "string") {
|
|
136
|
+
(0, _parseEpub.default)(cmdPath).then(res => {
|
|
137
|
+
_logger.default.success(`This book ${cmd}:`);
|
|
138
|
+
_logger.default.json(res[cmd]);
|
|
139
|
+
}).catch(error => {
|
|
140
|
+
_logger.default.error(error);
|
|
141
|
+
});
|
|
142
|
+
} else {
|
|
143
|
+
_logger.default.error(`Path must be a string, got ${typeof cmdPath}`);
|
|
144
|
+
}
|
|
64
145
|
}
|
package/lib/bin/convert.cjs
CHANGED
|
@@ -3,11 +3,11 @@
|
|
|
3
3
|
Object.defineProperty(exports, "__esModule", {
|
|
4
4
|
value: true
|
|
5
5
|
});
|
|
6
|
-
|
|
6
|
+
exports.Converter = void 0;
|
|
7
7
|
var _nodePath = require("node:path");
|
|
8
8
|
var _nodeFs = require("node:fs");
|
|
9
|
+
var _logger = _interopRequireDefault(require("../logger.cjs"));
|
|
9
10
|
var _writeFileSafe = require("write-file-safe");
|
|
10
|
-
var _chalk = _interopRequireDefault(require("chalk"));
|
|
11
11
|
var _parseEpub = _interopRequireDefault(require("../parseEpub.cjs"));
|
|
12
12
|
var _helper = require("./helper.cjs");
|
|
13
13
|
var _utils = require("../utils.cjs");
|
|
@@ -16,165 +16,301 @@ var _cli = require("./cli.cjs");
|
|
|
16
16
|
function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
|
|
17
17
|
class Converter {
|
|
18
18
|
epub;
|
|
19
|
-
// epub parser
|
|
19
|
+
// epub parser object
|
|
20
20
|
epubFilePath;
|
|
21
21
|
// current epub 's path
|
|
22
|
-
cmd;
|
|
23
|
-
// current using command flag
|
|
24
|
-
MD_FILE_EXT = ".md";
|
|
25
|
-
// out file extname
|
|
26
22
|
outDir;
|
|
27
23
|
// epub 's original directory to save markdown files
|
|
24
|
+
mergedFilename;
|
|
25
|
+
// The merged file name
|
|
26
|
+
// include images/html/css/js in the epub file
|
|
28
27
|
structure = [];
|
|
29
28
|
// epub dir structure
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
29
|
+
cmd = "convert";
|
|
30
|
+
// current using command flag
|
|
31
|
+
shouldMerge = false;
|
|
32
|
+
// Whether to directly generate the merged file
|
|
33
|
+
localize = false;
|
|
34
|
+
// Whether to retain the original online image link
|
|
35
|
+
IMAGE_DIR = "images";
|
|
36
|
+
// The directory to save images
|
|
37
|
+
MD_FILE_EXT = ".md";
|
|
38
|
+
// out file extname
|
|
39
|
+
/**
|
|
40
|
+
* Constructor
|
|
41
|
+
* @param epubPath - The path to the EPUB file
|
|
42
|
+
*/
|
|
43
|
+
constructor(epubPath) {
|
|
44
|
+
this.epubFilePath = epubPath;
|
|
45
|
+
this.outDir = (0, _nodePath.dirname)(epubPath);
|
|
37
46
|
if (!(0, _nodeFs.existsSync)(this.outDir)) (0, _nodeFs.mkdirSync)(this.outDir);
|
|
38
47
|
}
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
48
|
+
clearOutpath({
|
|
49
|
+
id,
|
|
50
|
+
outpath,
|
|
51
|
+
orderLabel
|
|
52
|
+
}) {
|
|
53
|
+
function _matchNav(id2, tocItems) {
|
|
54
|
+
if (Array.isArray(tocItems)) for (let i = 0; i < tocItems.length; i++) {
|
|
55
|
+
const item = tocItems[i];
|
|
56
|
+
if (item.sectionId === id2) {
|
|
57
|
+
return item;
|
|
58
|
+
}
|
|
59
|
+
if (item.children) {
|
|
60
|
+
const childMatch = _matchNav(id2, item.children);
|
|
61
|
+
if (childMatch) {
|
|
62
|
+
return childMatch;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
return void 0;
|
|
67
|
+
}
|
|
68
|
+
const nav = _matchNav(id, this.epub.structure);
|
|
69
|
+
const fileName = (0, _helper.getClearFilename)(nav ? nav.name + this.MD_FILE_EXT : (0, _nodePath.basename)(outpath));
|
|
70
|
+
const outDir = (0, _nodePath.dirname)(outpath);
|
|
47
71
|
return {
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
72
|
+
fileName,
|
|
73
|
+
outDir,
|
|
74
|
+
outPath: (0, _nodePath.join)(outDir, orderLabel + "-" + fileName)
|
|
51
75
|
};
|
|
52
76
|
}
|
|
53
|
-
resolveHTMLId(fileName) {
|
|
54
|
-
return fileName.replace(/\.x?html?(?:.*)/, "");
|
|
55
|
-
}
|
|
56
|
-
// 文件名处理
|
|
57
|
-
getCleanFileName(fileName, ext = "") {
|
|
58
|
-
return (0, _utils.sanitizeFileName)(fileName).trim().replace(/\s/g, "_") + ext;
|
|
59
|
-
}
|
|
60
77
|
/**
|
|
61
78
|
* Make a path,and normalize assets's path. normally markdowns dont need those css/js files, So i skip them
|
|
62
79
|
* @return these target file's path will be created,like "xxx/xxx.md","xxx/images"
|
|
63
80
|
*/
|
|
64
|
-
|
|
81
|
+
parseFileInfo(filepath) {
|
|
65
82
|
const {
|
|
66
83
|
isImage,
|
|
67
84
|
isHTML
|
|
68
|
-
} =
|
|
69
|
-
|
|
70
|
-
const
|
|
71
|
-
return
|
|
85
|
+
} = (0, _helper.checkFileType)(filepath);
|
|
86
|
+
const name2 = (0, _nodePath.basename)(filepath);
|
|
87
|
+
const path = !isImage && !isHTML ? (0, _nodePath.join)(this.outDir, "static", isHTML ? (0, _helper.resolveHTMLId)(name2) + this.MD_FILE_EXT : name2) : (0, _nodePath.join)(this.outDir, isImage ? this.IMAGE_DIR : "", isHTML ? (0, _helper.resolveHTMLId)(name2) + this.MD_FILE_EXT : name2);
|
|
88
|
+
return {
|
|
89
|
+
// html => md
|
|
90
|
+
type: isHTML ? "md" : isImage ? "img" : "",
|
|
91
|
+
name: name2,
|
|
92
|
+
path
|
|
93
|
+
};
|
|
72
94
|
}
|
|
95
|
+
/**
|
|
96
|
+
* Retrieves and processes the manifest of an EPUB file.
|
|
97
|
+
*
|
|
98
|
+
* @param unzip - Optional flag to indicate whether to simply unzip the file contents
|
|
99
|
+
* @returns Populates the structure array with manifest items, either unzipped or converted
|
|
100
|
+
*
|
|
101
|
+
* This method parses the EPUB file, extracts its manifest, and creates a structure
|
|
102
|
+
* representing the file contents. When unzip is false, it skips certain files like
|
|
103
|
+
* the NCX file and title page, and generates appropriate output paths for other files.
|
|
104
|
+
*/
|
|
73
105
|
async getManifest(unzip) {
|
|
74
106
|
this.epub = await (0, _parseEpub.default)(this.epubFilePath, {
|
|
75
107
|
convertToMarkdown: _helper.convertHTML
|
|
76
108
|
});
|
|
77
109
|
this.outDir = this.epubFilePath.replace(".epub", "");
|
|
78
|
-
|
|
110
|
+
let num = 0;
|
|
111
|
+
const padding = Math.floor(Math.log10(this.epub?.sections?.length ?? 0));
|
|
112
|
+
for (const {
|
|
79
113
|
href: filepath,
|
|
80
114
|
id
|
|
81
|
-
})
|
|
82
|
-
let outpath
|
|
115
|
+
} of this.epub.getManifest()) {
|
|
116
|
+
let outpath = "",
|
|
117
|
+
type = "";
|
|
83
118
|
if (unzip) outpath = (0, _nodePath.join)(this.outDir, filepath);else {
|
|
84
|
-
if (filepath.endsWith("ncx") || id === "titlepage")
|
|
85
|
-
|
|
119
|
+
if (filepath.endsWith("ncx") || id === "titlepage") continue;
|
|
120
|
+
const file = this.parseFileInfo(filepath);
|
|
121
|
+
outpath = file.path;
|
|
122
|
+
type = file.type;
|
|
86
123
|
}
|
|
87
|
-
if (
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
124
|
+
if (type !== "") {
|
|
125
|
+
this.structure.push({
|
|
126
|
+
// current only label markdown file
|
|
127
|
+
orderLabel: type === "md" ? (num++, ("0".repeat(padding) + num).slice(-(padding + 1))) : "",
|
|
128
|
+
id,
|
|
129
|
+
type,
|
|
130
|
+
outpath,
|
|
131
|
+
filepath
|
|
132
|
+
});
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
/**
|
|
137
|
+
* Download remote images to the local images directory
|
|
138
|
+
*/
|
|
139
|
+
async downloadImage(url, dest) {
|
|
140
|
+
if ((0, _nodeFs.existsSync)(dest)) return;
|
|
141
|
+
const res = await fetch(url);
|
|
142
|
+
if (!res.ok) throw new Error(`Failed to download image: ${url}`);
|
|
143
|
+
const arrayBuffer = await res.arrayBuffer();
|
|
144
|
+
const buffer = Buffer.from(arrayBuffer);
|
|
145
|
+
(0, _writeFileSafe.writeFileSync)(dest, buffer, {
|
|
146
|
+
overwrite: true
|
|
92
147
|
});
|
|
93
148
|
}
|
|
94
149
|
/**
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
150
|
+
* 本地化 markdown 内容中的所有 http/https 图片链接
|
|
151
|
+
*/
|
|
152
|
+
async localizeImages(links, outDir) {
|
|
153
|
+
if (!(0, _nodeFs.existsSync)(outDir)) (0, _nodeFs.mkdirSync)(outDir);
|
|
154
|
+
const downloadTasks = [];
|
|
155
|
+
for (const link of links) {
|
|
156
|
+
const imgName = (0, _nodePath.basename)(link.split("?")[0]);
|
|
157
|
+
const localPath = (0, _nodePath.join)(outDir, imgName);
|
|
158
|
+
downloadTasks.push(this.downloadImage(link, localPath));
|
|
159
|
+
}
|
|
160
|
+
if (downloadTasks.length) await Promise.all(downloadTasks);
|
|
161
|
+
}
|
|
162
|
+
async getFileDataAsync(structure, handleContent) {
|
|
98
163
|
let {
|
|
99
164
|
id,
|
|
165
|
+
type,
|
|
100
166
|
filepath,
|
|
101
|
-
outpath
|
|
167
|
+
outpath,
|
|
168
|
+
orderLabel
|
|
102
169
|
} = structure;
|
|
103
|
-
let content = ""
|
|
170
|
+
let content = "",
|
|
171
|
+
links = [];
|
|
104
172
|
const needAutoCorrect = this.cmd === _cli.Commands.autocorrect;
|
|
105
|
-
if (
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
return void 0;
|
|
120
|
-
};
|
|
121
|
-
content = this.epub?.getSection(id)?.toMarkdown();
|
|
122
|
-
const nav = _matchNav(this.epub?.structure, id);
|
|
123
|
-
const cleanFilename = this.getCleanFileName(nav ? nav.name + this.MD_FILE_EXT : (0, _nodePath.basename)(outpath));
|
|
124
|
-
outpath = (0, _nodePath.join)((0, _nodePath.dirname)(outpath), cleanFilename);
|
|
125
|
-
content = (0, _helper.fixLinkPath)(content, (link, text) => {
|
|
126
|
-
if (text) {
|
|
173
|
+
if (type === "md") {
|
|
174
|
+
const section = this.epub?.getSection(id);
|
|
175
|
+
if (section) {
|
|
176
|
+
content = section.toMarkdown();
|
|
177
|
+
}
|
|
178
|
+
const {
|
|
179
|
+
outPath,
|
|
180
|
+
fileName
|
|
181
|
+
} = this.clearOutpath(structure);
|
|
182
|
+
outpath = outPath;
|
|
183
|
+
const resLinks = [];
|
|
184
|
+
const linkStartSep = this.shouldMerge ? "#" : "./";
|
|
185
|
+
content = (0, _helper.fixLinkPath)(content, (link, isText) => {
|
|
186
|
+
if (isText) {
|
|
127
187
|
const {
|
|
128
|
-
hash,
|
|
188
|
+
hash = "",
|
|
129
189
|
url
|
|
130
|
-
} = (0, _parseLink.default)(link);
|
|
190
|
+
} = (0, _parseLink.default)(link, true);
|
|
131
191
|
if (link.startsWith("#")) {
|
|
132
|
-
return
|
|
192
|
+
return linkStartSep + this.shouldMerge ? id : fileName + link;
|
|
133
193
|
}
|
|
134
|
-
link =
|
|
135
|
-
const
|
|
136
|
-
|
|
194
|
+
link = (0, _helper.resolveHTMLId)((0, _nodePath.basename)(url));
|
|
195
|
+
const sectionId = this.epub.getItemId(url);
|
|
196
|
+
const internalNav = (0, _utils.matchTOC)(sectionId, this.epub?.structure) || {
|
|
197
|
+
name: link,
|
|
198
|
+
sectionId: (0, _helper.getClearFilename)((0, _nodePath.basename)(link))
|
|
137
199
|
};
|
|
138
|
-
|
|
200
|
+
let validPath = (0, _helper.getClearFilename)((0, _nodePath.extname)(internalNav.name) ? internalNav.name : internalNav.name + this.MD_FILE_EXT);
|
|
201
|
+
for (const sfile of this.structure) {
|
|
202
|
+
if (sectionId === sfile.id) {
|
|
203
|
+
validPath = (0, _nodePath.basename)(this.clearOutpath(sfile).outPath);
|
|
204
|
+
break;
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
const toId = this.epub.getItemId((0, _nodePath.join)((0, _nodePath.dirname)(filepath), url));
|
|
208
|
+
links.push({
|
|
209
|
+
url,
|
|
210
|
+
hash,
|
|
211
|
+
id: internalNav.sectionId,
|
|
212
|
+
toId
|
|
213
|
+
});
|
|
214
|
+
return this.shouldMerge ? linkStartSep + toId + (hash ? "#" + hash : "") : linkStartSep + validPath + `${hash ? "#" + hash : ""}`;
|
|
139
215
|
} else {
|
|
140
|
-
|
|
216
|
+
if (link.startsWith("http")) {
|
|
217
|
+
resLinks.push(link);
|
|
218
|
+
}
|
|
219
|
+
return "./" + this.IMAGE_DIR + "/" + (0, _nodePath.basename)(link);
|
|
141
220
|
}
|
|
142
221
|
});
|
|
222
|
+
if (this.localize) {
|
|
223
|
+
try {
|
|
224
|
+
this.localizeImages(resLinks, (0, _nodePath.join)(this.outDir, this.IMAGE_DIR));
|
|
225
|
+
} catch (error) {
|
|
226
|
+
_logger.default.error("Failed to localize the image!", error);
|
|
227
|
+
}
|
|
228
|
+
} else if (resLinks.length > 0) {
|
|
229
|
+
_logger.default.warn("Remote images are detected, you can set --localize to true to localize the remote images");
|
|
230
|
+
}
|
|
143
231
|
content = needAutoCorrect ? require("autocorrect-node").format(content) : content;
|
|
144
232
|
} else {
|
|
145
233
|
content = this.epub.resolve(filepath).asNodeBuffer();
|
|
146
234
|
}
|
|
147
235
|
return {
|
|
236
|
+
id,
|
|
237
|
+
type,
|
|
238
|
+
filepath,
|
|
148
239
|
content,
|
|
240
|
+
links,
|
|
149
241
|
outFilePath: outpath
|
|
150
242
|
};
|
|
151
243
|
}
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
244
|
+
/**
|
|
245
|
+
* Runs the conversion process for an EPUB file.
|
|
246
|
+
*
|
|
247
|
+
* @param RunOptions - Configuration options or boolean (backward compatibility)
|
|
248
|
+
* @returns A promise resolving to the output directory or the result of generating a merged file
|
|
249
|
+
*/
|
|
250
|
+
async run(options) {
|
|
251
|
+
const isUnzipOnly = options?.cmd === "unzip";
|
|
252
|
+
if (options) {
|
|
253
|
+
this.cmd = options.cmd;
|
|
254
|
+
this.shouldMerge = options.shouldMerge;
|
|
255
|
+
this.localize = options.localize;
|
|
256
|
+
this.mergedFilename = options.mergedFilename;
|
|
257
|
+
}
|
|
258
|
+
await this.getManifest(isUnzipOnly);
|
|
259
|
+
if (this.shouldMerge && !isUnzipOnly) {
|
|
260
|
+
return this.generateMergedFile();
|
|
261
|
+
}
|
|
262
|
+
let num = 1;
|
|
263
|
+
for (const s of this.structure) {
|
|
158
264
|
const {
|
|
265
|
+
type,
|
|
159
266
|
outFilePath,
|
|
160
267
|
content
|
|
161
|
-
} = this.
|
|
162
|
-
|
|
163
|
-
if (
|
|
164
|
-
|
|
165
|
-
const parsedPath = (0, _nodePath.parse)(outFilePath);
|
|
166
|
-
numberedOutFilePath = (0, _nodePath.format)({
|
|
167
|
-
...parsedPath,
|
|
168
|
-
base: `${("0".repeat(padding) + num).slice(-(padding + 1))}-${parsedPath.base}`
|
|
169
|
-
});
|
|
170
|
-
console.log(_chalk.default.yellow(`${num++}: [${(0, _nodePath.basename)(numberedOutFilePath)}]`));
|
|
268
|
+
} = await this.getFileDataAsync(s);
|
|
269
|
+
if (content.toString() === "") continue;
|
|
270
|
+
if (type === "md") {
|
|
271
|
+
_logger.default.success(`${num++}: [${(0, _nodePath.basename)(outFilePath)}]`);
|
|
171
272
|
}
|
|
172
|
-
|
|
173
|
-
(0, _writeFileSafe.writeFileSync)(numberedOutFilePath ?? outFilePath, content, {
|
|
273
|
+
(0, _writeFileSafe.writeFileSync)(outFilePath, content, {
|
|
174
274
|
overwrite: true
|
|
175
275
|
});
|
|
176
|
-
}
|
|
276
|
+
}
|
|
177
277
|
return this.outDir;
|
|
178
278
|
}
|
|
279
|
+
/**
|
|
280
|
+
* Directly generate a single merged Markdown file
|
|
281
|
+
*/
|
|
282
|
+
async generateMergedFile() {
|
|
283
|
+
let num = 1,
|
|
284
|
+
mergedContent = "";
|
|
285
|
+
for (const s of this.structure) {
|
|
286
|
+
let {
|
|
287
|
+
id,
|
|
288
|
+
filepath,
|
|
289
|
+
outFilePath,
|
|
290
|
+
content
|
|
291
|
+
} = await this.getFileDataAsync(s);
|
|
292
|
+
const {
|
|
293
|
+
isHTML
|
|
294
|
+
} = (0, _helper.checkFileType)(filepath);
|
|
295
|
+
if (isHTML) {
|
|
296
|
+
content = `<a role="toc_link" id="${id}"></a>
|
|
297
|
+
` + content;
|
|
298
|
+
}
|
|
299
|
+
if ((0, _nodePath.extname)(outFilePath) === ".md" && content.toString() !== "") {
|
|
300
|
+
num++;
|
|
301
|
+
mergedContent += content.toString() + "\n\n---\n\n";
|
|
302
|
+
_logger.default.success(`${num}: [${(0, _nodePath.basename)(outFilePath)}]`);
|
|
303
|
+
} else if ((0, _nodePath.extname)(outFilePath) !== ".md") {
|
|
304
|
+
(0, _writeFileSafe.writeFileSync)(outFilePath, content, {
|
|
305
|
+
overwrite: true
|
|
306
|
+
});
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
const outputPath = (0, _nodePath.join)(this.outDir, this.mergedFilename || `${(0, _nodePath.basename)(this.outDir)}-merged.md`);
|
|
310
|
+
(0, _writeFileSafe.writeFileSync)(outputPath, mergedContent, {
|
|
311
|
+
overwrite: true
|
|
312
|
+
});
|
|
313
|
+
return outputPath;
|
|
314
|
+
}
|
|
179
315
|
}
|
|
180
|
-
|
|
316
|
+
exports.Converter = Converter;
|