epub2md 1.5.0 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +74 -26
- package/lib/bin/cli.cjs +89 -39
- package/lib/bin/merge.cjs +1 -1
- package/lib/bin/utils.cjs +1 -1
- package/lib/convert/convert.cjs +205 -0
- package/lib/convert/convert.d.ts +37 -0
- package/lib/convert/convert.mjs +159 -0
- package/lib/convert/download-images.cjs +28 -0
- package/lib/convert/download-images.d.ts +4 -0
- package/lib/convert/download-images.mjs +20 -0
- package/lib/{bin → convert}/helper.cjs +5 -24
- package/lib/convert/helper.d.ts +11 -0
- package/lib/convert/helper.mjs +36 -0
- package/lib/epub/options.cjs +25 -0
- package/lib/epub/options.d.ts +16 -0
- package/lib/epub/options.mjs +8 -0
- package/lib/epub/parseEpub.cjs +96 -0
- package/lib/epub/parseEpub.d.ts +42 -0
- package/lib/epub/parseEpub.mjs +80 -0
- package/lib/epub/zip.cjs +27 -0
- package/lib/epub/zip.d.ts +8 -0
- package/lib/epub/zip.mjs +21 -0
- package/lib/index.cjs +1 -1
- package/lib/index.d.ts +1 -1
- package/lib/index.mjs +1 -1
- package/lib/mobi.d.ts +1 -1
- package/lib/parseHTML.cjs +49 -3
- package/lib/parseHTML.mjs +41 -1
- package/lib/parseSection.cjs +9 -9
- package/lib/parseSection.d.ts +8 -7
- package/lib/parseSection.mjs +8 -8
- package/lib/types.d.ts +0 -7
- package/lib/utils.cjs +19 -88
- package/lib/utils.d.ts +11 -24
- package/lib/utils.mjs +14 -76
- package/lib/xml/index.cjs +59 -0
- package/lib/xml/index.d.ts +20 -0
- package/lib/xml/index.mjs +38 -0
- package/lib/xml/meta-container.cjs +25 -0
- package/lib/xml/meta-container.d.ts +4 -0
- package/lib/xml/meta-container.mjs +15 -0
- package/lib/xml/opf.cjs +89 -0
- package/lib/xml/opf.d.ts +41 -0
- package/lib/xml/opf.mjs +78 -0
- package/lib/xml/parseXml.cjs +13 -0
- package/lib/xml/parseXml.d.ts +1 -0
- package/lib/xml/parseXml.mjs +6 -0
- package/lib/xml/toc.cjs +97 -0
- package/lib/xml/toc.d.ts +17 -0
- package/lib/xml/toc.mjs +84 -0
- package/package.json +13 -10
- package/lib/bin/convert.cjs +0 -316
- package/lib/parseEpub.cjs +0 -304
- package/lib/parseEpub.d.ts +0 -111
- package/lib/parseEpub.mjs +0 -273
package/README.md
CHANGED
|
@@ -13,7 +13,7 @@ Even though the package is primarily intended for CLI is to convert EPUB to Mark
|
|
|
13
13
|
- Retain the original online image links.
|
|
14
14
|
- Download and localize online images (save remote images locally).
|
|
15
15
|
- **View Information**: Easy to View the basic information, structure, and chapters of the EPUB.
|
|
16
|
-
- **Extraction Function**:
|
|
16
|
+
- **Extraction Function**: Just extract the useful contents of the EPUB file.
|
|
17
17
|
|
|
18
18
|
## Global Install for CLI
|
|
19
19
|
|
|
@@ -41,10 +41,17 @@ $ npm install @uxiew/epub2md
|
|
|
41
41
|
# Show usage help
|
|
42
42
|
$ epub2md -h
|
|
43
43
|
|
|
44
|
-
#
|
|
45
|
-
|
|
46
|
-
#
|
|
47
|
-
$ epub2md
|
|
44
|
+
# ========== Basic Conversion ==========
|
|
45
|
+
|
|
46
|
+
# Convert directly to markdown format (default command)
|
|
47
|
+
$ epub2md book.epub
|
|
48
|
+
$ epub2md /path/to/book.epub
|
|
49
|
+
|
|
50
|
+
# Convert with autocorrect (spaces and punctuation between Chinese and English)
|
|
51
|
+
$ epub2md -a book.epub
|
|
52
|
+
$ epub2md --autocorrect book.epub
|
|
53
|
+
|
|
54
|
+
# ========== Batch Conversion (Wildcard Support) ==========
|
|
48
55
|
|
|
49
56
|
# Convert multiple files using wildcards
|
|
50
57
|
$ epub2md "fixtures/*.epub"
|
|
@@ -56,33 +63,58 @@ $ epub2md "fixtures/*.epub" --merge
|
|
|
56
63
|
|
|
57
64
|
# Note: Quotes are required around patterns with wildcards to prevent shell expansion
|
|
58
65
|
|
|
59
|
-
#
|
|
60
|
-
$ epub2md -M ../../fixtures/zhihu.epub
|
|
66
|
+
# ========== Merge Options ==========
|
|
61
67
|
|
|
62
|
-
# Convert and directly generate a single merged markdown file
|
|
63
|
-
$ epub2md -m
|
|
64
|
-
|
|
65
|
-
$ epub2md ../../fixtures/zhihu.epub --merge
|
|
68
|
+
# Convert and directly generate a single merged markdown file
|
|
69
|
+
$ epub2md -m book.epub
|
|
70
|
+
$ epub2md --merge book.epub
|
|
66
71
|
|
|
67
|
-
#
|
|
68
|
-
$ epub2md
|
|
72
|
+
# Specify custom output filename for merged file
|
|
73
|
+
$ epub2md --merge=custom-name.md book.epub
|
|
69
74
|
|
|
70
|
-
#
|
|
71
|
-
|
|
72
|
-
# Download and localize online images (download remote images to local) (need node > 18.0)
|
|
73
|
-
$ epub2md ../../fixtures/zhihu.epub --localize
|
|
75
|
+
# Merge existing markdown files in a directory (without conversion)
|
|
76
|
+
$ epub2md --merge ./path/to/markdown/dir
|
|
74
77
|
|
|
75
|
-
#
|
|
76
|
-
$ epub2md ../../fixtures/zhihu.epub --merge --localize
|
|
78
|
+
# ========== Image Processing ==========
|
|
77
79
|
|
|
78
|
-
#
|
|
79
|
-
|
|
80
|
+
# By default, remote images are NOT downloaded (only a warning is shown)
|
|
81
|
+
# Images embedded in EPUB are always extracted
|
|
82
|
+
|
|
83
|
+
# Download and localize remote images (requires Node.js >= 18.0)
|
|
84
|
+
$ epub2md -l book.epub
|
|
85
|
+
$ epub2md --localize book.epub
|
|
86
|
+
|
|
87
|
+
# Combine: convert + merge + download remote images
|
|
88
|
+
$ epub2md -m -l book.epub
|
|
89
|
+
$ epub2md --merge --localize book.epub
|
|
90
|
+
|
|
91
|
+
# ========== Information Display ==========
|
|
92
|
+
|
|
93
|
+
# Show basic information (title, author, language)
|
|
94
|
+
$ epub2md -i book.epub
|
|
95
|
+
$ epub2md --info book.epub
|
|
96
|
+
|
|
97
|
+
# Show structure/table of contents
|
|
98
|
+
$ epub2md -s book.epub
|
|
99
|
+
$ epub2md --structure book.epub
|
|
100
|
+
|
|
101
|
+
# Show all sections/chapters
|
|
102
|
+
$ epub2md -S book.epub
|
|
103
|
+
$ epub2md --sections book.epub
|
|
80
104
|
|
|
81
|
-
#
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
$ epub2md -
|
|
85
|
-
$ epub2md
|
|
105
|
+
# ========== Extraction ==========
|
|
106
|
+
|
|
107
|
+
# Extract/unzip EPUB contents
|
|
108
|
+
$ epub2md -u book.epub
|
|
109
|
+
$ epub2md --unzip book.epub
|
|
110
|
+
|
|
111
|
+
# ========== Command Priority ==========
|
|
112
|
+
|
|
113
|
+
# Info commands have highest priority
|
|
114
|
+
$ epub2md book.epub --info # Shows info (doesn't convert)
|
|
115
|
+
$ epub2md --convert --info book.epub # Shows info (info takes precedence)
|
|
116
|
+
|
|
117
|
+
# Priority order: info/structure/sections > unzip > merge(dir) > convert/autocorrect
|
|
86
118
|
```
|
|
87
119
|
|
|
88
120
|
## Usage
|
|
@@ -126,6 +158,22 @@ The return value is an object which contains`structure`, `sections`, `info`(priv
|
|
|
126
158
|
|
|
127
159
|
- `Section.prototype.toHtmlObjects`: convert to html object. And a note about `src` and`href`, the`src` and`href` in raw html stay untouched, but the `toHtmlObjects` method resolves `src` to base64 string, and alters `href` so that they make sense in the parsed epub.And the parsed `href` is something like`#{sectionId},{hash}`.
|
|
128
160
|
|
|
161
|
+
## Testing
|
|
162
|
+
|
|
163
|
+
```bash
|
|
164
|
+
# Run all tests
|
|
165
|
+
$ npm test
|
|
166
|
+
|
|
167
|
+
# Run specific test file
|
|
168
|
+
$ npm test -- test/bin.spec.ts
|
|
169
|
+
|
|
170
|
+
# Run tests with verbose output
|
|
171
|
+
$ npm test -- --reporter=verbose
|
|
172
|
+
|
|
173
|
+
# Run integration tests only
|
|
174
|
+
$ npm test -- test/integration.spec.ts
|
|
175
|
+
```
|
|
176
|
+
|
|
129
177
|
## How to contribute
|
|
130
178
|
|
|
131
179
|
- Raise an issue in the issue section.
|
package/lib/bin/cli.cjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
1
|
+
// #!/usr/bin/env node
|
|
2
2
|
"use strict";
|
|
3
3
|
|
|
4
4
|
Object.defineProperty(exports, "__esModule", {
|
|
@@ -8,8 +8,10 @@ exports.Commands = void 0;
|
|
|
8
8
|
var _args = _interopRequireDefault(require("args"));
|
|
9
9
|
var _nodeProcess = _interopRequireDefault(require("node:process"));
|
|
10
10
|
var _nodeFs = _interopRequireDefault(require("node:fs"));
|
|
11
|
-
var
|
|
12
|
-
var
|
|
11
|
+
var _nodePath = require("node:path");
|
|
12
|
+
var _writeFileSafe = require("write-file-safe");
|
|
13
|
+
var _parseEpub = _interopRequireDefault(require("../epub/parseEpub.cjs"));
|
|
14
|
+
var _convert = require("../convert/convert.cjs");
|
|
13
15
|
var _merge = require("./merge.cjs");
|
|
14
16
|
var _logger = _interopRequireDefault(require("../logger.cjs"));
|
|
15
17
|
var _utils = require("./utils.cjs");
|
|
@@ -25,7 +27,7 @@ const Commands = exports.Commands = {
|
|
|
25
27
|
merge: "merge",
|
|
26
28
|
localize: "localize"
|
|
27
29
|
};
|
|
28
|
-
const commands = [[Commands.convert, "convert the
|
|
30
|
+
const commands = [[Commands.convert, "convert the EPUB file to markdown format"], [Commands.autocorrect, "convert the EPUB file to markdown format with autocorrect"], [Commands.unzip, "unzip EPUB file"], [Commands.info, "get EPUB file basic info"], [Commands.structure, "get EPUB file structure"], [Commands.sections, "get EPUB file sections"], [Commands.merge, "Merge all Markdown files into one; specify output filename with --merge=filename.md"], [Commands.localize, 'Download all remotes images to the local "images" folder']];
|
|
29
31
|
const DEFAULT_COMMAND = Commands.convert;
|
|
30
32
|
commands.forEach(cmd => _args.default.option(cmd[0], cmd[1], cmd[2]));
|
|
31
33
|
const flags = _args.default.parse(_nodeProcess.default.argv, {
|
|
@@ -52,19 +54,20 @@ for (const cmd of [Commands.info, Commands.structure, Commands.sections]) {
|
|
|
52
54
|
}
|
|
53
55
|
if (!hasRun && flags[Commands.unzip]) {
|
|
54
56
|
const epubPath = typeof flags[Commands.unzip] === "string" ? flags[Commands.unzip] : unprocessedArgs.length > 0 ? unprocessedArgs[0] : null;
|
|
57
|
+
const options = {
|
|
58
|
+
cmd: Commands.unzip,
|
|
59
|
+
mergedFilename: void 0,
|
|
60
|
+
shouldMerge: false,
|
|
61
|
+
localize: false
|
|
62
|
+
};
|
|
55
63
|
if (epubPath) {
|
|
56
64
|
_logger.default.info("unzipping...");
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
// Use cmd to indicate unzip only
|
|
60
|
-
mergedFilename: void 0,
|
|
61
|
-
shouldMerge: false,
|
|
62
|
-
localize: false
|
|
63
|
-
}).then(outDir => {
|
|
65
|
+
try {
|
|
66
|
+
const outDir = convert(epubPath, options);
|
|
64
67
|
_logger.default.info(`Unzip successful! output: ${outDir}`);
|
|
65
|
-
}
|
|
68
|
+
} catch (error) {
|
|
66
69
|
_logger.default.error(error);
|
|
67
|
-
}
|
|
70
|
+
}
|
|
68
71
|
hasRun = true;
|
|
69
72
|
} else {
|
|
70
73
|
_logger.default.error("No valid epub file path provided for unzip command");
|
|
@@ -104,14 +107,14 @@ if (!hasRun) {
|
|
|
104
107
|
}
|
|
105
108
|
async function run(cmd) {
|
|
106
109
|
if (cmd === Commands.convert || cmd === Commands.autocorrect) {
|
|
107
|
-
const
|
|
108
|
-
if (!
|
|
110
|
+
const pathArg = typeof flags[cmd] === "string" ? flags[cmd] : null;
|
|
111
|
+
if (!pathArg) {
|
|
109
112
|
_logger.default.error("No valid epub file path provided");
|
|
110
113
|
return;
|
|
111
114
|
}
|
|
112
|
-
const
|
|
113
|
-
if (
|
|
114
|
-
_logger.default.error(`No files found matching pattern: ${
|
|
115
|
+
const epubPaths = await (0, _utils.expandWildcard)(pathArg);
|
|
116
|
+
if (epubPaths.length === 0) {
|
|
117
|
+
_logger.default.error(`No files found matching pattern: ${pathArg}`);
|
|
115
118
|
return;
|
|
116
119
|
}
|
|
117
120
|
const shouldMerge = flags.merge === true || typeof flags.merge === "string" && flags.merge !== "";
|
|
@@ -119,47 +122,94 @@ async function run(cmd) {
|
|
|
119
122
|
if (typeof flags.merge === "string" && flags.merge !== "") {
|
|
120
123
|
mergedFilename = flags.merge;
|
|
121
124
|
}
|
|
122
|
-
if (mergedFilename &&
|
|
125
|
+
if (mergedFilename && epubPaths.length > 1) {
|
|
123
126
|
_logger.default.warn(`Warning: Using custom merge filename "${mergedFilename}" with multiple files. Each file will overwrite the previous merged output.`);
|
|
124
127
|
_logger.default.warn(`Consider using --merge (without filename) to generate separate merged files for each epub.`);
|
|
125
128
|
}
|
|
126
129
|
const localize = flags.localize === true;
|
|
127
|
-
if (
|
|
128
|
-
_logger.default.info(`Found ${
|
|
130
|
+
if (epubPaths.length > 1) {
|
|
131
|
+
_logger.default.info(`Found ${epubPaths.length} files matching pattern "${pathArg}"`);
|
|
129
132
|
}
|
|
130
|
-
for (let i = 0; i <
|
|
131
|
-
const
|
|
132
|
-
_logger.default.info(`[${i + 1}/${
|
|
133
|
+
for (let i = 0; i < epubPaths.length; i++) {
|
|
134
|
+
const epubPath = epubPaths[i];
|
|
135
|
+
_logger.default.info(`[${i + 1}/${epubPaths.length}] Converting ${epubPath}${cmd === Commands.autocorrect ? " with autocorrect" : ""}${flags[Commands.merge] ? " and merging" : ""}...`);
|
|
136
|
+
const options = {
|
|
137
|
+
cmd,
|
|
138
|
+
mergedFilename,
|
|
139
|
+
shouldMerge,
|
|
140
|
+
localize
|
|
141
|
+
};
|
|
133
142
|
try {
|
|
134
|
-
const outDir =
|
|
135
|
-
cmd,
|
|
136
|
-
mergedFilename,
|
|
137
|
-
shouldMerge,
|
|
138
|
-
localize
|
|
139
|
-
});
|
|
143
|
+
const outDir = convert(epubPath, options);
|
|
140
144
|
if (shouldMerge) {
|
|
141
|
-
_logger.default.info(`[${i + 1}/${
|
|
145
|
+
_logger.default.info(`[${i + 1}/${epubPaths.length}] Merging successful! Output file: ${outDir}`);
|
|
142
146
|
} else {
|
|
143
|
-
_logger.default.info(`[${i + 1}/${
|
|
147
|
+
_logger.default.info(`[${i + 1}/${epubPaths.length}] Conversion successful! output: ${outDir}`);
|
|
144
148
|
}
|
|
145
149
|
} catch (error) {
|
|
146
|
-
_logger.default.error(`[${i + 1}/${
|
|
150
|
+
_logger.default.error(`[${i + 1}/${epubPaths.length}] Failed to convert ${epubPath}:`, error);
|
|
147
151
|
}
|
|
148
152
|
}
|
|
149
|
-
if (
|
|
150
|
-
_logger.default.success(`Completed processing ${
|
|
153
|
+
if (epubPaths.length > 1) {
|
|
154
|
+
_logger.default.success(`Completed processing ${epubPaths.length} files`);
|
|
151
155
|
}
|
|
152
156
|
return;
|
|
153
157
|
}
|
|
154
158
|
const cmdPath = flags[cmd];
|
|
155
159
|
if (typeof cmdPath === "string") {
|
|
156
|
-
|
|
160
|
+
try {
|
|
161
|
+
const epub = (0, _parseEpub.default)(cmdPath);
|
|
162
|
+
const data = {
|
|
163
|
+
info: epub.structure.opf.metadata,
|
|
164
|
+
structure: epub.structure.toc?.tree,
|
|
165
|
+
sections: epub.sections
|
|
166
|
+
}[cmd];
|
|
157
167
|
_logger.default.success(`This book ${cmd}:`);
|
|
158
|
-
_logger.default.json(
|
|
159
|
-
}
|
|
168
|
+
_logger.default.json(data);
|
|
169
|
+
} catch (error) {
|
|
160
170
|
_logger.default.error(error);
|
|
161
|
-
}
|
|
171
|
+
}
|
|
162
172
|
} else {
|
|
163
173
|
_logger.default.error(`Path must be a string, got ${typeof cmdPath}`);
|
|
164
174
|
}
|
|
165
175
|
}
|
|
176
|
+
function convert(epubPath, options) {
|
|
177
|
+
const converter = new _convert.Converter(epubPath, options);
|
|
178
|
+
if (options?.shouldMerge) return handleMergedFile(converter.mergeProgress);else {
|
|
179
|
+
handleFiles(converter.files);
|
|
180
|
+
return converter.outDir;
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
function handleFiles(files) {
|
|
184
|
+
let markdownFileCount = 0;
|
|
185
|
+
for (const {
|
|
186
|
+
type,
|
|
187
|
+
outputPath,
|
|
188
|
+
content
|
|
189
|
+
} of files) {
|
|
190
|
+
if (type === "md") _logger.default.success(`${++markdownFileCount}: [${(0, _nodePath.basename)(outputPath)}]`);
|
|
191
|
+
(0, _writeFileSafe.writeFileSync)(outputPath, content, {
|
|
192
|
+
overwrite: true
|
|
193
|
+
});
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
function handleMergedFile(mergeFileProcess) {
|
|
197
|
+
let markdownFileCount = 0;
|
|
198
|
+
for (const {
|
|
199
|
+
type,
|
|
200
|
+
outputPath,
|
|
201
|
+
content
|
|
202
|
+
} of mergeFileProcess) {
|
|
203
|
+
if (type === "markdown file processed") _logger.default.success(`${++markdownFileCount}: [${outputPath}]`);
|
|
204
|
+
if (type === "file processed") (0, _writeFileSafe.writeFileSync)(outputPath, content, {
|
|
205
|
+
overwrite: true
|
|
206
|
+
});
|
|
207
|
+
if (type === "markdown merged") {
|
|
208
|
+
(0, _writeFileSafe.writeFileSync)(outputPath, content, {
|
|
209
|
+
overwrite: true
|
|
210
|
+
});
|
|
211
|
+
return outputPath;
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
throw "No merged markdown file created";
|
|
215
|
+
}
|
package/lib/bin/merge.cjs
CHANGED
package/lib/bin/utils.cjs
CHANGED
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.Converter = void 0;
|
|
7
|
+
var _nodePath = require("node:path");
|
|
8
|
+
var _logger = _interopRequireDefault(require("../logger.cjs"));
|
|
9
|
+
var iteratorHelpersPolyfill = _interopRequireWildcard(require("iterator-helpers-polyfill"));
|
|
10
|
+
var _parseEpub = _interopRequireDefault(require("../epub/parseEpub.cjs"));
|
|
11
|
+
var _helper = require("./helper.cjs");
|
|
12
|
+
var _utils = require("../utils.cjs");
|
|
13
|
+
var _parseLink = _interopRequireDefault(require("../parseLink.cjs"));
|
|
14
|
+
var _downloadImages = require("./download-images.cjs");
|
|
15
|
+
function _getRequireWildcardCache(e) { if ("function" != typeof WeakMap) return null; var r = new WeakMap(), t = new WeakMap(); return (_getRequireWildcardCache = function (e) { return e ? t : r; })(e); }
|
|
16
|
+
function _interopRequireWildcard(e, r) { if (!r && e && e.__esModule) return e; if (null === e || "object" != typeof e && "function" != typeof e) return { default: e }; var t = _getRequireWildcardCache(r); if (t && t.has(e)) return t.get(e); var n = { __proto__: null }, a = Object.defineProperty && Object.getOwnPropertyDescriptor; for (var u in e) if ("default" !== u && {}.hasOwnProperty.call(e, u)) { var i = a ? Object.getOwnPropertyDescriptor(e, u) : null; i && (i.get || i.set) ? Object.defineProperty(n, u, i) : n[u] = e[u]; } return n.default = e, t && t.set(e, n), n; }
|
|
17
|
+
function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
|
|
18
|
+
iteratorHelpersPolyfill.installIntoGlobal();
|
|
19
|
+
const defaultOptions = {
|
|
20
|
+
cmd: "convert",
|
|
21
|
+
shouldMerge: false,
|
|
22
|
+
localize: false
|
|
23
|
+
};
|
|
24
|
+
const IMAGE_DIR = "images";
|
|
25
|
+
class Converter {
|
|
26
|
+
epub;
|
|
27
|
+
// epub parser object
|
|
28
|
+
outDir;
|
|
29
|
+
// epub 's original directory to save markdown files
|
|
30
|
+
files;
|
|
31
|
+
mergeProgress;
|
|
32
|
+
options;
|
|
33
|
+
/**
|
|
34
|
+
* Constructor
|
|
35
|
+
* @param epubPath - The path to the EPUB file
|
|
36
|
+
* @param RunOptions - Configuration options or boolean (backward compatibility)
|
|
37
|
+
*/
|
|
38
|
+
constructor(epubPath, options) {
|
|
39
|
+
this.options = {
|
|
40
|
+
...defaultOptions,
|
|
41
|
+
...options
|
|
42
|
+
};
|
|
43
|
+
this.epub = (0, _parseEpub.default)(epubPath, {
|
|
44
|
+
convertToMarkdown: _helper.convertHTML
|
|
45
|
+
});
|
|
46
|
+
this.outDir = epubPath.replace(".epub", "");
|
|
47
|
+
const structures = processManifest(this.epub, this.options.cmd !== "unzip", this.outDir);
|
|
48
|
+
this.files = structures.values().map(x => this.getFileData(x, structures)).filter(x => x.content.length > 0);
|
|
49
|
+
if (this.options.shouldMerge) this.mergeProgress = this.mergeFiles();
|
|
50
|
+
}
|
|
51
|
+
getFileData(structure, structures) {
|
|
52
|
+
let {
|
|
53
|
+
id,
|
|
54
|
+
type,
|
|
55
|
+
filepath,
|
|
56
|
+
outpath
|
|
57
|
+
} = structure;
|
|
58
|
+
let content = "";
|
|
59
|
+
const needAutoCorrect = this.options.cmd === "autocorrect";
|
|
60
|
+
if (type === "md") {
|
|
61
|
+
const section = this.epub.getSection(id);
|
|
62
|
+
if (section) content = section.toMarkdown();
|
|
63
|
+
const {
|
|
64
|
+
outPath,
|
|
65
|
+
fileName
|
|
66
|
+
} = clearOutpath(structure, this.epub.structure.toc);
|
|
67
|
+
outpath = outPath;
|
|
68
|
+
const resLinks = [];
|
|
69
|
+
const linkStartSep = this.options.shouldMerge ? "#" : "./";
|
|
70
|
+
content = (0, _helper.fixLinkPath)(content, (link, isText) => {
|
|
71
|
+
if (isText) {
|
|
72
|
+
const {
|
|
73
|
+
hash = "",
|
|
74
|
+
url
|
|
75
|
+
} = (0, _parseLink.default)(link, true);
|
|
76
|
+
if (link.startsWith("#")) return linkStartSep + this.options.shouldMerge ? id : fileName + link;
|
|
77
|
+
const sectionId = this.epub.structure.opf.manifest.getItemId(url);
|
|
78
|
+
const internalNavName = this.epub.structure.toc?.getBySectionId(sectionId)?.name || link;
|
|
79
|
+
let validPath = (0, _helper.sanitizeFileName)((0, _nodePath.extname)(internalNavName) ? internalNavName : internalNavName + ".md");
|
|
80
|
+
const file = structures.find(file2 => file2.id === sectionId);
|
|
81
|
+
if (file) validPath = (0, _nodePath.basename)(clearOutpath(file, this.epub.structure.toc).outPath);
|
|
82
|
+
const toId = this.epub.structure.opf.manifest.getItemId((0, _nodePath.join)((0, _nodePath.dirname)(filepath), url));
|
|
83
|
+
return this.options.shouldMerge ? linkStartSep + toId + (hash ? "#" + hash : "") : linkStartSep + validPath + `${hash ? "#" + hash : ""}`;
|
|
84
|
+
} else {
|
|
85
|
+
if (link.startsWith("http")) {
|
|
86
|
+
resLinks.push(link);
|
|
87
|
+
}
|
|
88
|
+
return "./" + IMAGE_DIR + "/" + (0, _nodePath.basename)(link);
|
|
89
|
+
}
|
|
90
|
+
});
|
|
91
|
+
if (this.options.localize) {
|
|
92
|
+
try {
|
|
93
|
+
(0, _downloadImages.downloadRemoteImages)(resLinks, (0, _nodePath.join)(this.outDir, IMAGE_DIR));
|
|
94
|
+
} catch (error) {
|
|
95
|
+
_logger.default.error("Failed to localize the image!", error);
|
|
96
|
+
}
|
|
97
|
+
} else if (resLinks.length > 0) {
|
|
98
|
+
_logger.default.warn("Remote images are detected, use --localize to download the images");
|
|
99
|
+
}
|
|
100
|
+
content = needAutoCorrect ? require("autocorrect-node").format(content) : content;
|
|
101
|
+
} else {
|
|
102
|
+
content = this.epub.getFile(filepath).asNodeBuffer();
|
|
103
|
+
}
|
|
104
|
+
return {
|
|
105
|
+
id,
|
|
106
|
+
type,
|
|
107
|
+
content,
|
|
108
|
+
outputPath: outpath
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
*mergeFiles() {
|
|
112
|
+
const chapters = [];
|
|
113
|
+
for (const {
|
|
114
|
+
type,
|
|
115
|
+
id,
|
|
116
|
+
outputPath: outputPath2,
|
|
117
|
+
content
|
|
118
|
+
} of this.files) if (type === "md") {
|
|
119
|
+
chapters.push(`<a role="toc_link" id="${id}"></a>
|
|
120
|
+
` + content);
|
|
121
|
+
yield {
|
|
122
|
+
type: "markdown file processed",
|
|
123
|
+
outputPath: (0, _nodePath.basename)(outputPath2)
|
|
124
|
+
};
|
|
125
|
+
} else {
|
|
126
|
+
yield {
|
|
127
|
+
type: "file processed",
|
|
128
|
+
outputPath: outputPath2,
|
|
129
|
+
content
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
const outputPath = (0, _nodePath.join)(this.outDir, this.options.mergedFilename || `${(0, _nodePath.basename)(this.outDir)}-merged.md`);
|
|
133
|
+
yield {
|
|
134
|
+
type: "markdown merged",
|
|
135
|
+
outputPath,
|
|
136
|
+
content: chapters.join("\n\n---\n\n")
|
|
137
|
+
};
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
exports.Converter = Converter;
|
|
141
|
+
function processManifest(epub, unzip, outDir) {
|
|
142
|
+
const structure = [];
|
|
143
|
+
const orderPrefix = new OrderPrefix({
|
|
144
|
+
maximum: epub.sections.length
|
|
145
|
+
});
|
|
146
|
+
for (const {
|
|
147
|
+
href: filepath,
|
|
148
|
+
id
|
|
149
|
+
} of epub.structure.opf.manifest) {
|
|
150
|
+
if (filepath.endsWith("ncx") || id === "titlepage") continue;
|
|
151
|
+
const {
|
|
152
|
+
type,
|
|
153
|
+
path: outpath
|
|
154
|
+
} = parseFileInfo(filepath, outDir);
|
|
155
|
+
if (type === "" && unzip) continue;
|
|
156
|
+
structure.push({
|
|
157
|
+
// current only label markdown file
|
|
158
|
+
orderPrefix: type === "md" ? orderPrefix.next() : "",
|
|
159
|
+
id,
|
|
160
|
+
type,
|
|
161
|
+
outpath,
|
|
162
|
+
filepath
|
|
163
|
+
});
|
|
164
|
+
}
|
|
165
|
+
return structure;
|
|
166
|
+
}
|
|
167
|
+
class OrderPrefix {
|
|
168
|
+
count = 0;
|
|
169
|
+
length;
|
|
170
|
+
constructor({
|
|
171
|
+
maximum
|
|
172
|
+
}) {
|
|
173
|
+
this.length = Math.floor(Math.log10(maximum)) + 1;
|
|
174
|
+
}
|
|
175
|
+
next() {
|
|
176
|
+
return (++this.count).toString().padStart(this.length, "0");
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
function clearOutpath({
|
|
180
|
+
id,
|
|
181
|
+
outpath,
|
|
182
|
+
orderPrefix
|
|
183
|
+
}, toc) {
|
|
184
|
+
const nav = toc?.getBySectionId(id);
|
|
185
|
+
const fileName = (0, _helper.sanitizeFileName)(nav ? nav.name + ".md" : (0, _nodePath.basename)(outpath));
|
|
186
|
+
const outDir = (0, _nodePath.dirname)(outpath);
|
|
187
|
+
return {
|
|
188
|
+
fileName,
|
|
189
|
+
outDir,
|
|
190
|
+
outPath: (0, _nodePath.join)(outDir, orderPrefix + "-" + fileName)
|
|
191
|
+
};
|
|
192
|
+
}
|
|
193
|
+
function parseFileInfo(filepath, outDir) {
|
|
194
|
+
const {
|
|
195
|
+
isImage,
|
|
196
|
+
isHTML
|
|
197
|
+
} = (0, _helper.checkFileType)(filepath);
|
|
198
|
+
const name = (0, _nodePath.basename)(filepath);
|
|
199
|
+
const path = (0, _nodePath.join)(outDir, isImage ? IMAGE_DIR : isHTML ? "" : "static", isHTML ? _utils.Path.fileStem(name) + ".md" : name);
|
|
200
|
+
return {
|
|
201
|
+
// html => md
|
|
202
|
+
type: isHTML ? "md" : isImage ? "img" : "",
|
|
203
|
+
path
|
|
204
|
+
};
|
|
205
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import { Epub } from '../epub/parseEpub';
|
|
2
|
+
import { type CommandType } from '../bin/cli';
|
|
3
|
+
export interface RunOptions {
|
|
4
|
+
cmd: CommandType;
|
|
5
|
+
shouldMerge: boolean;
|
|
6
|
+
localize: boolean;
|
|
7
|
+
mergedFilename?: string;
|
|
8
|
+
}
|
|
9
|
+
export declare class Converter {
|
|
10
|
+
epub: Epub;
|
|
11
|
+
outDir: string;
|
|
12
|
+
files: FileData;
|
|
13
|
+
mergeProgress?: MergeProgress;
|
|
14
|
+
options: RunOptions;
|
|
15
|
+
/**
|
|
16
|
+
* Constructor
|
|
17
|
+
* @param epubPath - The path to the EPUB file
|
|
18
|
+
* @param RunOptions - Configuration options or boolean (backward compatibility)
|
|
19
|
+
*/
|
|
20
|
+
constructor(epubPath: string, options?: Partial<RunOptions>);
|
|
21
|
+
private getFileData;
|
|
22
|
+
mergeFiles(): Generator<{
|
|
23
|
+
readonly type: "markdown file processed";
|
|
24
|
+
readonly outputPath: string;
|
|
25
|
+
readonly content?: undefined;
|
|
26
|
+
} | {
|
|
27
|
+
readonly type: "file processed";
|
|
28
|
+
readonly outputPath: any;
|
|
29
|
+
readonly content: any;
|
|
30
|
+
} | {
|
|
31
|
+
readonly type: "markdown merged";
|
|
32
|
+
readonly outputPath: string;
|
|
33
|
+
readonly content: string;
|
|
34
|
+
}, void, unknown>;
|
|
35
|
+
}
|
|
36
|
+
export type FileData = IteratorObject<ReturnType<Converter['getFileData']>>;
|
|
37
|
+
export type MergeProgress = ReturnType<Converter['mergeFiles']>;
|