epub2md 1.5.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/README.md +74 -26
  2. package/lib/bin/cli.cjs +89 -39
  3. package/lib/bin/merge.cjs +1 -1
  4. package/lib/bin/utils.cjs +1 -1
  5. package/lib/convert/convert.cjs +205 -0
  6. package/lib/convert/convert.d.ts +37 -0
  7. package/lib/convert/convert.mjs +159 -0
  8. package/lib/convert/download-images.cjs +28 -0
  9. package/lib/convert/download-images.d.ts +4 -0
  10. package/lib/convert/download-images.mjs +20 -0
  11. package/lib/{bin → convert}/helper.cjs +5 -24
  12. package/lib/convert/helper.d.ts +11 -0
  13. package/lib/convert/helper.mjs +36 -0
  14. package/lib/epub/options.cjs +25 -0
  15. package/lib/epub/options.d.ts +16 -0
  16. package/lib/epub/options.mjs +8 -0
  17. package/lib/epub/parseEpub.cjs +96 -0
  18. package/lib/epub/parseEpub.d.ts +42 -0
  19. package/lib/epub/parseEpub.mjs +80 -0
  20. package/lib/epub/zip.cjs +27 -0
  21. package/lib/epub/zip.d.ts +8 -0
  22. package/lib/epub/zip.mjs +21 -0
  23. package/lib/index.cjs +1 -1
  24. package/lib/index.d.ts +1 -1
  25. package/lib/index.mjs +1 -1
  26. package/lib/mobi.d.ts +1 -1
  27. package/lib/parseHTML.cjs +49 -3
  28. package/lib/parseHTML.mjs +41 -1
  29. package/lib/parseSection.cjs +9 -9
  30. package/lib/parseSection.d.ts +8 -7
  31. package/lib/parseSection.mjs +8 -8
  32. package/lib/types.d.ts +0 -7
  33. package/lib/utils.cjs +19 -88
  34. package/lib/utils.d.ts +11 -24
  35. package/lib/utils.mjs +14 -76
  36. package/lib/xml/index.cjs +59 -0
  37. package/lib/xml/index.d.ts +20 -0
  38. package/lib/xml/index.mjs +38 -0
  39. package/lib/xml/meta-container.cjs +25 -0
  40. package/lib/xml/meta-container.d.ts +4 -0
  41. package/lib/xml/meta-container.mjs +15 -0
  42. package/lib/xml/opf.cjs +89 -0
  43. package/lib/xml/opf.d.ts +41 -0
  44. package/lib/xml/opf.mjs +78 -0
  45. package/lib/xml/parseXml.cjs +13 -0
  46. package/lib/xml/parseXml.d.ts +1 -0
  47. package/lib/xml/parseXml.mjs +6 -0
  48. package/lib/xml/toc.cjs +97 -0
  49. package/lib/xml/toc.d.ts +17 -0
  50. package/lib/xml/toc.mjs +84 -0
  51. package/package.json +13 -10
  52. package/lib/bin/convert.cjs +0 -316
  53. package/lib/parseEpub.cjs +0 -304
  54. package/lib/parseEpub.d.ts +0 -111
  55. package/lib/parseEpub.mjs +0 -273
package/README.md CHANGED
@@ -13,7 +13,7 @@ Even though the package is primarily intended for CLI is to convert EPUB to Mark
13
13
  - Retain the original online image links.
14
14
  - Download and localize online images (save remote images locally).
15
15
  - **View Information**: Easy to View the basic information, structure, and chapters of the EPUB.
16
- - **Extraction Function**: Dont need convert, just extract the useful contents of the EPUB file.
16
+ - **Extraction Function**: Just extract the useful contents of the EPUB file.
17
17
 
18
18
  ## Global Install for CLI
19
19
 
@@ -41,10 +41,17 @@ $ npm install @uxiew/epub2md
41
41
  # Show usage help
42
42
  $ epub2md -h
43
43
 
44
- # Convert directly to markdown format
45
- $ epub2md ../../fixtures/zhihu.epub
46
- # or use -m
47
- $ epub2md -m ../../fixtures/zhihu.epub
44
+ # ========== Basic Conversion ==========
45
+
46
+ # Convert directly to markdown format (default command)
47
+ $ epub2md book.epub
48
+ $ epub2md /path/to/book.epub
49
+
50
+ # Convert with autocorrect (spaces and punctuation between Chinese and English)
51
+ $ epub2md -a book.epub
52
+ $ epub2md --autocorrect book.epub
53
+
54
+ # ========== Batch Conversion (Wildcard Support) ==========
48
55
 
49
56
  # Convert multiple files using wildcards
50
57
  $ epub2md "fixtures/*.epub"
@@ -56,33 +63,58 @@ $ epub2md "fixtures/*.epub" --merge
56
63
 
57
64
  # Note: Quotes are required around patterns with wildcards to prevent shell expansion
58
65
 
59
- # Convert to markdown and automatically correct spaces and punctuation between Chinese and English (CLI only)
60
- $ epub2md -M ../../fixtures/zhihu.epub
66
+ # ========== Merge Options ==========
61
67
 
62
- # Convert and directly generate a single merged markdown file (no intermediate files)
63
- $ epub2md -m ../../fixtures/zhihu.epub --merge
64
- # You can also use the epub file path as the first parameter directly
65
- $ epub2md ../../fixtures/zhihu.epub --merge
68
+ # Convert and directly generate a single merged markdown file
69
+ $ epub2md -m book.epub
70
+ $ epub2md --merge book.epub
66
71
 
67
- # Use --merge=filename.md
68
- $ epub2md ../../fixtures/zhihu.epub --merge="merged-book.md"
72
+ # Specify custom output filename for merged file
73
+ $ epub2md --merge=custom-name.md book.epub
69
74
 
70
- # By default, DONT downloaded. Basically, the images in the epub are already included, so there is no need to download.
71
- # However, some epub image links are remote, You will see some warning,maybe they need to be downloaded.
72
- # Download and localize online images (download remote images to local) (need node > 18.0)
73
- $ epub2md ../../fixtures/zhihu.epub --localize
75
+ # Merge existing markdown files in a directory (without conversion)
76
+ $ epub2md --merge ./path/to/markdown/dir
74
77
 
75
- # Download and localize online images, while merging all chapters into a single file
76
- $ epub2md ../../fixtures/zhihu.epub --merge --localize
78
+ # ========== Image Processing ==========
77
79
 
78
- # Merge existing markdown files in a directory
79
- $ epub2md --merge ./path/to/markdown/dir
80
+ # By default, remote images are NOT downloaded (only a warning is shown)
81
+ # Images embedded in EPUB are always extracted
82
+
83
+ # Download and localize remote images (requires Node.js >= 18.0)
84
+ $ epub2md -l book.epub
85
+ $ epub2md --localize book.epub
86
+
87
+ # Combine: convert + merge + download remote images
88
+ $ epub2md -m -l book.epub
89
+ $ epub2md --merge --localize book.epub
90
+
91
+ # ========== Information Display ==========
92
+
93
+ # Show basic information (title, author, language)
94
+ $ epub2md -i book.epub
95
+ $ epub2md --info book.epub
96
+
97
+ # Show structure/table of contents
98
+ $ epub2md -s book.epub
99
+ $ epub2md --structure book.epub
100
+
101
+ # Show all sections/chapters
102
+ $ epub2md -S book.epub
103
+ $ epub2md --sections book.epub
80
104
 
81
- # Show additional information
82
- $ epub2md -u ../../fixtures/zhihu.epub # Extract epub
83
- $ epub2md -i ../../fixtures/zhihu.epub # Show basic information
84
- $ epub2md -S ../../fixtures/zhihu.epub # Show structure information
85
- $ epub2md -s ../../fixtures/zhihu.epub # Show chapter information
105
+ # ========== Extraction ==========
106
+
107
+ # Extract/unzip EPUB contents
108
+ $ epub2md -u book.epub
109
+ $ epub2md --unzip book.epub
110
+
111
+ # ========== Command Priority ==========
112
+
113
+ # Info commands have highest priority
114
+ $ epub2md book.epub --info # Shows info (doesn't convert)
115
+ $ epub2md --convert --info book.epub # Shows info (info takes precedence)
116
+
117
+ # Priority order: info/structure/sections > unzip > merge(dir) > convert/autocorrect
86
118
  ```
87
119
 
88
120
  ## Usage
@@ -126,6 +158,22 @@ The return value is an object which contains`structure`, `sections`, `info`(priv
126
158
 
127
159
  - `Section.prototype.toHtmlObjects`: convert to html object. And a note about `src` and`href`, the`src` and`href` in raw html stay untouched, but the `toHtmlObjects` method resolves `src` to base64 string, and alters `href` so that they make sense in the parsed epub.And the parsed `href` is something like`#{sectionId},{hash}`.
128
160
 
161
+ ## Testing
162
+
163
+ ```bash
164
+ # Run all tests
165
+ $ npm test
166
+
167
+ # Run specific test file
168
+ $ npm test -- test/bin.spec.ts
169
+
170
+ # Run tests with verbose output
171
+ $ npm test -- --reporter=verbose
172
+
173
+ # Run integration tests only
174
+ $ npm test -- test/integration.spec.ts
175
+ ```
176
+
129
177
  ## How to contribute
130
178
 
131
179
  - Raise an issue in the issue section.
package/lib/bin/cli.cjs CHANGED
@@ -1,4 +1,4 @@
1
- #!/usr/bin/env node
1
+ // #!/usr/bin/env node
2
2
  "use strict";
3
3
 
4
4
  Object.defineProperty(exports, "__esModule", {
@@ -8,8 +8,10 @@ exports.Commands = void 0;
8
8
  var _args = _interopRequireDefault(require("args"));
9
9
  var _nodeProcess = _interopRequireDefault(require("node:process"));
10
10
  var _nodeFs = _interopRequireDefault(require("node:fs"));
11
- var _parseEpub = _interopRequireDefault(require("../parseEpub.cjs"));
12
- var _convert = require("./convert.cjs");
11
+ var _nodePath = require("node:path");
12
+ var _writeFileSafe = require("write-file-safe");
13
+ var _parseEpub = _interopRequireDefault(require("../epub/parseEpub.cjs"));
14
+ var _convert = require("../convert/convert.cjs");
13
15
  var _merge = require("./merge.cjs");
14
16
  var _logger = _interopRequireDefault(require("../logger.cjs"));
15
17
  var _utils = require("./utils.cjs");
@@ -25,7 +27,7 @@ const Commands = exports.Commands = {
25
27
  merge: "merge",
26
28
  localize: "localize"
27
29
  };
28
- const commands = [[Commands.convert, "convert the epub file to markdown format"], [Commands.autocorrect, "convert the epub file to markdown format with autocorrect"], [Commands.unzip, "unzip epub file"], [Commands.info, "get epub file basic info"], [Commands.structure, "get epub file structure"], [Commands.sections, "get epub file sections"], [Commands.merge, "merge all markdown files into a single file, can also specify output filename with --merge=filename.md"], [Commands.localize, "Retain the original online link and do not convert it to a local path", false]];
30
+ const commands = [[Commands.convert, "convert the EPUB file to markdown format"], [Commands.autocorrect, "convert the EPUB file to markdown format with autocorrect"], [Commands.unzip, "unzip EPUB file"], [Commands.info, "get EPUB file basic info"], [Commands.structure, "get EPUB file structure"], [Commands.sections, "get EPUB file sections"], [Commands.merge, "Merge all Markdown files into one; specify output filename with --merge=filename.md"], [Commands.localize, 'Download all remotes images to the local "images" folder']];
29
31
  const DEFAULT_COMMAND = Commands.convert;
30
32
  commands.forEach(cmd => _args.default.option(cmd[0], cmd[1], cmd[2]));
31
33
  const flags = _args.default.parse(_nodeProcess.default.argv, {
@@ -52,19 +54,20 @@ for (const cmd of [Commands.info, Commands.structure, Commands.sections]) {
52
54
  }
53
55
  if (!hasRun && flags[Commands.unzip]) {
54
56
  const epubPath = typeof flags[Commands.unzip] === "string" ? flags[Commands.unzip] : unprocessedArgs.length > 0 ? unprocessedArgs[0] : null;
57
+ const options = {
58
+ cmd: Commands.unzip,
59
+ mergedFilename: void 0,
60
+ shouldMerge: false,
61
+ localize: false
62
+ };
55
63
  if (epubPath) {
56
64
  _logger.default.info("unzipping...");
57
- new _convert.Converter(epubPath).run({
58
- cmd: Commands.unzip,
59
- // Use cmd to indicate unzip only
60
- mergedFilename: void 0,
61
- shouldMerge: false,
62
- localize: false
63
- }).then(outDir => {
65
+ try {
66
+ const outDir = convert(epubPath, options);
64
67
  _logger.default.info(`Unzip successful! output: ${outDir}`);
65
- }).catch(error => {
68
+ } catch (error) {
66
69
  _logger.default.error(error);
67
- });
70
+ }
68
71
  hasRun = true;
69
72
  } else {
70
73
  _logger.default.error("No valid epub file path provided for unzip command");
@@ -104,14 +107,14 @@ if (!hasRun) {
104
107
  }
105
108
  async function run(cmd) {
106
109
  if (cmd === Commands.convert || cmd === Commands.autocorrect) {
107
- const epubPath = typeof flags[cmd] === "string" ? flags[cmd] : null;
108
- if (!epubPath) {
110
+ const pathArg = typeof flags[cmd] === "string" ? flags[cmd] : null;
111
+ if (!pathArg) {
109
112
  _logger.default.error("No valid epub file path provided");
110
113
  return;
111
114
  }
112
- const epubFiles = await (0, _utils.expandWildcard)(epubPath);
113
- if (epubFiles.length === 0) {
114
- _logger.default.error(`No files found matching pattern: ${epubPath}`);
115
+ const epubPaths = await (0, _utils.expandWildcard)(pathArg);
116
+ if (epubPaths.length === 0) {
117
+ _logger.default.error(`No files found matching pattern: ${pathArg}`);
115
118
  return;
116
119
  }
117
120
  const shouldMerge = flags.merge === true || typeof flags.merge === "string" && flags.merge !== "";
@@ -119,47 +122,94 @@ async function run(cmd) {
119
122
  if (typeof flags.merge === "string" && flags.merge !== "") {
120
123
  mergedFilename = flags.merge;
121
124
  }
122
- if (mergedFilename && epubFiles.length > 1) {
125
+ if (mergedFilename && epubPaths.length > 1) {
123
126
  _logger.default.warn(`Warning: Using custom merge filename "${mergedFilename}" with multiple files. Each file will overwrite the previous merged output.`);
124
127
  _logger.default.warn(`Consider using --merge (without filename) to generate separate merged files for each epub.`);
125
128
  }
126
129
  const localize = flags.localize === true;
127
- if (epubFiles.length > 1) {
128
- _logger.default.info(`Found ${epubFiles.length} files matching pattern "${epubPath}"`);
130
+ if (epubPaths.length > 1) {
131
+ _logger.default.info(`Found ${epubPaths.length} files matching pattern "${pathArg}"`);
129
132
  }
130
- for (let i = 0; i < epubFiles.length; i++) {
131
- const currentFile = epubFiles[i];
132
- _logger.default.info(`[${i + 1}/${epubFiles.length}] Converting ${currentFile}${cmd === Commands.autocorrect ? " with autocorrect" : ""}${flags[Commands.merge] ? " and merging" : ""}...`);
133
+ for (let i = 0; i < epubPaths.length; i++) {
134
+ const epubPath = epubPaths[i];
135
+ _logger.default.info(`[${i + 1}/${epubPaths.length}] Converting ${epubPath}${cmd === Commands.autocorrect ? " with autocorrect" : ""}${flags[Commands.merge] ? " and merging" : ""}...`);
136
+ const options = {
137
+ cmd,
138
+ mergedFilename,
139
+ shouldMerge,
140
+ localize
141
+ };
133
142
  try {
134
- const outDir = await new _convert.Converter(currentFile).run({
135
- cmd,
136
- mergedFilename,
137
- shouldMerge,
138
- localize
139
- });
143
+ const outDir = convert(epubPath, options);
140
144
  if (shouldMerge) {
141
- _logger.default.info(`[${i + 1}/${epubFiles.length}] Merging successful! Output file: ${outDir}`);
145
+ _logger.default.info(`[${i + 1}/${epubPaths.length}] Merging successful! Output file: ${outDir}`);
142
146
  } else {
143
- _logger.default.info(`[${i + 1}/${epubFiles.length}] Conversion successful! output: ${outDir}`);
147
+ _logger.default.info(`[${i + 1}/${epubPaths.length}] Conversion successful! output: ${outDir}`);
144
148
  }
145
149
  } catch (error) {
146
- _logger.default.error(`[${i + 1}/${epubFiles.length}] Failed to convert ${currentFile}:`, error);
150
+ _logger.default.error(`[${i + 1}/${epubPaths.length}] Failed to convert ${epubPath}:`, error);
147
151
  }
148
152
  }
149
- if (epubFiles.length > 1) {
150
- _logger.default.success(`Completed processing ${epubFiles.length} files`);
153
+ if (epubPaths.length > 1) {
154
+ _logger.default.success(`Completed processing ${epubPaths.length} files`);
151
155
  }
152
156
  return;
153
157
  }
154
158
  const cmdPath = flags[cmd];
155
159
  if (typeof cmdPath === "string") {
156
- (0, _parseEpub.default)(cmdPath).then(res => {
160
+ try {
161
+ const epub = (0, _parseEpub.default)(cmdPath);
162
+ const data = {
163
+ info: epub.structure.opf.metadata,
164
+ structure: epub.structure.toc?.tree,
165
+ sections: epub.sections
166
+ }[cmd];
157
167
  _logger.default.success(`This book ${cmd}:`);
158
- _logger.default.json(res[cmd]);
159
- }).catch(error => {
168
+ _logger.default.json(data);
169
+ } catch (error) {
160
170
  _logger.default.error(error);
161
- });
171
+ }
162
172
  } else {
163
173
  _logger.default.error(`Path must be a string, got ${typeof cmdPath}`);
164
174
  }
165
175
  }
176
+ function convert(epubPath, options) {
177
+ const converter = new _convert.Converter(epubPath, options);
178
+ if (options?.shouldMerge) return handleMergedFile(converter.mergeProgress);else {
179
+ handleFiles(converter.files);
180
+ return converter.outDir;
181
+ }
182
+ }
183
+ function handleFiles(files) {
184
+ let markdownFileCount = 0;
185
+ for (const {
186
+ type,
187
+ outputPath,
188
+ content
189
+ } of files) {
190
+ if (type === "md") _logger.default.success(`${++markdownFileCount}: [${(0, _nodePath.basename)(outputPath)}]`);
191
+ (0, _writeFileSafe.writeFileSync)(outputPath, content, {
192
+ overwrite: true
193
+ });
194
+ }
195
+ }
196
+ function handleMergedFile(mergeFileProcess) {
197
+ let markdownFileCount = 0;
198
+ for (const {
199
+ type,
200
+ outputPath,
201
+ content
202
+ } of mergeFileProcess) {
203
+ if (type === "markdown file processed") _logger.default.success(`${++markdownFileCount}: [${outputPath}]`);
204
+ if (type === "file processed") (0, _writeFileSafe.writeFileSync)(outputPath, content, {
205
+ overwrite: true
206
+ });
207
+ if (type === "markdown merged") {
208
+ (0, _writeFileSafe.writeFileSync)(outputPath, content, {
209
+ overwrite: true
210
+ });
211
+ return outputPath;
212
+ }
213
+ }
214
+ throw "No merged markdown file created";
215
+ }
package/lib/bin/merge.cjs CHANGED
@@ -47,4 +47,4 @@ async function mergeMarkdowns(directory, outputFile) {
47
47
  _logger.default.error(`Failed to merge Markdown files: ${error}`);
48
48
  throw error;
49
49
  }
50
- }
50
+ }
package/lib/bin/utils.cjs CHANGED
@@ -23,4 +23,4 @@ async function expandWildcard(pattern) {
23
23
  }
24
24
  }
25
25
  return [pattern];
26
- }
26
+ }
@@ -0,0 +1,205 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.Converter = void 0;
7
+ var _nodePath = require("node:path");
8
+ var _logger = _interopRequireDefault(require("../logger.cjs"));
9
+ var iteratorHelpersPolyfill = _interopRequireWildcard(require("iterator-helpers-polyfill"));
10
+ var _parseEpub = _interopRequireDefault(require("../epub/parseEpub.cjs"));
11
+ var _helper = require("./helper.cjs");
12
+ var _utils = require("../utils.cjs");
13
+ var _parseLink = _interopRequireDefault(require("../parseLink.cjs"));
14
+ var _downloadImages = require("./download-images.cjs");
15
+ function _getRequireWildcardCache(e) { if ("function" != typeof WeakMap) return null; var r = new WeakMap(), t = new WeakMap(); return (_getRequireWildcardCache = function (e) { return e ? t : r; })(e); }
16
+ function _interopRequireWildcard(e, r) { if (!r && e && e.__esModule) return e; if (null === e || "object" != typeof e && "function" != typeof e) return { default: e }; var t = _getRequireWildcardCache(r); if (t && t.has(e)) return t.get(e); var n = { __proto__: null }, a = Object.defineProperty && Object.getOwnPropertyDescriptor; for (var u in e) if ("default" !== u && {}.hasOwnProperty.call(e, u)) { var i = a ? Object.getOwnPropertyDescriptor(e, u) : null; i && (i.get || i.set) ? Object.defineProperty(n, u, i) : n[u] = e[u]; } return n.default = e, t && t.set(e, n), n; }
17
+ function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
18
+ iteratorHelpersPolyfill.installIntoGlobal();
19
+ const defaultOptions = {
20
+ cmd: "convert",
21
+ shouldMerge: false,
22
+ localize: false
23
+ };
24
+ const IMAGE_DIR = "images";
25
+ class Converter {
26
+ epub;
27
+ // epub parser object
28
+ outDir;
29
+ // epub 's original directory to save markdown files
30
+ files;
31
+ mergeProgress;
32
+ options;
33
+ /**
34
+ * Constructor
35
+ * @param epubPath - The path to the EPUB file
36
+ * @param RunOptions - Configuration options or boolean (backward compatibility)
37
+ */
38
+ constructor(epubPath, options) {
39
+ this.options = {
40
+ ...defaultOptions,
41
+ ...options
42
+ };
43
+ this.epub = (0, _parseEpub.default)(epubPath, {
44
+ convertToMarkdown: _helper.convertHTML
45
+ });
46
+ this.outDir = epubPath.replace(".epub", "");
47
+ const structures = processManifest(this.epub, this.options.cmd !== "unzip", this.outDir);
48
+ this.files = structures.values().map(x => this.getFileData(x, structures)).filter(x => x.content.length > 0);
49
+ if (this.options.shouldMerge) this.mergeProgress = this.mergeFiles();
50
+ }
51
+ getFileData(structure, structures) {
52
+ let {
53
+ id,
54
+ type,
55
+ filepath,
56
+ outpath
57
+ } = structure;
58
+ let content = "";
59
+ const needAutoCorrect = this.options.cmd === "autocorrect";
60
+ if (type === "md") {
61
+ const section = this.epub.getSection(id);
62
+ if (section) content = section.toMarkdown();
63
+ const {
64
+ outPath,
65
+ fileName
66
+ } = clearOutpath(structure, this.epub.structure.toc);
67
+ outpath = outPath;
68
+ const resLinks = [];
69
+ const linkStartSep = this.options.shouldMerge ? "#" : "./";
70
+ content = (0, _helper.fixLinkPath)(content, (link, isText) => {
71
+ if (isText) {
72
+ const {
73
+ hash = "",
74
+ url
75
+ } = (0, _parseLink.default)(link, true);
76
+ if (link.startsWith("#")) return linkStartSep + this.options.shouldMerge ? id : fileName + link;
77
+ const sectionId = this.epub.structure.opf.manifest.getItemId(url);
78
+ const internalNavName = this.epub.structure.toc?.getBySectionId(sectionId)?.name || link;
79
+ let validPath = (0, _helper.sanitizeFileName)((0, _nodePath.extname)(internalNavName) ? internalNavName : internalNavName + ".md");
80
+ const file = structures.find(file2 => file2.id === sectionId);
81
+ if (file) validPath = (0, _nodePath.basename)(clearOutpath(file, this.epub.structure.toc).outPath);
82
+ const toId = this.epub.structure.opf.manifest.getItemId((0, _nodePath.join)((0, _nodePath.dirname)(filepath), url));
83
+ return this.options.shouldMerge ? linkStartSep + toId + (hash ? "#" + hash : "") : linkStartSep + validPath + `${hash ? "#" + hash : ""}`;
84
+ } else {
85
+ if (link.startsWith("http")) {
86
+ resLinks.push(link);
87
+ }
88
+ return "./" + IMAGE_DIR + "/" + (0, _nodePath.basename)(link);
89
+ }
90
+ });
91
+ if (this.options.localize) {
92
+ try {
93
+ (0, _downloadImages.downloadRemoteImages)(resLinks, (0, _nodePath.join)(this.outDir, IMAGE_DIR));
94
+ } catch (error) {
95
+ _logger.default.error("Failed to localize the image!", error);
96
+ }
97
+ } else if (resLinks.length > 0) {
98
+ _logger.default.warn("Remote images are detected, use --localize to download the images");
99
+ }
100
+ content = needAutoCorrect ? require("autocorrect-node").format(content) : content;
101
+ } else {
102
+ content = this.epub.getFile(filepath).asNodeBuffer();
103
+ }
104
+ return {
105
+ id,
106
+ type,
107
+ content,
108
+ outputPath: outpath
109
+ };
110
+ }
111
+ *mergeFiles() {
112
+ const chapters = [];
113
+ for (const {
114
+ type,
115
+ id,
116
+ outputPath: outputPath2,
117
+ content
118
+ } of this.files) if (type === "md") {
119
+ chapters.push(`<a role="toc_link" id="${id}"></a>
120
+ ` + content);
121
+ yield {
122
+ type: "markdown file processed",
123
+ outputPath: (0, _nodePath.basename)(outputPath2)
124
+ };
125
+ } else {
126
+ yield {
127
+ type: "file processed",
128
+ outputPath: outputPath2,
129
+ content
130
+ };
131
+ }
132
+ const outputPath = (0, _nodePath.join)(this.outDir, this.options.mergedFilename || `${(0, _nodePath.basename)(this.outDir)}-merged.md`);
133
+ yield {
134
+ type: "markdown merged",
135
+ outputPath,
136
+ content: chapters.join("\n\n---\n\n")
137
+ };
138
+ }
139
+ }
140
+ exports.Converter = Converter;
141
+ function processManifest(epub, unzip, outDir) {
142
+ const structure = [];
143
+ const orderPrefix = new OrderPrefix({
144
+ maximum: epub.sections.length
145
+ });
146
+ for (const {
147
+ href: filepath,
148
+ id
149
+ } of epub.structure.opf.manifest) {
150
+ if (filepath.endsWith("ncx") || id === "titlepage") continue;
151
+ const {
152
+ type,
153
+ path: outpath
154
+ } = parseFileInfo(filepath, outDir);
155
+ if (type === "" && unzip) continue;
156
+ structure.push({
157
+ // current only label markdown file
158
+ orderPrefix: type === "md" ? orderPrefix.next() : "",
159
+ id,
160
+ type,
161
+ outpath,
162
+ filepath
163
+ });
164
+ }
165
+ return structure;
166
+ }
167
+ class OrderPrefix {
168
+ count = 0;
169
+ length;
170
+ constructor({
171
+ maximum
172
+ }) {
173
+ this.length = Math.floor(Math.log10(maximum)) + 1;
174
+ }
175
+ next() {
176
+ return (++this.count).toString().padStart(this.length, "0");
177
+ }
178
+ }
179
+ function clearOutpath({
180
+ id,
181
+ outpath,
182
+ orderPrefix
183
+ }, toc) {
184
+ const nav = toc?.getBySectionId(id);
185
+ const fileName = (0, _helper.sanitizeFileName)(nav ? nav.name + ".md" : (0, _nodePath.basename)(outpath));
186
+ const outDir = (0, _nodePath.dirname)(outpath);
187
+ return {
188
+ fileName,
189
+ outDir,
190
+ outPath: (0, _nodePath.join)(outDir, orderPrefix + "-" + fileName)
191
+ };
192
+ }
193
+ function parseFileInfo(filepath, outDir) {
194
+ const {
195
+ isImage,
196
+ isHTML
197
+ } = (0, _helper.checkFileType)(filepath);
198
+ const name = (0, _nodePath.basename)(filepath);
199
+ const path = (0, _nodePath.join)(outDir, isImage ? IMAGE_DIR : isHTML ? "" : "static", isHTML ? _utils.Path.fileStem(name) + ".md" : name);
200
+ return {
201
+ // html => md
202
+ type: isHTML ? "md" : isImage ? "img" : "",
203
+ path
204
+ };
205
+ }
@@ -0,0 +1,37 @@
1
+ import { Epub } from '../epub/parseEpub';
2
+ import { type CommandType } from '../bin/cli';
3
+ export interface RunOptions {
4
+ cmd: CommandType;
5
+ shouldMerge: boolean;
6
+ localize: boolean;
7
+ mergedFilename?: string;
8
+ }
9
+ export declare class Converter {
10
+ epub: Epub;
11
+ outDir: string;
12
+ files: FileData;
13
+ mergeProgress?: MergeProgress;
14
+ options: RunOptions;
15
+ /**
16
+ * Constructor
17
+ * @param epubPath - The path to the EPUB file
18
+ * @param RunOptions - Configuration options or boolean (backward compatibility)
19
+ */
20
+ constructor(epubPath: string, options?: Partial<RunOptions>);
21
+ private getFileData;
22
+ mergeFiles(): Generator<{
23
+ readonly type: "markdown file processed";
24
+ readonly outputPath: string;
25
+ readonly content?: undefined;
26
+ } | {
27
+ readonly type: "file processed";
28
+ readonly outputPath: any;
29
+ readonly content: any;
30
+ } | {
31
+ readonly type: "markdown merged";
32
+ readonly outputPath: string;
33
+ readonly content: string;
34
+ }, void, unknown>;
35
+ }
36
+ export type FileData = IteratorObject<ReturnType<Converter['getFileData']>>;
37
+ export type MergeProgress = ReturnType<Converter['mergeFiles']>;