epub2md 1.2.1 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,7 +2,18 @@
2
2
 
3
3
  Even though the package is primarily intended for CLI is to convert EPUB to Markdown, but it can be used programmatically.
4
4
 
5
- 主要的目标是转换 epub markdown,当然了也可以当做 epub 的解析器库使用.
5
+ 主要的目标是转换 epub 多个 markdown 文件,或者合并为 单个 markdown 文件,可以处理其中的远程图片资源;当然了也可以当做 epub 的解析器库使用.
6
+
7
+ ## Main Functions
8
+
9
+ - **Convert EPUB to Markdown**: By default, convert and output numbered markdown files in sequence.
10
+ - **Autocorrection**: Have option to Handle spaces and punctuation between Chinese and English as You Need.
11
+ - **Merge Chapters**: Optionally merge all markdown files into a single Markdown file, Support link jumping.
12
+ - **Image Processing**:
13
+ - Retain the original online image links.
14
+ - Download and localize online images (save remote images locally).
15
+ - **View Information**: Easy to View the basic information, structure, and chapters of the EPUB.
16
+ - **Extraction Function**: Dont need convert, just extract the useful contents of the EPUB file.
6
17
 
7
18
  ## Global Install for CLI
8
19
 
@@ -27,22 +38,41 @@ $ npm install @uxiew/epub2md
27
38
  ## CLI
28
39
 
29
40
  ```bash
30
- # show usage
41
+ # Show usage help
31
42
  $ epub2md -h
32
43
 
33
44
  # Convert directly to markdown format
34
45
  $ epub2md ../../fixtures/zhihu.epub
35
- # or -m
46
+ # or use -m
36
47
  $ epub2md -m ../../fixtures/zhihu.epub
37
48
 
38
- # Convert directly to markdown format with autocorrect to handle spacing between CJK and English words and Correct punctuations Only for command line use
49
+ # Convert to markdown and automatically correct spaces and punctuation between Chinese and English (CLI only)
39
50
  $ epub2md -M ../../fixtures/zhihu.epub
40
51
 
41
- # show other info
42
- $ epub2md -u ../../fixtures/zhihu.epub
43
- $ epub2md -i ../../fixtures/zhihu.epub
44
- $ epub2md -S ../../fixtures/zhihu.epub
45
- $ epub2md -s ../../fixtures/zhihu.epub
52
+ # Convert and directly generate a single merged markdown file (no intermediate files)
53
+ $ epub2md -m ../../fixtures/zhihu.epub --merge
54
+ # You can also use the epub file path as the first parameter directly
55
+ $ epub2md ../../fixtures/zhihu.epub --merge
56
+
57
+ # Use --merge=filename.md
58
+ $ epub2md ../../fixtures/zhihu.epub --merge="merged-book.md"
59
+
60
+ # By default, DONT downloaded. Basically, the images in the epub are already included, so there is no need to download.
61
+ # However, some epub image links are remote, You will see some warning,maybe they need to be downloaded.
62
+ # Download and localize online images (download remote images to local) (need node > 18.0)
63
+ $ epub2md ../../fixtures/zhihu.epub --localize
64
+
65
+ # Download and localize online images, while merging all chapters into a single file
66
+ $ epub2md ../../fixtures/zhihu.epub --merge --localize
67
+
68
+ # Merge existing markdown files in a directory
69
+ $ epub2md --merge ./path/to/markdown/dir
70
+
71
+ # Show additional information
72
+ $ epub2md -u ../../fixtures/zhihu.epub # Extract epub
73
+ $ epub2md -i ../../fixtures/zhihu.epub # Show basic information
74
+ $ epub2md -S ../../fixtures/zhihu.epub # Show structure information
75
+ $ epub2md -s ../../fixtures/zhihu.epub # Show chapter information
46
76
  ```
47
77
 
48
78
  ## Usage
package/lib/bin/cli.cjs CHANGED
@@ -6,59 +6,140 @@ Object.defineProperty(exports, "__esModule", {
6
6
  });
7
7
  exports.Commands = void 0;
8
8
  var _args = _interopRequireDefault(require("args"));
9
- var _chalk = _interopRequireDefault(require("chalk"));
10
9
  var _nodeProcess = _interopRequireDefault(require("node:process"));
10
+ var _nodeFs = _interopRequireDefault(require("node:fs"));
11
11
  var _parseEpub = _interopRequireDefault(require("../parseEpub.cjs"));
12
- var _convert = _interopRequireDefault(require("./convert.cjs"));
13
- var _beautyJson = require("beauty-json");
12
+ var _convert = require("./convert.cjs");
13
+ var _merge = require("./merge.cjs");
14
+ var _logger = _interopRequireDefault(require("../logger.cjs"));
14
15
  function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
15
16
  const name = "epub2md";
16
- var Commands = exports.Commands = /* @__PURE__ */(Commands2 => {
17
- Commands2["markdown"] = "md";
18
- Commands2["autocorrect"] = "ma";
19
- Commands2["unzip"] = "unzip";
20
- Commands2["info"] = "info";
21
- Commands2["structure"] = "structure";
22
- Commands2["sections"] = "sections";
23
- return Commands2;
24
- })(Commands || {});
25
- const commands = [["md" /* markdown */, "convert the epub file to markdown format"], ["ma" /* autocorrect */, "convert the epub file to markdown format with autocorrect"], ["unzip" /* unzip */, "unzip epub file"], ["info" /* info */, "get epub file basic info"], ["structure" /* structure */, "get epub file structure"], ["sections" /* sections */, "get epub file sections"]];
26
- const DEFAULT_COMMAND = "md" /* markdown */;
27
- commands.forEach(cmd => _args.default.option(cmd[0], cmd[1]));
17
+ const Commands = exports.Commands = {
18
+ convert: "convert",
19
+ autocorrect: "autocorrect",
20
+ unzip: "unzip",
21
+ info: "info",
22
+ structure: "structure",
23
+ sections: "sections",
24
+ merge: "merge",
25
+ localize: "localize"
26
+ };
27
+ const commands = [[Commands.convert, "convert the epub file to markdown format"], [Commands.autocorrect, "convert the epub file to markdown format with autocorrect"], [Commands.unzip, "unzip epub file"], [Commands.info, "get epub file basic info"], [Commands.structure, "get epub file structure"], [Commands.sections, "get epub file sections"], [Commands.merge, "merge all markdown files into a single file, can also specify output filename with --merge=filename.md"], [Commands.localize, "Retain the original online link and do not convert it to a local path", false]];
28
+ const DEFAULT_COMMAND = Commands.convert;
29
+ commands.forEach(cmd => _args.default.option(cmd[0], cmd[1], cmd[2]));
28
30
  const flags = _args.default.parse(_nodeProcess.default.argv, {
29
31
  name
30
32
  });
31
- commands.some(([cmd], i) => {
33
+ const unprocessedArgs = _nodeProcess.default.argv.slice(2).filter(arg => !arg.startsWith("--") && !arg.startsWith("-"));
34
+ if (unprocessedArgs.length > 0) {
35
+ flags[DEFAULT_COMMAND] = unprocessedArgs[0];
36
+ }
37
+ let hasRun = false;
38
+ for (const cmd of [Commands.info, Commands.structure, Commands.sections]) {
32
39
  if (flags[cmd]) {
33
- run(cmd);
34
- return true;
40
+ if (typeof flags[cmd] !== "string") {
41
+ if (unprocessedArgs.length > 0) {
42
+ flags[cmd] = unprocessedArgs[0];
43
+ }
44
+ }
45
+ if (typeof flags[cmd] === "string") {
46
+ run(cmd);
47
+ hasRun = true;
48
+ break;
49
+ }
50
+ }
51
+ }
52
+ if (!hasRun && flags[Commands.unzip]) {
53
+ const epubPath = typeof flags[Commands.unzip] === "string" ? flags[Commands.unzip] : unprocessedArgs.length > 0 ? unprocessedArgs[0] : null;
54
+ if (epubPath) {
55
+ _logger.default.info("unzipping...");
56
+ new _convert.Converter(epubPath).run({
57
+ cmd: Commands.unzip,
58
+ // Use cmd to indicate unzip only
59
+ mergedFilename: void 0,
60
+ shouldMerge: false,
61
+ localize: false
62
+ }).then(outDir => {
63
+ _logger.default.info(`Unzip successful! output: ${outDir}`);
64
+ }).catch(error => {
65
+ _logger.default.error(error);
66
+ });
67
+ hasRun = true;
35
68
  } else {
36
- if (i === commands.length - 1) {
37
- if (_nodeProcess.default.argv[2]) {
38
- flags[DEFAULT_COMMAND] = _nodeProcess.default.argv[2];
39
- run(DEFAULT_COMMAND);
40
- return true;
69
+ _logger.default.error("No valid epub file path provided for unzip command");
70
+ }
71
+ }
72
+ if (!hasRun) {
73
+ if (flags.merge && typeof flags.merge === "string" && flags.merge !== "") {
74
+ if (_nodeFs.default.existsSync(flags.merge) && _nodeFs.default.statSync(flags.merge).isDirectory()) {
75
+ _logger.default.info("merging markdown files in directory...");
76
+ (0, _merge.mergeMarkdowns)(flags.merge).then(outputPath => {
77
+ _logger.default.info(`Merging successful! Output file: ${outputPath}`);
78
+ }).catch(error => {
79
+ _logger.default.info(`Merging failed: ${error}`);
80
+ });
81
+ hasRun = true;
82
+ }
83
+ }
84
+ if (!hasRun) {
85
+ for (const cmd of [Commands.convert, Commands.autocorrect]) {
86
+ if (flags[cmd]) {
87
+ if (typeof flags[cmd] !== "string") {
88
+ if (unprocessedArgs.length > 0) {
89
+ flags[cmd] = unprocessedArgs[0];
90
+ }
91
+ }
92
+ run(cmd);
93
+ hasRun = true;
94
+ break;
41
95
  }
96
+ }
97
+ if (!hasRun && unprocessedArgs.length > 0) {
98
+ run(DEFAULT_COMMAND);
99
+ } else if (!hasRun) {
42
100
  _args.default.showHelp();
43
101
  }
44
102
  }
45
- });
103
+ }
46
104
  function run(cmd) {
47
- const epubPath = flags["md" /* markdown */] || flags["ma" /* autocorrect */] || flags["unzip" /* unzip */];
48
- if (epubPath) {
49
- console.log(_chalk.default.blueBright(`[${name}]: converting${cmd === "ma" /* autocorrect */ ? " with AutoCorrect" : ""}...`));
50
- new _convert.default({
51
- eubPath: epubPath,
52
- cmd
53
- }).run(flags["unzip" /* unzip */]).then(outDir => {
54
- console.log(_chalk.default.greenBright(`[${name}]: success! output: ${outDir}`));
105
+ if (cmd === Commands.convert || cmd === Commands.autocorrect) {
106
+ const epubPath = typeof flags[cmd] === "string" ? flags[cmd] : null;
107
+ if (!epubPath) {
108
+ _logger.default.error("No valid epub file path provided");
109
+ return;
110
+ }
111
+ _logger.default.info(`converting${cmd === Commands.autocorrect ? " with autocorrect" : ""}${flags[Commands.merge] ? " and merging" : ""}...`);
112
+ const shouldMerge = flags.merge === true || typeof flags.merge === "string" && flags.merge !== "";
113
+ let mergedFilename;
114
+ if (typeof flags.merge === "string" && flags.merge !== "") {
115
+ mergedFilename = flags.merge;
116
+ }
117
+ const localize = flags.localize === true;
118
+ new _convert.Converter(epubPath).run({
119
+ cmd,
120
+ mergedFilename,
121
+ shouldMerge,
122
+ localize
123
+ }).then(outDir => {
124
+ if (shouldMerge) {
125
+ _logger.default.info(`Merging successful! Output file: ${outDir}`);
126
+ } else {
127
+ _logger.default.info(`Conversion successful! output: ${outDir}`);
128
+ }
129
+ }).catch(error => {
130
+ _logger.default.error(error);
55
131
  });
56
132
  return;
57
133
  }
58
- (0, _parseEpub.default)(flags[cmd]).then(res => {
59
- console.log(_chalk.default.greenBright(`[${name}]: This book ${cmd}:`));
60
- _beautyJson.json.log(res[cmd]);
61
- }).catch(error => {
62
- console.log(_chalk.default.red(error));
63
- });
134
+ const cmdPath = flags[cmd];
135
+ if (typeof cmdPath === "string") {
136
+ (0, _parseEpub.default)(cmdPath).then(res => {
137
+ _logger.default.success(`This book ${cmd}:`);
138
+ _logger.default.json(res[cmd]);
139
+ }).catch(error => {
140
+ _logger.default.error(error);
141
+ });
142
+ } else {
143
+ _logger.default.error(`Path must be a string, got ${typeof cmdPath}`);
144
+ }
64
145
  }
@@ -3,11 +3,11 @@
3
3
  Object.defineProperty(exports, "__esModule", {
4
4
  value: true
5
5
  });
6
-
6
+ exports.Converter = void 0;
7
7
  var _nodePath = require("node:path");
8
8
  var _nodeFs = require("node:fs");
9
+ var _logger = _interopRequireDefault(require("../logger.cjs"));
9
10
  var _writeFileSafe = require("write-file-safe");
10
- var _chalk = _interopRequireDefault(require("chalk"));
11
11
  var _parseEpub = _interopRequireDefault(require("../parseEpub.cjs"));
12
12
  var _helper = require("./helper.cjs");
13
13
  var _utils = require("../utils.cjs");
@@ -16,165 +16,301 @@ var _cli = require("./cli.cjs");
16
16
  function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
17
17
  class Converter {
18
18
  epub;
19
- // epub parser result
19
+ // epub parser object
20
20
  epubFilePath;
21
21
  // current epub 's path
22
- cmd;
23
- // current using command flag
24
- MD_FILE_EXT = ".md";
25
- // out file extname
26
22
  outDir;
27
23
  // epub 's original directory to save markdown files
24
+ mergedFilename;
25
+ // The merged file name
26
+ // include images/html/css/js in the epub file
28
27
  structure = [];
29
28
  // epub dir structure
30
- constructor({
31
- eubPath,
32
- cmd
33
- }) {
34
- this.epubFilePath = eubPath;
35
- this.cmd = cmd;
36
- this.outDir = (0, _nodePath.dirname)(eubPath);
29
+ cmd = "convert";
30
+ // current using command flag
31
+ shouldMerge = false;
32
+ // Whether to directly generate the merged file
33
+ localize = false;
34
+ // Whether to retain the original online image link
35
+ IMAGE_DIR = "images";
36
+ // The directory to save images
37
+ MD_FILE_EXT = ".md";
38
+ // out file extname
39
+ /**
40
+ * Constructor
41
+ * @param epubPath - The path to the EPUB file
42
+ */
43
+ constructor(epubPath) {
44
+ this.epubFilePath = epubPath;
45
+ this.outDir = (0, _nodePath.dirname)(epubPath);
37
46
  if (!(0, _nodeFs.existsSync)(this.outDir)) (0, _nodeFs.mkdirSync)(this.outDir);
38
47
  }
39
- checkFileType(filepath) {
40
- let isImage,
41
- isCSS,
42
- isHTML = false;
43
- const ext = (0, _nodePath.extname)(filepath);
44
- if (",.jpg,.jpeg,.png,.gif,.webp,.svg".includes(ext)) isImage = true;
45
- if (",.css".includes(ext)) isCSS = true;
46
- if (".htm,.html,.xhtml".includes(ext)) isHTML = true;
48
+ clearOutpath({
49
+ id,
50
+ outpath,
51
+ orderLabel
52
+ }) {
53
+ function _matchNav(id2, tocItems) {
54
+ if (Array.isArray(tocItems)) for (let i = 0; i < tocItems.length; i++) {
55
+ const item = tocItems[i];
56
+ if (item.sectionId === id2) {
57
+ return item;
58
+ }
59
+ if (item.children) {
60
+ const childMatch = _matchNav(id2, item.children);
61
+ if (childMatch) {
62
+ return childMatch;
63
+ }
64
+ }
65
+ }
66
+ return void 0;
67
+ }
68
+ const nav = _matchNav(id, this.epub.structure);
69
+ const fileName = (0, _helper.getClearFilename)(nav ? nav.name + this.MD_FILE_EXT : (0, _nodePath.basename)(outpath));
70
+ const outDir = (0, _nodePath.dirname)(outpath);
47
71
  return {
48
- isImage,
49
- isCSS,
50
- isHTML
72
+ fileName,
73
+ outDir,
74
+ outPath: (0, _nodePath.join)(outDir, orderLabel + "-" + fileName)
51
75
  };
52
76
  }
53
- resolveHTMLId(fileName) {
54
- return fileName.replace(/\.x?html?(?:.*)/, "");
55
- }
56
- // 文件名处理
57
- getCleanFileName(fileName, ext = "") {
58
- return (0, _utils.sanitizeFileName)(fileName).trim().replace(/\s/g, "_") + ext;
59
- }
60
77
  /**
61
78
  * Make a path,and normalize assets's path. normally markdowns dont need those css/js files, So i skip them
62
79
  * @return these target file's path will be created,like "xxx/xxx.md","xxx/images"
63
80
  */
64
- _makePath(filepath) {
81
+ parseFileInfo(filepath) {
65
82
  const {
66
83
  isImage,
67
84
  isHTML
68
- } = this.checkFileType(filepath);
69
- if (!isImage && !isHTML) return "";
70
- const fileName = (0, _nodePath.basename)(filepath);
71
- return (0, _nodePath.join)(this.outDir, isImage ? "images" : "", isHTML ? this.resolveHTMLId(fileName) + this.MD_FILE_EXT : fileName);
85
+ } = (0, _helper.checkFileType)(filepath);
86
+ const name2 = (0, _nodePath.basename)(filepath);
87
+ const path = !isImage && !isHTML ? (0, _nodePath.join)(this.outDir, "static", isHTML ? (0, _helper.resolveHTMLId)(name2) + this.MD_FILE_EXT : name2) : (0, _nodePath.join)(this.outDir, isImage ? this.IMAGE_DIR : "", isHTML ? (0, _helper.resolveHTMLId)(name2) + this.MD_FILE_EXT : name2);
88
+ return {
89
+ // html => md
90
+ type: isHTML ? "md" : isImage ? "img" : "",
91
+ name: name2,
92
+ path
93
+ };
72
94
  }
95
+ /**
96
+ * Retrieves and processes the manifest of an EPUB file.
97
+ *
98
+ * @param unzip - Optional flag to indicate whether to simply unzip the file contents
99
+ * @returns Populates the structure array with manifest items, either unzipped or converted
100
+ *
101
+ * This method parses the EPUB file, extracts its manifest, and creates a structure
102
+ * representing the file contents. When unzip is false, it skips certain files like
103
+ * the NCX file and title page, and generates appropriate output paths for other files.
104
+ */
73
105
  async getManifest(unzip) {
74
106
  this.epub = await (0, _parseEpub.default)(this.epubFilePath, {
75
107
  convertToMarkdown: _helper.convertHTML
76
108
  });
77
109
  this.outDir = this.epubFilePath.replace(".epub", "");
78
- this.epub.getManifest().forEach(({
110
+ let num = 0;
111
+ const padding = Math.floor(Math.log10(this.epub?.sections?.length ?? 0));
112
+ for (const {
79
113
  href: filepath,
80
114
  id
81
- }) => {
82
- let outpath;
115
+ } of this.epub.getManifest()) {
116
+ let outpath = "",
117
+ type = "";
83
118
  if (unzip) outpath = (0, _nodePath.join)(this.outDir, filepath);else {
84
- if (filepath.endsWith("ncx") || id === "titlepage") return;
85
- outpath = this._makePath(filepath);
119
+ if (filepath.endsWith("ncx") || id === "titlepage") continue;
120
+ const file = this.parseFileInfo(filepath);
121
+ outpath = file.path;
122
+ type = file.type;
86
123
  }
87
- if (outpath !== "") this.structure.push({
88
- id,
89
- outpath,
90
- filepath
91
- });
124
+ if (type !== "") {
125
+ this.structure.push({
126
+ // current only label markdown file
127
+ orderLabel: type === "md" ? (num++, ("0".repeat(padding) + num).slice(-(padding + 1))) : "",
128
+ id,
129
+ type,
130
+ outpath,
131
+ filepath
132
+ });
133
+ }
134
+ }
135
+ }
136
+ /**
137
+ * Download remote images to the local images directory
138
+ */
139
+ async downloadImage(url, dest) {
140
+ if ((0, _nodeFs.existsSync)(dest)) return;
141
+ const res = await fetch(url);
142
+ if (!res.ok) throw new Error(`Failed to download image: ${url}`);
143
+ const arrayBuffer = await res.arrayBuffer();
144
+ const buffer = Buffer.from(arrayBuffer);
145
+ (0, _writeFileSafe.writeFileSync)(dest, buffer, {
146
+ overwrite: true
92
147
  });
93
148
  }
94
149
  /**
95
- * Try to obtain a friendly output filename.
96
- */
97
- _getFileData(structure) {
150
+ * 本地化 markdown 内容中的所有 http/https 图片链接
151
+ */
152
+ async localizeImages(links, outDir) {
153
+ if (!(0, _nodeFs.existsSync)(outDir)) (0, _nodeFs.mkdirSync)(outDir);
154
+ const downloadTasks = [];
155
+ for (const link of links) {
156
+ const imgName = (0, _nodePath.basename)(link.split("?")[0]);
157
+ const localPath = (0, _nodePath.join)(outDir, imgName);
158
+ downloadTasks.push(this.downloadImage(link, localPath));
159
+ }
160
+ if (downloadTasks.length) await Promise.all(downloadTasks);
161
+ }
162
+ async getFileDataAsync(structure, handleContent) {
98
163
  let {
99
164
  id,
165
+ type,
100
166
  filepath,
101
- outpath
167
+ outpath,
168
+ orderLabel
102
169
  } = structure;
103
- let content = "";
170
+ let content = "",
171
+ links = [];
104
172
  const needAutoCorrect = this.cmd === _cli.Commands.autocorrect;
105
- if ((0, _nodePath.extname)(outpath) === ".md") {
106
- let _matchNav = function (tocItems, id2) {
107
- if (Array.isArray(tocItems)) for (let i = 0; i < tocItems.length; i++) {
108
- const item = tocItems[i];
109
- if (item.sectionId === id2) {
110
- return item;
111
- }
112
- if (item.children) {
113
- const childMatch = _matchNav(item.children, id2);
114
- if (childMatch) {
115
- return childMatch;
116
- }
117
- }
118
- }
119
- return void 0;
120
- };
121
- content = this.epub?.getSection(id)?.toMarkdown();
122
- const nav = _matchNav(this.epub?.structure, id);
123
- const cleanFilename = this.getCleanFileName(nav ? nav.name + this.MD_FILE_EXT : (0, _nodePath.basename)(outpath));
124
- outpath = (0, _nodePath.join)((0, _nodePath.dirname)(outpath), cleanFilename);
125
- content = (0, _helper.fixLinkPath)(content, (link, text) => {
126
- if (text) {
173
+ if (type === "md") {
174
+ const section = this.epub?.getSection(id);
175
+ if (section) {
176
+ content = section.toMarkdown();
177
+ }
178
+ const {
179
+ outPath,
180
+ fileName
181
+ } = this.clearOutpath(structure);
182
+ outpath = outPath;
183
+ const resLinks = [];
184
+ const linkStartSep = this.shouldMerge ? "#" : "./";
185
+ content = (0, _helper.fixLinkPath)(content, (link, isText) => {
186
+ if (isText) {
127
187
  const {
128
- hash,
188
+ hash = "",
129
189
  url
130
- } = (0, _parseLink.default)(link);
190
+ } = (0, _parseLink.default)(link, true);
131
191
  if (link.startsWith("#")) {
132
- return "./" + cleanFilename + link;
192
+ return linkStartSep + this.shouldMerge ? id : fileName + link;
133
193
  }
134
- link = this.resolveHTMLId((0, _nodePath.basename)(url));
135
- const anav = (0, _utils.findRealPath)(link, this.epub?.structure) || {
136
- name: link
194
+ link = (0, _helper.resolveHTMLId)((0, _nodePath.basename)(url));
195
+ const sectionId = this.epub.getItemId(url);
196
+ const internalNav = (0, _utils.matchTOC)(sectionId, this.epub?.structure) || {
197
+ name: link,
198
+ sectionId: (0, _helper.getClearFilename)((0, _nodePath.basename)(link))
137
199
  };
138
- return "./" + this.getCleanFileName((0, _nodePath.extname)(anav.name) ? anav.name : anav.name + this.MD_FILE_EXT) + `${hash ? "#" + hash : ""}`;
200
+ let validPath = (0, _helper.getClearFilename)((0, _nodePath.extname)(internalNav.name) ? internalNav.name : internalNav.name + this.MD_FILE_EXT);
201
+ for (const sfile of this.structure) {
202
+ if (sectionId === sfile.id) {
203
+ validPath = (0, _nodePath.basename)(this.clearOutpath(sfile).outPath);
204
+ break;
205
+ }
206
+ }
207
+ const toId = this.epub.getItemId((0, _nodePath.join)((0, _nodePath.dirname)(filepath), url));
208
+ links.push({
209
+ url,
210
+ hash,
211
+ id: internalNav.sectionId,
212
+ toId
213
+ });
214
+ return this.shouldMerge ? linkStartSep + toId + (hash ? "#" + hash : "") : linkStartSep + validPath + `${hash ? "#" + hash : ""}`;
139
215
  } else {
140
- return "./images/" + (0, _nodePath.basename)(link);
216
+ if (link.startsWith("http")) {
217
+ resLinks.push(link);
218
+ }
219
+ return "./" + this.IMAGE_DIR + "/" + (0, _nodePath.basename)(link);
141
220
  }
142
221
  });
222
+ if (this.localize) {
223
+ try {
224
+ this.localizeImages(resLinks, (0, _nodePath.join)(this.outDir, this.IMAGE_DIR));
225
+ } catch (error) {
226
+ _logger.default.error("Failed to localize the image!", error);
227
+ }
228
+ } else if (resLinks.length > 0) {
229
+ _logger.default.warn("Remote images are detected, you can set --localize to true to localize the remote images");
230
+ }
143
231
  content = needAutoCorrect ? require("autocorrect-node").format(content) : content;
144
232
  } else {
145
233
  content = this.epub.resolve(filepath).asNodeBuffer();
146
234
  }
147
235
  return {
236
+ id,
237
+ type,
238
+ filepath,
148
239
  content,
240
+ links,
149
241
  outFilePath: outpath
150
242
  };
151
243
  }
152
- async run(unzip) {
153
- await this.getManifest(unzip);
154
- let num = 1,
155
- filterPool = {};
156
- const padding = Math.floor(Math.log10(this.structure.length));
157
- this.structure.forEach(s => {
244
+ /**
245
+ * Runs the conversion process for an EPUB file.
246
+ *
247
+ * @param RunOptions - Configuration options or boolean (backward compatibility)
248
+ * @returns A promise resolving to the output directory or the result of generating a merged file
249
+ */
250
+ async run(options) {
251
+ const isUnzipOnly = options?.cmd === "unzip";
252
+ if (options) {
253
+ this.cmd = options.cmd;
254
+ this.shouldMerge = options.shouldMerge;
255
+ this.localize = options.localize;
256
+ this.mergedFilename = options.mergedFilename;
257
+ }
258
+ await this.getManifest(isUnzipOnly);
259
+ if (this.shouldMerge && !isUnzipOnly) {
260
+ return this.generateMergedFile();
261
+ }
262
+ let num = 1;
263
+ for (const s of this.structure) {
158
264
  const {
265
+ type,
159
266
  outFilePath,
160
267
  content
161
- } = this._getFileData(s);
162
- let numberedOutFilePath = null;
163
- if (content.toString() === "") return;
164
- if (!filterPool[outFilePath] && (0, _nodePath.basename)(outFilePath).endsWith(".md")) {
165
- const parsedPath = (0, _nodePath.parse)(outFilePath);
166
- numberedOutFilePath = (0, _nodePath.format)({
167
- ...parsedPath,
168
- base: `${("0".repeat(padding) + num).slice(-(padding + 1))}-${parsedPath.base}`
169
- });
170
- console.log(_chalk.default.yellow(`${num++}: [${(0, _nodePath.basename)(numberedOutFilePath)}]`));
268
+ } = await this.getFileDataAsync(s);
269
+ if (content.toString() === "") continue;
270
+ if (type === "md") {
271
+ _logger.default.success(`${num++}: [${(0, _nodePath.basename)(outFilePath)}]`);
171
272
  }
172
- filterPool[outFilePath] = true;
173
- (0, _writeFileSafe.writeFileSync)(numberedOutFilePath ?? outFilePath, content, {
273
+ (0, _writeFileSafe.writeFileSync)(outFilePath, content, {
174
274
  overwrite: true
175
275
  });
176
- });
276
+ }
177
277
  return this.outDir;
178
278
  }
279
+ /**
280
+ * Directly generate a single merged Markdown file
281
+ */
282
+ async generateMergedFile() {
283
+ let num = 1,
284
+ mergedContent = "";
285
+ for (const s of this.structure) {
286
+ let {
287
+ id,
288
+ filepath,
289
+ outFilePath,
290
+ content
291
+ } = await this.getFileDataAsync(s);
292
+ const {
293
+ isHTML
294
+ } = (0, _helper.checkFileType)(filepath);
295
+ if (isHTML) {
296
+ content = `<a role="toc_link" id="${id}"></a>
297
+ ` + content;
298
+ }
299
+ if ((0, _nodePath.extname)(outFilePath) === ".md" && content.toString() !== "") {
300
+ num++;
301
+ mergedContent += content.toString() + "\n\n---\n\n";
302
+ _logger.default.success(`${num}: [${(0, _nodePath.basename)(outFilePath)}]`);
303
+ } else if ((0, _nodePath.extname)(outFilePath) !== ".md") {
304
+ (0, _writeFileSafe.writeFileSync)(outFilePath, content, {
305
+ overwrite: true
306
+ });
307
+ }
308
+ }
309
+ const outputPath = (0, _nodePath.join)(this.outDir, this.mergedFilename || `${(0, _nodePath.basename)(this.outDir)}-merged.md`);
310
+ (0, _writeFileSafe.writeFileSync)(outputPath, mergedContent, {
311
+ overwrite: true
312
+ });
313
+ return outputPath;
314
+ }
179
315
  }
180
- module.exports = Converter;
316
+ exports.Converter = Converter;