epub2md 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/bin/cli.cjs CHANGED
@@ -9,7 +9,7 @@ var _args = _interopRequireDefault(require("args"));
9
9
  var _chalk = _interopRequireDefault(require("chalk"));
10
10
  var _nodeProcess = _interopRequireDefault(require("node:process"));
11
11
  var _parseEpub = _interopRequireDefault(require("../parseEpub.cjs"));
12
- var _convert = _interopRequireDefault(require("./convert.cjs"));
12
+ var _convert = require("./convert.cjs");
13
13
  var _beautyJson = require("beauty-json");
14
14
  function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
15
15
  const name = "epub2md";
@@ -47,7 +47,7 @@ function run(cmd) {
47
47
  const epubPath = flags["md" /* markdown */] || flags["ma" /* autocorrect */] || flags["unzip" /* unzip */];
48
48
  if (epubPath) {
49
49
  console.log(_chalk.default.blueBright(`[${name}]: converting${cmd === "ma" /* autocorrect */ ? " with AutoCorrect" : ""}...`));
50
- new _convert.default({
50
+ new _convert.Converter({
51
51
  eubPath: epubPath,
52
52
  cmd
53
53
  }).run(flags["unzip" /* unzip */]).then(outDir => {
@@ -3,7 +3,7 @@
3
3
  Object.defineProperty(exports, "__esModule", {
4
4
  value: true
5
5
  });
6
-
6
+ exports.Converter = void 0;
7
7
  var _nodePath = require("node:path");
8
8
  var _nodeFs = require("node:fs");
9
9
  var _writeFileSafe = require("write-file-safe");
@@ -155,6 +155,7 @@ class Converter {
155
155
  filterPool = {};
156
156
  const padding = Math.floor(Math.log10(this.structure.length));
157
157
  this.structure.forEach(s => {
158
+ const numLabel = ("0".repeat(padding) + num).slice(-(padding + 1));
158
159
  const {
159
160
  outFilePath,
160
161
  content
@@ -165,7 +166,7 @@ class Converter {
165
166
  const parsedPath = (0, _nodePath.parse)(outFilePath);
166
167
  numberedOutFilePath = (0, _nodePath.format)({
167
168
  ...parsedPath,
168
- base: `${("0".repeat(padding) + num).slice(-(padding + 1))}-${parsedPath.base}`
169
+ base: `${numLabel}-${parsedPath.base}`
169
170
  });
170
171
  console.log(_chalk.default.yellow(`${num++}: [${(0, _nodePath.basename)(numberedOutFilePath)}]`));
171
172
  }
@@ -177,4 +178,4 @@ class Converter {
177
178
  return this.outDir;
178
179
  }
179
180
  }
180
- module.exports = Converter;
181
+ exports.Converter = Converter;
package/lib/parseEpub.cjs CHANGED
@@ -13,21 +13,21 @@ var _parseLink = _interopRequireDefault(require("./parseLink.cjs"));
13
13
  var _parseSection = _interopRequireDefault(require("./parseSection.cjs"));
14
14
  var _utils = require("./utils.cjs");
15
15
  function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
16
- const parseMetadata = metadata => {
17
- const meta = metadata[0];
16
+ const parseMetadata = (metadata = {}) => {
17
+ const meta = metadata;
18
18
  const info = {};
19
19
  ["title", "author", "description", "language", "publisher", "rights"].forEach(item => {
20
20
  if (item === "author") {
21
- info.author = _lodash.default.get(meta, ["dc:creator", 0]);
22
- if (_lodash.default.isString(info.author)) {
23
- info.author = [info.author];
21
+ const author = _lodash.default.get(meta, ["dc:creator"]);
22
+ if (_lodash.default.isArray(author)) {
23
+ info.author = author.map(a => a["#text"]);
24
24
  } else {
25
- info.author = [_lodash.default.get(info.author, ["_"])];
25
+ info.author = [author["#text"]];
26
26
  }
27
27
  } else if (item === "description") {
28
- info.description = _lodash.default.get(meta, ["description", 0, "_"]);
28
+ info.description = _lodash.default.get(meta, [item, "_"]);
29
29
  } else {
30
- info[item] = _lodash.default.get(meta, ["dc:" + item, 0]);
30
+ info[item] = _lodash.default.get(meta, ["dc:" + item]);
31
31
  }
32
32
  });
33
33
  return _lodash.default.pickBy(info, v => {
@@ -83,14 +83,15 @@ class Epub {
83
83
  throw new Error(`${_path} not found!`);
84
84
  }
85
85
  }
86
- async _resolveXMLAsJsObject(path) {
86
+ _resolveXMLAsJsObject(path) {
87
87
  const xml = this.resolve(path).asText();
88
- return (0, _utils.xmlToJs)(xml);
88
+ return (0, _utils.xmlToJson)(xml);
89
89
  }
90
- async _getOpfPath() {
91
- const container = await this._resolveXMLAsJsObject("/META-INF/container.xml");
92
- const opfPath = container.container.rootfiles[0].rootfile[0]["$"]["full-path"];
93
- return opfPath;
90
+ /**
91
+ * 获取EPUB文件中的OPF(Open Packaging Format)文件的路径。
92
+ */
93
+ _getOpfPath() {
94
+ return this._resolveXMLAsJsObject("/META-INF/container.xml").container.rootfiles.rootfile["@full-path"];
94
95
  }
95
96
  _resolveIdFromLink(href) {
96
97
  const {
@@ -105,13 +106,16 @@ class Epub {
105
106
  return _lodash.default.get(tarItem, "id");
106
107
  }
107
108
  getManifest(content) {
108
- return this._manifest || _lodash.default.get(content, ["package", "manifest", 0, "item"], []).map(item => item.$);
109
+ return this._manifest || _lodash.default.get(content, ["package", "manifest", "item"], []).map(item => ({
110
+ href: item["@href"],
111
+ id: item["@id"]
112
+ }));
109
113
  }
110
114
  getSpine() {
111
115
  const spine = {};
112
116
  this.getManifest();
113
- _lodash.default.get(this._content, ["package", "spine", 0, "itemref"], []).map((item, i) => {
114
- return spine[item.$.idref] = i;
117
+ _lodash.default.get(this._content, ["package", "spine", "itemref"], []).map((item, i) => {
118
+ return spine[item["@idref"]] = i;
115
119
  });
116
120
  return spine;
117
121
  }
@@ -155,11 +159,11 @@ class Epub {
155
159
  if (tocObj.html) {
156
160
  return this._genStructureForHTML(tocObj);
157
161
  }
158
- const rootNavPoints = _lodash.default.get(tocObj, ["ncx", "navMap", "0", "navPoint"], []);
162
+ const rootNavPoints = _lodash.default.get(tocObj, ["ncx", "navMap", "navPoint"], []);
159
163
  const parseNavPoint = navPoint => {
160
- const path = _lodash.default.get(navPoint, ["content", "0", "$", "src"], "");
161
- const name = _lodash.default.get(navPoint, ["navLabel", "0", "text", "0"]);
162
- const playOrder = _lodash.default.get(navPoint, ["$", "playOrder"]);
164
+ const path = _lodash.default.get(navPoint, ["content", "@src"], "");
165
+ const name = _lodash.default.get(navPoint, ["navLabel", "text"]);
166
+ const playOrder = _lodash.default.get(navPoint, ["@playOrder"]);
163
167
  const {
164
168
  hash
165
169
  } = (0, _parseLink.default)(path);
@@ -171,14 +175,14 @@ class Epub {
171
175
  return {
172
176
  name,
173
177
  sectionId,
174
- nodeId: hash || _lodash.default.get(navPoint, ["$", "id"]),
178
+ nodeId: hash || navPoint["@id"],
175
179
  path,
176
180
  playOrder,
177
181
  children
178
182
  };
179
183
  };
180
184
  const parseNavPoints = navPoints => {
181
- return navPoints.map(point => {
185
+ return (_lodash.default.isArray(navPoints) ? navPoints : [navPoints]).map(point => {
182
186
  return parseNavPoint(point);
183
187
  });
184
188
  };
@@ -216,16 +220,16 @@ class Epub {
216
220
  return this.sections ? sectionIndex != -1 ? this.sections[sectionIndex] : null : null;
217
221
  }
218
222
  async parse() {
219
- this._opfPath = await this._getOpfPath();
220
- this._content = await this._resolveXMLAsJsObject("/" + this._opfPath);
223
+ this._opfPath = this._getOpfPath();
224
+ this._content = this._resolveXMLAsJsObject("/" + this._opfPath);
221
225
  this._root = (0, _utils.determineRoot)(this._opfPath);
222
226
  this._manifest = this.getManifest(this._content);
223
- this._metadata = _lodash.default.get(this._content, ["package", "metadata"], []);
227
+ this._metadata = _lodash.default.get(this._content, ["package", "metadata"], {});
224
228
  this.tocFile = (_lodash.default.find(this._manifest, {
225
229
  id: "ncx"
226
230
  }) || {}).href;
227
231
  if (this.tocFile) {
228
- const toc = await this._resolveXMLAsJsObject(this.tocFile);
232
+ const toc = this._resolveXMLAsJsObject(this.tocFile);
229
233
  this._toc = toc;
230
234
  this.structure = this._genStructure(toc);
231
235
  }
@@ -45,7 +45,10 @@ export declare class Epub {
45
45
  asText: () => string;
46
46
  asNodeBuffer: () => Buffer;
47
47
  };
48
- _resolveXMLAsJsObject(path: string): Promise<GeneralObject>;
48
+ _resolveXMLAsJsObject(path: string): GeneralObject;
49
+ /**
50
+ * 获取EPUB文件中的OPF(Open Packaging Format)文件的路径。
51
+ */
49
52
  private _getOpfPath;
50
53
  _resolveIdFromLink(href: string): string;
51
54
  getManifest(content?: GeneralObject): Manifest[];
package/lib/parseEpub.mjs CHANGED
@@ -3,22 +3,22 @@ import _ from "lodash";
3
3
  import nodeZip from "node-zip";
4
4
  import parseLink from "./parseLink.mjs";
5
5
  import parseSection from "./parseSection.mjs";
6
- import { xmlToJs, determineRoot } from "./utils.mjs";
7
- const parseMetadata = (metadata) => {
8
- const meta = metadata[0];
6
+ import { xmlToJson, determineRoot } from "./utils.mjs";
7
+ const parseMetadata = (metadata = {}) => {
8
+ const meta = metadata;
9
9
  const info = {};
10
10
  ["title", "author", "description", "language", "publisher", "rights"].forEach((item) => {
11
11
  if (item === "author") {
12
- info.author = _.get(meta, ["dc:creator", 0]);
13
- if (_.isString(info.author)) {
14
- info.author = [info.author];
12
+ const author = _.get(meta, ["dc:creator"]);
13
+ if (_.isArray(author)) {
14
+ info.author = author.map((a) => a["#text"]);
15
15
  } else {
16
- info.author = [_.get(info.author, ["_"])];
16
+ info.author = [author["#text"]];
17
17
  }
18
18
  } else if (item === "description") {
19
- info.description = _.get(meta, ["description", 0, "_"]);
19
+ info.description = _.get(meta, [item, "_"]);
20
20
  } else {
21
- info[item] = _.get(meta, ["dc:" + item, 0]);
21
+ info[item] = _.get(meta, ["dc:" + item]);
22
22
  }
23
23
  });
24
24
  return _.pickBy(info, (v) => {
@@ -64,14 +64,15 @@ export class Epub {
64
64
  throw new Error(`${_path} not found!`);
65
65
  }
66
66
  }
67
- async _resolveXMLAsJsObject(path) {
67
+ _resolveXMLAsJsObject(path) {
68
68
  const xml = this.resolve(path).asText();
69
- return xmlToJs(xml);
69
+ return xmlToJson(xml);
70
70
  }
71
- async _getOpfPath() {
72
- const container = await this._resolveXMLAsJsObject("/META-INF/container.xml");
73
- const opfPath = container.container.rootfiles[0].rootfile[0]["$"]["full-path"];
74
- return opfPath;
71
+ /**
72
+ * 获取EPUB文件中的OPF(Open Packaging Format)文件的路径。
73
+ */
74
+ _getOpfPath() {
75
+ return this._resolveXMLAsJsObject("/META-INF/container.xml").container.rootfiles.rootfile["@full-path"];
75
76
  }
76
77
  _resolveIdFromLink(href) {
77
78
  const { name: tarName } = parseLink(href);
@@ -82,14 +83,19 @@ export class Epub {
82
83
  return _.get(tarItem, "id");
83
84
  }
84
85
  getManifest(content) {
85
- return this._manifest || _.get(content, ["package", "manifest", 0, "item"], []).map((item) => item.$);
86
+ return this._manifest || _.get(content, ["package", "manifest", "item"], []).map(
87
+ (item) => ({
88
+ href: item["@href"],
89
+ id: item["@id"]
90
+ })
91
+ );
86
92
  }
87
93
  getSpine() {
88
94
  const spine = {};
89
95
  this.getManifest();
90
- _.get(this._content, ["package", "spine", 0, "itemref"], []).map(
96
+ _.get(this._content, ["package", "spine", "itemref"], []).map(
91
97
  (item, i) => {
92
- return spine[item.$.idref] = i;
98
+ return spine[item["@idref"]] = i;
93
99
  }
94
100
  );
95
101
  return spine;
@@ -132,11 +138,11 @@ export class Epub {
132
138
  if (tocObj.html) {
133
139
  return this._genStructureForHTML(tocObj);
134
140
  }
135
- const rootNavPoints = _.get(tocObj, ["ncx", "navMap", "0", "navPoint"], []);
141
+ const rootNavPoints = _.get(tocObj, ["ncx", "navMap", "navPoint"], []);
136
142
  const parseNavPoint = (navPoint) => {
137
- const path = _.get(navPoint, ["content", "0", "$", "src"], "");
138
- const name = _.get(navPoint, ["navLabel", "0", "text", "0"]);
139
- const playOrder = _.get(navPoint, ["$", "playOrder"]);
143
+ const path = _.get(navPoint, ["content", "@src"], "");
144
+ const name = _.get(navPoint, ["navLabel", "text"]);
145
+ const playOrder = _.get(navPoint, ["@playOrder"]);
140
146
  const { hash } = parseLink(path);
141
147
  let children = navPoint.navPoint;
142
148
  if (children) {
@@ -146,14 +152,14 @@ export class Epub {
146
152
  return {
147
153
  name,
148
154
  sectionId,
149
- nodeId: hash || _.get(navPoint, ["$", "id"]),
155
+ nodeId: hash || navPoint["@id"],
150
156
  path,
151
157
  playOrder,
152
158
  children
153
159
  };
154
160
  };
155
161
  const parseNavPoints = (navPoints) => {
156
- return navPoints.map((point) => {
162
+ return (_.isArray(navPoints) ? navPoints : [navPoints]).map((point) => {
157
163
  return parseNavPoint(point);
158
164
  });
159
165
  };
@@ -189,14 +195,14 @@ export class Epub {
189
195
  return this.sections ? sectionIndex != -1 ? this.sections[sectionIndex] : null : null;
190
196
  }
191
197
  async parse() {
192
- this._opfPath = await this._getOpfPath();
193
- this._content = await this._resolveXMLAsJsObject("/" + this._opfPath);
198
+ this._opfPath = this._getOpfPath();
199
+ this._content = this._resolveXMLAsJsObject("/" + this._opfPath);
194
200
  this._root = determineRoot(this._opfPath);
195
201
  this._manifest = this.getManifest(this._content);
196
- this._metadata = _.get(this._content, ["package", "metadata"], []);
202
+ this._metadata = _.get(this._content, ["package", "metadata"], {});
197
203
  this.tocFile = (_.find(this._manifest, { id: "ncx" }) || {}).href;
198
204
  if (this.tocFile) {
199
- const toc = await this._resolveXMLAsJsObject(this.tocFile);
205
+ const toc = this._resolveXMLAsJsObject(this.tocFile);
200
206
  this._toc = toc;
201
207
  this.structure = this._genStructure(toc);
202
208
  }
package/lib/utils.cjs CHANGED
@@ -5,11 +5,15 @@ Object.defineProperty(exports, "__esModule", {
5
5
  });
6
6
  exports.determineRoot = void 0;
7
7
  exports.findRealPath = findRealPath;
8
- exports.xmlToJs = exports.traverseNestedObject = exports.sanitizeFileName = void 0;
8
+ exports.xmlToJson = exports.traverseNestedObject = exports.sanitizeFileName = void 0;
9
9
  var _lodash = _interopRequireDefault(require("lodash"));
10
- var _xml2js = _interopRequireDefault(require("xml2js"));
10
+ var _fastXmlParser = require("fast-xml-parser");
11
11
  function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
12
- const xmlParser = new _xml2js.default.Parser();
12
+ const xmlParser = new _fastXmlParser.XMLParser({
13
+ attributeNamePrefix: "@",
14
+ ignoreAttributes: false,
15
+ parseAttributeValue: true
16
+ });
13
17
  const cacheNavPool = {};
14
18
  function findRealPath(filePath, navs) {
15
19
  if (!navs) return;
@@ -37,18 +41,10 @@ const sanitizeFileName = (fileName, replacementChar = "_") => {
37
41
  return fileName.replace(invalidCharsPattern, replacementChar);
38
42
  };
39
43
  exports.sanitizeFileName = sanitizeFileName;
40
- const xmlToJs = xml => {
41
- return new Promise((resolve, reject) => {
42
- xmlParser.parseString(xml, (err, object) => {
43
- if (err) {
44
- reject(err);
45
- } else {
46
- resolve(object);
47
- }
48
- });
49
- });
44
+ const xmlToJson = xml => {
45
+ return xmlParser.parse(xml);
50
46
  };
51
- exports.xmlToJs = xmlToJs;
47
+ exports.xmlToJson = xmlToJson;
52
48
  const determineRoot = opfPath => {
53
49
  let root = "";
54
50
  if (opfPath.match(/\//)) {
package/lib/utils.d.ts CHANGED
@@ -12,7 +12,7 @@ export interface TraverseNestedObject {
12
12
  */
13
13
  export declare function findRealPath(filePath: string, navs?: TOCItem[]): TOCItem | undefined;
14
14
  export declare const sanitizeFileName: (fileName: string, replacementChar?: string) => string;
15
- export declare const xmlToJs: (xml: string) => Promise<any>;
15
+ export declare const xmlToJson: (xml: string) => any;
16
16
  export declare const determineRoot: (opfPath: string) => string;
17
17
  /**
18
18
  * traverseNestedObject
package/lib/utils.mjs CHANGED
@@ -1,6 +1,10 @@
1
1
  import _ from "lodash";
2
- import xml2js from "xml2js";
3
- const xmlParser = new xml2js.Parser();
2
+ import { XMLParser } from "fast-xml-parser";
3
+ const xmlParser = new XMLParser({
4
+ attributeNamePrefix: "@",
5
+ ignoreAttributes: false,
6
+ parseAttributeValue: true
7
+ });
4
8
  const cacheNavPool = {};
5
9
  export function findRealPath(filePath, navs) {
6
10
  if (!navs) return;
@@ -24,16 +28,8 @@ export const sanitizeFileName = (fileName, replacementChar = "_") => {
24
28
  const invalidCharsPattern = /[\\/:*?"<>|]/g;
25
29
  return fileName.replace(invalidCharsPattern, replacementChar);
26
30
  };
27
- export const xmlToJs = (xml) => {
28
- return new Promise((resolve, reject) => {
29
- xmlParser.parseString(xml, (err, object) => {
30
- if (err) {
31
- reject(err);
32
- } else {
33
- resolve(object);
34
- }
35
- });
36
- });
31
+ export const xmlToJson = (xml) => {
32
+ return xmlParser.parse(xml);
37
33
  };
38
34
  export const determineRoot = (opfPath) => {
39
35
  let root = "";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "epub2md",
3
- "version": "1.2.1",
3
+ "version": "1.3.0",
4
4
  "description": "A epub parser that also can convert epub to markdown using the command line",
5
5
  "type": "module",
6
6
  "main": "lib/index.cjs",
@@ -19,18 +19,6 @@
19
19
  }
20
20
  },
21
21
  "types": "./lib/index.d.ts",
22
- "scripts": {
23
- "prebuild": "pnpm clean",
24
- "build": "unbuild && pnpm run build:deno",
25
- "build:deno": "node ./deno-build.mjs",
26
- "watch": "tsc --watch",
27
- "clean": "rimraf lib dist",
28
- "format": "prettier --write \"src/**/*.{js,jsx,ts,tsx,json,md,css,scss}\"",
29
- "test": "jest",
30
- "test-debug": "node --inspect-brk -r ts-node/register node_modules/.bin/jest --runInBand",
31
- "v": "pnpm dlx commit-and-tag-version --preset angular --release-as",
32
- "postv": "git push --follow-tags origin main"
33
- },
34
22
  "repository": {
35
23
  "type": "git",
36
24
  "url": "git+https://github.com/uxiew/epub2MD.git"
@@ -55,20 +43,20 @@
55
43
  "args": "^5.0.3",
56
44
  "autocorrect-node": "^2.6.2",
57
45
  "beauty-json": "^1.0.0",
46
+ "fast-xml-parser": "^5.2.0",
58
47
  "jsdom": "^24.0.0",
59
48
  "lodash": "^4.17.15",
60
49
  "node-html-markdown": "^1.3.0",
61
50
  "node-zip": "^1.1.1",
62
- "write-file-safe": "^1.3.1",
63
- "xml2js": "^0.5.0"
51
+ "write-file-safe": "^1.3.1"
64
52
  },
65
53
  "devDependencies": {
54
+ "@jsdevtools/npm-publish": "^3.1.1",
66
55
  "@types/args": "^5.0.0",
67
56
  "@types/jest": "^24.0.18",
68
57
  "@types/jsdom": "^21.1.1",
69
58
  "@types/lodash": "^4.14.137",
70
59
  "@types/node": "^20.2.3",
71
- "@types/xml2js": "^0.4.4",
72
60
  "chalk": "^4.1.2",
73
61
  "cross-env": "^5.2.0",
74
62
  "dotenv": "^8.1.0",
@@ -83,5 +71,17 @@
83
71
  "typescript": "^5.5.4",
84
72
  "unbuild": "3.0.0-rc.7",
85
73
  "vrsource-tslint-rules": "^6.0.0"
74
+ },
75
+ "scripts": {
76
+ "prebuild": "pnpm clean",
77
+ "build": "unbuild && pnpm run build:deno",
78
+ "build:deno": "node ./deno-build.mjs",
79
+ "watch": "tsc --watch",
80
+ "clean": "rimraf lib dist",
81
+ "format": "prettier --write \"src/**/*.{js,jsx,ts,tsx,json,md,css,scss}\"",
82
+ "test": "jest",
83
+ "test-debug": "node --inspect-brk -r ts-node/register node_modules/.bin/jest --runInBand",
84
+ "v": "pnpm dlx commit-and-tag-version --preset angular --release-as",
85
+ "postv": "git push --follow-tags origin main"
86
86
  }
87
- }
87
+ }