epub2md 1.1.4 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +28 -27
  3. package/lib/bin/cli.cjs +64 -0
  4. package/lib/bin/convert.cjs +180 -0
  5. package/lib/bin/helper.cjs +49 -0
  6. package/lib/converter.cjs +11 -0
  7. package/lib/converter.d.ts +2 -1
  8. package/lib/converter.mjs +5 -0
  9. package/lib/index.cjs +34 -0
  10. package/lib/index.d.ts +1 -1
  11. package/lib/index.mjs +5 -0
  12. package/lib/mobi.cjs +160 -0
  13. package/lib/mobi.d.ts +1 -2
  14. package/lib/mobi.mjs +161 -0
  15. package/lib/parseEpub.cjs +249 -0
  16. package/lib/parseEpub.d.ts +10 -5
  17. package/lib/parseEpub.mjs +216 -0
  18. package/lib/parseHTML.cjs +99 -0
  19. package/lib/parseHTML.d.ts +2 -2
  20. package/lib/parseHTML.mjs +88 -0
  21. package/lib/parseLink.cjs +26 -0
  22. package/lib/parseLink.d.ts +3 -0
  23. package/lib/parseLink.mjs +13 -0
  24. package/lib/parseSection.cjs +75 -0
  25. package/lib/parseSection.d.ts +4 -4
  26. package/lib/parseSection.mjs +59 -0
  27. package/lib/types.cjs +1 -0
  28. package/lib/types.mjs +0 -0
  29. package/lib/utils.cjs +113 -0
  30. package/lib/utils.d.ts +1 -1
  31. package/lib/utils.mjs +91 -0
  32. package/package.json +24 -13
  33. package/CHANGELOG.md +0 -22
  34. package/lib/bin/cli.d.ts +0 -9
  35. package/lib/bin/cli.js +0 -77
  36. package/lib/bin/convert.d.ts +0 -37
  37. package/lib/bin/convert.js +0 -269
  38. package/lib/bin/parse.d.ts +0 -5
  39. package/lib/bin/parse.js +0 -71
  40. package/lib/converter.js +0 -14
  41. package/lib/index.js +0 -14
  42. package/lib/mobi.js +0 -168
  43. package/lib/parseEpub.js +0 -314
  44. package/lib/parseEpub.spec.d.ts +0 -1
  45. package/lib/parseEpub.spec.js +0 -121
  46. package/lib/parseHTML.js +0 -101
  47. package/lib/parseHTML.spec.d.ts +0 -1
  48. package/lib/parseHTML.spec.js +0 -12
  49. package/lib/parseLink.js +0 -19
  50. package/lib/parseSection.js +0 -69
  51. package/lib/types.js +0 -2
  52. package/lib/utils.js +0 -141
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2021 ChandlerVer5
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md CHANGED
@@ -1,13 +1,27 @@
1
- # 📖 epub2MD
1
+ # 📖 epub2md
2
2
 
3
- The main goal is to convert EPUB to Markdown, but it can also serve as an EPUB parser.
4
- 主要的目标是转换 epub 到 markdown,当然了也可以当做 epub 的解析器
3
+ Even though the package is primarily intended for CLI is to convert EPUB to Markdown, but it can be used programmatically.
5
4
 
6
- # Global Install for CLI
5
+ 主要的目标是转换 epub markdown,当然了也可以当做 epub 的解析器库使用.
7
6
 
8
- ```bash
9
- # cli
10
- npm install epub2md -g
7
+ ## Global Install for CLI
8
+
9
+ ```sh
10
+ # node global cli
11
+ $ npm install epub2md -g
12
+ ```
13
+
14
+ ## Development Install
15
+
16
+ ```sh
17
+ # for node
18
+ $ npm install epub2md
19
+
20
+ # for deno
21
+ $ deno add @xw/epub2md
22
+
23
+ # from GitHub Packages Registry
24
+ $ npm install @uxiew/epub2md
11
25
  ```
12
26
 
13
27
  ## CLI
@@ -31,20 +45,7 @@ $ epub2md -S ../../fixtures/zhihu.epub
31
45
  $ epub2md -s ../../fixtures/zhihu.epub
32
46
  ```
33
47
 
34
- # Development Install
35
-
36
- ```bash
37
- # npm
38
- npm install epub2md --save
39
-
40
- # pnpm
41
- pnpm add epub2md
42
-
43
- # yarn
44
- yarn add epub2md
45
- ```
46
-
47
- # Usage
48
+ ## Usage
48
49
 
49
50
  ```js
50
51
  import { parseEpub } from 'epub2md'
@@ -54,15 +55,15 @@ const epubObj = await parseEpub('/path/to/file.epub')
54
55
  console.log('epub content:', epubObj)
55
56
  ```
56
57
 
57
- ## parseEpub(target: string | buffer, options ?: ParserOptions): EpubObject
58
+ ### parseEpub(target: string | buffer, options ?: ParserOptions): EpubObject
58
59
 
59
- ### target
60
+ #### target
60
61
 
61
62
  type: `string` or`buffer`
62
63
 
63
64
  It can be the path to the file or file's binary string or buffer
64
65
 
65
- ### options: `ParserOptions`
66
+ #### options: `ParserOptions`
66
67
 
67
68
  - type ?: 'binaryString' | 'path' | 'buffer'
68
69
 
@@ -73,7 +74,7 @@ It forces the parser to treat supplied target as the defined type, if not define
73
74
 
74
75
  use custom convert function, you can use turndown or node-html-markdown.etc.
75
76
 
76
- ### EpubObject
77
+ #### EpubObject
77
78
 
78
79
  The return value is an object which contains`structure`, `sections`, `info`(private property names start with `_`.I don't recommend using them, since they are subscribed to change).
79
80
 
@@ -85,11 +86,11 @@ The return value is an object which contains`structure`, `sections`, `info`(priv
85
86
 
86
87
  - `Section.prototype.toHtmlObjects`: convert to html object. And a note about `src` and`href`, the`src` and`href` in raw html stay untouched, but the `toHtmlObjects` method resolves `src` to base64 string, and alters `href` so that they make sense in the parsed epub.And the parsed `href` is something like`#{sectionId},{hash}`.
87
88
 
88
- # How to contribute
89
+ ## How to contribute
89
90
 
90
91
  - Raise an issue in the issue section.
91
92
  - PRs are the best. ❤️
92
93
 
93
- # Credits
94
+ ## Credits
94
95
 
95
96
  [gaoxiaoliangz/epub-parser](https://github.com/gaoxiaoliangz/epub-parser)
@@ -0,0 +1,64 @@
1
+ #!/usr/bin/env node
2
+ "use strict";
3
+
4
+ Object.defineProperty(exports, "__esModule", {
5
+ value: true
6
+ });
7
+ exports.Commands = void 0;
8
+ var _args = _interopRequireDefault(require("args"));
9
+ var _chalk = _interopRequireDefault(require("chalk"));
10
+ var _nodeProcess = _interopRequireDefault(require("node:process"));
11
+ var _parseEpub = _interopRequireDefault(require("../parseEpub.cjs"));
12
+ var _convert = _interopRequireDefault(require("./convert.cjs"));
13
+ var _beautyJson = require("beauty-json");
14
+ function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
15
+ const name = "epub2md";
16
+ var Commands = exports.Commands = /* @__PURE__ */(Commands2 => {
17
+ Commands2["markdown"] = "md";
18
+ Commands2["autocorrect"] = "ma";
19
+ Commands2["unzip"] = "unzip";
20
+ Commands2["info"] = "info";
21
+ Commands2["structure"] = "structure";
22
+ Commands2["sections"] = "sections";
23
+ return Commands2;
24
+ })(Commands || {});
25
+ const commands = [["md" /* markdown */, "convert the epub file to markdown format"], ["ma" /* autocorrect */, "convert the epub file to markdown format with autocorrect"], ["unzip" /* unzip */, "unzip epub file"], ["info" /* info */, "get epub file basic info"], ["structure" /* structure */, "get epub file structure"], ["sections" /* sections */, "get epub file sections"]];
26
+ const DEFAULT_COMMAND = "md" /* markdown */;
27
+ commands.forEach(cmd => _args.default.option(cmd[0], cmd[1]));
28
+ const flags = _args.default.parse(_nodeProcess.default.argv, {
29
+ name
30
+ });
31
+ commands.some(([cmd], i) => {
32
+ if (flags[cmd]) {
33
+ run(cmd);
34
+ return true;
35
+ } else {
36
+ if (i === commands.length - 1) {
37
+ if (_nodeProcess.default.argv[2]) {
38
+ flags[DEFAULT_COMMAND] = _nodeProcess.default.argv[2];
39
+ run(DEFAULT_COMMAND);
40
+ return true;
41
+ }
42
+ _args.default.showHelp();
43
+ }
44
+ }
45
+ });
46
+ function run(cmd) {
47
+ const epubPath = flags["md" /* markdown */] || flags["ma" /* autocorrect */] || flags["unzip" /* unzip */];
48
+ if (epubPath) {
49
+ console.log(_chalk.default.blueBright(`[${name}]: converting${cmd === "ma" /* autocorrect */ ? " with AutoCorrect" : ""}...`));
50
+ new _convert.default({
51
+ eubPath: epubPath,
52
+ cmd
53
+ }).run(flags["unzip" /* unzip */]).then(outDir => {
54
+ console.log(_chalk.default.greenBright(`[${name}]: success! output: ${outDir}`));
55
+ });
56
+ return;
57
+ }
58
+ (0, _parseEpub.default)(flags[cmd]).then(res => {
59
+ console.log(_chalk.default.greenBright(`[${name}]: This book ${cmd}:`));
60
+ _beautyJson.json.log(res[cmd]);
61
+ }).catch(error => {
62
+ console.log(_chalk.default.red(error));
63
+ });
64
+ }
@@ -0,0 +1,180 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+
7
+ var _nodePath = require("node:path");
8
+ var _nodeFs = require("node:fs");
9
+ var _writeFileSafe = require("write-file-safe");
10
+ var _chalk = _interopRequireDefault(require("chalk"));
11
+ var _parseEpub = _interopRequireDefault(require("../parseEpub.cjs"));
12
+ var _helper = require("./helper.cjs");
13
+ var _utils = require("../utils.cjs");
14
+ var _parseLink = _interopRequireDefault(require("../parseLink.cjs"));
15
+ var _cli = require("./cli.cjs");
16
+ function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
17
+ class Converter {
18
+ epub;
19
+ // epub parser result
20
+ epubFilePath;
21
+ // current epub 's path
22
+ cmd;
23
+ // current using command flag
24
+ MD_FILE_EXT = ".md";
25
+ // out file extname
26
+ outDir;
27
+ // epub 's original directory to save markdown files
28
+ structure = [];
29
+ // epub dir structure
30
+ constructor({
31
+ eubPath,
32
+ cmd
33
+ }) {
34
+ this.epubFilePath = eubPath;
35
+ this.cmd = cmd;
36
+ this.outDir = (0, _nodePath.dirname)(eubPath);
37
+ if (!(0, _nodeFs.existsSync)(this.outDir)) (0, _nodeFs.mkdirSync)(this.outDir);
38
+ }
39
+ checkFileType(filepath) {
40
+ let isImage,
41
+ isCSS,
42
+ isHTML = false;
43
+ const ext = (0, _nodePath.extname)(filepath);
44
+ if (",.jpg,.jpeg,.png,.gif,.webp,.svg".includes(ext)) isImage = true;
45
+ if (",.css".includes(ext)) isCSS = true;
46
+ if (".htm,.html,.xhtml".includes(ext)) isHTML = true;
47
+ return {
48
+ isImage,
49
+ isCSS,
50
+ isHTML
51
+ };
52
+ }
53
+ resolveHTMLId(fileName) {
54
+ return fileName.replace(/\.x?html?(?:.*)/, "");
55
+ }
56
+ // 文件名处理
57
+ getCleanFileName(fileName, ext = "") {
58
+ return (0, _utils.sanitizeFileName)(fileName).trim().replace(/\s/g, "_") + ext;
59
+ }
60
+ /**
61
+ * Make a path,and normalize assets's path. normally markdowns dont need those css/js files, So i skip them
62
+ * @return these target file's path will be created,like "xxx/xxx.md","xxx/images"
63
+ */
64
+ _makePath(filepath) {
65
+ const {
66
+ isImage,
67
+ isHTML
68
+ } = this.checkFileType(filepath);
69
+ if (!isImage && !isHTML) return "";
70
+ const fileName = (0, _nodePath.basename)(filepath);
71
+ return (0, _nodePath.join)(this.outDir, isImage ? "images" : "", isHTML ? this.resolveHTMLId(fileName) + this.MD_FILE_EXT : fileName);
72
+ }
73
+ async getManifest(unzip) {
74
+ this.epub = await (0, _parseEpub.default)(this.epubFilePath, {
75
+ convertToMarkdown: _helper.convertHTML
76
+ });
77
+ this.outDir = this.epubFilePath.replace(".epub", "");
78
+ this.epub.getManifest().forEach(({
79
+ href: filepath,
80
+ id
81
+ }) => {
82
+ let outpath;
83
+ if (unzip) outpath = (0, _nodePath.join)(this.outDir, filepath);else {
84
+ if (filepath.endsWith("ncx") || id === "titlepage") return;
85
+ outpath = this._makePath(filepath);
86
+ }
87
+ if (outpath !== "") this.structure.push({
88
+ id,
89
+ outpath,
90
+ filepath
91
+ });
92
+ });
93
+ }
94
+ /**
95
+ * Try to obtain a friendly output filename.
96
+ */
97
+ _getFileData(structure) {
98
+ let {
99
+ id,
100
+ filepath,
101
+ outpath
102
+ } = structure;
103
+ let content = "";
104
+ const needAutoCorrect = this.cmd === _cli.Commands.autocorrect;
105
+ if ((0, _nodePath.extname)(outpath) === ".md") {
106
+ let _matchNav = function (tocItems, id2) {
107
+ if (Array.isArray(tocItems)) for (let i = 0; i < tocItems.length; i++) {
108
+ const item = tocItems[i];
109
+ if (item.sectionId === id2) {
110
+ return item;
111
+ }
112
+ if (item.children) {
113
+ const childMatch = _matchNav(item.children, id2);
114
+ if (childMatch) {
115
+ return childMatch;
116
+ }
117
+ }
118
+ }
119
+ return void 0;
120
+ };
121
+ content = this.epub?.getSection(id)?.toMarkdown();
122
+ const nav = _matchNav(this.epub?.structure, id);
123
+ const cleanFilename = this.getCleanFileName(nav ? nav.name + this.MD_FILE_EXT : (0, _nodePath.basename)(outpath));
124
+ outpath = (0, _nodePath.join)((0, _nodePath.dirname)(outpath), cleanFilename);
125
+ content = (0, _helper.fixLinkPath)(content, (link, text) => {
126
+ if (text) {
127
+ const {
128
+ hash,
129
+ url
130
+ } = (0, _parseLink.default)(link);
131
+ if (link.startsWith("#")) {
132
+ return "./" + cleanFilename + link;
133
+ }
134
+ link = this.resolveHTMLId((0, _nodePath.basename)(url));
135
+ const anav = (0, _utils.findRealPath)(link, this.epub?.structure) || {
136
+ name: link
137
+ };
138
+ return "./" + this.getCleanFileName((0, _nodePath.extname)(anav.name) ? anav.name : anav.name + this.MD_FILE_EXT) + `${hash ? "#" + hash : ""}`;
139
+ } else {
140
+ return "./images/" + (0, _nodePath.basename)(link);
141
+ }
142
+ });
143
+ content = needAutoCorrect ? require("autocorrect-node").format(content) : content;
144
+ } else {
145
+ content = this.epub.resolve(filepath).asNodeBuffer();
146
+ }
147
+ return {
148
+ content,
149
+ outFilePath: outpath
150
+ };
151
+ }
152
+ async run(unzip) {
153
+ await this.getManifest(unzip);
154
+ let num = 1,
155
+ filterPool = {};
156
+ const padding = Math.floor(Math.log10(this.structure.length));
157
+ this.structure.forEach(s => {
158
+ const {
159
+ outFilePath,
160
+ content
161
+ } = this._getFileData(s);
162
+ let numberedOutFilePath = null;
163
+ if (content.toString() === "") return;
164
+ if (!filterPool[outFilePath] && (0, _nodePath.basename)(outFilePath).endsWith(".md")) {
165
+ const parsedPath = (0, _nodePath.parse)(outFilePath);
166
+ numberedOutFilePath = (0, _nodePath.format)({
167
+ ...parsedPath,
168
+ base: `${("0".repeat(padding) + num).slice(-(padding + 1))}-${parsedPath.base}`
169
+ });
170
+ console.log(_chalk.default.yellow(`${num++}: [${(0, _nodePath.basename)(numberedOutFilePath)}]`));
171
+ }
172
+ filterPool[outFilePath] = true;
173
+ (0, _writeFileSafe.writeFileSync)(numberedOutFilePath ?? outFilePath, content, {
174
+ overwrite: true
175
+ });
176
+ });
177
+ return this.outDir;
178
+ }
179
+ }
180
+ module.exports = Converter;
@@ -0,0 +1,49 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.convertHTML = convertHTML;
7
+ exports.fixLinkPath = fixLinkPath;
8
+ var _converter = _interopRequireDefault(require("../converter.cjs"));
9
+ function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
10
+ function handleImagePath(markdownContent, replaceFn) {
11
+ const imgPattern = /!\[[^\]]*\]\(([^)]+)\)/g;
12
+ return markdownContent.replace(imgPattern, (match, imagePath) => {
13
+ const newImagePath = replaceFn(imagePath);
14
+ return `![${match.slice(2, match.indexOf("]"))}](${newImagePath})`;
15
+ });
16
+ }
17
+ function handleFileLinkPath(markdownContent, replaceFn) {
18
+ const inlineLinkPattern = /\[([^\]]*)]\(([^)]+)\)/g;
19
+ return markdownContent.replace(inlineLinkPattern, (match, linkText, linkUrl) => {
20
+ let newLinkUrl = linkUrl;
21
+ newLinkUrl = replaceFn(linkUrl, linkText);
22
+ return `[${linkText}](${newLinkUrl})`;
23
+ });
24
+ }
25
+ function fixLinkPath(markdownContent, replaceFn) {
26
+ const linkPattern = /(!?)\[(.*?)\](\(.*?\)\])?\((.*?)\)/g;
27
+ return markdownContent.replace(linkPattern, (match, imgMark, internalMatch1, internalMatch2, linkUrl) => {
28
+ const hasWrappedImg = internalMatch1.startsWith("![");
29
+ if (imgMark === "!") {
30
+ return handleImagePath(match, replaceFn);
31
+ } else if (hasWrappedImg) {
32
+ let wrappedImg = internalMatch1 + "]" + internalMatch2.replace(/\)\]$/, ")");
33
+ let m1 = "",
34
+ m2 = "";
35
+ const link = handleImagePath(wrappedImg, replaceFn).replace(/(!\[)(.*?)(\]\()/g, (m, mark1, mark, mark2) => {
36
+ m1 = mark1;
37
+ m2 = mark2;
38
+ return "$$" + mark + "@@";
39
+ });
40
+ return handleFileLinkPath(link, replaceFn).replace("$$", m1).replace("@@", m2);
41
+ } else {
42
+ return handleFileLinkPath(match, replaceFn);
43
+ }
44
+ });
45
+ }
46
+ function convertHTML(prunedHtml) {
47
+ const htmlString = prunedHtml.replace(/()/g, "()").replace(/::/g, "::").replace(/\s?<\?xml.*?>\s?/g, "").replace(/\s?<!DOC.*?>\s?/g, "").replace(/\n+\s?/g, "\n");
48
+ return (0, _converter.default)(htmlString);
49
+ }
@@ -0,0 +1,11 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+
7
+ var _nodeHtmlMarkdown = require("node-html-markdown");
8
+ const convert = str => _nodeHtmlMarkdown.NodeHtmlMarkdown.translate(str, {
9
+ useLinkReferenceDefinitions: false
10
+ });
11
+ module.exports = convert;
@@ -1 +1,2 @@
1
- export declare const convert: (str: string) => string;
1
+ declare const _default: (str: string) => string;
2
+ export default _default;
@@ -0,0 +1,5 @@
1
+ import { NodeHtmlMarkdown } from "node-html-markdown";
2
+ const convert = (str) => NodeHtmlMarkdown.translate(str, {
3
+ useLinkReferenceDefinitions: false
4
+ });
5
+ export default convert;
package/lib/index.cjs ADDED
@@ -0,0 +1,34 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ Object.defineProperty(exports, "convertToMarkdown", {
7
+ enumerable: true,
8
+ get: function () {
9
+ return _converter.default;
10
+ }
11
+ });
12
+ Object.defineProperty(exports, "parseEpub", {
13
+ enumerable: true,
14
+ get: function () {
15
+ return _parseEpub.default;
16
+ }
17
+ });
18
+ Object.defineProperty(exports, "parseHTML", {
19
+ enumerable: true,
20
+ get: function () {
21
+ return _parseHTML.default;
22
+ }
23
+ });
24
+ Object.defineProperty(exports, "parseLink", {
25
+ enumerable: true,
26
+ get: function () {
27
+ return _parseLink.default;
28
+ }
29
+ });
30
+ var _converter = _interopRequireDefault(require("./converter.cjs"));
31
+ var _parseEpub = _interopRequireDefault(require("./parseEpub.cjs"));
32
+ var _parseLink = _interopRequireDefault(require("./parseLink.cjs"));
33
+ var _parseHTML = _interopRequireDefault(require("./parseHTML.cjs"));
34
+ function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
package/lib/index.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { convert } from './converter';
1
+ import convert from './converter';
2
2
  import parseEpub from './parseEpub';
3
3
  import parseLink from './parseLink';
4
4
  import parseHTML from './parseHTML';
package/lib/index.mjs ADDED
@@ -0,0 +1,5 @@
1
+ import convert from "./converter.mjs";
2
+ import parseEpub from "./parseEpub.mjs";
3
+ import parseLink from "./parseLink.mjs";
4
+ import parseHTML from "./parseHTML.mjs";
5
+ export { convert as convertToMarkdown, parseLink, parseHTML, parseEpub };
package/lib/mobi.cjs ADDED
@@ -0,0 +1,160 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+
7
+ var _nodeFs = _interopRequireDefault(require("node:fs"));
8
+ var _pypacker = _interopRequireDefault(require("pypacker"));
9
+ function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
10
+ class Mobi {
11
+ filename;
12
+ info = {
13
+ content: "",
14
+ pdbHeader: {
15
+ records: []
16
+ },
17
+ mobiHeader: {}
18
+ };
19
+ constructor(filename) {
20
+ this.filename = filename;
21
+ this.parse();
22
+ }
23
+ parse() {
24
+ var bufIndex, buffer, data, fd, file_info, filename, flags, header, id, index, mobiHeader, multibyte, pdbHeader, position, startPosition, trailers, _i, _j, _ref, _ref1, _ref2, _ref3, _ref4, _ref5, _ref6, _ref7;
25
+ filename = this.filename;
26
+ file_info = _nodeFs.default.statSync(filename);
27
+ buffer = new Buffer(file_info.size);
28
+ fd = _nodeFs.default.openSync(filename, "r");
29
+ _nodeFs.default.readSync(fd, buffer, 0, file_info.size, 0);
30
+ pdbHeader = this.info.pdbHeader;
31
+ _ref = new _pypacker.default("31sxHH6I4s4s2IH").unpack_from(buffer);
32
+ this.info.name = _ref[0];
33
+ pdbHeader.attributes = _ref[1];
34
+ pdbHeader.version = _ref[2];
35
+ pdbHeader.created = _ref[3];
36
+ pdbHeader.modified = _ref[4];
37
+ pdbHeader.backedUp = _ref[5];
38
+ pdbHeader.modificationNumber = _ref[6];
39
+ pdbHeader.appInfoId = _ref[7];
40
+ pdbHeader.sortInfoID = _ref[8];
41
+ pdbHeader.type = _ref[9];
42
+ pdbHeader.creator = _ref[10];
43
+ pdbHeader.uniqueIDseed = _ref[11];
44
+ pdbHeader.nextRecordListID = _ref[12];
45
+ pdbHeader.recordCount = _ref[13];
46
+ this.info.name = this.info.name.replace(/\u0000/g, "");
47
+ pdbHeader.created = new Date(pdbHeader.created * 1e3);
48
+ pdbHeader.modified = new Date(pdbHeader.modified * 1e3);
49
+ pdbHeader.backedUp = new Date(pdbHeader.backedUp * 100);
50
+ bufIndex = 78;
51
+ for (index = _i = 0, _ref1 = pdbHeader.recordCount; 0 <= _ref1 ? _i < _ref1 : _i > _ref1; index = 0 <= _ref1 ? ++_i : --_i) {
52
+ startPosition = bufIndex + index * 8;
53
+ _ref2 = new _pypacker.default("II").unpack_from(buffer, startPosition), position = _ref2[0], id = _ref2[1];
54
+ id = id & 16777215;
55
+ pdbHeader.records.push({
56
+ position,
57
+ id
58
+ });
59
+ }
60
+ header = buffer.slice(pdbHeader.records[0].position, pdbHeader.records[1].position);
61
+ mobiHeader = this.info.mobiHeader;
62
+ _ref3 = new _pypacker.default("H2xI3H6x3I").unpack_from(header);
63
+ mobiHeader.compression = _ref3[0];
64
+ mobiHeader.text_length = _ref3[1];
65
+ mobiHeader.textRecordCount = _ref3[2];
66
+ mobiHeader.recordSize = _ref3[3];
67
+ mobiHeader.encryption = _ref3[4];
68
+ mobiHeader.headerLength = _ref3[5];
69
+ mobiHeader.mobiType = _ref3[6];
70
+ mobiHeader.encoding = _ref3[7];
71
+ _ref4 = new _pypacker.default("3I").unpack_from(header, 80);
72
+ mobiHeader.firstNonBookIndex = _ref4[0];
73
+ mobiHeader.fullNameOffset = _ref4[1];
74
+ mobiHeader.fullNameLength = _ref4[2];
75
+ mobiHeader.firstImageIndex = new _pypacker.default("I").unpack_from(header, 108)[0];
76
+ mobiHeader.exthFlags = new _pypacker.default("I").unpack_from(header, 128)[0];
77
+ mobiHeader.exthFlags = (mobiHeader.exthFlags & 64) === 64 ? true : false;
78
+ _ref5 = new _pypacker.default("2H").unpack_from(header, 194);
79
+ mobiHeader.firstContentRecord = _ref5[0];
80
+ mobiHeader.lastContentRecord = _ref5[1];
81
+ this.info.title = new _pypacker.default(mobiHeader.fullNameLength + "s").unpack_from(header, mobiHeader.fullNameOffset)[0];
82
+ multibyte = 0;
83
+ trailers = 0;
84
+ if (mobiHeader.headerLength >= 228) {
85
+ flags = (_ref6 = new _pypacker.default("H").unpack_from(header, 242), mobiHeader.flags = _ref6[0], _ref6);
86
+ multibyte = flags & 1;
87
+ while (flags > 1) {
88
+ trailers += 1;
89
+ flags = flags & flags - 2;
90
+ }
91
+ }
92
+ for (position = _j = 1, _ref7 = mobiHeader.textRecordCount; 1 <= _ref7 ? _j <= _ref7 : _j >= _ref7; position = 1 <= _ref7 ? ++_j : --_j) {
93
+ data = buffer.slice(pdbHeader.records[position].position, pdbHeader.records[position + 1].position);
94
+ data = this.trim(data, trailers, multibyte);
95
+ if (mobiHeader.compression === 1) {
96
+ this.info.content += data;
97
+ } else if (mobiHeader.compression === 2) {
98
+ this.info.content += this.palmdocReader(data);
99
+ } else {
100
+ throw new Error("LZ77 compression isn't supported... yet.");
101
+ }
102
+ }
103
+ this.info.content = this.info.content.replace(/<(head|HEAD)>/g, '<head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>');
104
+ return this;
105
+ }
106
+ trim(data, trailers, multibyte) {
107
+ var end_bytes, num, v, z, _i, _j;
108
+ for (z = _i = 0; 0 <= trailers ? _i < trailers : _i > trailers; z = 0 <= trailers ? ++_i : --_i) {
109
+ num = 0;
110
+ end_bytes = data.slice(data.length - 4);
111
+ for (v = _j = 0; _j < 4; v = ++_j) {
112
+ if (end_bytes[v] & 128) {
113
+ num = 0;
114
+ }
115
+ num = num << 7 | end_bytes[v] & 127;
116
+ }
117
+ data = data.slice(0, data.length - num);
118
+ }
119
+ if (multibyte) {
120
+ num = (data[data.length - 1] & 3) + 1;
121
+ data = data.slice(0, data.length - num);
122
+ }
123
+ return data;
124
+ }
125
+ palmdocReader(data) {
126
+ var concat, distance, frame, index, length, string, x, _i, _j, _ref;
127
+ string = "";
128
+ index = 0;
129
+ while (index < data.length) {
130
+ frame = data[index];
131
+ index += 1;
132
+ if (frame >= 1 && frame <= 8) {
133
+ string += data.toString("utf8", index, index + frame);
134
+ for (x = _i = 0, _ref = frame - 1; 0 <= _ref ? _i < _ref : _i > _ref; x = 0 <= _ref ? ++_i : --_i) {
135
+ string += String.fromCharCode(57568);
136
+ }
137
+ index += frame;
138
+ } else if (frame < 128) {
139
+ string += String.fromCharCode(frame);
140
+ } else if (frame >= 192) {
141
+ string += " " + String.fromCharCode(frame ^ 128);
142
+ } else {
143
+ concat = frame << 8 | data[index];
144
+ distance = concat >> 3 & 2047;
145
+ length = (concat & 7) + 3;
146
+ if (length < distance) {
147
+ string += string.slice(-distance, length - distance);
148
+ } else {
149
+ for (x = _j = 0; 0 <= length ? _j < length : _j > length; x = 0 <= length ? ++_j : --_j) {
150
+ string += string[string.length - distance];
151
+ }
152
+ }
153
+ index += 1;
154
+ }
155
+ }
156
+ string = string.replace(/\uE0E0/g, "");
157
+ return string;
158
+ }
159
+ }
160
+ module.exports = Mobi;
package/lib/mobi.d.ts CHANGED
@@ -1,4 +1,3 @@
1
- /// <reference types="node" />
2
1
  export default class Mobi {
3
2
  filename: string;
4
3
  info: {
@@ -11,5 +10,5 @@ export default class Mobi {
11
10
  constructor(filename: string);
12
11
  parse(): this;
13
12
  trim(data: Buffer, trailers: number, multibyte: number): Buffer;
14
- palmdocReader(data: Buffer): string;
13
+ palmdocReader(data: Buffer): any;
15
14
  }