epub2md 1.1.4 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +28 -27
- package/lib/bin/cli.cjs +64 -0
- package/lib/bin/convert.cjs +180 -0
- package/lib/bin/helper.cjs +49 -0
- package/lib/converter.cjs +11 -0
- package/lib/converter.d.ts +2 -1
- package/lib/converter.mjs +5 -0
- package/lib/index.cjs +34 -0
- package/lib/index.d.ts +1 -1
- package/lib/index.mjs +5 -0
- package/lib/mobi.cjs +160 -0
- package/lib/mobi.d.ts +1 -2
- package/lib/mobi.mjs +161 -0
- package/lib/parseEpub.cjs +249 -0
- package/lib/parseEpub.d.ts +10 -5
- package/lib/parseEpub.mjs +216 -0
- package/lib/parseHTML.cjs +99 -0
- package/lib/parseHTML.d.ts +2 -2
- package/lib/parseHTML.mjs +88 -0
- package/lib/parseLink.cjs +26 -0
- package/lib/parseLink.d.ts +3 -0
- package/lib/parseLink.mjs +13 -0
- package/lib/parseSection.cjs +75 -0
- package/lib/parseSection.d.ts +4 -4
- package/lib/parseSection.mjs +59 -0
- package/lib/types.cjs +1 -0
- package/lib/types.mjs +0 -0
- package/lib/utils.cjs +113 -0
- package/lib/utils.d.ts +1 -1
- package/lib/utils.mjs +91 -0
- package/package.json +24 -13
- package/CHANGELOG.md +0 -22
- package/lib/bin/cli.d.ts +0 -9
- package/lib/bin/cli.js +0 -77
- package/lib/bin/convert.d.ts +0 -37
- package/lib/bin/convert.js +0 -269
- package/lib/bin/parse.d.ts +0 -5
- package/lib/bin/parse.js +0 -71
- package/lib/converter.js +0 -14
- package/lib/index.js +0 -14
- package/lib/mobi.js +0 -168
- package/lib/parseEpub.js +0 -314
- package/lib/parseEpub.spec.d.ts +0 -1
- package/lib/parseEpub.spec.js +0 -121
- package/lib/parseHTML.js +0 -101
- package/lib/parseHTML.spec.d.ts +0 -1
- package/lib/parseHTML.spec.js +0 -12
- package/lib/parseLink.js +0 -19
- package/lib/parseSection.js +0 -69
- package/lib/types.js +0 -2
- package/lib/utils.js +0 -141
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2021 ChandlerVer5
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
CHANGED
|
@@ -1,13 +1,27 @@
|
|
|
1
|
-
# 📖
|
|
1
|
+
# 📖 epub2md
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
主要的目标是转换 epub 到 markdown,当然了也可以当做 epub 的解析器
|
|
3
|
+
Even though the package is primarily intended for CLI is to convert EPUB to Markdown, but it can be used programmatically.
|
|
5
4
|
|
|
6
|
-
|
|
5
|
+
主要的目标是转换 epub 到 markdown,当然了也可以当做 epub 的解析器库使用.
|
|
7
6
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
7
|
+
## Global Install for CLI
|
|
8
|
+
|
|
9
|
+
```sh
|
|
10
|
+
# node global cli
|
|
11
|
+
$ npm install epub2md -g
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
## Development Install
|
|
15
|
+
|
|
16
|
+
```sh
|
|
17
|
+
# for node
|
|
18
|
+
$ npm install epub2md
|
|
19
|
+
|
|
20
|
+
# for deno
|
|
21
|
+
$ deno add @xw/epub2md
|
|
22
|
+
|
|
23
|
+
# from GitHub Packages Registry
|
|
24
|
+
$ npm install @uxiew/epub2md
|
|
11
25
|
```
|
|
12
26
|
|
|
13
27
|
## CLI
|
|
@@ -31,20 +45,7 @@ $ epub2md -S ../../fixtures/zhihu.epub
|
|
|
31
45
|
$ epub2md -s ../../fixtures/zhihu.epub
|
|
32
46
|
```
|
|
33
47
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
```bash
|
|
37
|
-
# npm
|
|
38
|
-
npm install epub2md --save
|
|
39
|
-
|
|
40
|
-
# pnpm
|
|
41
|
-
pnpm add epub2md
|
|
42
|
-
|
|
43
|
-
# yarn
|
|
44
|
-
yarn add epub2md
|
|
45
|
-
```
|
|
46
|
-
|
|
47
|
-
# Usage
|
|
48
|
+
## Usage
|
|
48
49
|
|
|
49
50
|
```js
|
|
50
51
|
import { parseEpub } from 'epub2md'
|
|
@@ -54,15 +55,15 @@ const epubObj = await parseEpub('/path/to/file.epub')
|
|
|
54
55
|
console.log('epub content:', epubObj)
|
|
55
56
|
```
|
|
56
57
|
|
|
57
|
-
|
|
58
|
+
### parseEpub(target: string | buffer, options ?: ParserOptions): EpubObject
|
|
58
59
|
|
|
59
|
-
|
|
60
|
+
#### target
|
|
60
61
|
|
|
61
62
|
type: `string` or`buffer`
|
|
62
63
|
|
|
63
64
|
It can be the path to the file or file's binary string or buffer
|
|
64
65
|
|
|
65
|
-
|
|
66
|
+
#### options: `ParserOptions`
|
|
66
67
|
|
|
67
68
|
- type ?: 'binaryString' | 'path' | 'buffer'
|
|
68
69
|
|
|
@@ -73,7 +74,7 @@ It forces the parser to treat supplied target as the defined type, if not define
|
|
|
73
74
|
|
|
74
75
|
use custom convert function, you can use turndown or node-html-markdown.etc.
|
|
75
76
|
|
|
76
|
-
|
|
77
|
+
#### EpubObject
|
|
77
78
|
|
|
78
79
|
The return value is an object which contains`structure`, `sections`, `info`(private property names start with `_`.I don't recommend using them, since they are subscribed to change).
|
|
79
80
|
|
|
@@ -85,11 +86,11 @@ The return value is an object which contains`structure`, `sections`, `info`(priv
|
|
|
85
86
|
|
|
86
87
|
- `Section.prototype.toHtmlObjects`: convert to html object. And a note about `src` and`href`, the`src` and`href` in raw html stay untouched, but the `toHtmlObjects` method resolves `src` to base64 string, and alters `href` so that they make sense in the parsed epub.And the parsed `href` is something like`#{sectionId},{hash}`.
|
|
87
88
|
|
|
88
|
-
|
|
89
|
+
## How to contribute
|
|
89
90
|
|
|
90
91
|
- Raise an issue in the issue section.
|
|
91
92
|
- PRs are the best. ❤️
|
|
92
93
|
|
|
93
|
-
|
|
94
|
+
## Credits
|
|
94
95
|
|
|
95
96
|
[gaoxiaoliangz/epub-parser](https://github.com/gaoxiaoliangz/epub-parser)
|
package/lib/bin/cli.cjs
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
"use strict";
|
|
3
|
+
|
|
4
|
+
Object.defineProperty(exports, "__esModule", {
|
|
5
|
+
value: true
|
|
6
|
+
});
|
|
7
|
+
exports.Commands = void 0;
|
|
8
|
+
var _args = _interopRequireDefault(require("args"));
|
|
9
|
+
var _chalk = _interopRequireDefault(require("chalk"));
|
|
10
|
+
var _nodeProcess = _interopRequireDefault(require("node:process"));
|
|
11
|
+
var _parseEpub = _interopRequireDefault(require("../parseEpub.cjs"));
|
|
12
|
+
var _convert = _interopRequireDefault(require("./convert.cjs"));
|
|
13
|
+
var _beautyJson = require("beauty-json");
|
|
14
|
+
function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
|
|
15
|
+
const name = "epub2md";
|
|
16
|
+
var Commands = exports.Commands = /* @__PURE__ */(Commands2 => {
|
|
17
|
+
Commands2["markdown"] = "md";
|
|
18
|
+
Commands2["autocorrect"] = "ma";
|
|
19
|
+
Commands2["unzip"] = "unzip";
|
|
20
|
+
Commands2["info"] = "info";
|
|
21
|
+
Commands2["structure"] = "structure";
|
|
22
|
+
Commands2["sections"] = "sections";
|
|
23
|
+
return Commands2;
|
|
24
|
+
})(Commands || {});
|
|
25
|
+
const commands = [["md" /* markdown */, "convert the epub file to markdown format"], ["ma" /* autocorrect */, "convert the epub file to markdown format with autocorrect"], ["unzip" /* unzip */, "unzip epub file"], ["info" /* info */, "get epub file basic info"], ["structure" /* structure */, "get epub file structure"], ["sections" /* sections */, "get epub file sections"]];
|
|
26
|
+
const DEFAULT_COMMAND = "md" /* markdown */;
|
|
27
|
+
commands.forEach(cmd => _args.default.option(cmd[0], cmd[1]));
|
|
28
|
+
const flags = _args.default.parse(_nodeProcess.default.argv, {
|
|
29
|
+
name
|
|
30
|
+
});
|
|
31
|
+
commands.some(([cmd], i) => {
|
|
32
|
+
if (flags[cmd]) {
|
|
33
|
+
run(cmd);
|
|
34
|
+
return true;
|
|
35
|
+
} else {
|
|
36
|
+
if (i === commands.length - 1) {
|
|
37
|
+
if (_nodeProcess.default.argv[2]) {
|
|
38
|
+
flags[DEFAULT_COMMAND] = _nodeProcess.default.argv[2];
|
|
39
|
+
run(DEFAULT_COMMAND);
|
|
40
|
+
return true;
|
|
41
|
+
}
|
|
42
|
+
_args.default.showHelp();
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
});
|
|
46
|
+
function run(cmd) {
|
|
47
|
+
const epubPath = flags["md" /* markdown */] || flags["ma" /* autocorrect */] || flags["unzip" /* unzip */];
|
|
48
|
+
if (epubPath) {
|
|
49
|
+
console.log(_chalk.default.blueBright(`[${name}]: converting${cmd === "ma" /* autocorrect */ ? " with AutoCorrect" : ""}...`));
|
|
50
|
+
new _convert.default({
|
|
51
|
+
eubPath: epubPath,
|
|
52
|
+
cmd
|
|
53
|
+
}).run(flags["unzip" /* unzip */]).then(outDir => {
|
|
54
|
+
console.log(_chalk.default.greenBright(`[${name}]: success! output: ${outDir}`));
|
|
55
|
+
});
|
|
56
|
+
return;
|
|
57
|
+
}
|
|
58
|
+
(0, _parseEpub.default)(flags[cmd]).then(res => {
|
|
59
|
+
console.log(_chalk.default.greenBright(`[${name}]: This book ${cmd}:`));
|
|
60
|
+
_beautyJson.json.log(res[cmd]);
|
|
61
|
+
}).catch(error => {
|
|
62
|
+
console.log(_chalk.default.red(error));
|
|
63
|
+
});
|
|
64
|
+
}
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
|
|
7
|
+
var _nodePath = require("node:path");
|
|
8
|
+
var _nodeFs = require("node:fs");
|
|
9
|
+
var _writeFileSafe = require("write-file-safe");
|
|
10
|
+
var _chalk = _interopRequireDefault(require("chalk"));
|
|
11
|
+
var _parseEpub = _interopRequireDefault(require("../parseEpub.cjs"));
|
|
12
|
+
var _helper = require("./helper.cjs");
|
|
13
|
+
var _utils = require("../utils.cjs");
|
|
14
|
+
var _parseLink = _interopRequireDefault(require("../parseLink.cjs"));
|
|
15
|
+
var _cli = require("./cli.cjs");
|
|
16
|
+
function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
|
|
17
|
+
class Converter {
|
|
18
|
+
epub;
|
|
19
|
+
// epub parser result
|
|
20
|
+
epubFilePath;
|
|
21
|
+
// current epub 's path
|
|
22
|
+
cmd;
|
|
23
|
+
// current using command flag
|
|
24
|
+
MD_FILE_EXT = ".md";
|
|
25
|
+
// out file extname
|
|
26
|
+
outDir;
|
|
27
|
+
// epub 's original directory to save markdown files
|
|
28
|
+
structure = [];
|
|
29
|
+
// epub dir structure
|
|
30
|
+
constructor({
|
|
31
|
+
eubPath,
|
|
32
|
+
cmd
|
|
33
|
+
}) {
|
|
34
|
+
this.epubFilePath = eubPath;
|
|
35
|
+
this.cmd = cmd;
|
|
36
|
+
this.outDir = (0, _nodePath.dirname)(eubPath);
|
|
37
|
+
if (!(0, _nodeFs.existsSync)(this.outDir)) (0, _nodeFs.mkdirSync)(this.outDir);
|
|
38
|
+
}
|
|
39
|
+
checkFileType(filepath) {
|
|
40
|
+
let isImage,
|
|
41
|
+
isCSS,
|
|
42
|
+
isHTML = false;
|
|
43
|
+
const ext = (0, _nodePath.extname)(filepath);
|
|
44
|
+
if (",.jpg,.jpeg,.png,.gif,.webp,.svg".includes(ext)) isImage = true;
|
|
45
|
+
if (",.css".includes(ext)) isCSS = true;
|
|
46
|
+
if (".htm,.html,.xhtml".includes(ext)) isHTML = true;
|
|
47
|
+
return {
|
|
48
|
+
isImage,
|
|
49
|
+
isCSS,
|
|
50
|
+
isHTML
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
resolveHTMLId(fileName) {
|
|
54
|
+
return fileName.replace(/\.x?html?(?:.*)/, "");
|
|
55
|
+
}
|
|
56
|
+
// 文件名处理
|
|
57
|
+
getCleanFileName(fileName, ext = "") {
|
|
58
|
+
return (0, _utils.sanitizeFileName)(fileName).trim().replace(/\s/g, "_") + ext;
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Make a path,and normalize assets's path. normally markdowns dont need those css/js files, So i skip them
|
|
62
|
+
* @return these target file's path will be created,like "xxx/xxx.md","xxx/images"
|
|
63
|
+
*/
|
|
64
|
+
_makePath(filepath) {
|
|
65
|
+
const {
|
|
66
|
+
isImage,
|
|
67
|
+
isHTML
|
|
68
|
+
} = this.checkFileType(filepath);
|
|
69
|
+
if (!isImage && !isHTML) return "";
|
|
70
|
+
const fileName = (0, _nodePath.basename)(filepath);
|
|
71
|
+
return (0, _nodePath.join)(this.outDir, isImage ? "images" : "", isHTML ? this.resolveHTMLId(fileName) + this.MD_FILE_EXT : fileName);
|
|
72
|
+
}
|
|
73
|
+
async getManifest(unzip) {
|
|
74
|
+
this.epub = await (0, _parseEpub.default)(this.epubFilePath, {
|
|
75
|
+
convertToMarkdown: _helper.convertHTML
|
|
76
|
+
});
|
|
77
|
+
this.outDir = this.epubFilePath.replace(".epub", "");
|
|
78
|
+
this.epub.getManifest().forEach(({
|
|
79
|
+
href: filepath,
|
|
80
|
+
id
|
|
81
|
+
}) => {
|
|
82
|
+
let outpath;
|
|
83
|
+
if (unzip) outpath = (0, _nodePath.join)(this.outDir, filepath);else {
|
|
84
|
+
if (filepath.endsWith("ncx") || id === "titlepage") return;
|
|
85
|
+
outpath = this._makePath(filepath);
|
|
86
|
+
}
|
|
87
|
+
if (outpath !== "") this.structure.push({
|
|
88
|
+
id,
|
|
89
|
+
outpath,
|
|
90
|
+
filepath
|
|
91
|
+
});
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Try to obtain a friendly output filename.
|
|
96
|
+
*/
|
|
97
|
+
_getFileData(structure) {
|
|
98
|
+
let {
|
|
99
|
+
id,
|
|
100
|
+
filepath,
|
|
101
|
+
outpath
|
|
102
|
+
} = structure;
|
|
103
|
+
let content = "";
|
|
104
|
+
const needAutoCorrect = this.cmd === _cli.Commands.autocorrect;
|
|
105
|
+
if ((0, _nodePath.extname)(outpath) === ".md") {
|
|
106
|
+
let _matchNav = function (tocItems, id2) {
|
|
107
|
+
if (Array.isArray(tocItems)) for (let i = 0; i < tocItems.length; i++) {
|
|
108
|
+
const item = tocItems[i];
|
|
109
|
+
if (item.sectionId === id2) {
|
|
110
|
+
return item;
|
|
111
|
+
}
|
|
112
|
+
if (item.children) {
|
|
113
|
+
const childMatch = _matchNav(item.children, id2);
|
|
114
|
+
if (childMatch) {
|
|
115
|
+
return childMatch;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
return void 0;
|
|
120
|
+
};
|
|
121
|
+
content = this.epub?.getSection(id)?.toMarkdown();
|
|
122
|
+
const nav = _matchNav(this.epub?.structure, id);
|
|
123
|
+
const cleanFilename = this.getCleanFileName(nav ? nav.name + this.MD_FILE_EXT : (0, _nodePath.basename)(outpath));
|
|
124
|
+
outpath = (0, _nodePath.join)((0, _nodePath.dirname)(outpath), cleanFilename);
|
|
125
|
+
content = (0, _helper.fixLinkPath)(content, (link, text) => {
|
|
126
|
+
if (text) {
|
|
127
|
+
const {
|
|
128
|
+
hash,
|
|
129
|
+
url
|
|
130
|
+
} = (0, _parseLink.default)(link);
|
|
131
|
+
if (link.startsWith("#")) {
|
|
132
|
+
return "./" + cleanFilename + link;
|
|
133
|
+
}
|
|
134
|
+
link = this.resolveHTMLId((0, _nodePath.basename)(url));
|
|
135
|
+
const anav = (0, _utils.findRealPath)(link, this.epub?.structure) || {
|
|
136
|
+
name: link
|
|
137
|
+
};
|
|
138
|
+
return "./" + this.getCleanFileName((0, _nodePath.extname)(anav.name) ? anav.name : anav.name + this.MD_FILE_EXT) + `${hash ? "#" + hash : ""}`;
|
|
139
|
+
} else {
|
|
140
|
+
return "./images/" + (0, _nodePath.basename)(link);
|
|
141
|
+
}
|
|
142
|
+
});
|
|
143
|
+
content = needAutoCorrect ? require("autocorrect-node").format(content) : content;
|
|
144
|
+
} else {
|
|
145
|
+
content = this.epub.resolve(filepath).asNodeBuffer();
|
|
146
|
+
}
|
|
147
|
+
return {
|
|
148
|
+
content,
|
|
149
|
+
outFilePath: outpath
|
|
150
|
+
};
|
|
151
|
+
}
|
|
152
|
+
async run(unzip) {
|
|
153
|
+
await this.getManifest(unzip);
|
|
154
|
+
let num = 1,
|
|
155
|
+
filterPool = {};
|
|
156
|
+
const padding = Math.floor(Math.log10(this.structure.length));
|
|
157
|
+
this.structure.forEach(s => {
|
|
158
|
+
const {
|
|
159
|
+
outFilePath,
|
|
160
|
+
content
|
|
161
|
+
} = this._getFileData(s);
|
|
162
|
+
let numberedOutFilePath = null;
|
|
163
|
+
if (content.toString() === "") return;
|
|
164
|
+
if (!filterPool[outFilePath] && (0, _nodePath.basename)(outFilePath).endsWith(".md")) {
|
|
165
|
+
const parsedPath = (0, _nodePath.parse)(outFilePath);
|
|
166
|
+
numberedOutFilePath = (0, _nodePath.format)({
|
|
167
|
+
...parsedPath,
|
|
168
|
+
base: `${("0".repeat(padding) + num).slice(-(padding + 1))}-${parsedPath.base}`
|
|
169
|
+
});
|
|
170
|
+
console.log(_chalk.default.yellow(`${num++}: [${(0, _nodePath.basename)(numberedOutFilePath)}]`));
|
|
171
|
+
}
|
|
172
|
+
filterPool[outFilePath] = true;
|
|
173
|
+
(0, _writeFileSafe.writeFileSync)(numberedOutFilePath ?? outFilePath, content, {
|
|
174
|
+
overwrite: true
|
|
175
|
+
});
|
|
176
|
+
});
|
|
177
|
+
return this.outDir;
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
module.exports = Converter;
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.convertHTML = convertHTML;
|
|
7
|
+
exports.fixLinkPath = fixLinkPath;
|
|
8
|
+
var _converter = _interopRequireDefault(require("../converter.cjs"));
|
|
9
|
+
function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
|
|
10
|
+
function handleImagePath(markdownContent, replaceFn) {
|
|
11
|
+
const imgPattern = /!\[[^\]]*\]\(([^)]+)\)/g;
|
|
12
|
+
return markdownContent.replace(imgPattern, (match, imagePath) => {
|
|
13
|
+
const newImagePath = replaceFn(imagePath);
|
|
14
|
+
return `![${match.slice(2, match.indexOf("]"))}](${newImagePath})`;
|
|
15
|
+
});
|
|
16
|
+
}
|
|
17
|
+
function handleFileLinkPath(markdownContent, replaceFn) {
|
|
18
|
+
const inlineLinkPattern = /\[([^\]]*)]\(([^)]+)\)/g;
|
|
19
|
+
return markdownContent.replace(inlineLinkPattern, (match, linkText, linkUrl) => {
|
|
20
|
+
let newLinkUrl = linkUrl;
|
|
21
|
+
newLinkUrl = replaceFn(linkUrl, linkText);
|
|
22
|
+
return `[${linkText}](${newLinkUrl})`;
|
|
23
|
+
});
|
|
24
|
+
}
|
|
25
|
+
function fixLinkPath(markdownContent, replaceFn) {
|
|
26
|
+
const linkPattern = /(!?)\[(.*?)\](\(.*?\)\])?\((.*?)\)/g;
|
|
27
|
+
return markdownContent.replace(linkPattern, (match, imgMark, internalMatch1, internalMatch2, linkUrl) => {
|
|
28
|
+
const hasWrappedImg = internalMatch1.startsWith("![");
|
|
29
|
+
if (imgMark === "!") {
|
|
30
|
+
return handleImagePath(match, replaceFn);
|
|
31
|
+
} else if (hasWrappedImg) {
|
|
32
|
+
let wrappedImg = internalMatch1 + "]" + internalMatch2.replace(/\)\]$/, ")");
|
|
33
|
+
let m1 = "",
|
|
34
|
+
m2 = "";
|
|
35
|
+
const link = handleImagePath(wrappedImg, replaceFn).replace(/(!\[)(.*?)(\]\()/g, (m, mark1, mark, mark2) => {
|
|
36
|
+
m1 = mark1;
|
|
37
|
+
m2 = mark2;
|
|
38
|
+
return "$$" + mark + "@@";
|
|
39
|
+
});
|
|
40
|
+
return handleFileLinkPath(link, replaceFn).replace("$$", m1).replace("@@", m2);
|
|
41
|
+
} else {
|
|
42
|
+
return handleFileLinkPath(match, replaceFn);
|
|
43
|
+
}
|
|
44
|
+
});
|
|
45
|
+
}
|
|
46
|
+
function convertHTML(prunedHtml) {
|
|
47
|
+
const htmlString = prunedHtml.replace(/()/g, "()").replace(/::/g, "::").replace(/\s?<\?xml.*?>\s?/g, "").replace(/\s?<!DOC.*?>\s?/g, "").replace(/\n+\s?/g, "\n");
|
|
48
|
+
return (0, _converter.default)(htmlString);
|
|
49
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
|
|
7
|
+
var _nodeHtmlMarkdown = require("node-html-markdown");
|
|
8
|
+
const convert = str => _nodeHtmlMarkdown.NodeHtmlMarkdown.translate(str, {
|
|
9
|
+
useLinkReferenceDefinitions: false
|
|
10
|
+
});
|
|
11
|
+
module.exports = convert;
|
package/lib/converter.d.ts
CHANGED
|
@@ -1 +1,2 @@
|
|
|
1
|
-
|
|
1
|
+
declare const _default: (str: string) => string;
|
|
2
|
+
export default _default;
|
package/lib/index.cjs
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
Object.defineProperty(exports, "convertToMarkdown", {
|
|
7
|
+
enumerable: true,
|
|
8
|
+
get: function () {
|
|
9
|
+
return _converter.default;
|
|
10
|
+
}
|
|
11
|
+
});
|
|
12
|
+
Object.defineProperty(exports, "parseEpub", {
|
|
13
|
+
enumerable: true,
|
|
14
|
+
get: function () {
|
|
15
|
+
return _parseEpub.default;
|
|
16
|
+
}
|
|
17
|
+
});
|
|
18
|
+
Object.defineProperty(exports, "parseHTML", {
|
|
19
|
+
enumerable: true,
|
|
20
|
+
get: function () {
|
|
21
|
+
return _parseHTML.default;
|
|
22
|
+
}
|
|
23
|
+
});
|
|
24
|
+
Object.defineProperty(exports, "parseLink", {
|
|
25
|
+
enumerable: true,
|
|
26
|
+
get: function () {
|
|
27
|
+
return _parseLink.default;
|
|
28
|
+
}
|
|
29
|
+
});
|
|
30
|
+
var _converter = _interopRequireDefault(require("./converter.cjs"));
|
|
31
|
+
var _parseEpub = _interopRequireDefault(require("./parseEpub.cjs"));
|
|
32
|
+
var _parseLink = _interopRequireDefault(require("./parseLink.cjs"));
|
|
33
|
+
var _parseHTML = _interopRequireDefault(require("./parseHTML.cjs"));
|
|
34
|
+
function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
|
package/lib/index.d.ts
CHANGED
package/lib/index.mjs
ADDED
package/lib/mobi.cjs
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
|
|
7
|
+
var _nodeFs = _interopRequireDefault(require("node:fs"));
|
|
8
|
+
var _pypacker = _interopRequireDefault(require("pypacker"));
|
|
9
|
+
function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
|
|
10
|
+
class Mobi {
|
|
11
|
+
filename;
|
|
12
|
+
info = {
|
|
13
|
+
content: "",
|
|
14
|
+
pdbHeader: {
|
|
15
|
+
records: []
|
|
16
|
+
},
|
|
17
|
+
mobiHeader: {}
|
|
18
|
+
};
|
|
19
|
+
constructor(filename) {
|
|
20
|
+
this.filename = filename;
|
|
21
|
+
this.parse();
|
|
22
|
+
}
|
|
23
|
+
parse() {
|
|
24
|
+
var bufIndex, buffer, data, fd, file_info, filename, flags, header, id, index, mobiHeader, multibyte, pdbHeader, position, startPosition, trailers, _i, _j, _ref, _ref1, _ref2, _ref3, _ref4, _ref5, _ref6, _ref7;
|
|
25
|
+
filename = this.filename;
|
|
26
|
+
file_info = _nodeFs.default.statSync(filename);
|
|
27
|
+
buffer = new Buffer(file_info.size);
|
|
28
|
+
fd = _nodeFs.default.openSync(filename, "r");
|
|
29
|
+
_nodeFs.default.readSync(fd, buffer, 0, file_info.size, 0);
|
|
30
|
+
pdbHeader = this.info.pdbHeader;
|
|
31
|
+
_ref = new _pypacker.default("31sxHH6I4s4s2IH").unpack_from(buffer);
|
|
32
|
+
this.info.name = _ref[0];
|
|
33
|
+
pdbHeader.attributes = _ref[1];
|
|
34
|
+
pdbHeader.version = _ref[2];
|
|
35
|
+
pdbHeader.created = _ref[3];
|
|
36
|
+
pdbHeader.modified = _ref[4];
|
|
37
|
+
pdbHeader.backedUp = _ref[5];
|
|
38
|
+
pdbHeader.modificationNumber = _ref[6];
|
|
39
|
+
pdbHeader.appInfoId = _ref[7];
|
|
40
|
+
pdbHeader.sortInfoID = _ref[8];
|
|
41
|
+
pdbHeader.type = _ref[9];
|
|
42
|
+
pdbHeader.creator = _ref[10];
|
|
43
|
+
pdbHeader.uniqueIDseed = _ref[11];
|
|
44
|
+
pdbHeader.nextRecordListID = _ref[12];
|
|
45
|
+
pdbHeader.recordCount = _ref[13];
|
|
46
|
+
this.info.name = this.info.name.replace(/\u0000/g, "");
|
|
47
|
+
pdbHeader.created = new Date(pdbHeader.created * 1e3);
|
|
48
|
+
pdbHeader.modified = new Date(pdbHeader.modified * 1e3);
|
|
49
|
+
pdbHeader.backedUp = new Date(pdbHeader.backedUp * 100);
|
|
50
|
+
bufIndex = 78;
|
|
51
|
+
for (index = _i = 0, _ref1 = pdbHeader.recordCount; 0 <= _ref1 ? _i < _ref1 : _i > _ref1; index = 0 <= _ref1 ? ++_i : --_i) {
|
|
52
|
+
startPosition = bufIndex + index * 8;
|
|
53
|
+
_ref2 = new _pypacker.default("II").unpack_from(buffer, startPosition), position = _ref2[0], id = _ref2[1];
|
|
54
|
+
id = id & 16777215;
|
|
55
|
+
pdbHeader.records.push({
|
|
56
|
+
position,
|
|
57
|
+
id
|
|
58
|
+
});
|
|
59
|
+
}
|
|
60
|
+
header = buffer.slice(pdbHeader.records[0].position, pdbHeader.records[1].position);
|
|
61
|
+
mobiHeader = this.info.mobiHeader;
|
|
62
|
+
_ref3 = new _pypacker.default("H2xI3H6x3I").unpack_from(header);
|
|
63
|
+
mobiHeader.compression = _ref3[0];
|
|
64
|
+
mobiHeader.text_length = _ref3[1];
|
|
65
|
+
mobiHeader.textRecordCount = _ref3[2];
|
|
66
|
+
mobiHeader.recordSize = _ref3[3];
|
|
67
|
+
mobiHeader.encryption = _ref3[4];
|
|
68
|
+
mobiHeader.headerLength = _ref3[5];
|
|
69
|
+
mobiHeader.mobiType = _ref3[6];
|
|
70
|
+
mobiHeader.encoding = _ref3[7];
|
|
71
|
+
_ref4 = new _pypacker.default("3I").unpack_from(header, 80);
|
|
72
|
+
mobiHeader.firstNonBookIndex = _ref4[0];
|
|
73
|
+
mobiHeader.fullNameOffset = _ref4[1];
|
|
74
|
+
mobiHeader.fullNameLength = _ref4[2];
|
|
75
|
+
mobiHeader.firstImageIndex = new _pypacker.default("I").unpack_from(header, 108)[0];
|
|
76
|
+
mobiHeader.exthFlags = new _pypacker.default("I").unpack_from(header, 128)[0];
|
|
77
|
+
mobiHeader.exthFlags = (mobiHeader.exthFlags & 64) === 64 ? true : false;
|
|
78
|
+
_ref5 = new _pypacker.default("2H").unpack_from(header, 194);
|
|
79
|
+
mobiHeader.firstContentRecord = _ref5[0];
|
|
80
|
+
mobiHeader.lastContentRecord = _ref5[1];
|
|
81
|
+
this.info.title = new _pypacker.default(mobiHeader.fullNameLength + "s").unpack_from(header, mobiHeader.fullNameOffset)[0];
|
|
82
|
+
multibyte = 0;
|
|
83
|
+
trailers = 0;
|
|
84
|
+
if (mobiHeader.headerLength >= 228) {
|
|
85
|
+
flags = (_ref6 = new _pypacker.default("H").unpack_from(header, 242), mobiHeader.flags = _ref6[0], _ref6);
|
|
86
|
+
multibyte = flags & 1;
|
|
87
|
+
while (flags > 1) {
|
|
88
|
+
trailers += 1;
|
|
89
|
+
flags = flags & flags - 2;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
for (position = _j = 1, _ref7 = mobiHeader.textRecordCount; 1 <= _ref7 ? _j <= _ref7 : _j >= _ref7; position = 1 <= _ref7 ? ++_j : --_j) {
|
|
93
|
+
data = buffer.slice(pdbHeader.records[position].position, pdbHeader.records[position + 1].position);
|
|
94
|
+
data = this.trim(data, trailers, multibyte);
|
|
95
|
+
if (mobiHeader.compression === 1) {
|
|
96
|
+
this.info.content += data;
|
|
97
|
+
} else if (mobiHeader.compression === 2) {
|
|
98
|
+
this.info.content += this.palmdocReader(data);
|
|
99
|
+
} else {
|
|
100
|
+
throw new Error("LZ77 compression isn't supported... yet.");
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
this.info.content = this.info.content.replace(/<(head|HEAD)>/g, '<head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>');
|
|
104
|
+
return this;
|
|
105
|
+
}
|
|
106
|
+
trim(data, trailers, multibyte) {
|
|
107
|
+
var end_bytes, num, v, z, _i, _j;
|
|
108
|
+
for (z = _i = 0; 0 <= trailers ? _i < trailers : _i > trailers; z = 0 <= trailers ? ++_i : --_i) {
|
|
109
|
+
num = 0;
|
|
110
|
+
end_bytes = data.slice(data.length - 4);
|
|
111
|
+
for (v = _j = 0; _j < 4; v = ++_j) {
|
|
112
|
+
if (end_bytes[v] & 128) {
|
|
113
|
+
num = 0;
|
|
114
|
+
}
|
|
115
|
+
num = num << 7 | end_bytes[v] & 127;
|
|
116
|
+
}
|
|
117
|
+
data = data.slice(0, data.length - num);
|
|
118
|
+
}
|
|
119
|
+
if (multibyte) {
|
|
120
|
+
num = (data[data.length - 1] & 3) + 1;
|
|
121
|
+
data = data.slice(0, data.length - num);
|
|
122
|
+
}
|
|
123
|
+
return data;
|
|
124
|
+
}
|
|
125
|
+
palmdocReader(data) {
|
|
126
|
+
var concat, distance, frame, index, length, string, x, _i, _j, _ref;
|
|
127
|
+
string = "";
|
|
128
|
+
index = 0;
|
|
129
|
+
while (index < data.length) {
|
|
130
|
+
frame = data[index];
|
|
131
|
+
index += 1;
|
|
132
|
+
if (frame >= 1 && frame <= 8) {
|
|
133
|
+
string += data.toString("utf8", index, index + frame);
|
|
134
|
+
for (x = _i = 0, _ref = frame - 1; 0 <= _ref ? _i < _ref : _i > _ref; x = 0 <= _ref ? ++_i : --_i) {
|
|
135
|
+
string += String.fromCharCode(57568);
|
|
136
|
+
}
|
|
137
|
+
index += frame;
|
|
138
|
+
} else if (frame < 128) {
|
|
139
|
+
string += String.fromCharCode(frame);
|
|
140
|
+
} else if (frame >= 192) {
|
|
141
|
+
string += " " + String.fromCharCode(frame ^ 128);
|
|
142
|
+
} else {
|
|
143
|
+
concat = frame << 8 | data[index];
|
|
144
|
+
distance = concat >> 3 & 2047;
|
|
145
|
+
length = (concat & 7) + 3;
|
|
146
|
+
if (length < distance) {
|
|
147
|
+
string += string.slice(-distance, length - distance);
|
|
148
|
+
} else {
|
|
149
|
+
for (x = _j = 0; 0 <= length ? _j < length : _j > length; x = 0 <= length ? ++_j : --_j) {
|
|
150
|
+
string += string[string.length - distance];
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
index += 1;
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
string = string.replace(/\uE0E0/g, "");
|
|
157
|
+
return string;
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
module.exports = Mobi;
|
package/lib/mobi.d.ts
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
/// <reference types="node" />
|
|
2
1
|
export default class Mobi {
|
|
3
2
|
filename: string;
|
|
4
3
|
info: {
|
|
@@ -11,5 +10,5 @@ export default class Mobi {
|
|
|
11
10
|
constructor(filename: string);
|
|
12
11
|
parse(): this;
|
|
13
12
|
trim(data: Buffer, trailers: number, multibyte: number): Buffer;
|
|
14
|
-
palmdocReader(data: Buffer):
|
|
13
|
+
palmdocReader(data: Buffer): any;
|
|
15
14
|
}
|