wp-epub-gen 0.3.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -1,59 +1,311 @@
1
- "use strict";
2
- Object.defineProperty(exports, Symbol.toStringTag, { value: "Module" });
3
- const os = require("os");
4
- const path = require("path");
5
- const url = require("url");
6
- const uuid = require("uuid");
7
- const cheerio = require("cheerio");
8
- const diacritics = require("diacritics");
9
- const uslug = require("uslug");
10
- const archiver = require("archiver");
11
- const fs$1 = require("fs-extra");
12
- const request = require("superagent");
13
- const fs = require("fs");
14
- const util = require("util");
15
- const ejs = require("ejs");
16
- const entities = require("entities");
17
- var _documentCurrentScript = typeof document !== "undefined" ? document.currentScript : null;
18
- function _interopNamespaceDefault(e) {
19
- const n = Object.create(null, { [Symbol.toStringTag]: { value: "Module" } });
20
- if (e) {
21
- for (const k in e) {
22
- if (k !== "default") {
23
- const d = Object.getOwnPropertyDescriptor(e, k);
24
- Object.defineProperty(n, k, d.get ? d : {
25
- enumerable: true,
26
- get: () => e[k]
27
- });
28
- }
29
- }
30
- }
31
- n.default = e;
32
- return Object.freeze(n);
33
- }
34
- const cheerio__namespace = /* @__PURE__ */ _interopNamespaceDefault(cheerio);
35
- const entities__namespace = /* @__PURE__ */ _interopNamespaceDefault(entities);
1
+ import os from "os";
2
+ import path from "path";
3
+ import { fileURLToPath } from "url";
4
+ import { v4 } from "uuid";
5
+ import * as cheerio from "cheerio";
6
+ import { remove } from "diacritics";
7
+ import uslug from "uslug";
8
+ import archiver from "archiver";
9
+ import fs$1 from "fs-extra";
10
+ import request from "superagent";
11
+ import fs from "fs";
12
+ import { promisify } from "util";
13
+ import ejs from "ejs";
14
+ import * as entities from "entities";
36
15
  const errors = {
37
16
  no_output_path: "No output path!",
38
17
  no_title: "Title is required.",
39
18
  no_content: "Content is required."
40
19
  };
20
+ class GlobalLogger {
21
+ constructor() {
22
+ this.customLogger = null;
23
+ }
24
+ /**
25
+ * 获取 Logger 单例实例
26
+ */
27
+ static getInstance() {
28
+ if (!GlobalLogger.instance) {
29
+ GlobalLogger.instance = new GlobalLogger();
30
+ }
31
+ return GlobalLogger.instance;
32
+ }
33
+ /**
34
+ * 设置自定义 logger
35
+ * @param logger 自定义 logger 实例
36
+ */
37
+ setLogger(logger2) {
38
+ this.customLogger = logger2;
39
+ }
40
+ /**
41
+ * 获取当前使用的 logger
42
+ */
43
+ getLogger() {
44
+ return this.customLogger || {
45
+ log: (msg) => console.log(msg),
46
+ info: (msg) => console.info(msg),
47
+ error: (msg) => console.error(msg),
48
+ warn: (msg) => console.warn(msg)
49
+ };
50
+ }
51
+ /**
52
+ * 输出普通日志
53
+ */
54
+ log(msg) {
55
+ this.getLogger().log(msg);
56
+ }
57
+ /**
58
+ * 输出信息日志
59
+ */
60
+ info(msg) {
61
+ this.getLogger().info(msg);
62
+ }
63
+ /**
64
+ * 输出错误日志
65
+ */
66
+ error(msg) {
67
+ this.getLogger().error(msg);
68
+ }
69
+ /**
70
+ * 输出警告日志
71
+ */
72
+ warn(msg) {
73
+ this.getLogger().warn(msg);
74
+ }
75
+ }
76
+ const logger = GlobalLogger.getInstance();
41
77
  function safeFineName(name) {
42
78
  return name.replace(/[/\\?%*:|"<>\t\r\n]/g, "_");
43
79
  }
44
80
  const mimeModule$1 = require("mime/lite");
45
81
  const mime$1 = mimeModule$1.default || mimeModule$1;
46
- function parseContent(content, index, epubConfigs) {
47
- let chapter = { ...content };
82
+ const ALLOWED_ATTRIBUTES = [
83
+ "about",
84
+ "accesskey",
85
+ "alt",
86
+ "aria-activedescendant",
87
+ "aria-atomic",
88
+ "aria-autocomplete",
89
+ "aria-busy",
90
+ "aria-checked",
91
+ "aria-controls",
92
+ "aria-describedat",
93
+ "aria-describedby",
94
+ "aria-disabled",
95
+ "aria-dropeffect",
96
+ "aria-expanded",
97
+ "aria-flowto",
98
+ "aria-grabbed",
99
+ "aria-haspopup",
100
+ "aria-hidden",
101
+ "aria-invalid",
102
+ "aria-label",
103
+ "aria-labelledby",
104
+ "aria-level",
105
+ "aria-live",
106
+ "aria-multiline",
107
+ "aria-multiselectable",
108
+ "aria-orientation",
109
+ "aria-owns",
110
+ "aria-posinset",
111
+ "aria-pressed",
112
+ "aria-readonly",
113
+ "aria-relevant",
114
+ "aria-required",
115
+ "aria-selected",
116
+ "aria-setsize",
117
+ "aria-sort",
118
+ "aria-valuemax",
119
+ "aria-valuemin",
120
+ "aria-valuenow",
121
+ "aria-valuetext",
122
+ "class",
123
+ "colspan",
124
+ "content",
125
+ // 去除重复
126
+ "contenteditable",
127
+ "contextmenu",
128
+ "datatype",
129
+ "dir",
130
+ "draggable",
131
+ "dropzone",
132
+ "epub:prefix",
133
+ "epub:type",
134
+ "hidden",
135
+ "href",
136
+ "hreflang",
137
+ "id",
138
+ // 去除重复
139
+ "inlist",
140
+ "itemid",
141
+ "itemref",
142
+ "itemscope",
143
+ "itemtype",
144
+ "lang",
145
+ "media",
146
+ "ns1:type",
147
+ "ns2:alphabet",
148
+ "ns2:ph",
149
+ "onabort",
150
+ "onblur",
151
+ "oncanplay",
152
+ "oncanplaythrough",
153
+ "onchange",
154
+ "onclick",
155
+ "oncontextmenu",
156
+ "ondblclick",
157
+ "ondrag",
158
+ "ondragend",
159
+ "ondragenter",
160
+ "ondragleave",
161
+ "ondragover",
162
+ "ondragstart",
163
+ "ondrop",
164
+ "ondurationchange",
165
+ "onemptied",
166
+ "onended",
167
+ "onerror",
168
+ "onfocus",
169
+ "oninput",
170
+ "oninvalid",
171
+ "onkeydown",
172
+ "onkeypress",
173
+ "onkeyup",
174
+ "onload",
175
+ "onloadeddata",
176
+ "onloadedmetadata",
177
+ "onloadstart",
178
+ "onmousedown",
179
+ "onmousemove",
180
+ "onmouseout",
181
+ "onmouseover",
182
+ "onmouseup",
183
+ "onmousewheel",
184
+ "onpause",
185
+ "onplay",
186
+ "onplaying",
187
+ "onprogress",
188
+ "onratechange",
189
+ "onreadystatechange",
190
+ "onreset",
191
+ "onscroll",
192
+ "onseeked",
193
+ "onseeking",
194
+ "onselect",
195
+ "onshow",
196
+ "onstalled",
197
+ "onsubmit",
198
+ "onsuspend",
199
+ "ontimeupdate",
200
+ "onvolumechange",
201
+ "onwaiting",
202
+ "prefix",
203
+ "property",
204
+ "rel",
205
+ "resource",
206
+ "rev",
207
+ "role",
208
+ "rowspan",
209
+ "spellcheck",
210
+ "src",
211
+ "style",
212
+ "tabindex",
213
+ "target",
214
+ "title",
215
+ // 去除重复
216
+ "type",
217
+ "typeof",
218
+ "vocab",
219
+ "xml:base",
220
+ "xml:lang",
221
+ "xml:space"
222
+ ];
223
+ const ALLOWED_XHTML11_TAGS = [
224
+ "a",
225
+ "abbr",
226
+ "acronym",
227
+ "address",
228
+ "applet",
229
+ "b",
230
+ "bar",
231
+ "basefont",
232
+ "bdo",
233
+ "big",
234
+ "blockquote",
235
+ "br",
236
+ "caption",
237
+ "center",
238
+ "cite",
239
+ "code",
240
+ "col",
241
+ "colgroup",
242
+ "dd",
243
+ "del",
244
+ "dfn",
245
+ "div",
246
+ "dl",
247
+ "dt",
248
+ "em",
249
+ "embed",
250
+ "font",
251
+ "h1",
252
+ "h2",
253
+ "h3",
254
+ "h4",
255
+ "h5",
256
+ "h6",
257
+ "hr",
258
+ "i",
259
+ "iframe",
260
+ "img",
261
+ "ins",
262
+ "kbd",
263
+ "li",
264
+ "map",
265
+ "noscript",
266
+ "ns:svg",
267
+ "object",
268
+ // 去除重复
269
+ "ol",
270
+ "p",
271
+ "param",
272
+ "pre",
273
+ "q",
274
+ "s",
275
+ "samp",
276
+ "script",
277
+ "small",
278
+ "span",
279
+ "strike",
280
+ "strong",
281
+ "sub",
282
+ "sup",
283
+ "table",
284
+ "tbody",
285
+ "td",
286
+ "tfoot",
287
+ "th",
288
+ "thead",
289
+ "tr",
290
+ "tt",
291
+ "u",
292
+ "ul",
293
+ "var"
294
+ ];
295
+ const ALLOWED_ATTRIBUTES_SET = new Set(ALLOWED_ATTRIBUTES);
296
+ const ALLOWED_XHTML11_TAGS_SET = new Set(ALLOWED_XHTML11_TAGS);
297
+ const SELF_CLOSING_TAGS = /* @__PURE__ */ new Set(["img", "br", "hr"]);
298
+ function initializeChapterInfo(content, index2, epubConfigs) {
299
+ const chapter = { ...content };
48
300
  let { filename } = chapter;
49
301
  if (!filename) {
50
- let titleSlug = uslug(diacritics.remove(chapter.title || "no title"));
51
- titleSlug = titleSlug.replace(/[\/\\]/g, "_");
52
- chapter.href = `${index}_${titleSlug}.xhtml`;
302
+ let titleSlug = uslug(remove(chapter.title || "no title"));
303
+ titleSlug = titleSlug.replace(/[/\\]/g, "_");
304
+ chapter.href = `${index2}_${titleSlug}.xhtml`;
53
305
  chapter.filePath = path.join(epubConfigs.dir, "OEBPS", chapter.href);
54
306
  } else {
55
307
  filename = safeFineName(filename);
56
- let is_xhtml = filename.endsWith(".xhtml");
308
+ const is_xhtml = filename.endsWith(".xhtml");
57
309
  chapter.href = is_xhtml ? filename : `${filename}.xhtml`;
58
310
  if (is_xhtml) {
59
311
  chapter.filePath = path.join(epubConfigs.dir, "OEBPS", filename);
@@ -61,317 +313,261 @@ function parseContent(content, index, epubConfigs) {
61
313
  chapter.filePath = path.join(epubConfigs.dir, "OEBPS", `${filename}.xhtml`);
62
314
  }
63
315
  }
64
- chapter.id = `item_${index}`;
316
+ chapter.id = `item_${index2}`;
65
317
  chapter.dir = path.dirname(chapter.filePath);
66
318
  chapter.excludeFromToc = chapter.excludeFromToc || false;
67
319
  chapter.beforeToc = chapter.beforeToc || false;
320
+ return chapter;
321
+ }
322
+ function normalizeAuthorInfo(chapter) {
68
323
  if (chapter.author && typeof chapter.author === "string") {
69
324
  chapter.author = [chapter.author];
70
325
  } else if (!chapter.author || !Array.isArray(chapter.author)) {
71
326
  chapter.author = [];
72
327
  }
73
- let allowedAttributes = [
74
- "content",
75
- "alt",
76
- "id",
77
- "title",
78
- "src",
79
- "href",
80
- "about",
81
- "accesskey",
82
- "aria-activedescendant",
83
- "aria-atomic",
84
- "aria-autocomplete",
85
- "aria-busy",
86
- "aria-checked",
87
- "aria-controls",
88
- "aria-describedat",
89
- "aria-describedby",
90
- "aria-disabled",
91
- "aria-dropeffect",
92
- "aria-expanded",
93
- "aria-flowto",
94
- "aria-grabbed",
95
- "aria-haspopup",
96
- "aria-hidden",
97
- "aria-invalid",
98
- "aria-label",
99
- "aria-labelledby",
100
- "aria-level",
101
- "aria-live",
102
- "aria-multiline",
103
- "aria-multiselectable",
104
- "aria-orientation",
105
- "aria-owns",
106
- "aria-posinset",
107
- "aria-pressed",
108
- "aria-readonly",
109
- "aria-relevant",
110
- "aria-required",
111
- "aria-selected",
112
- "aria-setsize",
113
- "aria-sort",
114
- "aria-valuemax",
115
- "aria-valuemin",
116
- "aria-valuenow",
117
- "aria-valuetext",
118
- "class",
119
- "content",
120
- "contenteditable",
121
- "contextmenu",
122
- "datatype",
123
- "dir",
124
- "draggable",
125
- "dropzone",
126
- "hidden",
127
- "hreflang",
128
- "id",
129
- "inlist",
130
- "itemid",
131
- "itemref",
132
- "itemscope",
133
- "itemtype",
134
- "lang",
135
- "media",
136
- "ns1:type",
137
- "ns2:alphabet",
138
- "ns2:ph",
139
- "onabort",
140
- "onblur",
141
- "oncanplay",
142
- "oncanplaythrough",
143
- "onchange",
144
- "onclick",
145
- "oncontextmenu",
146
- "ondblclick",
147
- "ondrag",
148
- "ondragend",
149
- "ondragenter",
150
- "ondragleave",
151
- "ondragover",
152
- "ondragstart",
153
- "ondrop",
154
- "ondurationchange",
155
- "onemptied",
156
- "onended",
157
- "onerror",
158
- "onfocus",
159
- "oninput",
160
- "oninvalid",
161
- "onkeydown",
162
- "onkeypress",
163
- "onkeyup",
164
- "onload",
165
- "onloadeddata",
166
- "onloadedmetadata",
167
- "onloadstart",
168
- "onmousedown",
169
- "onmousemove",
170
- "onmouseout",
171
- "onmouseover",
172
- "onmouseup",
173
- "onmousewheel",
174
- "onpause",
175
- "onplay",
176
- "onplaying",
177
- "onprogress",
178
- "onratechange",
179
- "onreadystatechange",
180
- "onreset",
181
- "onscroll",
182
- "onseeked",
183
- "onseeking",
184
- "onselect",
185
- "onshow",
186
- "onstalled",
187
- "onsubmit",
188
- "onsuspend",
189
- "ontimeupdate",
190
- "onvolumechange",
191
- "onwaiting",
192
- "prefix",
193
- "property",
194
- "rel",
195
- "resource",
196
- "rev",
197
- "role",
198
- "spellcheck",
199
- "style",
200
- "tabindex",
201
- "target",
202
- "title",
203
- "type",
204
- "typeof",
205
- "vocab",
206
- "xml:base",
207
- "xml:lang",
208
- "xml:space",
209
- "colspan",
210
- "rowspan",
211
- "epub:type",
212
- "epub:prefix"
213
- ];
214
- let allowedXhtml11Tags = [
215
- "div",
216
- "p",
217
- "h1",
218
- "h2",
219
- "h3",
220
- "h4",
221
- "h5",
222
- "h6",
223
- "ul",
224
- "ol",
225
- "li",
226
- "dl",
227
- "dt",
228
- "dd",
229
- "address",
230
- "hr",
231
- "pre",
232
- "blockquote",
233
- "center",
234
- "ins",
235
- "del",
236
- "a",
237
- "span",
238
- "bdo",
239
- "br",
240
- "em",
241
- "strong",
242
- "dfn",
243
- "code",
244
- "samp",
245
- "kbd",
246
- "bar",
247
- "cite",
248
- "abbr",
249
- "acronym",
250
- "q",
251
- "sub",
252
- "sup",
253
- "tt",
254
- "i",
255
- "b",
256
- "big",
257
- "small",
258
- "u",
259
- "s",
260
- "strike",
261
- "basefont",
262
- "font",
263
- "object",
264
- "param",
265
- "img",
266
- "table",
267
- "caption",
268
- "colgroup",
269
- "col",
270
- "thead",
271
- "tfoot",
272
- "tbody",
273
- "tr",
274
- "th",
275
- "td",
276
- "embed",
277
- "applet",
278
- "iframe",
279
- "img",
280
- "map",
281
- "noscript",
282
- "ns:svg",
283
- "object",
284
- "script",
285
- "table",
286
- "tt",
287
- "var"
288
- ];
289
- let $ = cheerio__namespace.load(chapter.data, {
290
- xml: {
291
- lowerCaseTags: true,
292
- recognizeSelfClosing: true
293
- }
294
- });
295
- let body = $("body");
296
- if (body.length) {
297
- let html = body.html();
298
- if (html) {
299
- $ = cheerio__namespace.load(html, {
300
- xml: {
301
- lowerCaseTags: true,
302
- recognizeSelfClosing: true
303
- }
304
- });
328
+ }
329
+ function getAllowedAttributes() {
330
+ return ALLOWED_ATTRIBUTES;
331
+ }
332
+ function getAllowedXhtml11Tags() {
333
+ return ALLOWED_XHTML11_TAGS;
334
+ }
335
+ function loadAndProcessHtml(data) {
336
+ if (!data || typeof data !== "string") {
337
+ throw new Error("Invalid HTML data: data must be a non-empty string");
338
+ }
339
+ const trimmedData = data.trim();
340
+ if (trimmedData.length === 0) {
341
+ throw new Error("Invalid HTML data: data cannot be empty or whitespace only");
342
+ }
343
+ try {
344
+ let $ = cheerio.load(trimmedData, {
345
+ xml: {
346
+ lowerCaseTags: true,
347
+ recognizeSelfClosing: true
348
+ }
349
+ });
350
+ const body = $("body");
351
+ if (body.length) {
352
+ const html = body.html();
353
+ if (html) {
354
+ $ = cheerio.load(html, {
355
+ xml: {
356
+ lowerCaseTags: true,
357
+ recognizeSelfClosing: true
358
+ }
359
+ });
360
+ }
305
361
  }
362
+ return $;
363
+ } catch (error) {
364
+ throw new Error(
365
+ `Failed to parse HTML content: ${error instanceof Error ? error.message : "Unknown error"}`
366
+ );
306
367
  }
368
+ }
369
+ function processHtmlElements($, allowedAttributes, allowedXhtml11Tags, epubConfigs, index2) {
370
+ const allowedAttrsSet = ALLOWED_ATTRIBUTES_SET;
371
+ const allowedTagsSet = ALLOWED_XHTML11_TAGS_SET;
372
+ const selfClosingTags = SELF_CLOSING_TAGS;
307
373
  $($("*").get().reverse()).each(function(elemIndex, elem) {
308
- let attrs = elem.attribs;
309
- let that = this;
310
- let tags = ["img", "br", "hr"];
311
- if (tags.includes(that.name)) {
312
- if (that.name === "img" && !$(that).attr("alt")) {
313
- $(that).attr("alt", "image-placeholder");
374
+ const attrs = elem.attribs || {};
375
+ const $elem = $(elem);
376
+ const tagName = elem.name;
377
+ if (selfClosingTags.has(tagName)) {
378
+ if (tagName === "img" && !$elem.attr("alt")) {
379
+ $elem.attr("alt", "image-placeholder");
314
380
  }
315
381
  }
316
- Object.entries(attrs).map(([k, v]) => {
317
- if (allowedAttributes.includes(k)) {
318
- if (k === "type" && that.name !== "script") {
319
- $(that).removeAttr(k);
382
+ const attrsToRemove = [];
383
+ for (const [attrName] of Object.entries(attrs)) {
384
+ if (allowedAttrsSet.has(attrName)) {
385
+ if (attrName === "type" && tagName !== "script") {
386
+ attrsToRemove.push(attrName);
320
387
  }
321
388
  } else {
322
- $(that).removeAttr(k);
389
+ attrsToRemove.push(attrName);
323
390
  }
324
- });
391
+ }
392
+ for (const attrName of attrsToRemove) {
393
+ $elem.removeAttr(attrName);
394
+ }
325
395
  if (epubConfigs.version === 2) {
326
- if (!allowedXhtml11Tags.includes(that.name)) {
396
+ if (!allowedTagsSet.has(tagName)) {
327
397
  if (epubConfigs.verbose) {
328
- console.log(
329
- "Warning (content[" + index + "]):",
330
- that.name,
331
- "tag isn't allowed on EPUB 2/XHTML 1.1 DTD."
398
+ logger.warn(
399
+ `Warning (content[${index2}]): ${tagName} tag isn't allowed on EPUB 2/XHTML 1.1 DTD.`
332
400
  );
333
401
  }
334
- let child = $(that).html();
335
- $(that).replaceWith($("<div>" + child + "</div>"));
402
+ const child = $elem.html();
403
+ $elem.replaceWith($("<div>" + child + "</div>"));
336
404
  }
337
405
  }
338
406
  });
407
+ }
408
+ function processImages($, chapter, epubConfigs) {
339
409
  $("img").each((index2, elem) => {
340
- let url2 = $(elem).attr("src") || "";
341
- let image = epubConfigs.images.find((el) => el.url === url2);
410
+ const url = $(elem).attr("src") || "";
411
+ if (!url || url.trim().length === 0) {
412
+ logger.warn(`Image at index ${index2} in chapter has empty src attribute, removing element`);
413
+ $(elem).remove();
414
+ return;
415
+ }
416
+ const trimmedUrl = url.trim();
417
+ try {
418
+ if (!trimmedUrl.match(/^(https?:\/\/|data:|\.\/|\/)/)) {
419
+ logger.warn(`Image URL "${trimmedUrl}" appears to be invalid, but processing anyway`);
420
+ }
421
+ } catch (error) {
422
+ logger.error(`Error validating image URL "${trimmedUrl}": ${error}`);
423
+ }
424
+ const image = epubConfigs.images.find((el) => el.url === trimmedUrl);
342
425
  let id;
343
426
  let extension;
344
427
  if (image) {
345
428
  id = image.id;
346
429
  extension = image.extension;
347
430
  } else {
348
- id = uuid.v4();
349
- let mediaType = mime$1.getType(url2.replace(/\?.*/, "")) || "";
350
- extension = mime$1.getExtension(mediaType) || "";
351
- let dir = chapter.dir || "";
352
- let img = { id, url: url2, dir, mediaType, extension };
431
+ id = v4();
432
+ let mediaType = "";
433
+ try {
434
+ const cleanUrl = trimmedUrl.replace(/\?.*/, "");
435
+ mediaType = mime$1.getType(cleanUrl) || "";
436
+ if (!mediaType) {
437
+ const urlExtension = cleanUrl.split(".").pop()?.toLowerCase();
438
+ if (urlExtension && ["jpg", "jpeg", "png", "gif", "webp", "svg"].includes(urlExtension)) {
439
+ mediaType = `image/${urlExtension === "jpg" ? "jpeg" : urlExtension}`;
440
+ logger.warn(
441
+ `Could not determine MIME type for "${trimmedUrl}", inferred as "${mediaType}"`
442
+ );
443
+ } else {
444
+ logger.warn(
445
+ `Could not determine MIME type for "${trimmedUrl}", defaulting to image/jpeg`
446
+ );
447
+ mediaType = "image/jpeg";
448
+ }
449
+ }
450
+ } catch (error) {
451
+ logger.error(`Error determining MIME type for "${trimmedUrl}": ${error}`);
452
+ mediaType = "image/jpeg";
453
+ }
454
+ try {
455
+ extension = mime$1.getExtension(mediaType) || "jpg";
456
+ } catch (error) {
457
+ logger.error(`Error getting extension for MIME type "${mediaType}": ${error}`);
458
+ extension = "jpg";
459
+ }
460
+ const dir = chapter.dir || "";
461
+ const img = { id, url: trimmedUrl, dir, mediaType, extension };
353
462
  epubConfigs.images.push(img);
463
+ if (epubConfigs.verbose) {
464
+ logger.info(`Added image: ${trimmedUrl} -> images/${id}.${extension} (${mediaType})`);
465
+ }
466
+ }
467
+ try {
468
+ $(elem).attr("src", `images/${id}.${extension}`);
469
+ } catch (error) {
470
+ logger.error(`Error setting src attribute for image ${id}: ${error}`);
471
+ $(elem).remove();
354
472
  }
355
- $(elem).attr("src", `images/${id}.${extension}`);
356
473
  });
357
- chapter.data = $.xml();
474
+ }
475
+ function extractAndCleanHtmlContent($, originalData) {
476
+ let data;
477
+ if ($("body").length) {
478
+ data = $("body").html() || "";
479
+ } else {
480
+ data = $.root().html() || "";
481
+ }
482
+ if (!originalData) {
483
+ return data.replace(
484
+ /<(br|hr|img|input|meta|area|base|col|embed|link|source|track|wbr)([^>]*?)><\/\1>/gi,
485
+ "<$1$2/>"
486
+ ).replace(
487
+ new RegExp("<(br|hr|img|input|meta|area|base|col|embed|link|source|track|wbr)([^>]*?)(?<!\\/)>", "gi"),
488
+ "<$1$2/>"
489
+ );
490
+ }
491
+ const entityMap = /* @__PURE__ */ new Map();
492
+ const entityRegex = /&[a-zA-Z][a-zA-Z0-9]*;|&#[0-9]+;|&#x[0-9a-fA-F]+;/g;
493
+ const matches = Array.from(originalData.matchAll(entityRegex));
494
+ let processedOriginal = originalData;
495
+ const timestamp = Date.now();
496
+ const randomId = Math.random().toString(36).substring(2, 8);
497
+ const placeholderPrefix = `__ENTITY_${timestamp}_${randomId}_`;
498
+ for (let i = matches.length - 1; i >= 0; i--) {
499
+ const match = matches[i];
500
+ const placeholder = `${placeholderPrefix}${i}__`;
501
+ entityMap.set(placeholder, match[0]);
502
+ processedOriginal = processedOriginal.substring(0, match.index) + placeholder + processedOriginal.substring(match.index + match[0].length);
503
+ }
504
+ const $temp = cheerio.load(processedOriginal, {
505
+ xmlMode: false
506
+ });
507
+ let tempData;
508
+ if ($temp("body").length) {
509
+ tempData = $temp("body").html() || "";
510
+ } else {
511
+ tempData = $temp.root().html() || "";
512
+ }
513
+ for (const [placeholder, entity] of entityMap) {
514
+ tempData = tempData.replace(new RegExp(placeholder, "g"), entity);
515
+ }
516
+ return tempData.replace(
517
+ /<(br|hr|img|input|meta|area|base|col|embed|link|source|track|wbr)([^>]*?)><\/\1>/gi,
518
+ "<$1$2/>"
519
+ ).replace(
520
+ new RegExp("<(br|hr|img|input|meta|area|base|col|embed|link|source|track|wbr)([^>]*?)(?<!\\/)>", "gi"),
521
+ "<$1$2/>"
522
+ );
523
+ }
524
+ function processChildrenChapters(chapter, index2, epubConfigs) {
358
525
  if (Array.isArray(chapter.children)) {
359
526
  chapter.children = chapter.children.map(
360
- (content2, idx) => parseContent(content2, `${index}_${idx}`, epubConfigs)
527
+ (content, idx) => parseContent(content, `${index2}_${idx}`, epubConfigs)
361
528
  );
362
529
  }
530
+ }
531
+ function parseContent(content, index2, epubConfigs) {
532
+ if (!content) {
533
+ throw new Error("Content cannot be null or undefined");
534
+ }
535
+ if (!content.data) {
536
+ logger.warn(`Chapter at index ${index2} has no data, using empty string`);
537
+ content.data = "";
538
+ }
539
+ const chapter = initializeChapterInfo(content, index2, epubConfigs);
540
+ normalizeAuthorInfo(chapter);
541
+ const allowedAttributes = getAllowedAttributes();
542
+ const allowedXhtml11Tags = getAllowedXhtml11Tags();
543
+ if (!chapter.data || chapter.data.trim().length === 0) {
544
+ logger.warn(`Chapter at index ${index2} has empty data, setting empty content`);
545
+ chapter.data = "";
546
+ } else {
547
+ let $;
548
+ try {
549
+ $ = loadAndProcessHtml(chapter.data);
550
+ } catch (error) {
551
+ logger.error(`Failed to process HTML for chapter ${index2}: ${error}`);
552
+ $ = cheerio.load(`<div>${chapter.data}</div>`);
553
+ }
554
+ processHtmlElements($, allowedAttributes, allowedXhtml11Tags, epubConfigs, index2);
555
+ processImages($, chapter, epubConfigs);
556
+ chapter.data = extractAndCleanHtmlContent($, content.data);
557
+ }
558
+ processChildrenChapters(chapter, index2, epubConfigs);
363
559
  return chapter;
364
560
  }
365
- util.promisify(fs.readFile);
366
- const writeFile = util.promisify(fs.writeFile);
561
+ promisify(fs.readFile);
562
+ const writeFile = promisify(fs.writeFile);
367
563
  const USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.116 Safari/537.36";
368
564
  const wait = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
369
565
  async function fileIsStable(filename, max_wait = 3e4) {
370
- let start_time = (/* @__PURE__ */ new Date()).getTime();
566
+ const start_time = (/* @__PURE__ */ new Date()).getTime();
371
567
  let last_size = fs.statSync(filename).size;
372
568
  while ((/* @__PURE__ */ new Date()).getTime() - start_time <= max_wait) {
373
569
  await wait(1e3);
374
- let size = fs.statSync(filename).size;
570
+ const size = fs.statSync(filename).size;
375
571
  if (size === last_size) return true;
376
572
  last_size = size;
377
573
  }
@@ -382,20 +578,20 @@ function simpleMinifier(xhtml) {
382
578
  return xhtml;
383
579
  }
384
580
  const downloadImage = async (epubData, options) => {
385
- let { url: url2 } = options;
386
- let { log } = epubData;
387
- let epub_dir = epubData.dir;
388
- if (!url2) {
581
+ const { url } = options;
582
+ const { log } = epubData;
583
+ const epub_dir = epubData.dir;
584
+ if (!url) {
389
585
  return;
390
586
  }
391
- let image_dir = path.join(epub_dir, "OEBPS", "images");
587
+ const image_dir = path.join(epub_dir, "OEBPS", "images");
392
588
  fs$1.ensureDirSync(image_dir);
393
- let filename = path.join(image_dir, options.id + "." + options.extension);
394
- if (url2.startsWith("file://") || url2.startsWith("/")) {
395
- let aux_path = url2.replace(/^file:\/\//i, "");
589
+ const filename = path.join(image_dir, options.id + "." + options.extension);
590
+ if (url.startsWith("file://") || url.startsWith("/")) {
591
+ let aux_path = url.replace(/^file:\/\//i, "");
396
592
  try {
397
593
  aux_path = decodeURIComponent(aux_path);
398
- } catch (e) {
594
+ } catch {
399
595
  log(`[URL Decode Warning] Failed to decode path: ${aux_path}`);
400
596
  }
401
597
  if (process.platform === "win32") {
@@ -411,37 +607,37 @@ const downloadImage = async (epubData, options) => {
411
607
  log("[Copy 1 Error] " + e.message);
412
608
  }
413
609
  } else {
414
- log(`[Copy 1 Fail] '${url2}' not exists!`);
610
+ log(`[Copy 1 Fail] '${url}' not exists!`);
415
611
  }
416
612
  return;
417
613
  }
418
614
  let requestAction;
419
- if (url2.startsWith("http")) {
420
- requestAction = request.get(url2).set({ "User-Agent": USER_AGENT });
615
+ if (url.startsWith("http")) {
616
+ requestAction = request.get(url).set({ "User-Agent": USER_AGENT });
421
617
  requestAction.pipe(fs$1.createWriteStream(filename));
422
618
  } else {
423
- log(`[Copy 2] '${url2}' to '${filename}'`);
424
- requestAction = fs$1.createReadStream(path.join(options.dir || "", url2));
619
+ log(`[Copy 2] '${url}' to '${filename}'`);
620
+ requestAction = fs$1.createReadStream(path.join(options.dir || "", url));
425
621
  requestAction.pipe(fs$1.createWriteStream(filename));
426
622
  }
427
- return new Promise((resolve, reject) => {
623
+ return new Promise((resolve, _reject) => {
428
624
  requestAction.on("error", (err) => {
429
- log("[Download Error] Error while downloading: " + url2);
625
+ log("[Download Error] Error while downloading: " + url);
430
626
  log(err);
431
627
  fs$1.unlinkSync(filename);
432
628
  resolve();
433
629
  });
434
630
  requestAction.on("end", () => {
435
- log("[Download Success] " + url2);
631
+ log("[Download Success] " + url);
436
632
  resolve();
437
633
  });
438
634
  });
439
635
  };
440
636
  const downloadAllImages = async (epubData) => {
441
- let { images } = epubData;
637
+ const { images } = epubData;
442
638
  if (images.length === 0) return;
443
639
  fs$1.ensureDirSync(path.join(epubData.dir, "OEBPS", "images"));
444
- for (let image of images) {
640
+ for (const image of images) {
445
641
  await downloadImage(epubData, image);
446
642
  }
447
643
  };
@@ -615,7 +811,7 @@ const epub3_content_opf_ejs = `<?xml version="1.0" encoding="UTF-8"?>
615
811
  </package>
616
812
  `;
617
813
  const epub3_toc_xhtml_ejs = `<?xml version="1.0" encoding="UTF-8"?>
618
- <!DOCTYPE html>
814
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
619
815
  <html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" xml:lang="<%- lang %>"
620
816
  lang="<%- lang %>">
621
817
  <head>
@@ -747,16 +943,16 @@ const toc_ncx_ejs = `<?xml version="1.0" encoding="UTF-8"?>
747
943
  </ncx>
748
944
  `;
749
945
  const generateTempFile = async (epubData) => {
750
- let { log } = epubData;
751
- let oebps_dir = path.join(epubData.dir, "OEBPS");
946
+ const { log } = epubData;
947
+ const oebps_dir = path.join(epubData.dir, "OEBPS");
752
948
  await fs$1.ensureDir(oebps_dir);
753
949
  epubData.css = epubData.css || template_css;
754
950
  await writeFile(path.join(oebps_dir, "style.css"), epubData.css, "utf-8");
755
951
  if (epubData.fonts?.length) {
756
- let fonts_dir = path.join(oebps_dir, "fonts");
952
+ const fonts_dir = path.join(oebps_dir, "fonts");
757
953
  await fs$1.ensureDir(fonts_dir);
758
954
  epubData.fonts = epubData.fonts.map((font) => {
759
- let filename = path.basename(font);
955
+ const filename = path.basename(font);
760
956
  if (!fs$1.existsSync(font)) {
761
957
  log(`Custom font not found at '${font}'.`);
762
958
  } else {
@@ -770,7 +966,7 @@ const generateTempFile = async (epubData) => {
770
966
  return !!global_append;
771
967
  };
772
968
  const saveContentToFile = (content) => {
773
- let title = entities__namespace.encodeXML(content.title || "");
969
+ const title = entities.encodeXML(content.title || "");
774
970
  let html = `${epubData.docHeader}
775
971
  <head>
776
972
  <meta charset="UTF-8" />
@@ -782,7 +978,7 @@ const generateTempFile = async (epubData) => {
782
978
  if (content.title && isAppendTitle(epubData.appendChapterTitles, content.appendChapterTitle)) {
783
979
  html += `<h1>${title}</h1>`;
784
980
  }
785
- html += content.title && content.author && content.author?.length ? `<p class='epub-author'>${entities__namespace.encodeXML(content.author.join(", "))}</p>` : "";
981
+ html += content.title && content.author && content.author?.length ? `<p class='epub-author'>${entities.encodeXML(content.author.join(", "))}</p>` : "";
786
982
  html += content.title && content.url ? `<p class="epub-link"><a href="${content.url}">${content.url}</a></p>` : "";
787
983
  html += `${content.data}`;
788
984
  html += "\n</body>\n</html>";
@@ -793,7 +989,7 @@ const generateTempFile = async (epubData) => {
793
989
  }
794
990
  };
795
991
  epubData.content.map(saveContentToFile);
796
- let metainf_dir = path.join(epubData.dir, "META-INF");
992
+ const metainf_dir = path.join(epubData.dir, "META-INF");
797
993
  fs$1.ensureDirSync(metainf_dir);
798
994
  fs$1.writeFileSync(
799
995
  path.join(metainf_dir, "container.xml"),
@@ -804,7 +1000,7 @@ const generateTempFile = async (epubData) => {
804
1000
  "utf-8"
805
1001
  );
806
1002
  if (epubData.version === 2) {
807
- let fn = path.join(metainf_dir, "com.apple.ibooks.display-options.xml");
1003
+ const fn = path.join(metainf_dir, "com.apple.ibooks.display-options.xml");
808
1004
  fs$1.writeFileSync(
809
1005
  fn,
810
1006
  `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
@@ -838,7 +1034,7 @@ const generateTempFile = async (epubData) => {
838
1034
  } else {
839
1035
  htmlTocTemplate = epubData.version === 2 ? epub2_toc_xhtml_ejs : epub3_toc_xhtml_ejs;
840
1036
  }
841
- let toc_depth = 1;
1037
+ const toc_depth = 1;
842
1038
  fs$1.writeFileSync(path.join(oebps_dir, "content.opf"), ejs.render(opfTemplate, epubData), "utf-8");
843
1039
  fs$1.writeFileSync(
844
1040
  path.join(oebps_dir, "toc.ncx"),
@@ -852,9 +1048,9 @@ const generateTempFile = async (epubData) => {
852
1048
  );
853
1049
  };
854
1050
  async function makeCover(data) {
855
- let { cover, _coverExtension, log } = data;
1051
+ const { cover, _coverExtension, log } = data;
856
1052
  if (!cover) return;
857
- let destPath = path.join(data.dir, "OEBPS", `cover.${_coverExtension}`);
1053
+ const destPath = path.join(data.dir, "OEBPS", `cover.${_coverExtension}`);
858
1054
  let writeStream = null;
859
1055
  if (cover.startsWith("http")) {
860
1056
  writeStream = request.get(cover).set({ "User-Agent": USER_AGENT });
@@ -886,7 +1082,7 @@ async function makeCover(data) {
886
1082
  });
887
1083
  }
888
1084
  async function render(data) {
889
- let { log } = data;
1085
+ const { log } = data;
890
1086
  log("Generating Template Files...");
891
1087
  await generateTempFile(data);
892
1088
  log("Downloading Images...");
@@ -903,14 +1099,14 @@ async function render(data) {
903
1099
  }
904
1100
  }
905
1101
  async function genEpub(epubData) {
906
- let { log, dir, output } = epubData;
907
- let archive = archiver("zip", { zlib: { level: 9 } });
908
- let outputStream = fs$1.createWriteStream(epubData.output);
1102
+ const { log, dir, output } = epubData;
1103
+ const archive = archiver("zip", { zlib: { level: 9 } });
1104
+ const outputStream = fs$1.createWriteStream(epubData.output);
909
1105
  log("Zipping temp dir to " + output);
910
1106
  return new Promise((resolve, reject) => {
911
1107
  archive.on("end", async () => {
912
1108
  log("Done zipping, clearing temp dir...");
913
- let stable = await fileIsStable(epubData.output);
1109
+ const stable = await fileIsStable(epubData.output);
914
1110
  if (!stable) {
915
1111
  log("Output epub file is not stable!");
916
1112
  }
@@ -941,16 +1137,16 @@ async function genEpub(epubData) {
941
1137
  }
942
1138
  const mimeModule = require("mime/lite");
943
1139
  const mime = mimeModule.default || mimeModule;
944
- const __filename$1 = url.fileURLToPath(typeof document === "undefined" ? require("url").pathToFileURL(__filename).href : _documentCurrentScript && _documentCurrentScript.tagName.toUpperCase() === "SCRIPT" && _documentCurrentScript.src || new URL("index.js", document.baseURI).href);
945
- const __dirname$1 = path.dirname(__filename$1);
946
- const baseDir = __dirname$1;
1140
+ const __filename = fileURLToPath(import.meta.url);
1141
+ const __dirname = path.dirname(__filename);
1142
+ const baseDir = __dirname;
947
1143
  function result(success, message, options) {
948
1144
  if (options && options.verbose) {
949
1145
  if (!success) {
950
- console.error(new Error(message));
1146
+ logger.error(new Error(message));
951
1147
  }
952
1148
  }
953
- let out = {
1149
+ const out = {
954
1150
  success
955
1151
  };
956
1152
  if (typeof message === "string") {
@@ -974,9 +1170,9 @@ function check(options) {
974
1170
  return result(true, void 0, options);
975
1171
  }
976
1172
  function parseOptions(options) {
977
- let tmpDir = options.tmpDir || os.tmpdir();
978
- let id = uuid.v4();
979
- let data = {
1173
+ const tmpDir = options.tmpDir || os.tmpdir();
1174
+ const id = v4();
1175
+ const data = {
980
1176
  description: options.title,
981
1177
  publisher: "anonymous",
982
1178
  author: ["anonymous"],
@@ -998,7 +1194,7 @@ function parseOptions(options) {
998
1194
  docHeader: "",
999
1195
  images: [],
1000
1196
  content: [],
1001
- log: (msg) => options.verbose && console.log(msg)
1197
+ log: (msg) => options.verbose && logger.log(msg)
1002
1198
  };
1003
1199
  if (data.version === 2) {
1004
1200
  data.docHeader = `<?xml version="1.0" encoding="UTF-8"?>
@@ -1007,7 +1203,7 @@ function parseOptions(options) {
1007
1203
  `;
1008
1204
  } else {
1009
1205
  data.docHeader = `<?xml version="1.0" encoding="UTF-8"?>
1010
- <!DOCTYPE html>
1206
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
1011
1207
  <html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" lang="#{self.options.lang}">
1012
1208
  `;
1013
1209
  }
@@ -1017,46 +1213,54 @@ function parseOptions(options) {
1017
1213
  if (typeof data.author === "string") {
1018
1214
  data.author = [data.author];
1019
1215
  }
1020
- data.content = options.content.map((content, index) => parseContent(content, index, data));
1216
+ data.content = options.content.map((content, index2) => parseContent(content, index2, data));
1021
1217
  if (data.cover) {
1022
1218
  data._coverMediaType = mime.getType(data.cover) || "";
1023
1219
  data._coverExtension = mime.getExtension(data._coverMediaType) || "";
1024
1220
  }
1025
1221
  return data;
1026
1222
  }
1027
- async function epubGen(options, output) {
1028
- options = { ...options };
1029
- if (output) {
1030
- options.output = output;
1223
+ async function epubGen(options, configs) {
1224
+ if (configs?.logger) {
1225
+ logger.setLogger(configs.logger);
1031
1226
  }
1032
- let o = check(options);
1033
- let verbose = options.verbose !== false;
1227
+ options = { ...options };
1228
+ const o = check(options);
1229
+ const verbose = options.verbose !== false;
1034
1230
  if (!o.success) {
1035
- if (verbose) console.error(o.message);
1231
+ if (verbose) logger.error(o.message);
1036
1232
  return o;
1037
1233
  }
1038
1234
  let t;
1039
1235
  try {
1040
- let data = parseOptions(options);
1041
- let timeoutSeconds = data.timeoutSeconds || 0;
1236
+ const data = parseOptions(options);
1237
+ const timeoutSeconds = data.timeoutSeconds || 0;
1042
1238
  if (timeoutSeconds > 0) {
1043
- if (verbose) console.log(`TIMEOUT: ${timeoutSeconds}s`);
1239
+ if (verbose) logger.log(`TIMEOUT: ${timeoutSeconds}s`);
1044
1240
  t = setTimeout(() => {
1045
1241
  throw new Error("timeout!");
1046
1242
  }, timeoutSeconds * 1e3);
1047
1243
  } else {
1048
- if (verbose) console.log(`TIMEOUT: N/A`);
1244
+ if (verbose) logger.log(`TIMEOUT: N/A`);
1049
1245
  }
1050
1246
  await render(data);
1051
1247
  return result(true, void 0, data);
1052
1248
  } catch (e) {
1053
- if (verbose) console.error(e);
1249
+ if (verbose) logger.error(e);
1054
1250
  return result(false, e.message, options);
1055
1251
  } finally {
1056
1252
  clearTimeout(t);
1057
1253
  }
1058
1254
  }
1059
1255
  const gen = epubGen;
1060
- exports.epubGen = epubGen;
1061
- exports.errors = errors;
1062
- exports.gen = gen;
1256
+ const index = {
1257
+ epubGen,
1258
+ gen,
1259
+ errors
1260
+ };
1261
+ export {
1262
+ index as default,
1263
+ epubGen,
1264
+ errors,
1265
+ gen
1266
+ };