wp-epub-gen 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,38 +1,332 @@
1
- import os from "os";
2
- import path from "path";
3
- import { fileURLToPath } from "url";
4
- import { v4 } from "uuid";
5
- import * as cheerio from "cheerio";
6
- import { remove } from "diacritics";
7
- import uslug from "uslug";
8
- import archiver from "archiver";
9
- import fs$1 from "fs-extra";
10
- import request from "superagent";
11
- import fs from "fs";
12
- import { promisify } from "util";
13
- import ejs from "ejs";
14
- import * as entities from "entities";
1
+ "use strict";
2
+ Object.defineProperty(exports, Symbol.toStringTag, { value: "Module" });
3
+ const os = require("os");
4
+ const path = require("path");
5
+ const url = require("url");
6
+ const uuid = require("uuid");
7
+ const cheerio = require("cheerio");
8
+ const diacritics = require("diacritics");
9
+ const uslug = require("uslug");
10
+ const archiver = require("archiver");
11
+ const fs$1 = require("fs-extra");
12
+ const request = require("superagent");
13
+ const fs = require("fs");
14
+ const util = require("util");
15
+ const ejs = require("ejs");
16
+ const entities = require("entities");
17
+ var _documentCurrentScript = typeof document !== "undefined" ? document.currentScript : null;
18
+ function _interopNamespaceDefault(e) {
19
+ const n = Object.create(null, { [Symbol.toStringTag]: { value: "Module" } });
20
+ if (e) {
21
+ for (const k in e) {
22
+ if (k !== "default") {
23
+ const d = Object.getOwnPropertyDescriptor(e, k);
24
+ Object.defineProperty(n, k, d.get ? d : {
25
+ enumerable: true,
26
+ get: () => e[k]
27
+ });
28
+ }
29
+ }
30
+ }
31
+ n.default = e;
32
+ return Object.freeze(n);
33
+ }
34
+ const cheerio__namespace = /* @__PURE__ */ _interopNamespaceDefault(cheerio);
35
+ const entities__namespace = /* @__PURE__ */ _interopNamespaceDefault(entities);
15
36
  const errors = {
16
37
  no_output_path: "No output path!",
17
38
  no_title: "Title is required.",
18
39
  no_content: "Content is required."
19
40
  };
41
+ class GlobalLogger {
42
+ constructor() {
43
+ this.customLogger = null;
44
+ }
45
+ /**
46
+ * 获取 Logger 单例实例
47
+ */
48
+ static getInstance() {
49
+ if (!GlobalLogger.instance) {
50
+ GlobalLogger.instance = new GlobalLogger();
51
+ }
52
+ return GlobalLogger.instance;
53
+ }
54
+ /**
55
+ * 设置自定义 logger
56
+ * @param logger 自定义 logger 实例
57
+ */
58
+ setLogger(logger2) {
59
+ this.customLogger = logger2;
60
+ }
61
+ /**
62
+ * 获取当前使用的 logger
63
+ */
64
+ getLogger() {
65
+ return this.customLogger || {
66
+ log: (msg) => console.log(msg),
67
+ info: (msg) => console.info(msg),
68
+ error: (msg) => console.error(msg),
69
+ warn: (msg) => console.warn(msg)
70
+ };
71
+ }
72
+ /**
73
+ * 输出普通日志
74
+ */
75
+ log(msg) {
76
+ this.getLogger().log(msg);
77
+ }
78
+ /**
79
+ * 输出信息日志
80
+ */
81
+ info(msg) {
82
+ this.getLogger().info(msg);
83
+ }
84
+ /**
85
+ * 输出错误日志
86
+ */
87
+ error(msg) {
88
+ this.getLogger().error(msg);
89
+ }
90
+ /**
91
+ * 输出警告日志
92
+ */
93
+ warn(msg) {
94
+ this.getLogger().warn(msg);
95
+ }
96
+ }
97
+ const logger = GlobalLogger.getInstance();
20
98
  function safeFineName(name) {
21
99
  return name.replace(/[/\\?%*:|"<>\t\r\n]/g, "_");
22
100
  }
23
101
  const mimeModule$1 = require("mime/lite");
24
102
  const mime$1 = mimeModule$1.default || mimeModule$1;
25
- function parseContent(content, index, epubConfigs) {
26
- let chapter = { ...content };
103
+ const ALLOWED_ATTRIBUTES = [
104
+ "about",
105
+ "accesskey",
106
+ "alt",
107
+ "aria-activedescendant",
108
+ "aria-atomic",
109
+ "aria-autocomplete",
110
+ "aria-busy",
111
+ "aria-checked",
112
+ "aria-controls",
113
+ "aria-describedat",
114
+ "aria-describedby",
115
+ "aria-disabled",
116
+ "aria-dropeffect",
117
+ "aria-expanded",
118
+ "aria-flowto",
119
+ "aria-grabbed",
120
+ "aria-haspopup",
121
+ "aria-hidden",
122
+ "aria-invalid",
123
+ "aria-label",
124
+ "aria-labelledby",
125
+ "aria-level",
126
+ "aria-live",
127
+ "aria-multiline",
128
+ "aria-multiselectable",
129
+ "aria-orientation",
130
+ "aria-owns",
131
+ "aria-posinset",
132
+ "aria-pressed",
133
+ "aria-readonly",
134
+ "aria-relevant",
135
+ "aria-required",
136
+ "aria-selected",
137
+ "aria-setsize",
138
+ "aria-sort",
139
+ "aria-valuemax",
140
+ "aria-valuemin",
141
+ "aria-valuenow",
142
+ "aria-valuetext",
143
+ "class",
144
+ "colspan",
145
+ "content",
146
+ // 去除重复
147
+ "contenteditable",
148
+ "contextmenu",
149
+ "datatype",
150
+ "dir",
151
+ "draggable",
152
+ "dropzone",
153
+ "epub:prefix",
154
+ "epub:type",
155
+ "hidden",
156
+ "href",
157
+ "hreflang",
158
+ "id",
159
+ // 去除重复
160
+ "inlist",
161
+ "itemid",
162
+ "itemref",
163
+ "itemscope",
164
+ "itemtype",
165
+ "lang",
166
+ "media",
167
+ "ns1:type",
168
+ "ns2:alphabet",
169
+ "ns2:ph",
170
+ "onabort",
171
+ "onblur",
172
+ "oncanplay",
173
+ "oncanplaythrough",
174
+ "onchange",
175
+ "onclick",
176
+ "oncontextmenu",
177
+ "ondblclick",
178
+ "ondrag",
179
+ "ondragend",
180
+ "ondragenter",
181
+ "ondragleave",
182
+ "ondragover",
183
+ "ondragstart",
184
+ "ondrop",
185
+ "ondurationchange",
186
+ "onemptied",
187
+ "onended",
188
+ "onerror",
189
+ "onfocus",
190
+ "oninput",
191
+ "oninvalid",
192
+ "onkeydown",
193
+ "onkeypress",
194
+ "onkeyup",
195
+ "onload",
196
+ "onloadeddata",
197
+ "onloadedmetadata",
198
+ "onloadstart",
199
+ "onmousedown",
200
+ "onmousemove",
201
+ "onmouseout",
202
+ "onmouseover",
203
+ "onmouseup",
204
+ "onmousewheel",
205
+ "onpause",
206
+ "onplay",
207
+ "onplaying",
208
+ "onprogress",
209
+ "onratechange",
210
+ "onreadystatechange",
211
+ "onreset",
212
+ "onscroll",
213
+ "onseeked",
214
+ "onseeking",
215
+ "onselect",
216
+ "onshow",
217
+ "onstalled",
218
+ "onsubmit",
219
+ "onsuspend",
220
+ "ontimeupdate",
221
+ "onvolumechange",
222
+ "onwaiting",
223
+ "prefix",
224
+ "property",
225
+ "rel",
226
+ "resource",
227
+ "rev",
228
+ "role",
229
+ "rowspan",
230
+ "spellcheck",
231
+ "src",
232
+ "style",
233
+ "tabindex",
234
+ "target",
235
+ "title",
236
+ // 去除重复
237
+ "type",
238
+ "typeof",
239
+ "vocab",
240
+ "xml:base",
241
+ "xml:lang",
242
+ "xml:space"
243
+ ];
244
+ const ALLOWED_XHTML11_TAGS = [
245
+ "a",
246
+ "abbr",
247
+ "acronym",
248
+ "address",
249
+ "applet",
250
+ "b",
251
+ "bar",
252
+ "basefont",
253
+ "bdo",
254
+ "big",
255
+ "blockquote",
256
+ "br",
257
+ "caption",
258
+ "center",
259
+ "cite",
260
+ "code",
261
+ "col",
262
+ "colgroup",
263
+ "dd",
264
+ "del",
265
+ "dfn",
266
+ "div",
267
+ "dl",
268
+ "dt",
269
+ "em",
270
+ "embed",
271
+ "font",
272
+ "h1",
273
+ "h2",
274
+ "h3",
275
+ "h4",
276
+ "h5",
277
+ "h6",
278
+ "hr",
279
+ "i",
280
+ "iframe",
281
+ "img",
282
+ "ins",
283
+ "kbd",
284
+ "li",
285
+ "map",
286
+ "noscript",
287
+ "ns:svg",
288
+ "object",
289
+ // 去除重复
290
+ "ol",
291
+ "p",
292
+ "param",
293
+ "pre",
294
+ "q",
295
+ "s",
296
+ "samp",
297
+ "script",
298
+ "small",
299
+ "span",
300
+ "strike",
301
+ "strong",
302
+ "sub",
303
+ "sup",
304
+ "table",
305
+ "tbody",
306
+ "td",
307
+ "tfoot",
308
+ "th",
309
+ "thead",
310
+ "tr",
311
+ "tt",
312
+ "u",
313
+ "ul",
314
+ "var"
315
+ ];
316
+ const ALLOWED_ATTRIBUTES_SET = new Set(ALLOWED_ATTRIBUTES);
317
+ const ALLOWED_XHTML11_TAGS_SET = new Set(ALLOWED_XHTML11_TAGS);
318
+ const SELF_CLOSING_TAGS = /* @__PURE__ */ new Set(["img", "br", "hr"]);
319
+ function initializeChapterInfo(content, index, epubConfigs) {
320
+ const chapter = { ...content };
27
321
  let { filename } = chapter;
28
322
  if (!filename) {
29
- let titleSlug = uslug(remove(chapter.title || "no title"));
30
- titleSlug = titleSlug.replace(/[\/\\]/g, "_");
323
+ let titleSlug = uslug(diacritics.remove(chapter.title || "no title"));
324
+ titleSlug = titleSlug.replace(/[/\\]/g, "_");
31
325
  chapter.href = `${index}_${titleSlug}.xhtml`;
32
326
  chapter.filePath = path.join(epubConfigs.dir, "OEBPS", chapter.href);
33
327
  } else {
34
328
  filename = safeFineName(filename);
35
- let is_xhtml = filename.endsWith(".xhtml");
329
+ const is_xhtml = filename.endsWith(".xhtml");
36
330
  chapter.href = is_xhtml ? filename : `${filename}.xhtml`;
37
331
  if (is_xhtml) {
38
332
  chapter.filePath = path.join(epubConfigs.dir, "OEBPS", filename);
@@ -44,313 +338,257 @@ function parseContent(content, index, epubConfigs) {
44
338
  chapter.dir = path.dirname(chapter.filePath);
45
339
  chapter.excludeFromToc = chapter.excludeFromToc || false;
46
340
  chapter.beforeToc = chapter.beforeToc || false;
341
+ return chapter;
342
+ }
343
+ function normalizeAuthorInfo(chapter) {
47
344
  if (chapter.author && typeof chapter.author === "string") {
48
345
  chapter.author = [chapter.author];
49
346
  } else if (!chapter.author || !Array.isArray(chapter.author)) {
50
347
  chapter.author = [];
51
348
  }
52
- let allowedAttributes = [
53
- "content",
54
- "alt",
55
- "id",
56
- "title",
57
- "src",
58
- "href",
59
- "about",
60
- "accesskey",
61
- "aria-activedescendant",
62
- "aria-atomic",
63
- "aria-autocomplete",
64
- "aria-busy",
65
- "aria-checked",
66
- "aria-controls",
67
- "aria-describedat",
68
- "aria-describedby",
69
- "aria-disabled",
70
- "aria-dropeffect",
71
- "aria-expanded",
72
- "aria-flowto",
73
- "aria-grabbed",
74
- "aria-haspopup",
75
- "aria-hidden",
76
- "aria-invalid",
77
- "aria-label",
78
- "aria-labelledby",
79
- "aria-level",
80
- "aria-live",
81
- "aria-multiline",
82
- "aria-multiselectable",
83
- "aria-orientation",
84
- "aria-owns",
85
- "aria-posinset",
86
- "aria-pressed",
87
- "aria-readonly",
88
- "aria-relevant",
89
- "aria-required",
90
- "aria-selected",
91
- "aria-setsize",
92
- "aria-sort",
93
- "aria-valuemax",
94
- "aria-valuemin",
95
- "aria-valuenow",
96
- "aria-valuetext",
97
- "class",
98
- "content",
99
- "contenteditable",
100
- "contextmenu",
101
- "datatype",
102
- "dir",
103
- "draggable",
104
- "dropzone",
105
- "hidden",
106
- "hreflang",
107
- "id",
108
- "inlist",
109
- "itemid",
110
- "itemref",
111
- "itemscope",
112
- "itemtype",
113
- "lang",
114
- "media",
115
- "ns1:type",
116
- "ns2:alphabet",
117
- "ns2:ph",
118
- "onabort",
119
- "onblur",
120
- "oncanplay",
121
- "oncanplaythrough",
122
- "onchange",
123
- "onclick",
124
- "oncontextmenu",
125
- "ondblclick",
126
- "ondrag",
127
- "ondragend",
128
- "ondragenter",
129
- "ondragleave",
130
- "ondragover",
131
- "ondragstart",
132
- "ondrop",
133
- "ondurationchange",
134
- "onemptied",
135
- "onended",
136
- "onerror",
137
- "onfocus",
138
- "oninput",
139
- "oninvalid",
140
- "onkeydown",
141
- "onkeypress",
142
- "onkeyup",
143
- "onload",
144
- "onloadeddata",
145
- "onloadedmetadata",
146
- "onloadstart",
147
- "onmousedown",
148
- "onmousemove",
149
- "onmouseout",
150
- "onmouseover",
151
- "onmouseup",
152
- "onmousewheel",
153
- "onpause",
154
- "onplay",
155
- "onplaying",
156
- "onprogress",
157
- "onratechange",
158
- "onreadystatechange",
159
- "onreset",
160
- "onscroll",
161
- "onseeked",
162
- "onseeking",
163
- "onselect",
164
- "onshow",
165
- "onstalled",
166
- "onsubmit",
167
- "onsuspend",
168
- "ontimeupdate",
169
- "onvolumechange",
170
- "onwaiting",
171
- "prefix",
172
- "property",
173
- "rel",
174
- "resource",
175
- "rev",
176
- "role",
177
- "spellcheck",
178
- "style",
179
- "tabindex",
180
- "target",
181
- "title",
182
- "type",
183
- "typeof",
184
- "vocab",
185
- "xml:base",
186
- "xml:lang",
187
- "xml:space",
188
- "colspan",
189
- "rowspan",
190
- "epub:type",
191
- "epub:prefix"
192
- ];
193
- let allowedXhtml11Tags = [
194
- "div",
195
- "p",
196
- "h1",
197
- "h2",
198
- "h3",
199
- "h4",
200
- "h5",
201
- "h6",
202
- "ul",
203
- "ol",
204
- "li",
205
- "dl",
206
- "dt",
207
- "dd",
208
- "address",
209
- "hr",
210
- "pre",
211
- "blockquote",
212
- "center",
213
- "ins",
214
- "del",
215
- "a",
216
- "span",
217
- "bdo",
218
- "br",
219
- "em",
220
- "strong",
221
- "dfn",
222
- "code",
223
- "samp",
224
- "kbd",
225
- "bar",
226
- "cite",
227
- "abbr",
228
- "acronym",
229
- "q",
230
- "sub",
231
- "sup",
232
- "tt",
233
- "i",
234
- "b",
235
- "big",
236
- "small",
237
- "u",
238
- "s",
239
- "strike",
240
- "basefont",
241
- "font",
242
- "object",
243
- "param",
244
- "img",
245
- "table",
246
- "caption",
247
- "colgroup",
248
- "col",
249
- "thead",
250
- "tfoot",
251
- "tbody",
252
- "tr",
253
- "th",
254
- "td",
255
- "embed",
256
- "applet",
257
- "iframe",
258
- "img",
259
- "map",
260
- "noscript",
261
- "ns:svg",
262
- "object",
263
- "script",
264
- "table",
265
- "tt",
266
- "var"
267
- ];
268
- let $ = cheerio.load(chapter.data, {
269
- xml: {
270
- lowerCaseTags: true,
271
- recognizeSelfClosing: true
272
- }
273
- });
274
- let body = $("body");
275
- if (body.length) {
276
- let html = body.html();
277
- if (html) {
278
- $ = cheerio.load(html, {
279
- xml: {
280
- lowerCaseTags: true,
281
- recognizeSelfClosing: true
282
- }
283
- });
349
+ }
350
+ function getAllowedAttributes() {
351
+ return ALLOWED_ATTRIBUTES;
352
+ }
353
+ function getAllowedXhtml11Tags() {
354
+ return ALLOWED_XHTML11_TAGS;
355
+ }
356
+ function loadAndProcessHtml(data) {
357
+ if (!data || typeof data !== "string") {
358
+ throw new Error("Invalid HTML data: data must be a non-empty string");
359
+ }
360
+ const trimmedData = data.trim();
361
+ if (trimmedData.length === 0) {
362
+ throw new Error("Invalid HTML data: data cannot be empty or whitespace only");
363
+ }
364
+ try {
365
+ let $ = cheerio__namespace.load(trimmedData, {
366
+ xml: {
367
+ lowerCaseTags: true,
368
+ recognizeSelfClosing: true
369
+ }
370
+ });
371
+ const body = $("body");
372
+ if (body.length) {
373
+ const html = body.html();
374
+ if (html) {
375
+ $ = cheerio__namespace.load(html, {
376
+ xml: {
377
+ lowerCaseTags: true,
378
+ recognizeSelfClosing: true
379
+ }
380
+ });
381
+ }
284
382
  }
383
+ return $;
384
+ } catch (error) {
385
+ throw new Error(
386
+ `Failed to parse HTML content: ${error instanceof Error ? error.message : "Unknown error"}`
387
+ );
285
388
  }
389
+ }
390
+ function processHtmlElements($, allowedAttributes, allowedXhtml11Tags, epubConfigs, index) {
391
+ const allowedAttrsSet = ALLOWED_ATTRIBUTES_SET;
392
+ const allowedTagsSet = ALLOWED_XHTML11_TAGS_SET;
393
+ const selfClosingTags = SELF_CLOSING_TAGS;
286
394
  $($("*").get().reverse()).each(function(elemIndex, elem) {
287
- let attrs = elem.attribs;
288
- let that = this;
289
- let tags = ["img", "br", "hr"];
290
- if (tags.includes(that.name)) {
291
- if (that.name === "img" && !$(that).attr("alt")) {
292
- $(that).attr("alt", "image-placeholder");
395
+ const attrs = elem.attribs || {};
396
+ const $elem = $(elem);
397
+ const tagName = elem.name;
398
+ if (selfClosingTags.has(tagName)) {
399
+ if (tagName === "img" && !$elem.attr("alt")) {
400
+ $elem.attr("alt", "image-placeholder");
293
401
  }
294
402
  }
295
- Object.entries(attrs).map(([k, v]) => {
296
- if (allowedAttributes.includes(k)) {
297
- if (k === "type" && that.name !== "script") {
298
- $(that).removeAttr(k);
403
+ const attrsToRemove = [];
404
+ for (const [attrName] of Object.entries(attrs)) {
405
+ if (allowedAttrsSet.has(attrName)) {
406
+ if (attrName === "type" && tagName !== "script") {
407
+ attrsToRemove.push(attrName);
299
408
  }
300
409
  } else {
301
- $(that).removeAttr(k);
410
+ attrsToRemove.push(attrName);
302
411
  }
303
- });
412
+ }
413
+ for (const attrName of attrsToRemove) {
414
+ $elem.removeAttr(attrName);
415
+ }
304
416
  if (epubConfigs.version === 2) {
305
- if (!allowedXhtml11Tags.includes(that.name)) {
417
+ if (!allowedTagsSet.has(tagName)) {
306
418
  if (epubConfigs.verbose) {
307
- console.log(
308
- "Warning (content[" + index + "]):",
309
- that.name,
310
- "tag isn't allowed on EPUB 2/XHTML 1.1 DTD."
419
+ logger.warn(
420
+ `Warning (content[${index}]): ${tagName} tag isn't allowed on EPUB 2/XHTML 1.1 DTD.`
311
421
  );
312
422
  }
313
- let child = $(that).html();
314
- $(that).replaceWith($("<div>" + child + "</div>"));
423
+ const child = $elem.html();
424
+ $elem.replaceWith($("<div>" + child + "</div>"));
315
425
  }
316
426
  }
317
427
  });
318
- $("img").each((index2, elem) => {
319
- let url = $(elem).attr("src") || "";
320
- let image = epubConfigs.images.find((el) => el.url === url);
428
+ }
429
+ function processImages($, chapter, epubConfigs) {
430
+ $("img").each((index, elem) => {
431
+ const url2 = $(elem).attr("src") || "";
432
+ if (!url2 || url2.trim().length === 0) {
433
+ logger.warn(`Image at index ${index} in chapter has empty src attribute, removing element`);
434
+ $(elem).remove();
435
+ return;
436
+ }
437
+ const trimmedUrl = url2.trim();
438
+ try {
439
+ if (!trimmedUrl.match(/^(https?:\/\/|data:|\.\/|\/)/)) {
440
+ logger.warn(`Image URL "${trimmedUrl}" appears to be invalid, but processing anyway`);
441
+ }
442
+ } catch (error) {
443
+ logger.error(`Error validating image URL "${trimmedUrl}": ${error}`);
444
+ }
445
+ const image = epubConfigs.images.find((el) => el.url === trimmedUrl);
321
446
  let id;
322
447
  let extension;
323
448
  if (image) {
324
449
  id = image.id;
325
450
  extension = image.extension;
326
451
  } else {
327
- id = v4();
328
- let mediaType = mime$1.getType(url.replace(/\?.*/, "")) || "";
329
- extension = mime$1.getExtension(mediaType) || "";
330
- let dir = chapter.dir || "";
331
- let img = { id, url, dir, mediaType, extension };
452
+ id = uuid.v4();
453
+ let mediaType = "";
454
+ try {
455
+ const cleanUrl = trimmedUrl.replace(/\?.*/, "");
456
+ mediaType = mime$1.getType(cleanUrl) || "";
457
+ if (!mediaType) {
458
+ const urlExtension = cleanUrl.split(".").pop()?.toLowerCase();
459
+ if (urlExtension && ["jpg", "jpeg", "png", "gif", "webp", "svg"].includes(urlExtension)) {
460
+ mediaType = `image/${urlExtension === "jpg" ? "jpeg" : urlExtension}`;
461
+ logger.warn(
462
+ `Could not determine MIME type for "${trimmedUrl}", inferred as "${mediaType}"`
463
+ );
464
+ } else {
465
+ logger.warn(
466
+ `Could not determine MIME type for "${trimmedUrl}", defaulting to image/jpeg`
467
+ );
468
+ mediaType = "image/jpeg";
469
+ }
470
+ }
471
+ } catch (error) {
472
+ logger.error(`Error determining MIME type for "${trimmedUrl}": ${error}`);
473
+ mediaType = "image/jpeg";
474
+ }
475
+ try {
476
+ extension = mime$1.getExtension(mediaType) || "jpg";
477
+ } catch (error) {
478
+ logger.error(`Error getting extension for MIME type "${mediaType}": ${error}`);
479
+ extension = "jpg";
480
+ }
481
+ const dir = chapter.dir || "";
482
+ const img = { id, url: trimmedUrl, dir, mediaType, extension };
332
483
  epubConfigs.images.push(img);
484
+ if (epubConfigs.verbose) {
485
+ logger.info(`Added image: ${trimmedUrl} -> images/${id}.${extension} (${mediaType})`);
486
+ }
487
+ }
488
+ try {
489
+ $(elem).attr("src", `images/${id}.${extension}`);
490
+ } catch (error) {
491
+ logger.error(`Error setting src attribute for image ${id}: ${error}`);
492
+ $(elem).remove();
333
493
  }
334
- $(elem).attr("src", `images/${id}.${extension}`);
335
494
  });
336
- chapter.data = $.xml();
495
+ }
496
+ function extractAndCleanHtmlContent($, originalData) {
497
+ let data;
498
+ if ($("body").length) {
499
+ data = $("body").html() || "";
500
+ } else {
501
+ data = $.root().html() || "";
502
+ }
503
+ if (!originalData) {
504
+ return data.replace(
505
+ /<(br|hr|img|input|meta|area|base|col|embed|link|source|track|wbr)([^>]*?)><\/\1>/gi,
506
+ "<$1$2/>"
507
+ ).replace(
508
+ new RegExp("<(br|hr|img|input|meta|area|base|col|embed|link|source|track|wbr)([^>]*?)(?<!\\/)>", "gi"),
509
+ "<$1$2/>"
510
+ );
511
+ }
512
+ const entityMap = /* @__PURE__ */ new Map();
513
+ const entityRegex = /&[a-zA-Z][a-zA-Z0-9]*;|&#[0-9]+;|&#x[0-9a-fA-F]+;/g;
514
+ const matches = Array.from(originalData.matchAll(entityRegex));
515
+ let processedOriginal = originalData;
516
+ const timestamp = Date.now();
517
+ const randomId = Math.random().toString(36).substring(2, 8);
518
+ const placeholderPrefix = `__ENTITY_${timestamp}_${randomId}_`;
519
+ for (let i = matches.length - 1; i >= 0; i--) {
520
+ const match = matches[i];
521
+ const placeholder = `${placeholderPrefix}${i}__`;
522
+ entityMap.set(placeholder, match[0]);
523
+ processedOriginal = processedOriginal.substring(0, match.index) + placeholder + processedOriginal.substring(match.index + match[0].length);
524
+ }
525
+ const $temp = cheerio__namespace.load(processedOriginal, {
526
+ xmlMode: false
527
+ });
528
+ let tempData;
529
+ if ($temp("body").length) {
530
+ tempData = $temp("body").html() || "";
531
+ } else {
532
+ tempData = $temp.root().html() || "";
533
+ }
534
+ for (const [placeholder, entity] of entityMap) {
535
+ tempData = tempData.replace(new RegExp(placeholder, "g"), entity);
536
+ }
537
+ return tempData.replace(
538
+ /<(br|hr|img|input|meta|area|base|col|embed|link|source|track|wbr)([^>]*?)><\/\1>/gi,
539
+ "<$1$2/>"
540
+ ).replace(
541
+ new RegExp("<(br|hr|img|input|meta|area|base|col|embed|link|source|track|wbr)([^>]*?)(?<!\\/)>", "gi"),
542
+ "<$1$2/>"
543
+ );
544
+ }
545
+ function processChildrenChapters(chapter, index, epubConfigs) {
337
546
  if (Array.isArray(chapter.children)) {
338
547
  chapter.children = chapter.children.map(
339
- (content2, idx) => parseContent(content2, `${index}_${idx}`, epubConfigs)
548
+ (content, idx) => parseContent(content, `${index}_${idx}`, epubConfigs)
340
549
  );
341
550
  }
551
+ }
552
+ function parseContent(content, index, epubConfigs) {
553
+ if (!content) {
554
+ throw new Error("Content cannot be null or undefined");
555
+ }
556
+ if (!content.data) {
557
+ logger.warn(`Chapter at index ${index} has no data, using empty string`);
558
+ content.data = "";
559
+ }
560
+ const chapter = initializeChapterInfo(content, index, epubConfigs);
561
+ normalizeAuthorInfo(chapter);
562
+ const allowedAttributes = getAllowedAttributes();
563
+ const allowedXhtml11Tags = getAllowedXhtml11Tags();
564
+ if (!chapter.data || chapter.data.trim().length === 0) {
565
+ logger.warn(`Chapter at index ${index} has empty data, setting empty content`);
566
+ chapter.data = "";
567
+ } else {
568
+ let $;
569
+ try {
570
+ $ = loadAndProcessHtml(chapter.data);
571
+ } catch (error) {
572
+ logger.error(`Failed to process HTML for chapter ${index}: ${error}`);
573
+ $ = cheerio__namespace.load(`<div>${chapter.data}</div>`);
574
+ }
575
+ processHtmlElements($, allowedAttributes, allowedXhtml11Tags, epubConfigs, index);
576
+ processImages($, chapter, epubConfigs);
577
+ chapter.data = extractAndCleanHtmlContent($, content.data);
578
+ }
579
+ processChildrenChapters(chapter, index, epubConfigs);
342
580
  return chapter;
343
581
  }
344
- promisify(fs.readFile);
345
- const writeFile = promisify(fs.writeFile);
582
+ util.promisify(fs.readFile);
583
+ const writeFile = util.promisify(fs.writeFile);
346
584
  const USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.116 Safari/537.36";
347
585
  const wait = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
348
586
  async function fileIsStable(filename, max_wait = 3e4) {
349
- let start_time = (/* @__PURE__ */ new Date()).getTime();
587
+ const start_time = (/* @__PURE__ */ new Date()).getTime();
350
588
  let last_size = fs.statSync(filename).size;
351
589
  while ((/* @__PURE__ */ new Date()).getTime() - start_time <= max_wait) {
352
590
  await wait(1e3);
353
- let size = fs.statSync(filename).size;
591
+ const size = fs.statSync(filename).size;
354
592
  if (size === last_size) return true;
355
593
  last_size = size;
356
594
  }
@@ -361,20 +599,20 @@ function simpleMinifier(xhtml) {
361
599
  return xhtml;
362
600
  }
363
601
  const downloadImage = async (epubData, options) => {
364
- let { url } = options;
365
- let { log } = epubData;
366
- let epub_dir = epubData.dir;
367
- if (!url) {
602
+ const { url: url2 } = options;
603
+ const { log } = epubData;
604
+ const epub_dir = epubData.dir;
605
+ if (!url2) {
368
606
  return;
369
607
  }
370
- let image_dir = path.join(epub_dir, "OEBPS", "images");
608
+ const image_dir = path.join(epub_dir, "OEBPS", "images");
371
609
  fs$1.ensureDirSync(image_dir);
372
- let filename = path.join(image_dir, options.id + "." + options.extension);
373
- if (url.startsWith("file://") || url.startsWith("/")) {
374
- let aux_path = url.replace(/^file:\/\//i, "");
610
+ const filename = path.join(image_dir, options.id + "." + options.extension);
611
+ if (url2.startsWith("file://") || url2.startsWith("/")) {
612
+ let aux_path = url2.replace(/^file:\/\//i, "");
375
613
  try {
376
614
  aux_path = decodeURIComponent(aux_path);
377
- } catch (e) {
615
+ } catch {
378
616
  log(`[URL Decode Warning] Failed to decode path: ${aux_path}`);
379
617
  }
380
618
  if (process.platform === "win32") {
@@ -390,37 +628,37 @@ const downloadImage = async (epubData, options) => {
390
628
  log("[Copy 1 Error] " + e.message);
391
629
  }
392
630
  } else {
393
- log(`[Copy 1 Fail] '${url}' not exists!`);
631
+ log(`[Copy 1 Fail] '${url2}' not exists!`);
394
632
  }
395
633
  return;
396
634
  }
397
635
  let requestAction;
398
- if (url.startsWith("http")) {
399
- requestAction = request.get(url).set({ "User-Agent": USER_AGENT });
636
+ if (url2.startsWith("http")) {
637
+ requestAction = request.get(url2).set({ "User-Agent": USER_AGENT });
400
638
  requestAction.pipe(fs$1.createWriteStream(filename));
401
639
  } else {
402
- log(`[Copy 2] '${url}' to '${filename}'`);
403
- requestAction = fs$1.createReadStream(path.join(options.dir || "", url));
640
+ log(`[Copy 2] '${url2}' to '${filename}'`);
641
+ requestAction = fs$1.createReadStream(path.join(options.dir || "", url2));
404
642
  requestAction.pipe(fs$1.createWriteStream(filename));
405
643
  }
406
- return new Promise((resolve, reject) => {
644
+ return new Promise((resolve, _reject) => {
407
645
  requestAction.on("error", (err) => {
408
- log("[Download Error] Error while downloading: " + url);
646
+ log("[Download Error] Error while downloading: " + url2);
409
647
  log(err);
410
648
  fs$1.unlinkSync(filename);
411
649
  resolve();
412
650
  });
413
651
  requestAction.on("end", () => {
414
- log("[Download Success] " + url);
652
+ log("[Download Success] " + url2);
415
653
  resolve();
416
654
  });
417
655
  });
418
656
  };
419
657
  const downloadAllImages = async (epubData) => {
420
- let { images } = epubData;
658
+ const { images } = epubData;
421
659
  if (images.length === 0) return;
422
660
  fs$1.ensureDirSync(path.join(epubData.dir, "OEBPS", "images"));
423
- for (let image of images) {
661
+ for (const image of images) {
424
662
  await downloadImage(epubData, image);
425
663
  }
426
664
  };
@@ -594,7 +832,7 @@ const epub3_content_opf_ejs = `<?xml version="1.0" encoding="UTF-8"?>
594
832
  </package>
595
833
  `;
596
834
  const epub3_toc_xhtml_ejs = `<?xml version="1.0" encoding="UTF-8"?>
597
- <!DOCTYPE html>
835
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
598
836
  <html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" xml:lang="<%- lang %>"
599
837
  lang="<%- lang %>">
600
838
  <head>
@@ -726,16 +964,16 @@ const toc_ncx_ejs = `<?xml version="1.0" encoding="UTF-8"?>
726
964
  </ncx>
727
965
  `;
728
966
  const generateTempFile = async (epubData) => {
729
- let { log } = epubData;
730
- let oebps_dir = path.join(epubData.dir, "OEBPS");
967
+ const { log } = epubData;
968
+ const oebps_dir = path.join(epubData.dir, "OEBPS");
731
969
  await fs$1.ensureDir(oebps_dir);
732
970
  epubData.css = epubData.css || template_css;
733
971
  await writeFile(path.join(oebps_dir, "style.css"), epubData.css, "utf-8");
734
972
  if (epubData.fonts?.length) {
735
- let fonts_dir = path.join(oebps_dir, "fonts");
973
+ const fonts_dir = path.join(oebps_dir, "fonts");
736
974
  await fs$1.ensureDir(fonts_dir);
737
975
  epubData.fonts = epubData.fonts.map((font) => {
738
- let filename = path.basename(font);
976
+ const filename = path.basename(font);
739
977
  if (!fs$1.existsSync(font)) {
740
978
  log(`Custom font not found at '${font}'.`);
741
979
  } else {
@@ -749,7 +987,7 @@ const generateTempFile = async (epubData) => {
749
987
  return !!global_append;
750
988
  };
751
989
  const saveContentToFile = (content) => {
752
- let title = entities.encodeXML(content.title || "");
990
+ const title = entities__namespace.encodeXML(content.title || "");
753
991
  let html = `${epubData.docHeader}
754
992
  <head>
755
993
  <meta charset="UTF-8" />
@@ -761,7 +999,7 @@ const generateTempFile = async (epubData) => {
761
999
  if (content.title && isAppendTitle(epubData.appendChapterTitles, content.appendChapterTitle)) {
762
1000
  html += `<h1>${title}</h1>`;
763
1001
  }
764
- html += content.title && content.author && content.author?.length ? `<p class='epub-author'>${entities.encodeXML(content.author.join(", "))}</p>` : "";
1002
+ html += content.title && content.author && content.author?.length ? `<p class='epub-author'>${entities__namespace.encodeXML(content.author.join(", "))}</p>` : "";
765
1003
  html += content.title && content.url ? `<p class="epub-link"><a href="${content.url}">${content.url}</a></p>` : "";
766
1004
  html += `${content.data}`;
767
1005
  html += "\n</body>\n</html>";
@@ -772,7 +1010,7 @@ const generateTempFile = async (epubData) => {
772
1010
  }
773
1011
  };
774
1012
  epubData.content.map(saveContentToFile);
775
- let metainf_dir = path.join(epubData.dir, "META-INF");
1013
+ const metainf_dir = path.join(epubData.dir, "META-INF");
776
1014
  fs$1.ensureDirSync(metainf_dir);
777
1015
  fs$1.writeFileSync(
778
1016
  path.join(metainf_dir, "container.xml"),
@@ -783,7 +1021,7 @@ const generateTempFile = async (epubData) => {
783
1021
  "utf-8"
784
1022
  );
785
1023
  if (epubData.version === 2) {
786
- let fn = path.join(metainf_dir, "com.apple.ibooks.display-options.xml");
1024
+ const fn = path.join(metainf_dir, "com.apple.ibooks.display-options.xml");
787
1025
  fs$1.writeFileSync(
788
1026
  fn,
789
1027
  `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
@@ -817,7 +1055,7 @@ const generateTempFile = async (epubData) => {
817
1055
  } else {
818
1056
  htmlTocTemplate = epubData.version === 2 ? epub2_toc_xhtml_ejs : epub3_toc_xhtml_ejs;
819
1057
  }
820
- let toc_depth = 1;
1058
+ const toc_depth = 1;
821
1059
  fs$1.writeFileSync(path.join(oebps_dir, "content.opf"), ejs.render(opfTemplate, epubData), "utf-8");
822
1060
  fs$1.writeFileSync(
823
1061
  path.join(oebps_dir, "toc.ncx"),
@@ -831,9 +1069,9 @@ const generateTempFile = async (epubData) => {
831
1069
  );
832
1070
  };
833
1071
  async function makeCover(data) {
834
- let { cover, _coverExtension, log } = data;
1072
+ const { cover, _coverExtension, log } = data;
835
1073
  if (!cover) return;
836
- let destPath = path.join(data.dir, "OEBPS", `cover.${_coverExtension}`);
1074
+ const destPath = path.join(data.dir, "OEBPS", `cover.${_coverExtension}`);
837
1075
  let writeStream = null;
838
1076
  if (cover.startsWith("http")) {
839
1077
  writeStream = request.get(cover).set({ "User-Agent": USER_AGENT });
@@ -865,7 +1103,7 @@ async function makeCover(data) {
865
1103
  });
866
1104
  }
867
1105
  async function render(data) {
868
- let { log } = data;
1106
+ const { log } = data;
869
1107
  log("Generating Template Files...");
870
1108
  await generateTempFile(data);
871
1109
  log("Downloading Images...");
@@ -882,14 +1120,14 @@ async function render(data) {
882
1120
  }
883
1121
  }
884
1122
  async function genEpub(epubData) {
885
- let { log, dir, output } = epubData;
886
- let archive = archiver("zip", { zlib: { level: 9 } });
887
- let outputStream = fs$1.createWriteStream(epubData.output);
1123
+ const { log, dir, output } = epubData;
1124
+ const archive = archiver("zip", { zlib: { level: 9 } });
1125
+ const outputStream = fs$1.createWriteStream(epubData.output);
888
1126
  log("Zipping temp dir to " + output);
889
1127
  return new Promise((resolve, reject) => {
890
1128
  archive.on("end", async () => {
891
1129
  log("Done zipping, clearing temp dir...");
892
- let stable = await fileIsStable(epubData.output);
1130
+ const stable = await fileIsStable(epubData.output);
893
1131
  if (!stable) {
894
1132
  log("Output epub file is not stable!");
895
1133
  }
@@ -920,16 +1158,16 @@ async function genEpub(epubData) {
920
1158
  }
921
1159
  const mimeModule = require("mime/lite");
922
1160
  const mime = mimeModule.default || mimeModule;
923
- const __filename = fileURLToPath(import.meta.url);
924
- const __dirname = path.dirname(__filename);
925
- const baseDir = __dirname;
1161
+ const __filename$1 = url.fileURLToPath(typeof document === "undefined" ? require("url").pathToFileURL(__filename).href : _documentCurrentScript && _documentCurrentScript.tagName.toUpperCase() === "SCRIPT" && _documentCurrentScript.src || new URL("index.cjs", document.baseURI).href);
1162
+ const __dirname$1 = path.dirname(__filename$1);
1163
+ const baseDir = __dirname$1;
926
1164
  function result(success, message, options) {
927
1165
  if (options && options.verbose) {
928
1166
  if (!success) {
929
- console.error(new Error(message));
1167
+ logger.error(new Error(message));
930
1168
  }
931
1169
  }
932
- let out = {
1170
+ const out = {
933
1171
  success
934
1172
  };
935
1173
  if (typeof message === "string") {
@@ -953,9 +1191,9 @@ function check(options) {
953
1191
  return result(true, void 0, options);
954
1192
  }
955
1193
  function parseOptions(options) {
956
- let tmpDir = options.tmpDir || os.tmpdir();
957
- let id = v4();
958
- let data = {
1194
+ const tmpDir = options.tmpDir || os.tmpdir();
1195
+ const id = uuid.v4();
1196
+ const data = {
959
1197
  description: options.title,
960
1198
  publisher: "anonymous",
961
1199
  author: ["anonymous"],
@@ -977,7 +1215,7 @@ function parseOptions(options) {
977
1215
  docHeader: "",
978
1216
  images: [],
979
1217
  content: [],
980
- log: (msg) => options.verbose && console.log(msg)
1218
+ log: (msg) => options.verbose && logger.log(msg)
981
1219
  };
982
1220
  if (data.version === 2) {
983
1221
  data.docHeader = `<?xml version="1.0" encoding="UTF-8"?>
@@ -986,7 +1224,7 @@ function parseOptions(options) {
986
1224
  `;
987
1225
  } else {
988
1226
  data.docHeader = `<?xml version="1.0" encoding="UTF-8"?>
989
- <!DOCTYPE html>
1227
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
990
1228
  <html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" lang="#{self.options.lang}">
991
1229
  `;
992
1230
  }
@@ -1003,41 +1241,39 @@ function parseOptions(options) {
1003
1241
  }
1004
1242
  return data;
1005
1243
  }
1006
- async function epubGen(options, output) {
1007
- options = { ...options };
1008
- if (output) {
1009
- options.output = output;
1244
+ async function epubGen(options, configs) {
1245
+ if (configs?.logger) {
1246
+ logger.setLogger(configs.logger);
1010
1247
  }
1011
- let o = check(options);
1012
- let verbose = options.verbose !== false;
1248
+ options = { ...options };
1249
+ const o = check(options);
1250
+ const verbose = options.verbose !== false;
1013
1251
  if (!o.success) {
1014
- if (verbose) console.error(o.message);
1252
+ if (verbose) logger.error(o.message);
1015
1253
  return o;
1016
1254
  }
1017
1255
  let t;
1018
1256
  try {
1019
- let data = parseOptions(options);
1020
- let timeoutSeconds = data.timeoutSeconds || 0;
1257
+ const data = parseOptions(options);
1258
+ const timeoutSeconds = data.timeoutSeconds || 0;
1021
1259
  if (timeoutSeconds > 0) {
1022
- if (verbose) console.log(`TIMEOUT: ${timeoutSeconds}s`);
1260
+ if (verbose) logger.log(`TIMEOUT: ${timeoutSeconds}s`);
1023
1261
  t = setTimeout(() => {
1024
1262
  throw new Error("timeout!");
1025
1263
  }, timeoutSeconds * 1e3);
1026
1264
  } else {
1027
- if (verbose) console.log(`TIMEOUT: N/A`);
1265
+ if (verbose) logger.log(`TIMEOUT: N/A`);
1028
1266
  }
1029
1267
  await render(data);
1030
1268
  return result(true, void 0, data);
1031
1269
  } catch (e) {
1032
- if (verbose) console.error(e);
1270
+ if (verbose) logger.error(e);
1033
1271
  return result(false, e.message, options);
1034
1272
  } finally {
1035
1273
  clearTimeout(t);
1036
1274
  }
1037
1275
  }
1038
1276
  const gen = epubGen;
1039
- export {
1040
- epubGen,
1041
- errors,
1042
- gen
1043
- };
1277
+ exports.epubGen = epubGen;
1278
+ exports.errors = errors;
1279
+ exports.gen = gen;