wp-epub-gen 0.4.1 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.cjs +18 -47
- package/build/index.js +18 -47
- package/package.json +1 -1
package/build/index.cjs
CHANGED
|
@@ -363,20 +363,24 @@ function loadAndProcessHtml(data) {
|
|
|
363
363
|
}
|
|
364
364
|
try {
|
|
365
365
|
let $ = cheerio__namespace.load(trimmedData, {
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
366
|
+
xmlMode: true,
|
|
367
|
+
// @ts-ignore
|
|
368
|
+
decodeEntities: false,
|
|
369
|
+
lowerCaseTags: true,
|
|
370
|
+
recognizeSelfClosing: true,
|
|
371
|
+
lowerCaseAttributeNames: true
|
|
370
372
|
});
|
|
371
373
|
const body = $("body");
|
|
372
374
|
if (body.length) {
|
|
373
375
|
const html = body.html();
|
|
374
376
|
if (html) {
|
|
375
377
|
$ = cheerio__namespace.load(html, {
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
378
|
+
xmlMode: true,
|
|
379
|
+
// @ts-ignore
|
|
380
|
+
decodeEntities: false,
|
|
381
|
+
lowerCaseTags: true,
|
|
382
|
+
recognizeSelfClosing: true,
|
|
383
|
+
lowerCaseAttributeNames: true
|
|
380
384
|
});
|
|
381
385
|
}
|
|
382
386
|
}
|
|
@@ -436,7 +440,7 @@ function processImages($, chapter, epubConfigs) {
|
|
|
436
440
|
}
|
|
437
441
|
const trimmedUrl = url2.trim();
|
|
438
442
|
try {
|
|
439
|
-
if (!trimmedUrl.match(/^(https?:\/\/|data:|\.\/|\/)/)) {
|
|
443
|
+
if (!trimmedUrl.match(/^(https?:\/\/|file:\/\/|data:|\.\/|\/)/)) {
|
|
440
444
|
logger.warn(`Image URL "${trimmedUrl}" appears to be invalid, but processing anyway`);
|
|
441
445
|
}
|
|
442
446
|
} catch (error) {
|
|
@@ -493,54 +497,20 @@ function processImages($, chapter, epubConfigs) {
|
|
|
493
497
|
}
|
|
494
498
|
});
|
|
495
499
|
}
|
|
496
|
-
function extractAndCleanHtmlContent(
|
|
500
|
+
function extractAndCleanHtmlContent($) {
|
|
497
501
|
let data;
|
|
498
502
|
if ($("body").length) {
|
|
499
503
|
data = $("body").html() || "";
|
|
500
504
|
} else {
|
|
501
505
|
data = $.root().html() || "";
|
|
502
506
|
}
|
|
503
|
-
|
|
504
|
-
return data.replace(
|
|
505
|
-
/<(br|hr|img|input|meta|area|base|col|embed|link|source|track|wbr)([^>]*?)><\/\1>/gi,
|
|
506
|
-
"<$1$2/>"
|
|
507
|
-
).replace(
|
|
508
|
-
new RegExp("<(br|hr|img|input|meta|area|base|col|embed|link|source|track|wbr)([^>]*?)(?<!\\/)>", "gi"),
|
|
509
|
-
"<$1$2/>"
|
|
510
|
-
);
|
|
511
|
-
}
|
|
512
|
-
const entityMap = /* @__PURE__ */ new Map();
|
|
513
|
-
const entityRegex = /&[a-zA-Z][a-zA-Z0-9]*;|&#[0-9]+;|&#x[0-9a-fA-F]+;/g;
|
|
514
|
-
const matches = Array.from(originalData.matchAll(entityRegex));
|
|
515
|
-
let processedOriginal = originalData;
|
|
516
|
-
const timestamp = Date.now();
|
|
517
|
-
const randomId = Math.random().toString(36).substring(2, 8);
|
|
518
|
-
const placeholderPrefix = `__ENTITY_${timestamp}_${randomId}_`;
|
|
519
|
-
for (let i = matches.length - 1; i >= 0; i--) {
|
|
520
|
-
const match = matches[i];
|
|
521
|
-
const placeholder = `${placeholderPrefix}${i}__`;
|
|
522
|
-
entityMap.set(placeholder, match[0]);
|
|
523
|
-
processedOriginal = processedOriginal.substring(0, match.index) + placeholder + processedOriginal.substring(match.index + match[0].length);
|
|
524
|
-
}
|
|
525
|
-
const $temp = cheerio__namespace.load(processedOriginal, {
|
|
526
|
-
xmlMode: false
|
|
527
|
-
});
|
|
528
|
-
let tempData;
|
|
529
|
-
if ($temp("body").length) {
|
|
530
|
-
tempData = $temp("body").html() || "";
|
|
531
|
-
} else {
|
|
532
|
-
tempData = $temp.root().html() || "";
|
|
533
|
-
}
|
|
534
|
-
for (const [placeholder, entity] of entityMap) {
|
|
535
|
-
tempData = tempData.replace(new RegExp(placeholder, "g"), entity);
|
|
536
|
-
}
|
|
537
|
-
return tempData.replace(
|
|
507
|
+
return data.replace(
|
|
538
508
|
/<(br|hr|img|input|meta|area|base|col|embed|link|source|track|wbr)([^>]*?)><\/\1>/gi,
|
|
539
509
|
"<$1$2/>"
|
|
540
510
|
).replace(
|
|
541
511
|
new RegExp("<(br|hr|img|input|meta|area|base|col|embed|link|source|track|wbr)([^>]*?)(?<!\\/)>", "gi"),
|
|
542
512
|
"<$1$2/>"
|
|
543
|
-
);
|
|
513
|
+
).replace(/<\/img\s*>/gi, "");
|
|
544
514
|
}
|
|
545
515
|
function processChildrenChapters(chapter, index2, epubConfigs) {
|
|
546
516
|
if (Array.isArray(chapter.children)) {
|
|
@@ -574,7 +544,7 @@ function parseContent(content, index2, epubConfigs) {
|
|
|
574
544
|
}
|
|
575
545
|
processHtmlElements($, allowedAttributes, allowedXhtml11Tags, epubConfigs, index2);
|
|
576
546
|
processImages($, chapter, epubConfigs);
|
|
577
|
-
chapter.data = extractAndCleanHtmlContent(
|
|
547
|
+
chapter.data = extractAndCleanHtmlContent($);
|
|
578
548
|
}
|
|
579
549
|
processChildrenChapters(chapter, index2, epubConfigs);
|
|
580
550
|
return chapter;
|
|
@@ -1245,6 +1215,7 @@ async function epubGen(options, configs) {
|
|
|
1245
1215
|
if (configs?.logger) {
|
|
1246
1216
|
logger.setLogger(configs.logger);
|
|
1247
1217
|
}
|
|
1218
|
+
logger.info("EpubGen started 101...");
|
|
1248
1219
|
options = { ...options };
|
|
1249
1220
|
const o = check(options);
|
|
1250
1221
|
const verbose = options.verbose !== false;
|
package/build/index.js
CHANGED
|
@@ -342,20 +342,24 @@ function loadAndProcessHtml(data) {
|
|
|
342
342
|
}
|
|
343
343
|
try {
|
|
344
344
|
let $ = cheerio.load(trimmedData, {
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
345
|
+
xmlMode: true,
|
|
346
|
+
// @ts-ignore
|
|
347
|
+
decodeEntities: false,
|
|
348
|
+
lowerCaseTags: true,
|
|
349
|
+
recognizeSelfClosing: true,
|
|
350
|
+
lowerCaseAttributeNames: true
|
|
349
351
|
});
|
|
350
352
|
const body = $("body");
|
|
351
353
|
if (body.length) {
|
|
352
354
|
const html = body.html();
|
|
353
355
|
if (html) {
|
|
354
356
|
$ = cheerio.load(html, {
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
357
|
+
xmlMode: true,
|
|
358
|
+
// @ts-ignore
|
|
359
|
+
decodeEntities: false,
|
|
360
|
+
lowerCaseTags: true,
|
|
361
|
+
recognizeSelfClosing: true,
|
|
362
|
+
lowerCaseAttributeNames: true
|
|
359
363
|
});
|
|
360
364
|
}
|
|
361
365
|
}
|
|
@@ -415,7 +419,7 @@ function processImages($, chapter, epubConfigs) {
|
|
|
415
419
|
}
|
|
416
420
|
const trimmedUrl = url.trim();
|
|
417
421
|
try {
|
|
418
|
-
if (!trimmedUrl.match(/^(https?:\/\/|data:|\.\/|\/)/)) {
|
|
422
|
+
if (!trimmedUrl.match(/^(https?:\/\/|file:\/\/|data:|\.\/|\/)/)) {
|
|
419
423
|
logger.warn(`Image URL "${trimmedUrl}" appears to be invalid, but processing anyway`);
|
|
420
424
|
}
|
|
421
425
|
} catch (error) {
|
|
@@ -472,54 +476,20 @@ function processImages($, chapter, epubConfigs) {
|
|
|
472
476
|
}
|
|
473
477
|
});
|
|
474
478
|
}
|
|
475
|
-
function extractAndCleanHtmlContent(
|
|
479
|
+
function extractAndCleanHtmlContent($) {
|
|
476
480
|
let data;
|
|
477
481
|
if ($("body").length) {
|
|
478
482
|
data = $("body").html() || "";
|
|
479
483
|
} else {
|
|
480
484
|
data = $.root().html() || "";
|
|
481
485
|
}
|
|
482
|
-
|
|
483
|
-
return data.replace(
|
|
484
|
-
/<(br|hr|img|input|meta|area|base|col|embed|link|source|track|wbr)([^>]*?)><\/\1>/gi,
|
|
485
|
-
"<$1$2/>"
|
|
486
|
-
).replace(
|
|
487
|
-
new RegExp("<(br|hr|img|input|meta|area|base|col|embed|link|source|track|wbr)([^>]*?)(?<!\\/)>", "gi"),
|
|
488
|
-
"<$1$2/>"
|
|
489
|
-
);
|
|
490
|
-
}
|
|
491
|
-
const entityMap = /* @__PURE__ */ new Map();
|
|
492
|
-
const entityRegex = /&[a-zA-Z][a-zA-Z0-9]*;|&#[0-9]+;|&#x[0-9a-fA-F]+;/g;
|
|
493
|
-
const matches = Array.from(originalData.matchAll(entityRegex));
|
|
494
|
-
let processedOriginal = originalData;
|
|
495
|
-
const timestamp = Date.now();
|
|
496
|
-
const randomId = Math.random().toString(36).substring(2, 8);
|
|
497
|
-
const placeholderPrefix = `__ENTITY_${timestamp}_${randomId}_`;
|
|
498
|
-
for (let i = matches.length - 1; i >= 0; i--) {
|
|
499
|
-
const match = matches[i];
|
|
500
|
-
const placeholder = `${placeholderPrefix}${i}__`;
|
|
501
|
-
entityMap.set(placeholder, match[0]);
|
|
502
|
-
processedOriginal = processedOriginal.substring(0, match.index) + placeholder + processedOriginal.substring(match.index + match[0].length);
|
|
503
|
-
}
|
|
504
|
-
const $temp = cheerio.load(processedOriginal, {
|
|
505
|
-
xmlMode: false
|
|
506
|
-
});
|
|
507
|
-
let tempData;
|
|
508
|
-
if ($temp("body").length) {
|
|
509
|
-
tempData = $temp("body").html() || "";
|
|
510
|
-
} else {
|
|
511
|
-
tempData = $temp.root().html() || "";
|
|
512
|
-
}
|
|
513
|
-
for (const [placeholder, entity] of entityMap) {
|
|
514
|
-
tempData = tempData.replace(new RegExp(placeholder, "g"), entity);
|
|
515
|
-
}
|
|
516
|
-
return tempData.replace(
|
|
486
|
+
return data.replace(
|
|
517
487
|
/<(br|hr|img|input|meta|area|base|col|embed|link|source|track|wbr)([^>]*?)><\/\1>/gi,
|
|
518
488
|
"<$1$2/>"
|
|
519
489
|
).replace(
|
|
520
490
|
new RegExp("<(br|hr|img|input|meta|area|base|col|embed|link|source|track|wbr)([^>]*?)(?<!\\/)>", "gi"),
|
|
521
491
|
"<$1$2/>"
|
|
522
|
-
);
|
|
492
|
+
).replace(/<\/img\s*>/gi, "");
|
|
523
493
|
}
|
|
524
494
|
function processChildrenChapters(chapter, index2, epubConfigs) {
|
|
525
495
|
if (Array.isArray(chapter.children)) {
|
|
@@ -553,7 +523,7 @@ function parseContent(content, index2, epubConfigs) {
|
|
|
553
523
|
}
|
|
554
524
|
processHtmlElements($, allowedAttributes, allowedXhtml11Tags, epubConfigs, index2);
|
|
555
525
|
processImages($, chapter, epubConfigs);
|
|
556
|
-
chapter.data = extractAndCleanHtmlContent(
|
|
526
|
+
chapter.data = extractAndCleanHtmlContent($);
|
|
557
527
|
}
|
|
558
528
|
processChildrenChapters(chapter, index2, epubConfigs);
|
|
559
529
|
return chapter;
|
|
@@ -1224,6 +1194,7 @@ async function epubGen(options, configs) {
|
|
|
1224
1194
|
if (configs?.logger) {
|
|
1225
1195
|
logger.setLogger(configs.logger);
|
|
1226
1196
|
}
|
|
1197
|
+
logger.info("EpubGen started 101...");
|
|
1227
1198
|
options = { ...options };
|
|
1228
1199
|
const o = check(options);
|
|
1229
1200
|
const verbose = options.verbose !== false;
|