wp-epub-gen 0.4.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.cjs CHANGED
@@ -363,20 +363,24 @@ function loadAndProcessHtml(data) {
363
363
  }
364
364
  try {
365
365
  let $ = cheerio__namespace.load(trimmedData, {
366
- xml: {
367
- lowerCaseTags: true,
368
- recognizeSelfClosing: true
369
- }
366
+ xmlMode: true,
367
+ // @ts-ignore
368
+ decodeEntities: false,
369
+ lowerCaseTags: true,
370
+ recognizeSelfClosing: true,
371
+ lowerCaseAttributeNames: true
370
372
  });
371
373
  const body = $("body");
372
374
  if (body.length) {
373
375
  const html = body.html();
374
376
  if (html) {
375
377
  $ = cheerio__namespace.load(html, {
376
- xml: {
377
- lowerCaseTags: true,
378
- recognizeSelfClosing: true
379
- }
378
+ xmlMode: true,
379
+ // @ts-ignore
380
+ decodeEntities: false,
381
+ lowerCaseTags: true,
382
+ recognizeSelfClosing: true,
383
+ lowerCaseAttributeNames: true
380
384
  });
381
385
  }
382
386
  }
@@ -436,7 +440,7 @@ function processImages($, chapter, epubConfigs) {
436
440
  }
437
441
  const trimmedUrl = url2.trim();
438
442
  try {
439
- if (!trimmedUrl.match(/^(https?:\/\/|data:|\.\/|\/)/)) {
443
+ if (!trimmedUrl.match(/^(https?:\/\/|file:\/\/|data:|\.\/|\/)/)) {
440
444
  logger.warn(`Image URL "${trimmedUrl}" appears to be invalid, but processing anyway`);
441
445
  }
442
446
  } catch (error) {
@@ -493,54 +497,20 @@ function processImages($, chapter, epubConfigs) {
493
497
  }
494
498
  });
495
499
  }
496
- function extractAndCleanHtmlContent($, originalData) {
500
+ function extractAndCleanHtmlContent($) {
497
501
  let data;
498
502
  if ($("body").length) {
499
503
  data = $("body").html() || "";
500
504
  } else {
501
505
  data = $.root().html() || "";
502
506
  }
503
- if (!originalData) {
504
- return data.replace(
505
- /<(br|hr|img|input|meta|area|base|col|embed|link|source|track|wbr)([^>]*?)><\/\1>/gi,
506
- "<$1$2/>"
507
- ).replace(
508
- new RegExp("<(br|hr|img|input|meta|area|base|col|embed|link|source|track|wbr)([^>]*?)(?<!\\/)>", "gi"),
509
- "<$1$2/>"
510
- );
511
- }
512
- const entityMap = /* @__PURE__ */ new Map();
513
- const entityRegex = /&[a-zA-Z][a-zA-Z0-9]*;|&#[0-9]+;|&#x[0-9a-fA-F]+;/g;
514
- const matches = Array.from(originalData.matchAll(entityRegex));
515
- let processedOriginal = originalData;
516
- const timestamp = Date.now();
517
- const randomId = Math.random().toString(36).substring(2, 8);
518
- const placeholderPrefix = `__ENTITY_${timestamp}_${randomId}_`;
519
- for (let i = matches.length - 1; i >= 0; i--) {
520
- const match = matches[i];
521
- const placeholder = `${placeholderPrefix}${i}__`;
522
- entityMap.set(placeholder, match[0]);
523
- processedOriginal = processedOriginal.substring(0, match.index) + placeholder + processedOriginal.substring(match.index + match[0].length);
524
- }
525
- const $temp = cheerio__namespace.load(processedOriginal, {
526
- xmlMode: false
527
- });
528
- let tempData;
529
- if ($temp("body").length) {
530
- tempData = $temp("body").html() || "";
531
- } else {
532
- tempData = $temp.root().html() || "";
533
- }
534
- for (const [placeholder, entity] of entityMap) {
535
- tempData = tempData.replace(new RegExp(placeholder, "g"), entity);
536
- }
537
- return tempData.replace(
507
+ return data.replace(
538
508
  /<(br|hr|img|input|meta|area|base|col|embed|link|source|track|wbr)([^>]*?)><\/\1>/gi,
539
509
  "<$1$2/>"
540
510
  ).replace(
541
511
  new RegExp("<(br|hr|img|input|meta|area|base|col|embed|link|source|track|wbr)([^>]*?)(?<!\\/)>", "gi"),
542
512
  "<$1$2/>"
543
- );
513
+ ).replace(/<\/img\s*>/gi, "");
544
514
  }
545
515
  function processChildrenChapters(chapter, index2, epubConfigs) {
546
516
  if (Array.isArray(chapter.children)) {
@@ -574,7 +544,7 @@ function parseContent(content, index2, epubConfigs) {
574
544
  }
575
545
  processHtmlElements($, allowedAttributes, allowedXhtml11Tags, epubConfigs, index2);
576
546
  processImages($, chapter, epubConfigs);
577
- chapter.data = extractAndCleanHtmlContent($, content.data);
547
+ chapter.data = extractAndCleanHtmlContent($);
578
548
  }
579
549
  processChildrenChapters(chapter, index2, epubConfigs);
580
550
  return chapter;
@@ -1245,6 +1215,7 @@ async function epubGen(options, configs) {
1245
1215
  if (configs?.logger) {
1246
1216
  logger.setLogger(configs.logger);
1247
1217
  }
1218
+ logger.info("EpubGen started 101...");
1248
1219
  options = { ...options };
1249
1220
  const o = check(options);
1250
1221
  const verbose = options.verbose !== false;
package/build/index.js CHANGED
@@ -342,20 +342,24 @@ function loadAndProcessHtml(data) {
342
342
  }
343
343
  try {
344
344
  let $ = cheerio.load(trimmedData, {
345
- xml: {
346
- lowerCaseTags: true,
347
- recognizeSelfClosing: true
348
- }
345
+ xmlMode: true,
346
+ // @ts-ignore
347
+ decodeEntities: false,
348
+ lowerCaseTags: true,
349
+ recognizeSelfClosing: true,
350
+ lowerCaseAttributeNames: true
349
351
  });
350
352
  const body = $("body");
351
353
  if (body.length) {
352
354
  const html = body.html();
353
355
  if (html) {
354
356
  $ = cheerio.load(html, {
355
- xml: {
356
- lowerCaseTags: true,
357
- recognizeSelfClosing: true
358
- }
357
+ xmlMode: true,
358
+ // @ts-ignore
359
+ decodeEntities: false,
360
+ lowerCaseTags: true,
361
+ recognizeSelfClosing: true,
362
+ lowerCaseAttributeNames: true
359
363
  });
360
364
  }
361
365
  }
@@ -415,7 +419,7 @@ function processImages($, chapter, epubConfigs) {
415
419
  }
416
420
  const trimmedUrl = url.trim();
417
421
  try {
418
- if (!trimmedUrl.match(/^(https?:\/\/|data:|\.\/|\/)/)) {
422
+ if (!trimmedUrl.match(/^(https?:\/\/|file:\/\/|data:|\.\/|\/)/)) {
419
423
  logger.warn(`Image URL "${trimmedUrl}" appears to be invalid, but processing anyway`);
420
424
  }
421
425
  } catch (error) {
@@ -472,54 +476,20 @@ function processImages($, chapter, epubConfigs) {
472
476
  }
473
477
  });
474
478
  }
475
- function extractAndCleanHtmlContent($, originalData) {
479
+ function extractAndCleanHtmlContent($) {
476
480
  let data;
477
481
  if ($("body").length) {
478
482
  data = $("body").html() || "";
479
483
  } else {
480
484
  data = $.root().html() || "";
481
485
  }
482
- if (!originalData) {
483
- return data.replace(
484
- /<(br|hr|img|input|meta|area|base|col|embed|link|source|track|wbr)([^>]*?)><\/\1>/gi,
485
- "<$1$2/>"
486
- ).replace(
487
- new RegExp("<(br|hr|img|input|meta|area|base|col|embed|link|source|track|wbr)([^>]*?)(?<!\\/)>", "gi"),
488
- "<$1$2/>"
489
- );
490
- }
491
- const entityMap = /* @__PURE__ */ new Map();
492
- const entityRegex = /&[a-zA-Z][a-zA-Z0-9]*;|&#[0-9]+;|&#x[0-9a-fA-F]+;/g;
493
- const matches = Array.from(originalData.matchAll(entityRegex));
494
- let processedOriginal = originalData;
495
- const timestamp = Date.now();
496
- const randomId = Math.random().toString(36).substring(2, 8);
497
- const placeholderPrefix = `__ENTITY_${timestamp}_${randomId}_`;
498
- for (let i = matches.length - 1; i >= 0; i--) {
499
- const match = matches[i];
500
- const placeholder = `${placeholderPrefix}${i}__`;
501
- entityMap.set(placeholder, match[0]);
502
- processedOriginal = processedOriginal.substring(0, match.index) + placeholder + processedOriginal.substring(match.index + match[0].length);
503
- }
504
- const $temp = cheerio.load(processedOriginal, {
505
- xmlMode: false
506
- });
507
- let tempData;
508
- if ($temp("body").length) {
509
- tempData = $temp("body").html() || "";
510
- } else {
511
- tempData = $temp.root().html() || "";
512
- }
513
- for (const [placeholder, entity] of entityMap) {
514
- tempData = tempData.replace(new RegExp(placeholder, "g"), entity);
515
- }
516
- return tempData.replace(
486
+ return data.replace(
517
487
  /<(br|hr|img|input|meta|area|base|col|embed|link|source|track|wbr)([^>]*?)><\/\1>/gi,
518
488
  "<$1$2/>"
519
489
  ).replace(
520
490
  new RegExp("<(br|hr|img|input|meta|area|base|col|embed|link|source|track|wbr)([^>]*?)(?<!\\/)>", "gi"),
521
491
  "<$1$2/>"
522
- );
492
+ ).replace(/<\/img\s*>/gi, "");
523
493
  }
524
494
  function processChildrenChapters(chapter, index2, epubConfigs) {
525
495
  if (Array.isArray(chapter.children)) {
@@ -553,7 +523,7 @@ function parseContent(content, index2, epubConfigs) {
553
523
  }
554
524
  processHtmlElements($, allowedAttributes, allowedXhtml11Tags, epubConfigs, index2);
555
525
  processImages($, chapter, epubConfigs);
556
- chapter.data = extractAndCleanHtmlContent($, content.data);
526
+ chapter.data = extractAndCleanHtmlContent($);
557
527
  }
558
528
  processChildrenChapters(chapter, index2, epubConfigs);
559
529
  return chapter;
@@ -1224,6 +1194,7 @@ async function epubGen(options, configs) {
1224
1194
  if (configs?.logger) {
1225
1195
  logger.setLogger(configs.logger);
1226
1196
  }
1197
+ logger.info("EpubGen started 101...");
1227
1198
  options = { ...options };
1228
1199
  const o = check(options);
1229
1200
  const verbose = options.verbose !== false;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "wp-epub-gen",
3
- "version": "0.4.1",
3
+ "version": "0.4.2",
4
4
  "description": "Epub generator.",
5
5
  "type": "module",
6
6
  "main": "build/index.cjs",