npm - @marvalt/wparser - Versions diffs - 0.1.66 → 0.1.68 - Mend

@marvalt/wparser 0.1.66 → 0.1.68

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/index.cjs +54 -17
package/dist/index.cjs.map +1 -1
package/dist/index.d.ts +1 -0
package/dist/index.esm.js +54 -17
package/dist/index.esm.js.map +1 -1
package/dist/utils/contentExtractor.d.ts +1 -0
package/dist/utils/contentExtractor.d.ts.map +1 -1
package/package.json +1 -1

package/dist/index.d.ts CHANGED Viewed

@@ -264,6 +264,7 @@ declare function getBlockTextContent(block: WordPressBlock): string;
 declare function getImageUrl(block: WordPressBlock): string | null;
 /**
  * Extract image attributes (alt, width, height, alignment) from block
+ * Prioritizes Cloudflare URLs over WordPress URLs
  */
 declare function getImageAttributes(block: WordPressBlock): {
     url: string | null;

package/dist/index.esm.js CHANGED Viewed

@@ -1517,29 +1517,64 @@ function renderTextWithShortcodes(text, registry) {
  * Content extraction utilities for WordPress blocks
  * Extracts text content from various block formats
  */
+/**
+ * Decode HTML entities in a string
+ * Handles both named entities (&amp;, &quot;) and numeric entities (&#039;, &#8217;)
+ */
+function decodeHtmlEntities(text) {
+    if (!text)
+        return '';
+    // Use browser's built-in decoder if available (most efficient)
+    if (typeof document !== 'undefined') {
+        const textarea = document.createElement('textarea');
+        textarea.innerHTML = text;
+        return textarea.value;
+    }
+    // Fallback for server-side or when document is not available
+    // Decode numeric entities (&#039;, &#8217;, etc.)
+    let decoded = text.replace(/&#(\d+);/g, (match, dec) => {
+        return String.fromCharCode(parseInt(dec, 10));
+    });
+    // Decode hex entities (&#x27;, etc.)
+    decoded = decoded.replace(/&#x([0-9a-fA-F]+);/g, (match, hex) => {
+        return String.fromCharCode(parseInt(hex, 16));
+    });
+    // Decode common named entities
+    const namedEntities = {
+        '&amp;': '&',
+        '&lt;': '<',
+        '&gt;': '>',
+        '&quot;': '"',
+        '&apos;': "'",
+        '&nbsp;': ' ',
+        '&copy;': '©',
+        '&reg;': '®',
+        '&trade;': '™',
+        '&hellip;': '…',
+        '&mdash;': '—',
+        '&ndash;': '–',
+        '&lsquo;': '\u2018', // Left single quotation mark
+        '&rsquo;': '\u2019', // Right single quotation mark
+        '&ldquo;': '\u201C', // Left double quotation mark
+        '&rdquo;': '\u201D', // Right double quotation mark
+    };
+    Object.entries(namedEntities).forEach(([entity, char]) => {
+        decoded = decoded.replace(new RegExp(entity, 'g'), char);
+    });
+    return decoded;
+}
 /**
  * Extract text content from a block's innerHTML by stripping HTML tags
  */
 function extractTextFromHTML(html) {
     if (!html)
         return '';
-    // Remove HTML tags and decode entities
-    let text = html
-        .replace(/<[^>]*>/g, '') // Remove HTML tags
-        .replace(/&nbsp;/g, ' ') // Replace &nbsp; with space
-        .replace(/&#8217;/g, "'") // Replace apostrophe entity
-        .replace(/&#8220;/g, '"') // Replace left double quote
-        .replace(/&#8221;/g, '"') // Replace right double quote
-        .replace(/&#8230;/g, '...') // Replace ellipsis
-        .replace(/&amp;/g, '&') // Replace &amp;
-        .replace(/&lt;/g, '<') // Replace &lt;
-        .replace(/&gt;/g, '>') // Replace &gt;
-        .replace(/&quot;/g, '"') // Replace &quot;
-        .replace(/&#8211;/g, '–') // Replace en dash
-        .replace(/&#8212;/g, '—') // Replace em dash
-        .trim();
+    // Remove HTML tags first
+    let text = html.replace(/<[^>]*>/g, '');
+    // Decode all HTML entities (comprehensive)
+    text = decodeHtmlEntities(text);
     // Clean up extra whitespace
-    text = text.replace(/\s+/g, ' ');
+    text = text.replace(/\s+/g, ' ').trim();
     return text;
 }
 /**
@@ -1591,10 +1626,12 @@ function getImageUrl(block) {
 }
 /**
  * Extract image attributes (alt, width, height, alignment) from block
+ * Prioritizes Cloudflare URLs over WordPress URLs
  */
 function getImageAttributes(block) {
     const attrs = block.attributes || {};
-    const url = getImageUrl(block);
+    // Use extractImageUrlWithFallback to prioritize Cloudflare URLs
+    const url = extractImageUrlWithFallback(block);
     // Extract width - can be number or string like "640px"
     let width;
     const widthAttr = attrs['width'];