@marvalt/wparser 0.1.65 → 0.1.67
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +89 -23
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +4 -1
- package/dist/index.esm.js +89 -23
- package/dist/index.esm.js.map +1 -1
- package/dist/utils/blockExtractors.d.ts +4 -1
- package/dist/utils/blockExtractors.d.ts.map +1 -1
- package/dist/utils/contentExtractor.d.ts.map +1 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -1519,29 +1519,64 @@ function renderTextWithShortcodes(text, registry) {
|
|
|
1519
1519
|
* Content extraction utilities for WordPress blocks
|
|
1520
1520
|
* Extracts text content from various block formats
|
|
1521
1521
|
*/
|
|
1522
|
+
/**
|
|
1523
|
+
* Decode HTML entities in a string
|
|
1524
|
+
* Handles both named entities (&, ") and numeric entities (', ’)
|
|
1525
|
+
*/
|
|
1526
|
+
function decodeHtmlEntities(text) {
|
|
1527
|
+
if (!text)
|
|
1528
|
+
return '';
|
|
1529
|
+
// Use browser's built-in decoder if available (most efficient)
|
|
1530
|
+
if (typeof document !== 'undefined') {
|
|
1531
|
+
const textarea = document.createElement('textarea');
|
|
1532
|
+
textarea.innerHTML = text;
|
|
1533
|
+
return textarea.value;
|
|
1534
|
+
}
|
|
1535
|
+
// Fallback for server-side or when document is not available
|
|
1536
|
+
// Decode numeric entities (', ’, etc.)
|
|
1537
|
+
let decoded = text.replace(/&#(\d+);/g, (match, dec) => {
|
|
1538
|
+
return String.fromCharCode(parseInt(dec, 10));
|
|
1539
|
+
});
|
|
1540
|
+
// Decode hex entities (', etc.)
|
|
1541
|
+
decoded = decoded.replace(/&#x([0-9a-fA-F]+);/g, (match, hex) => {
|
|
1542
|
+
return String.fromCharCode(parseInt(hex, 16));
|
|
1543
|
+
});
|
|
1544
|
+
// Decode common named entities
|
|
1545
|
+
const namedEntities = {
|
|
1546
|
+
'&': '&',
|
|
1547
|
+
'<': '<',
|
|
1548
|
+
'>': '>',
|
|
1549
|
+
'"': '"',
|
|
1550
|
+
''': "'",
|
|
1551
|
+
' ': ' ',
|
|
1552
|
+
'©': '©',
|
|
1553
|
+
'®': '®',
|
|
1554
|
+
'™': '™',
|
|
1555
|
+
'…': '…',
|
|
1556
|
+
'—': '—',
|
|
1557
|
+
'–': '–',
|
|
1558
|
+
'‘': '\u2018', // Left single quotation mark
|
|
1559
|
+
'’': '\u2019', // Right single quotation mark
|
|
1560
|
+
'“': '\u201C', // Left double quotation mark
|
|
1561
|
+
'”': '\u201D', // Right double quotation mark
|
|
1562
|
+
};
|
|
1563
|
+
Object.entries(namedEntities).forEach(([entity, char]) => {
|
|
1564
|
+
decoded = decoded.replace(new RegExp(entity, 'g'), char);
|
|
1565
|
+
});
|
|
1566
|
+
return decoded;
|
|
1567
|
+
}
|
|
1522
1568
|
/**
|
|
1523
1569
|
* Extract text content from a block's innerHTML by stripping HTML tags
|
|
1524
1570
|
*/
|
|
1525
1571
|
function extractTextFromHTML(html) {
|
|
1526
1572
|
if (!html)
|
|
1527
1573
|
return '';
|
|
1528
|
-
// Remove HTML tags
|
|
1529
|
-
let text = html
|
|
1530
|
-
|
|
1531
|
-
|
|
1532
|
-
.replace(/’/g, "'") // Replace apostrophe entity
|
|
1533
|
-
.replace(/“/g, '"') // Replace left double quote
|
|
1534
|
-
.replace(/”/g, '"') // Replace right double quote
|
|
1535
|
-
.replace(/…/g, '...') // Replace ellipsis
|
|
1536
|
-
.replace(/&/g, '&') // Replace &
|
|
1537
|
-
.replace(/</g, '<') // Replace <
|
|
1538
|
-
.replace(/>/g, '>') // Replace >
|
|
1539
|
-
.replace(/"/g, '"') // Replace "
|
|
1540
|
-
.replace(/–/g, '–') // Replace en dash
|
|
1541
|
-
.replace(/—/g, '—') // Replace em dash
|
|
1542
|
-
.trim();
|
|
1574
|
+
// Remove HTML tags first
|
|
1575
|
+
let text = html.replace(/<[^>]*>/g, '');
|
|
1576
|
+
// Decode all HTML entities (comprehensive)
|
|
1577
|
+
text = decodeHtmlEntities(text);
|
|
1543
1578
|
// Clean up extra whitespace
|
|
1544
|
-
text = text.replace(/\s+/g, ' ');
|
|
1579
|
+
text = text.replace(/\s+/g, ' ').trim();
|
|
1545
1580
|
return text;
|
|
1546
1581
|
}
|
|
1547
1582
|
/**
|
|
@@ -2069,19 +2104,50 @@ function extractTextAlignFromInnerBlocks(block) {
|
|
|
2069
2104
|
}
|
|
2070
2105
|
/**
|
|
2071
2106
|
* Parse contentPosition string into horizontal and vertical alignment
|
|
2072
|
-
*
|
|
2107
|
+
* Supports both formats:
|
|
2108
|
+
* - "horizontal vertical" (e.g., "left bottom", "center center")
|
|
2109
|
+
* - "vertical horizontal" (e.g., "bottom left", "top center")
|
|
2110
|
+
* WordPress typically uses "bottom left" format
|
|
2073
2111
|
*/
|
|
2074
2112
|
function parseContentPosition(contentPosition) {
|
|
2075
2113
|
if (!contentPosition) {
|
|
2076
2114
|
return { horizontal: 'left', vertical: 'center' };
|
|
2077
2115
|
}
|
|
2078
2116
|
const parts = contentPosition.trim().split(/\s+/);
|
|
2079
|
-
const
|
|
2080
|
-
const
|
|
2081
|
-
|
|
2082
|
-
|
|
2083
|
-
|
|
2084
|
-
|
|
2117
|
+
const part1 = parts[0] || '';
|
|
2118
|
+
const part2 = parts[1] || '';
|
|
2119
|
+
let horizontal = 'left';
|
|
2120
|
+
let vertical = 'center';
|
|
2121
|
+
// Try to detect format: if first part is vertical, WordPress format (vertical horizontal)
|
|
2122
|
+
if (part1 === 'top' || part1 === 'bottom') {
|
|
2123
|
+
// WordPress format: "bottom left" -> vertical=bottom, horizontal=left
|
|
2124
|
+
vertical = part1;
|
|
2125
|
+
horizontal = (part2 === 'center' || part2 === 'right' ? part2 : 'left');
|
|
2126
|
+
}
|
|
2127
|
+
else if (part1 === 'left' || part1 === 'right') {
|
|
2128
|
+
// Standard format: "left bottom" -> horizontal=left, vertical=bottom
|
|
2129
|
+
horizontal = part1;
|
|
2130
|
+
vertical = (part2 === 'top' || part2 === 'bottom' ? part2 : 'center');
|
|
2131
|
+
}
|
|
2132
|
+
else if (part1 === 'center') {
|
|
2133
|
+
// First part is center - check second part to determine format
|
|
2134
|
+
if (part2 === 'top' || part2 === 'bottom') {
|
|
2135
|
+
// "center bottom" -> horizontal=center, vertical=bottom
|
|
2136
|
+
horizontal = 'center';
|
|
2137
|
+
vertical = part2;
|
|
2138
|
+
}
|
|
2139
|
+
else {
|
|
2140
|
+
// "center center" or "center left/right" -> both center or horizontal=center
|
|
2141
|
+
horizontal = 'center';
|
|
2142
|
+
vertical = (part2 === 'top' || part2 === 'bottom' ? part2 : 'center');
|
|
2143
|
+
}
|
|
2144
|
+
}
|
|
2145
|
+
else {
|
|
2146
|
+
// Unknown format, use defaults
|
|
2147
|
+
horizontal = 'left';
|
|
2148
|
+
vertical = 'center';
|
|
2149
|
+
}
|
|
2150
|
+
return { horizontal, vertical };
|
|
2085
2151
|
}
|
|
2086
2152
|
/**
|
|
2087
2153
|
* Extract video iframe HTML from innerBlocks (finds HTML block with iframe)
|