@emdash-cms/gutenberg-to-portable-text 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs ADDED
@@ -0,0 +1,1466 @@
1
+ import { t as __exportAll } from "./chunk-DQk6qfdC.mjs";
2
+ import { parse } from "@wordpress/block-serialization-default-parser";
3
+ import { parseFragment } from "parse5";
4
+
5
+ //#region src/url.ts
6
+ /**
7
+ * URL scheme validation for the converter pipeline (defense-in-depth).
8
+ *
9
+ * This mirrors the canonical sanitizeHref in packages/core/src/utils/url.ts.
10
+ * The converter is a standalone zero-dependency package, so it carries its own
11
+ * copy. The render layer in core is the primary defense; this is secondary.
12
+ */
13
+ const SAFE_URL_SCHEME_RE = /^(https?:|mailto:|tel:|\/(?!\/)|#)/i;
14
+ /**
15
+ * Returns the URL unchanged if it uses a safe scheme, otherwise returns "".
16
+ *
17
+ * Returns empty string (not "#") because this is the converter layer — we
18
+ * strip bad URLs rather than substituting anchors. The render layer handles
19
+ * the fallback to "#".
20
+ */
21
+ function sanitizeHref(url) {
22
+ if (!url) return "";
23
+ return SAFE_URL_SCHEME_RE.test(url) ? url : "";
24
+ }
25
+
26
+ //#endregion
27
+ //#region src/inline.ts
28
+ /**
29
+ * Inline HTML to Portable Text spans converter
30
+ *
31
+ * Parses inline HTML elements (strong, em, a, code, etc.) and converts
32
+ * them to Portable Text spans with marks.
33
+ */
34
+ const WHITESPACE_PATTERN = /\S/;
35
+ const BLOCK_TAG_PATTERNS = {
36
+ p: {
37
+ open: /^<p[^>]*>/i,
38
+ close: /<\/p>$/i
39
+ },
40
+ h1: {
41
+ open: /^<h1[^>]*>/i,
42
+ close: /<\/h1>$/i
43
+ },
44
+ h2: {
45
+ open: /^<h2[^>]*>/i,
46
+ close: /<\/h2>$/i
47
+ },
48
+ h3: {
49
+ open: /^<h3[^>]*>/i,
50
+ close: /<\/h3>$/i
51
+ },
52
+ h4: {
53
+ open: /^<h4[^>]*>/i,
54
+ close: /<\/h4>$/i
55
+ },
56
+ h5: {
57
+ open: /^<h5[^>]*>/i,
58
+ close: /<\/h5>$/i
59
+ },
60
+ h6: {
61
+ open: /^<h6[^>]*>/i,
62
+ close: /<\/h6>$/i
63
+ },
64
+ li: {
65
+ open: /^<li[^>]*>/i,
66
+ close: /<\/li>$/i
67
+ },
68
+ blockquote: {
69
+ open: /^<blockquote[^>]*>/i,
70
+ close: /<\/blockquote>$/i
71
+ },
72
+ figcaption: {
73
+ open: /^<figcaption[^>]*>/i,
74
+ close: /<\/figcaption>$/i
75
+ }
76
+ };
77
+ const IMG_ALT_PATTERN = /<img[^>]+alt=["']([^"']*)["']/i;
78
+ const FIGCAPTION_PATTERN = /<figcaption[^>]*>([\s\S]*?)<\/figcaption>/i;
79
+ const IMG_SRC_PATTERN = /<img[^>]+src=["']([^"']*)["']/i;
80
+ const URL_AMP_ENTITY_PATTERN = /&amp;/g;
81
+ const URL_NUMERIC_AMP_ENTITY_PATTERN = /&#0?38;/g;
82
+ const URL_HEX_AMP_ENTITY_PATTERN = /&#x26;/gi;
83
+ /**
84
+ * Parse inline HTML content into Portable Text spans
85
+ */
86
+ function parseInlineContent(html, generateKey) {
87
+ const children = [];
88
+ const markDefs = [];
89
+ const markDefMap = /* @__PURE__ */ new Map();
90
+ if (html.length > 0 && !WHITESPACE_PATTERN.test(html)) return {
91
+ children: [{
92
+ _type: "span",
93
+ _key: generateKey(),
94
+ text: html
95
+ }],
96
+ markDefs: []
97
+ };
98
+ walkNodes(parseFragment(stripBlockTags(html)).childNodes, [], children, markDefs, markDefMap, generateKey);
99
+ if (children.length === 0) children.push({
100
+ _type: "span",
101
+ _key: generateKey(),
102
+ text: ""
103
+ });
104
+ return {
105
+ children,
106
+ markDefs
107
+ };
108
+ }
109
+ /**
110
+ * Strip common block-level wrapper tags
111
+ */
112
+ function stripBlockTags(html) {
113
+ let stripped = html.trim();
114
+ for (const tag of [
115
+ "p",
116
+ "h1",
117
+ "h2",
118
+ "h3",
119
+ "h4",
120
+ "h5",
121
+ "h6",
122
+ "li",
123
+ "blockquote",
124
+ "figcaption"
125
+ ]) {
126
+ const patterns = BLOCK_TAG_PATTERNS[tag];
127
+ if (patterns && patterns.open.test(stripped) && patterns.close.test(stripped)) {
128
+ stripped = stripped.replace(patterns.open, "").replace(patterns.close, "").trim();
129
+ break;
130
+ }
131
+ }
132
+ return stripped;
133
+ }
134
+ /**
135
+ * Recursively walk DOM nodes and build spans
136
+ */
137
+ function walkNodes(nodes, currentMarks, children, markDefs, markDefMap, generateKey) {
138
+ for (const node of nodes) if (isTextNode(node)) {
139
+ const text = node.value;
140
+ if (text) {
141
+ const parts = text.split("\n");
142
+ for (let i = 0; i < parts.length; i++) {
143
+ const part = parts[i];
144
+ if (part || i > 0) {
145
+ if (part) children.push({
146
+ _type: "span",
147
+ _key: generateKey(),
148
+ text: part,
149
+ marks: currentMarks.length > 0 ? [...currentMarks] : void 0
150
+ });
151
+ if (i < parts.length - 1) if (children.length > 0) {
152
+ const lastChild = children.at(-1);
153
+ if (lastChild) lastChild.text += "\n";
154
+ } else children.push({
155
+ _type: "span",
156
+ _key: generateKey(),
157
+ text: "\n"
158
+ });
159
+ }
160
+ }
161
+ }
162
+ } else if (isElement(node)) {
163
+ if (node.tagName.toLowerCase() === "br") {
164
+ if (children.length > 0) {
165
+ const lastChild = children.at(-1);
166
+ if (lastChild) lastChild.text += "\n";
167
+ } else children.push({
168
+ _type: "span",
169
+ _key: generateKey(),
170
+ text: "\n"
171
+ });
172
+ continue;
173
+ }
174
+ const markResult = getMarkForElement(node, markDefs, markDefMap, generateKey);
175
+ const newMarks = markResult ? [...currentMarks, markResult] : currentMarks;
176
+ walkNodes(node.childNodes, newMarks, children, markDefs, markDefMap, generateKey);
177
+ }
178
+ }
179
+ /**
180
+ * Get the Portable Text mark for an HTML element
181
+ */
182
+ function getMarkForElement(element, markDefs, markDefMap, generateKey) {
183
+ switch (element.tagName.toLowerCase()) {
184
+ case "strong":
185
+ case "b": return "strong";
186
+ case "em":
187
+ case "i": return "em";
188
+ case "u": return "underline";
189
+ case "s":
190
+ case "strike":
191
+ case "del": return "strike-through";
192
+ case "code": return "code";
193
+ case "sup": return "superscript";
194
+ case "sub": return "subscript";
195
+ case "a": {
196
+ const href = sanitizeHref(getAttr(element, "href"));
197
+ const target = getAttr(element, "target");
198
+ const existingKey = markDefMap.get(href);
199
+ if (existingKey) return existingKey;
200
+ const key = generateKey();
201
+ const markDef = {
202
+ _type: "link",
203
+ _key: key,
204
+ href
205
+ };
206
+ if (target === "_blank") markDef.blank = true;
207
+ markDefs.push(markDef);
208
+ markDefMap.set(href, key);
209
+ return key;
210
+ }
211
+ default: return null;
212
+ }
213
+ }
214
+ /**
215
+ * Get attribute value from element
216
+ */
217
+ function getAttr(element, name) {
218
+ return element.attrs.find((a) => a.name.toLowerCase() === name)?.value;
219
+ }
220
+ /**
221
+ * Type guard for text nodes
222
+ */
223
+ function isTextNode(node) {
224
+ return node.nodeName === "#text";
225
+ }
226
+ /**
227
+ * Type guard for elements
228
+ */
229
+ function isElement(node) {
230
+ return "tagName" in node;
231
+ }
232
+ /**
233
+ * Extract plain text from HTML (for alt text, captions)
234
+ */
235
+ function extractText(html) {
236
+ return getTextContent(parseFragment(html).childNodes);
237
+ }
238
+ function getTextContent(nodes) {
239
+ let text = "";
240
+ for (const node of nodes) if (isTextNode(node)) text += node.value;
241
+ else if (isElement(node)) text += getTextContent(node.childNodes);
242
+ return text.trim();
243
+ }
244
+ /**
245
+ * Extract alt text from an img element in HTML
246
+ */
247
+ function extractAlt(html) {
248
+ const match = html.match(IMG_ALT_PATTERN);
249
+ if (match) return match[1];
250
+ }
251
+ /**
252
+ * Extract caption from a figcaption element
253
+ */
254
+ function extractCaption(html) {
255
+ const match = html.match(FIGCAPTION_PATTERN);
256
+ if (match?.[1]) return extractText(match[1]);
257
+ }
258
+ /**
259
+ * Extract src from an img element
260
+ */
261
+ function extractSrc(html) {
262
+ const match = html.match(IMG_SRC_PATTERN);
263
+ if (!match?.[1]) return void 0;
264
+ return decodeUrlEntities$1(match[1]);
265
+ }
266
+ /**
267
+ * Decode HTML entities commonly found in URLs
268
+ */
269
+ function decodeUrlEntities$1(url) {
270
+ return url.replace(URL_AMP_ENTITY_PATTERN, "&").replace(URL_NUMERIC_AMP_ENTITY_PATTERN, "&").replace(URL_HEX_AMP_ENTITY_PATTERN, "&");
271
+ }
272
+
273
+ //#endregion
274
+ //#region src/types.ts
275
+ /** Extract a string attribute, returning undefined if missing or wrong type */
276
+ function attrString(attrs, key) {
277
+ const v = attrs[key];
278
+ return typeof v === "string" ? v : void 0;
279
+ }
280
+ /** Extract a number attribute, returning undefined if missing or wrong type */
281
+ function attrNumber(attrs, key) {
282
+ const v = attrs[key];
283
+ return typeof v === "number" ? v : void 0;
284
+ }
285
+ /** Extract a boolean attribute, returning undefined if missing or wrong type */
286
+ function attrBoolean(attrs, key) {
287
+ const v = attrs[key];
288
+ return typeof v === "boolean" ? v : void 0;
289
+ }
290
+ function isRecord(v) {
291
+ return typeof v === "object" && v !== null && !Array.isArray(v);
292
+ }
293
+ /** Extract an object attribute, returning undefined if missing or wrong type */
294
+ function attrObject(attrs, key) {
295
+ const v = attrs[key];
296
+ return isRecord(v) ? v : void 0;
297
+ }
298
+
299
+ //#endregion
300
+ //#region src/transformers/core.ts
301
+ /**
302
+ * Transformers for WordPress core/* blocks
303
+ */
304
+ var core_exports = /* @__PURE__ */ __exportAll({
305
+ button: () => button,
306
+ buttons: () => buttons,
307
+ code: () => code,
308
+ columns: () => columns,
309
+ cover: () => cover,
310
+ file: () => file,
311
+ gallery: () => gallery,
312
+ group: () => group,
313
+ heading: () => heading,
314
+ html: () => html,
315
+ image: () => image,
316
+ list: () => list,
317
+ mediaText: () => mediaText,
318
+ more: () => more,
319
+ nextpage: () => nextpage,
320
+ paragraph: () => paragraph,
321
+ preformatted: () => preformatted,
322
+ pullquote: () => pullquote,
323
+ quote: () => quote,
324
+ separator: () => separator,
325
+ shortcode: () => shortcode,
326
+ table: () => table,
327
+ verse: () => verse
328
+ });
329
+ const UOL_TAG_PATTERN = /<[uo]l[^>]*>([\s\S]*)<\/[uo]l>/i;
330
+ const LI_TAG_PATTERN = /<li[^>]*>([\s\S]*?)<\/li>/i;
331
+ const UL_TAG_PATTERN = /<ul[^>]*>([\s\S]*)<\/ul>/i;
332
+ const OL_TAG_PATTERN = /<ol[^>]*>([\s\S]*)<\/ol>/i;
333
+ const NESTED_LIST_PATTERN = /<[uo]l[^>]*>[\s\S]*<\/[uo]l>/gi;
334
+ const P_TAG_PATTERN = /<p[^>]*>([\s\S]*?)<\/p>/gi;
335
+ const P_TAG_SINGLE_PATTERN = /<p[^>]*>([\s\S]*?)<\/p>/i;
336
+ const HREF_PATTERN = /href="([^"]*)"/i;
337
+ const DATA_ID_PATTERN = /data-id=["'](\d+)["']/i;
338
+ const CODE_TAG_PATTERN_SINGLE = /<code[^>]*>([\s\S]*?)<\/code>/i;
339
+ const TABLE_TAG_PATTERN = /<table[^>]*>([\s\S]*?)<\/table>/i;
340
+ const THEAD_TAG_PATTERN = /<thead[^>]*>([\s\S]*?)<\/thead>/i;
341
+ const IMG_TAG_GLOBAL = /<img[^>]+>/gi;
342
+ const TABLE_ROW_PATTERN = /<tr[^>]*>([\s\S]*?)<\/tr>/gi;
343
+ const TABLE_CELL_PATTERN = /<(th|td)[^>]*>([\s\S]*?)<\/\1>/gi;
344
+ const TBODY_TAG_PATTERN = /<tbody[^>]*>([\s\S]*?)<\/tbody>/i;
345
+ const CITE_TAG_PATTERN = /<cite[^>]*>([\s\S]*?)<\/cite>/i;
346
+ const LT_ENTITY_PATTERN = /&lt;/g;
347
+ const GT_ENTITY_PATTERN = /&gt;/g;
348
+ const AMP_ENTITY_PATTERN$1 = /&amp;/g;
349
+ const QUOT_ENTITY_PATTERN = /&quot;/g;
350
+ const APOS_ENTITY_PATTERN$1 = /&#039;/g;
351
+ const NBSP_ENTITY_PATTERN$1 = /&nbsp;/g;
352
+ /**
353
+ * core/paragraph → block with style "normal"
354
+ */
355
+ const paragraph = (block, _options, context) => {
356
+ const { children, markDefs } = context.parseInlineContent(block.innerHTML);
357
+ if (children.length === 1 && children[0]?.text === "") return [];
358
+ const result = {
359
+ _type: "block",
360
+ _key: context.generateKey(),
361
+ style: "normal",
362
+ children
363
+ };
364
+ if (markDefs.length > 0) result.markDefs = markDefs;
365
+ return [result];
366
+ };
367
+ /**
368
+ * core/heading → block with style "h1"-"h6"
369
+ */
370
+ const heading = (block, _options, context) => {
371
+ const level = attrNumber(block.attrs, "level") ?? 2;
372
+ const { children, markDefs } = context.parseInlineContent(block.innerHTML);
373
+ const result = {
374
+ _type: "block",
375
+ _key: context.generateKey(),
376
+ style: toHeadingStyle(level),
377
+ children
378
+ };
379
+ if (markDefs.length > 0) result.markDefs = markDefs;
380
+ return [result];
381
+ };
382
+ /**
383
+ * core/list → blocks with listItem
384
+ *
385
+ * Handles both old format (HTML list) and new format (innerBlocks with list-item)
386
+ */
387
+ const list = (block, _options, context) => {
388
+ const listItem = block.attrs.ordered === true ? "number" : "bullet";
389
+ if (block.innerBlocks.length > 0) return parseListItemBlocks(block.innerBlocks, listItem, 1, context);
390
+ return parseListItems(block.innerHTML.match(UOL_TAG_PATTERN)?.[1] || block.innerHTML, listItem, 1, context);
391
+ };
392
+ /**
393
+ * Parse list-item blocks (WordPress 6.x format)
394
+ */
395
+ function parseListItemBlocks(innerBlocks, listItem, level, context) {
396
+ const blocks = [];
397
+ for (const itemBlock of innerBlocks) {
398
+ if (itemBlock.blockName !== "core/list-item") continue;
399
+ const textContent = itemBlock.innerHTML.match(LI_TAG_PATTERN)?.[1]?.trim() || "";
400
+ if (textContent) {
401
+ const { children, markDefs } = context.parseInlineContent(textContent);
402
+ const block = {
403
+ _type: "block",
404
+ _key: context.generateKey(),
405
+ style: "normal",
406
+ listItem,
407
+ level,
408
+ children
409
+ };
410
+ if (markDefs.length > 0) block.markDefs = markDefs;
411
+ blocks.push(block);
412
+ }
413
+ if (itemBlock.innerBlocks.length > 0) {
414
+ for (const nested of itemBlock.innerBlocks) if (nested.blockName === "core/list") {
415
+ const nestedListItem = nested.attrs.ordered === true ? "number" : "bullet";
416
+ blocks.push(...parseListItemBlocks(nested.innerBlocks, nestedListItem, level + 1, context));
417
+ }
418
+ }
419
+ }
420
+ return blocks;
421
+ }
422
+ /**
423
+ * Parse list items from HTML
424
+ */
425
+ function parseListItems(html, listItem, level, context) {
426
+ const blocks = [];
427
+ const liItems = extractTopLevelListItems(html);
428
+ for (const liContent of liItems) {
429
+ const nestedUl = liContent.match(UL_TAG_PATTERN);
430
+ const nestedOl = liContent.match(OL_TAG_PATTERN);
431
+ let textContent = liContent.replace(NESTED_LIST_PATTERN, "").trim();
432
+ if (textContent) {
433
+ const { children, markDefs } = context.parseInlineContent(textContent);
434
+ const block = {
435
+ _type: "block",
436
+ _key: context.generateKey(),
437
+ style: "normal",
438
+ listItem,
439
+ level,
440
+ children
441
+ };
442
+ if (markDefs.length > 0) block.markDefs = markDefs;
443
+ blocks.push(block);
444
+ }
445
+ if (nestedUl?.[1]) blocks.push(...parseListItems(nestedUl[1], "bullet", level + 1, context));
446
+ if (nestedOl?.[1]) blocks.push(...parseListItems(nestedOl[1], "number", level + 1, context));
447
+ }
448
+ return blocks;
449
+ }
450
+ /**
451
+ * Extract top-level <li> items from HTML, handling nested lists correctly
452
+ */
453
+ function extractTopLevelListItems(html) {
454
+ const items = [];
455
+ let depth = 0;
456
+ let currentItem = "";
457
+ let inLi = false;
458
+ let i = 0;
459
+ while (i < html.length) {
460
+ if (html.substring(i, i + 3).toLowerCase() === "<li") {
461
+ const tagEnd = html.indexOf(">", i);
462
+ if (tagEnd === -1) break;
463
+ if (!inLi) {
464
+ inLi = true;
465
+ i = tagEnd + 1;
466
+ continue;
467
+ } else {
468
+ currentItem += html.substring(i, tagEnd + 1);
469
+ depth++;
470
+ i = tagEnd + 1;
471
+ continue;
472
+ }
473
+ }
474
+ if (html.substring(i, i + 5).toLowerCase() === "</li>") if (depth === 0) {
475
+ items.push(currentItem);
476
+ currentItem = "";
477
+ inLi = false;
478
+ i += 5;
479
+ continue;
480
+ } else {
481
+ currentItem += "</li>";
482
+ depth--;
483
+ i += 5;
484
+ continue;
485
+ }
486
+ if (html.substring(i, i + 3).toLowerCase() === "<ul" || html.substring(i, i + 3).toLowerCase() === "<ol") {
487
+ const tagEnd = html.indexOf(">", i);
488
+ if (tagEnd !== -1) {
489
+ currentItem += html.substring(i, tagEnd + 1);
490
+ i = tagEnd + 1;
491
+ continue;
492
+ }
493
+ }
494
+ if (html.substring(i, i + 5).toLowerCase() === "</ul>" || html.substring(i, i + 5).toLowerCase() === "</ol>") {
495
+ currentItem += html.substring(i, i + 5);
496
+ i += 5;
497
+ continue;
498
+ }
499
+ if (inLi) currentItem += html[i];
500
+ i++;
501
+ }
502
+ if (currentItem.trim()) items.push(currentItem);
503
+ return items.filter((item) => item.trim().length > 0);
504
+ }
505
+ /**
506
+ * core/quote → block with style "blockquote"
507
+ */
508
+ const quote = (block, _options, context) => {
509
+ const blocks = [];
510
+ let match;
511
+ while ((match = P_TAG_PATTERN.exec(block.innerHTML)) !== null) {
512
+ const content = match[1] || "";
513
+ const { children, markDefs } = context.parseInlineContent(content);
514
+ const quoteBlock = {
515
+ _type: "block",
516
+ _key: context.generateKey(),
517
+ style: "blockquote",
518
+ children
519
+ };
520
+ if (markDefs.length > 0) quoteBlock.markDefs = markDefs;
521
+ blocks.push(quoteBlock);
522
+ }
523
+ if (blocks.length === 0) {
524
+ const { children, markDefs } = context.parseInlineContent(block.innerHTML);
525
+ const quoteBlock = {
526
+ _type: "block",
527
+ _key: context.generateKey(),
528
+ style: "blockquote",
529
+ children
530
+ };
531
+ if (markDefs.length > 0) quoteBlock.markDefs = markDefs;
532
+ blocks.push(quoteBlock);
533
+ }
534
+ const citation = attrString(block.attrs, "citation");
535
+ if (citation) {
536
+ const { children, markDefs } = context.parseInlineContent(citation);
537
+ const citationBlock = {
538
+ _type: "block",
539
+ _key: context.generateKey(),
540
+ style: "normal",
541
+ children: [{
542
+ _type: "span",
543
+ _key: context.generateKey(),
544
+ text: "— "
545
+ }, ...children]
546
+ };
547
+ if (markDefs.length > 0) citationBlock.markDefs = markDefs;
548
+ blocks.push(citationBlock);
549
+ }
550
+ return blocks;
551
+ };
552
+ /**
553
+ * core/image → image block
554
+ */
555
+ const image = (block, options, context) => {
556
+ const wpId = attrNumber(block.attrs, "id");
557
+ const src = attrString(block.attrs, "url") ?? extractSrc(block.innerHTML);
558
+ const alt = attrString(block.attrs, "alt") ?? extractAlt(block.innerHTML);
559
+ const caption = extractCaption(block.innerHTML);
560
+ const align = attrString(block.attrs, "align");
561
+ const ref = wpId && options.mediaMap?.get(wpId);
562
+ return [{
563
+ _type: "image",
564
+ _key: context.generateKey(),
565
+ asset: {
566
+ _type: "reference",
567
+ _ref: ref || String(wpId || src || ""),
568
+ url: src
569
+ },
570
+ alt,
571
+ caption,
572
+ alignment: mapAlignment(align)
573
+ }];
574
+ };
575
+ /**
576
+ * core/code → code block
577
+ */
578
+ const code = (block, _options, context) => {
579
+ const decoded = decodeHtmlEntities$1(block.innerHTML.match(CODE_TAG_PATTERN_SINGLE)?.[1] || block.innerHTML);
580
+ return [{
581
+ _type: "code",
582
+ _key: context.generateKey(),
583
+ code: decoded,
584
+ language: attrString(block.attrs, "language")
585
+ }];
586
+ };
587
+ /**
588
+ * core/preformatted → code block (no syntax highlighting)
589
+ */
590
+ const preformatted = (block, _options, context) => {
591
+ const text = extractText(block.innerHTML);
592
+ return [{
593
+ _type: "code",
594
+ _key: context.generateKey(),
595
+ code: text
596
+ }];
597
+ };
598
+ /**
599
+ * core/separator / core/spacer → break block
600
+ */
601
+ const separator = (_block, _options, context) => {
602
+ return [{
603
+ _type: "break",
604
+ _key: context.generateKey(),
605
+ style: "lineBreak"
606
+ }];
607
+ };
608
+ /**
609
+ * core/gallery → gallery block
610
+ */
611
+ const gallery = (block, options, context) => {
612
+ const images = [];
613
+ if (block.innerBlocks.length > 0) {
614
+ for (const innerBlock of block.innerBlocks) if (innerBlock.blockName === "core/image") {
615
+ const wpId = attrNumber(innerBlock.attrs, "id");
616
+ const src = attrString(innerBlock.attrs, "url") ?? extractSrc(innerBlock.innerHTML);
617
+ const alt = attrString(innerBlock.attrs, "alt") ?? extractAlt(innerBlock.innerHTML);
618
+ const caption = extractCaption(innerBlock.innerHTML);
619
+ const ref = wpId && options.mediaMap?.get(wpId);
620
+ images.push({
621
+ _type: "image",
622
+ _key: context.generateKey(),
623
+ asset: {
624
+ _type: "reference",
625
+ _ref: ref || String(wpId || src || ""),
626
+ url: src
627
+ },
628
+ alt,
629
+ caption
630
+ });
631
+ }
632
+ } else {
633
+ let match;
634
+ while ((match = IMG_TAG_GLOBAL.exec(block.innerHTML)) !== null) {
635
+ const imgHtml = match[0];
636
+ const src = extractSrc(imgHtml);
637
+ const alt = extractAlt(imgHtml);
638
+ const idMatch = imgHtml.match(DATA_ID_PATTERN);
639
+ const wpId = idMatch?.[1] ? parseInt(idMatch[1], 10) : void 0;
640
+ const ref = wpId && options.mediaMap?.get(wpId);
641
+ images.push({
642
+ _type: "image",
643
+ _key: context.generateKey(),
644
+ asset: {
645
+ _type: "reference",
646
+ _ref: ref || String(wpId || src || ""),
647
+ url: src
648
+ },
649
+ alt
650
+ });
651
+ }
652
+ }
653
+ return [{
654
+ _type: "gallery",
655
+ _key: context.generateKey(),
656
+ images,
657
+ columns: attrNumber(block.attrs, "columns")
658
+ }];
659
+ };
660
+ /**
661
+ * core/columns → columns block
662
+ */
663
+ const columns = (block, _options, context) => {
664
+ const columnBlocks = block.innerBlocks.map((col) => ({
665
+ _type: "column",
666
+ _key: context.generateKey(),
667
+ content: context.transformBlocks(col.innerBlocks)
668
+ }));
669
+ return [{
670
+ _type: "columns",
671
+ _key: context.generateKey(),
672
+ columns: columnBlocks
673
+ }];
674
+ };
675
+ /**
676
+ * core/group → flatten children (no special container)
677
+ */
678
+ const group = (block, _options, context) => {
679
+ return context.transformBlocks(block.innerBlocks);
680
+ };
681
+ /**
682
+ * core/table → table block
683
+ */
684
+ const table = (block, _options, context) => {
685
+ attrBoolean(block.attrs, "hasFixedLayout");
686
+ const tableMatch = block.innerHTML.match(TABLE_TAG_PATTERN);
687
+ if (!tableMatch) return [];
688
+ const tableContent = tableMatch[1];
689
+ const theadMatch = tableContent.match(THEAD_TAG_PATTERN);
690
+ const tbodyMatch = tableContent.match(TBODY_TAG_PATTERN);
691
+ const rows = [];
692
+ if (theadMatch?.[1]) {
693
+ const headerRows = parseTableRows(theadMatch[1], context, true);
694
+ rows.push(...headerRows);
695
+ }
696
+ if (tbodyMatch?.[1]) {
697
+ const bodyRows = parseTableRows(tbodyMatch[1], context, false);
698
+ rows.push(...bodyRows);
699
+ } else if (!theadMatch) {
700
+ const directRows = parseTableRows(tableContent, context, false);
701
+ rows.push(...directRows);
702
+ }
703
+ if (rows.length === 0) return [];
704
+ return [{
705
+ _type: "table",
706
+ _key: context.generateKey(),
707
+ rows,
708
+ hasHeaderRow: !!theadMatch
709
+ }];
710
+ };
711
+ /**
712
+ * Parse table rows from HTML
713
+ */
714
+ function parseTableRows(html, context, isHeader) {
715
+ const rows = [];
716
+ let rowMatch;
717
+ while ((rowMatch = TABLE_ROW_PATTERN.exec(html)) !== null) {
718
+ const rowContent = rowMatch[1];
719
+ const cells = [];
720
+ let cellMatch;
721
+ while ((cellMatch = TABLE_CELL_PATTERN.exec(rowContent)) !== null) {
722
+ const isHeaderCell = cellMatch[1].toLowerCase() === "th" || isHeader;
723
+ const cellContent = cellMatch[2];
724
+ const { children, markDefs } = context.parseInlineContent(cellContent);
725
+ cells.push({
726
+ _type: "tableCell",
727
+ _key: context.generateKey(),
728
+ content: children,
729
+ markDefs: markDefs.length > 0 ? markDefs : void 0,
730
+ isHeader: isHeaderCell || void 0
731
+ });
732
+ }
733
+ if (cells.length > 0) rows.push({
734
+ _type: "tableRow",
735
+ _key: context.generateKey(),
736
+ cells
737
+ });
738
+ }
739
+ return rows;
740
+ }
741
+ /**
742
+ * Convert a heading level number to a PortableTextTextBlock style
743
+ */
744
+ function toHeadingStyle(level) {
745
+ switch (level) {
746
+ case 1: return "h1";
747
+ case 2: return "h2";
748
+ case 3: return "h3";
749
+ case 4: return "h4";
750
+ case 5: return "h5";
751
+ case 6: return "h6";
752
+ default: return "h2";
753
+ }
754
+ }
755
+ /**
756
+ * Map WordPress alignment to Portable Text alignment
757
+ */
758
+ function mapAlignment(align) {
759
+ switch (align) {
760
+ case "left":
761
+ case "center":
762
+ case "right":
763
+ case "wide":
764
+ case "full": return align;
765
+ default: return;
766
+ }
767
+ }
768
+ /**
769
+ * Decode HTML entities
770
+ */
771
+ function decodeHtmlEntities$1(html) {
772
+ return html.replace(LT_ENTITY_PATTERN, "<").replace(GT_ENTITY_PATTERN, ">").replace(AMP_ENTITY_PATTERN$1, "&").replace(QUOT_ENTITY_PATTERN, "\"").replace(APOS_ENTITY_PATTERN$1, "'").replace(NBSP_ENTITY_PATTERN$1, " ");
773
+ }
774
+ /**
775
+ * core/button → button block
776
+ */
777
+ const button = (block, _options, context) => {
778
+ const url = sanitizeHref(attrString(block.attrs, "url"));
779
+ const text = extractText(block.innerHTML).trim() || "Button";
780
+ let style = "default";
781
+ const className = attrString(block.attrs, "className");
782
+ if (className?.includes("is-style-outline")) style = "outline";
783
+ else if (className?.includes("is-style-fill")) style = "fill";
784
+ return [{
785
+ _type: "button",
786
+ _key: context.generateKey(),
787
+ text,
788
+ url,
789
+ style
790
+ }];
791
+ };
792
+ /**
793
+ * core/buttons → buttons container block
794
+ */
795
+ const buttons = (block, _options, context) => {
796
+ const buttonBlocks = [];
797
+ for (const innerBlock of block.innerBlocks) if (innerBlock.blockName === "core/button") {
798
+ const url = attrString(innerBlock.attrs, "url");
799
+ const text = extractText(innerBlock.innerHTML).trim() || "Button";
800
+ let style = "default";
801
+ const className = attrString(innerBlock.attrs, "className");
802
+ if (className?.includes("is-style-outline")) style = "outline";
803
+ else if (className?.includes("is-style-fill")) style = "fill";
804
+ buttonBlocks.push({
805
+ _type: "button",
806
+ _key: context.generateKey(),
807
+ text,
808
+ url,
809
+ style
810
+ });
811
+ }
812
+ const layoutObj = attrObject(block.attrs, "layout");
813
+ const layout = layoutObj && typeof layoutObj["type"] === "string" && layoutObj["type"] === "flex" ? "horizontal" : "vertical";
814
+ return [{
815
+ _type: "buttons",
816
+ _key: context.generateKey(),
817
+ buttons: buttonBlocks,
818
+ layout
819
+ }];
820
+ };
821
+ /**
822
+ * core/cover → cover block
823
+ */
824
+ const cover = (block, _options, context) => {
825
+ const url = attrString(block.attrs, "url");
826
+ const overlayColor = attrString(block.attrs, "overlayColor");
827
+ const customOverlayColor = attrString(block.attrs, "customOverlayColor");
828
+ const dimRatio = attrNumber(block.attrs, "dimRatio");
829
+ const minHeight = attrString(block.attrs, "minHeight");
830
+ const minHeightUnit = attrString(block.attrs, "minHeightUnit");
831
+ const contentPosition = attrString(block.attrs, "contentPosition");
832
+ const content = context.transformBlocks(block.innerBlocks);
833
+ let alignment;
834
+ if (contentPosition?.includes("left")) alignment = "left";
835
+ else if (contentPosition?.includes("right")) alignment = "right";
836
+ else if (contentPosition?.includes("center")) alignment = "center";
837
+ let minHeightStr;
838
+ if (minHeight) minHeightStr = minHeightUnit ? `${minHeight}${minHeightUnit}` : `${minHeight}px`;
839
+ return [{
840
+ _type: "cover",
841
+ _key: context.generateKey(),
842
+ backgroundImage: url,
843
+ overlayColor: customOverlayColor || overlayColor,
844
+ overlayOpacity: dimRatio !== void 0 ? dimRatio / 100 : void 0,
845
+ content,
846
+ minHeight: minHeightStr,
847
+ alignment
848
+ }];
849
+ };
850
+ /**
851
+ * core/file → file block
852
+ */
853
+ const file = (block, _options, context) => {
854
+ const href = sanitizeHref(attrString(block.attrs, "href"));
855
+ const fileName = attrString(block.attrs, "fileName");
856
+ const showDownloadButton = attrBoolean(block.attrs, "showDownloadButton");
857
+ let url = href;
858
+ if (!url) url = sanitizeHref(block.innerHTML.match(HREF_PATTERN)?.[1]);
859
+ let filename = fileName;
860
+ if (!filename && url) filename = url.split("/").pop()?.split("?")[0];
861
+ return [{
862
+ _type: "file",
863
+ _key: context.generateKey(),
864
+ url: url || "",
865
+ filename,
866
+ showDownloadButton: showDownloadButton !== false
867
+ }];
868
+ };
869
+ /**
870
+ * core/pullquote → pullquote block
871
+ */
872
+ const pullquote = (block, _options, context) => {
873
+ const pMatch = block.innerHTML.match(P_TAG_SINGLE_PATTERN);
874
+ const text = pMatch ? extractText(pMatch[1]) : extractText(block.innerHTML);
875
+ const citeMatch = block.innerHTML.match(CITE_TAG_PATTERN);
876
+ const citation = citeMatch ? extractText(citeMatch[1]) : attrString(block.attrs, "citation");
877
+ return [{
878
+ _type: "pullquote",
879
+ _key: context.generateKey(),
880
+ text: text.trim(),
881
+ citation: citation?.trim()
882
+ }];
883
+ };
884
+ /**
885
+ * core/html → htmlBlock (pass through)
886
+ */
887
+ const html = (block, _options, context) => {
888
+ return [{
889
+ _type: "htmlBlock",
890
+ _key: context.generateKey(),
891
+ html: block.innerHTML.trim(),
892
+ originalBlockName: "core/html"
893
+ }];
894
+ };
895
+ /**
896
+ * core/verse → code block (preserves whitespace like preformatted)
897
+ */
898
+ const verse = (block, _options, context) => {
899
+ const text = extractText(block.innerHTML);
900
+ return [{
901
+ _type: "code",
902
+ _key: context.generateKey(),
903
+ code: text,
904
+ language: "text"
905
+ }];
906
+ };
907
+ /**
908
+ * core/more → break block with "readMore" style
909
+ */
910
+ const more = (_block, _options, context) => {
911
+ return [{
912
+ _type: "break",
913
+ _key: context.generateKey(),
914
+ style: "lineBreak"
915
+ }];
916
+ };
917
+ /**
918
+ * core/nextpage → break block with page break indicator
919
+ */
920
+ const nextpage = (_block, _options, context) => {
921
+ return [{
922
+ _type: "break",
923
+ _key: context.generateKey(),
924
+ style: "lineBreak"
925
+ }];
926
+ };
927
+ /**
928
+ * core/shortcode → htmlBlock (preserve for manual handling)
929
+ */
930
+ const shortcode = (block, _options, context) => {
931
+ return [{
932
+ _type: "htmlBlock",
933
+ _key: context.generateKey(),
934
+ html: block.innerHTML.trim(),
935
+ originalBlockName: "core/shortcode"
936
+ }];
937
+ };
938
+ /**
939
+ * core/media-text → columns block with 2 columns
940
+ */
941
+ const mediaText = (block, _options, context) => {
942
+ const mediaId = attrNumber(block.attrs, "mediaId");
943
+ const mediaUrl = attrString(block.attrs, "mediaUrl");
944
+ const mediaType = attrString(block.attrs, "mediaType");
945
+ const mediaPosition = attrString(block.attrs, "mediaPosition");
946
+ const mediaAlt = attrString(block.attrs, "mediaAlt");
947
+ const mediaBlock = mediaType === "video" ? [{
948
+ _type: "embed",
949
+ _key: context.generateKey(),
950
+ url: mediaUrl || "",
951
+ provider: "video"
952
+ }] : [{
953
+ _type: "image",
954
+ _key: context.generateKey(),
955
+ asset: {
956
+ _type: "reference",
957
+ _ref: String(mediaId || mediaUrl || ""),
958
+ url: mediaUrl
959
+ },
960
+ alt: mediaAlt
961
+ }];
962
+ const contentBlocks = context.transformBlocks(block.innerBlocks);
963
+ const mediaTextColumns = mediaPosition === "right" ? [{
964
+ _type: "column",
965
+ _key: context.generateKey(),
966
+ content: contentBlocks
967
+ }, {
968
+ _type: "column",
969
+ _key: context.generateKey(),
970
+ content: mediaBlock
971
+ }] : [{
972
+ _type: "column",
973
+ _key: context.generateKey(),
974
+ content: mediaBlock
975
+ }, {
976
+ _type: "column",
977
+ _key: context.generateKey(),
978
+ content: contentBlocks
979
+ }];
980
+ return [{
981
+ _type: "columns",
982
+ _key: context.generateKey(),
983
+ columns: mediaTextColumns
984
+ }];
985
+ };
986
+
987
+ //#endregion
988
+ //#region src/transformers/embed.ts
989
+ var embed_exports = /* @__PURE__ */ __exportAll({
990
+ audio: () => audio,
991
+ embed: () => embed,
992
+ twitter: () => twitter,
993
+ video: () => video,
994
+ vimeo: () => vimeo,
995
+ youtube: () => youtube
996
+ });
997
+ const IFRAME_SRC_PATTERN = /<iframe[^>]+src=["']([^"']+)["']/i;
998
+ const VIDEO_SRC_PATTERN = /<video[^>]+src=["']([^"']+)["']/i;
999
+ const VIDEO_SOURCE_PATTERN = /<source[^>]+src=["']([^"']+)["']/i;
1000
+ const AUDIO_SRC_PATTERN = /<audio[^>]+src=["']([^"']+)["']/i;
1001
+ const AUDIO_SOURCE_PATTERN = /<source[^>]+src=["']([^"']+)["']/i;
1002
+ /**
1003
+ * core/embed and variants → embed block
1004
+ */
1005
+ const embed = (block, _options, context) => {
1006
+ const url = attrString(block.attrs, "url");
1007
+ const providerSlug = attrString(block.attrs, "providerNameSlug");
1008
+ const iframeSrc = block.innerHTML.match(IFRAME_SRC_PATTERN)?.[1];
1009
+ return [{
1010
+ _type: "embed",
1011
+ _key: context.generateKey(),
1012
+ url: url || iframeSrc || "",
1013
+ provider: providerSlug || detectProvider(url || iframeSrc || ""),
1014
+ html: block.innerHTML.trim() || void 0
1015
+ }];
1016
+ };
1017
+ /**
1018
+ * core-embed/youtube → embed block
1019
+ */
1020
+ const youtube = (block, options, context) => {
1021
+ return embed(block, options, context);
1022
+ };
1023
+ /**
1024
+ * core-embed/twitter → embed block
1025
+ */
1026
+ const twitter = (block, options, context) => {
1027
+ return embed(block, options, context);
1028
+ };
1029
+ /**
1030
+ * core-embed/vimeo → embed block
1031
+ */
1032
+ const vimeo = (block, options, context) => {
1033
+ return embed(block, options, context);
1034
+ };
1035
+ /**
1036
+ * core/video → embed block (self-hosted video)
1037
+ */
1038
+ const video = (block, _options, context) => {
1039
+ const src = attrString(block.attrs, "src");
1040
+ const videoMatch = block.innerHTML.match(VIDEO_SRC_PATTERN);
1041
+ const sourceMatch = block.innerHTML.match(VIDEO_SOURCE_PATTERN);
1042
+ const videoSrc = src || videoMatch?.[1] || sourceMatch?.[1];
1043
+ return [{
1044
+ _type: "embed",
1045
+ _key: context.generateKey(),
1046
+ url: videoSrc || "",
1047
+ provider: "video",
1048
+ html: block.innerHTML.trim() || void 0
1049
+ }];
1050
+ };
1051
+ /**
1052
+ * core/audio → embed block (self-hosted audio)
1053
+ */
1054
+ const audio = (block, _options, context) => {
1055
+ const src = attrString(block.attrs, "src");
1056
+ const audioMatch = block.innerHTML.match(AUDIO_SRC_PATTERN);
1057
+ const sourceMatch = block.innerHTML.match(AUDIO_SOURCE_PATTERN);
1058
+ const audioSrc = src || audioMatch?.[1] || sourceMatch?.[1];
1059
+ return [{
1060
+ _type: "embed",
1061
+ _key: context.generateKey(),
1062
+ url: audioSrc || "",
1063
+ provider: "audio",
1064
+ html: block.innerHTML.trim() || void 0
1065
+ }];
1066
+ };
1067
+ /**
1068
+ * Detect embed provider from URL
1069
+ */
1070
+ function detectProvider(url) {
1071
+ if (!url) return void 0;
1072
+ const urlLower = url.toLowerCase();
1073
+ if (urlLower.includes("youtube.com") || urlLower.includes("youtu.be")) return "youtube";
1074
+ if (urlLower.includes("vimeo.com")) return "vimeo";
1075
+ if (urlLower.includes("twitter.com") || urlLower.includes("x.com")) return "twitter";
1076
+ if (urlLower.includes("instagram.com")) return "instagram";
1077
+ if (urlLower.includes("facebook.com")) return "facebook";
1078
+ if (urlLower.includes("tiktok.com")) return "tiktok";
1079
+ if (urlLower.includes("spotify.com")) return "spotify";
1080
+ if (urlLower.includes("soundcloud.com")) return "soundcloud";
1081
+ if (urlLower.includes("codepen.io")) return "codepen";
1082
+ if (urlLower.includes("gist.github.com")) return "gist";
1083
+ }
1084
+
1085
+ //#endregion
1086
+ //#region src/transformers/index.ts
1087
+ /**
1088
+ * Default block transformers for core WordPress blocks
1089
+ */
1090
+ const defaultTransformers = {
1091
+ "core/paragraph": paragraph,
1092
+ "core/heading": heading,
1093
+ "core/list": list,
1094
+ "core/quote": quote,
1095
+ "core/code": code,
1096
+ "core/preformatted": preformatted,
1097
+ "core/pullquote": pullquote,
1098
+ "core/verse": verse,
1099
+ "core/image": image,
1100
+ "core/gallery": gallery,
1101
+ "core/file": file,
1102
+ "core/media-text": mediaText,
1103
+ "core/cover": cover,
1104
+ "core/columns": columns,
1105
+ "core/group": group,
1106
+ "core/separator": separator,
1107
+ "core/spacer": separator,
1108
+ "core/table": table,
1109
+ "core/buttons": buttons,
1110
+ "core/button": button,
1111
+ "core/more": more,
1112
+ "core/nextpage": nextpage,
1113
+ "core/html": html,
1114
+ "core/shortcode": shortcode,
1115
+ "core/embed": embed,
1116
+ "core/video": video,
1117
+ "core/audio": audio,
1118
+ "core-embed/youtube": youtube,
1119
+ "core-embed/twitter": twitter,
1120
+ "core-embed/vimeo": vimeo,
1121
+ "core-embed/facebook": embed,
1122
+ "core-embed/instagram": embed,
1123
+ "core-embed/soundcloud": embed,
1124
+ "core-embed/spotify": embed
1125
+ };
1126
+ /**
1127
+ * Fallback transformer for unknown blocks
1128
+ * Stores the original HTML for manual review
1129
+ */
1130
+ const fallbackTransformer = (block, _options, context) => {
1131
+ if (!block.innerHTML.trim() && block.innerBlocks.length === 0) return [];
1132
+ if (block.innerBlocks.length > 0) return context.transformBlocks(block.innerBlocks);
1133
+ return [{
1134
+ _type: "htmlBlock",
1135
+ _key: context.generateKey(),
1136
+ html: block.innerHTML,
1137
+ originalBlockName: block.blockName,
1138
+ originalAttrs: Object.keys(block.attrs).length > 0 ? block.attrs : void 0
1139
+ }];
1140
+ };
1141
+ /**
1142
+ * Get transformer for a block
1143
+ */
1144
+ function getTransformer(blockName, customTransformers) {
1145
+ if (!blockName) return fallbackTransformer;
1146
+ if (customTransformers?.[blockName]) return customTransformers[blockName];
1147
+ if (defaultTransformers[blockName]) return defaultTransformers[blockName];
1148
+ return fallbackTransformer;
1149
+ }
1150
+
1151
+ //#endregion
1152
+ //#region src/index.ts
1153
+ /**
1154
+ * Gutenberg to Portable Text Converter
1155
+ *
1156
+ * Converts WordPress Gutenberg block content to Portable Text format.
1157
+ * Uses @wordpress/block-serialization-default-parser to parse the hybrid
1158
+ * HTML+JSON format that WordPress uses.
1159
+ */
1160
+ const BLOCK_ELEMENT_PATTERN = /<(p|h[1-6]|blockquote|pre|ul|ol|figure|div|hr)[^>]*>([\s\S]*?)<\/\1>|<(hr|br)\s*\/?>|<img\s+[^>]+\/?>/gu;
1161
+ const LINKED_IMAGE_PATTERN = /<a\s+[^>]*href=["']([^"']+)["'][^>]*>\s*<img\s+([^>]+)\/?>\s*<\/a>/gu;
1162
+ const STANDALONE_IMAGE_PATTERN = /<img\s+[^>]+\/?>/gu;
1163
+ const IMG_TAG_PATTERN = /<img[^>]+>/i;
1164
+ const SRC_ATTR_PATTERN = /src=["']([^"']+)["']/i;
1165
+ const ALT_ATTR_PATTERN = /alt=["']([^"']*)["']/i;
1166
+ const LIST_ITEM_PATTERN = /<li[^>]*>([\s\S]*?)<\/li>/gu;
1167
+ const CODE_TAG_PATTERN = /<code[^>]*>([\s\S]*?)<\/code>/i;
1168
+ const HTML_TAG_PATTERN = /<[^>]+>/g;
1169
+ const FIGCAPTION_TAG_PATTERN = /<figcaption[^>]*>([\s\S]*?)<\/figcaption>/i;
1170
+ const AMP_ENTITY_PATTERN = /&amp;/g;
1171
+ const LESS_THAN_ENTITY_PATTERN = /&lt;/g;
1172
+ const GREATER_THAN_ENTITY_PATTERN = /&gt;/g;
1173
+ const QUOTE_ENTITY_PATTERN = /&quot;/g;
1174
+ const APOS_ENTITY_PATTERN = /&#039;/g;
1175
+ const NUMERIC_AMP_ENTITY_PATTERN = /&#0?38;/g;
1176
+ const HEX_AMP_ENTITY_PATTERN = /&#x26;/gi;
1177
+ const NBSP_ENTITY_PATTERN = /&nbsp;/g;
1178
+ /**
1179
+ * Default key generator
1180
+ */
1181
+ function createKeyGenerator() {
1182
+ let counter = 0;
1183
+ return () => {
1184
+ counter++;
1185
+ return `key-${counter}-${Math.random().toString(36).substring(2, 7)}`;
1186
+ };
1187
+ }
1188
+ /**
1189
+ * Normalize parsed blocks from the WP parser into our GutenbergBlock type.
1190
+ * The WP parser returns `attrs: Record<string, any> | null`, so we normalize
1191
+ * null attrs to empty objects and recursively process innerBlocks.
1192
+ */
1193
+ function normalizeBlocks(blocks) {
1194
+ return blocks.map((block) => ({
1195
+ blockName: block.blockName,
1196
+ attrs: block.attrs ?? {},
1197
+ innerHTML: block.innerHTML,
1198
+ innerBlocks: normalizeBlocks(block.innerBlocks),
1199
+ innerContent: block.innerContent
1200
+ }));
1201
+ }
1202
+ /**
1203
+ * Convert WordPress Gutenberg content to Portable Text
1204
+ *
1205
+ * @param content - WordPress post content (HTML with Gutenberg block comments)
1206
+ * @param options - Conversion options
1207
+ * @returns Array of Portable Text blocks
1208
+ *
1209
+ * @example
1210
+ * ```ts
1211
+ * const portableText = gutenbergToPortableText(`
1212
+ * <!-- wp:paragraph -->
1213
+ * <p>Hello <strong>world</strong>!</p>
1214
+ * <!-- /wp:paragraph -->
1215
+ * `);
1216
+ * // → [{ _type: "block", style: "normal", children: [...] }]
1217
+ * ```
1218
+ */
1219
+ function gutenbergToPortableText(content, options = {}) {
1220
+ if (!content || !content.trim()) return [];
1221
+ if (!content.includes("<!-- wp:")) return htmlToPortableText(content, options);
1222
+ const blocks = normalizeBlocks(parse(content));
1223
+ const context = createTransformContext(options, options.keyGenerator || createKeyGenerator());
1224
+ return blocks.flatMap((block) => transformBlock(block, options, context));
1225
+ }
1226
+ /**
1227
+ * Convert plain HTML (classic editor) to Portable Text
1228
+ */
1229
+ function htmlToPortableText(html, options = {}) {
1230
+ const generateKey = options.keyGenerator || createKeyGenerator();
1231
+ const blocks = [];
1232
+ let lastIndex = 0;
1233
+ let match;
1234
+ while ((match = BLOCK_ELEMENT_PATTERN.exec(html)) !== null) {
1235
+ const fullMatch = match[0];
1236
+ const tag = (match[1] || match[3] || "").toLowerCase();
1237
+ const content = match[2] || "";
1238
+ const between = html.slice(lastIndex, match.index).trim();
1239
+ if (between) {
1240
+ const { children, markDefs } = parseInlineContent(between, generateKey);
1241
+ if (children.some((c) => c.text.trim())) blocks.push({
1242
+ _type: "block",
1243
+ _key: generateKey(),
1244
+ style: "normal",
1245
+ children,
1246
+ markDefs: markDefs.length > 0 ? markDefs : void 0
1247
+ });
1248
+ }
1249
+ lastIndex = match.index + match[0].length;
1250
+ if (fullMatch.toLowerCase().startsWith("<img")) {
1251
+ const srcMatch = fullMatch.match(SRC_ATTR_PATTERN);
1252
+ const altMatch = fullMatch.match(ALT_ATTR_PATTERN);
1253
+ if (srcMatch?.[1]) {
1254
+ const imgUrl = decodeUrlEntities(srcMatch[1]);
1255
+ blocks.push({
1256
+ _type: "image",
1257
+ _key: generateKey(),
1258
+ asset: {
1259
+ _type: "reference",
1260
+ _ref: imgUrl,
1261
+ url: imgUrl
1262
+ },
1263
+ alt: altMatch?.[1]
1264
+ });
1265
+ }
1266
+ continue;
1267
+ }
1268
+ switch (tag) {
1269
+ case "p":
1270
+ case "div": {
1271
+ const linkedImgPositions = [];
1272
+ let linkedMatch;
1273
+ while ((linkedMatch = LINKED_IMAGE_PATTERN.exec(content)) !== null) {
1274
+ const linkUrl = decodeUrlEntities(linkedMatch[1]);
1275
+ const imgAttrs = linkedMatch[2];
1276
+ const srcMatch = imgAttrs.match(SRC_ATTR_PATTERN);
1277
+ const altMatch = imgAttrs.match(ALT_ATTR_PATTERN);
1278
+ if (srcMatch?.[1]) {
1279
+ const imgUrl = decodeUrlEntities(srcMatch[1]);
1280
+ blocks.push({
1281
+ _type: "image",
1282
+ _key: generateKey(),
1283
+ asset: {
1284
+ _type: "reference",
1285
+ _ref: imgUrl,
1286
+ url: imgUrl
1287
+ },
1288
+ alt: altMatch?.[1],
1289
+ link: linkUrl
1290
+ });
1291
+ }
1292
+ linkedImgPositions.push({
1293
+ start: linkedMatch.index,
1294
+ end: linkedMatch.index + linkedMatch[0].length
1295
+ });
1296
+ }
1297
+ let imgMatch;
1298
+ while ((imgMatch = STANDALONE_IMAGE_PATTERN.exec(content)) !== null) {
1299
+ if (linkedImgPositions.some((pos) => imgMatch.index >= pos.start && imgMatch.index < pos.end)) continue;
1300
+ const srcMatch = imgMatch[0].match(SRC_ATTR_PATTERN);
1301
+ const altMatch = imgMatch[0].match(ALT_ATTR_PATTERN);
1302
+ if (srcMatch?.[1]) {
1303
+ const imgUrl = decodeUrlEntities(srcMatch[1]);
1304
+ blocks.push({
1305
+ _type: "image",
1306
+ _key: generateKey(),
1307
+ asset: {
1308
+ _type: "reference",
1309
+ _ref: imgUrl,
1310
+ url: imgUrl
1311
+ },
1312
+ alt: altMatch?.[1]
1313
+ });
1314
+ }
1315
+ }
1316
+ let textContent = content.replace(LINKED_IMAGE_PATTERN, "").replace(STANDALONE_IMAGE_PATTERN, "").trim();
1317
+ if (textContent) {
1318
+ const { children, markDefs } = parseInlineContent(textContent, generateKey);
1319
+ if (children.some((c) => c.text.trim())) blocks.push({
1320
+ _type: "block",
1321
+ _key: generateKey(),
1322
+ style: "normal",
1323
+ children,
1324
+ markDefs: markDefs.length > 0 ? markDefs : void 0
1325
+ });
1326
+ }
1327
+ break;
1328
+ }
1329
+ case "h1":
1330
+ case "h2":
1331
+ case "h3":
1332
+ case "h4":
1333
+ case "h5":
1334
+ case "h6": {
1335
+ const { children, markDefs } = parseInlineContent(content, generateKey);
1336
+ blocks.push({
1337
+ _type: "block",
1338
+ _key: generateKey(),
1339
+ style: tag,
1340
+ children,
1341
+ markDefs: markDefs.length > 0 ? markDefs : void 0
1342
+ });
1343
+ break;
1344
+ }
1345
+ case "blockquote": {
1346
+ const { children, markDefs } = parseInlineContent(content, generateKey);
1347
+ blocks.push({
1348
+ _type: "block",
1349
+ _key: generateKey(),
1350
+ style: "blockquote",
1351
+ children,
1352
+ markDefs: markDefs.length > 0 ? markDefs : void 0
1353
+ });
1354
+ break;
1355
+ }
1356
+ case "pre": {
1357
+ const code = content.match(CODE_TAG_PATTERN)?.[1] || content;
1358
+ blocks.push({
1359
+ _type: "code",
1360
+ _key: generateKey(),
1361
+ code: decodeHtmlEntities(code)
1362
+ });
1363
+ break;
1364
+ }
1365
+ case "ul":
1366
+ case "ol": {
1367
+ const listItem = tag === "ol" ? "number" : "bullet";
1368
+ let liMatch;
1369
+ while ((liMatch = LIST_ITEM_PATTERN.exec(content)) !== null) {
1370
+ const { children, markDefs } = parseInlineContent(liMatch[1] || "", generateKey);
1371
+ blocks.push({
1372
+ _type: "block",
1373
+ _key: generateKey(),
1374
+ style: "normal",
1375
+ listItem,
1376
+ level: 1,
1377
+ children,
1378
+ markDefs: markDefs.length > 0 ? markDefs : void 0
1379
+ });
1380
+ }
1381
+ break;
1382
+ }
1383
+ case "hr":
1384
+ blocks.push({
1385
+ _type: "break",
1386
+ _key: generateKey(),
1387
+ style: "lineBreak"
1388
+ });
1389
+ break;
1390
+ case "figure": {
1391
+ const imgMatch = content.match(IMG_TAG_PATTERN);
1392
+ if (imgMatch) {
1393
+ const srcMatch = imgMatch[0].match(SRC_ATTR_PATTERN);
1394
+ const altMatch = imgMatch[0].match(ALT_ATTR_PATTERN);
1395
+ const captionMatch = content.match(FIGCAPTION_TAG_PATTERN);
1396
+ const imgUrl = srcMatch?.[1] ? decodeUrlEntities(srcMatch[1]) : "";
1397
+ blocks.push({
1398
+ _type: "image",
1399
+ _key: generateKey(),
1400
+ asset: {
1401
+ _type: "reference",
1402
+ _ref: imgUrl,
1403
+ url: imgUrl || void 0
1404
+ },
1405
+ alt: altMatch?.[1],
1406
+ caption: captionMatch?.[1]?.replace(HTML_TAG_PATTERN, "").trim()
1407
+ });
1408
+ }
1409
+ break;
1410
+ }
1411
+ }
1412
+ }
1413
+ const remaining = html.slice(lastIndex).trim();
1414
+ if (remaining) {
1415
+ const { children, markDefs } = parseInlineContent(remaining, generateKey);
1416
+ if (children.some((c) => c.text.trim())) blocks.push({
1417
+ _type: "block",
1418
+ _key: generateKey(),
1419
+ style: "normal",
1420
+ children,
1421
+ markDefs: markDefs.length > 0 ? markDefs : void 0
1422
+ });
1423
+ }
1424
+ return blocks;
1425
+ }
1426
+ /**
1427
+ * Create transform context for recursive block transformation
1428
+ */
1429
+ function createTransformContext(options, generateKey) {
1430
+ const context = {
1431
+ generateKey,
1432
+ parseInlineContent: (html) => parseInlineContent(html, generateKey),
1433
+ transformBlocks: (blocks) => blocks.flatMap((block) => transformBlock(block, options, context))
1434
+ };
1435
+ return context;
1436
+ }
1437
+ /**
1438
+ * Transform a single block
1439
+ */
1440
+ function transformBlock(block, options, context) {
1441
+ return getTransformer(block.blockName, options.customTransformers)(block, options, context);
1442
+ }
1443
+ /**
1444
+ * Decode HTML entities
1445
+ */
1446
+ function decodeHtmlEntities(html) {
1447
+ return html.replace(LESS_THAN_ENTITY_PATTERN, "<").replace(GREATER_THAN_ENTITY_PATTERN, ">").replace(AMP_ENTITY_PATTERN, "&").replace(QUOTE_ENTITY_PATTERN, "\"").replace(APOS_ENTITY_PATTERN, "'").replace(NUMERIC_AMP_ENTITY_PATTERN, "&").replace(HEX_AMP_ENTITY_PATTERN, "&").replace(NBSP_ENTITY_PATTERN, " ");
1448
+ }
1449
+ /**
1450
+ * Decode HTML entities in URLs (used for image src attributes)
1451
+ */
1452
+ function decodeUrlEntities(url) {
1453
+ return url.replace(AMP_ENTITY_PATTERN, "&").replace(NUMERIC_AMP_ENTITY_PATTERN, "&").replace(HEX_AMP_ENTITY_PATTERN, "&");
1454
+ }
1455
+ /**
1456
+ * Parse Gutenberg blocks without converting to Portable Text
1457
+ * Useful for inspection and debugging
1458
+ */
1459
+ function parseGutenbergBlocks(content) {
1460
+ if (!content || !content.trim()) return [];
1461
+ return normalizeBlocks(parse(content));
1462
+ }
1463
+
1464
+ //#endregion
1465
+ export { core_exports as coreTransformers, defaultTransformers, embed_exports as embedTransformers, extractAlt, extractCaption, extractSrc, extractText, fallbackTransformer, gutenbergToPortableText, htmlToPortableText, parseGutenbergBlocks, parseInlineContent };
1466
+ //# sourceMappingURL=index.mjs.map