@wdprlib/parser 0.1.5 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -77,6 +77,7 @@ var import_ast = require("@wdprlib/ast");
77
77
  class Lexer {
78
78
  state;
79
79
  options;
80
+ splitBlockClosePositions = new Set;
80
81
  constructor(source, options = {}) {
81
82
  this.options = {
82
83
  trackPositions: options.trackPositions ?? true
@@ -103,6 +104,37 @@ class Lexer {
103
104
  current() {
104
105
  return this.state.source[this.state.pos] ?? "";
105
106
  }
107
+ findInvalidAnchorNameEnd() {
108
+ const src = this.state.source;
109
+ const pos = this.state.pos;
110
+ if (src[pos] !== "[" || src[pos + 1] !== "[" || src[pos + 2] !== "#") {
111
+ return null;
112
+ }
113
+ if (src[pos + 3] !== " ") {
114
+ return null;
115
+ }
116
+ let i = pos + 4;
117
+ while (i < src.length && src[i] === " ") {
118
+ i++;
119
+ }
120
+ let foundInvalid = false;
121
+ while (i < src.length) {
122
+ const ch = src[i];
123
+ if (ch === `
124
+ `)
125
+ return null;
126
+ if (ch === "]" && src[i + 1] === "]") {
127
+ return foundInvalid ? i : null;
128
+ }
129
+ const code = ch.charCodeAt(0);
130
+ const isValid = code >= 48 && code <= 57 || code >= 65 && code <= 90 || code >= 97 && code <= 122 || code === 45 || code === 95 || code === 46 || code === 37;
131
+ if (!isValid) {
132
+ foundInvalid = true;
133
+ }
134
+ i++;
135
+ }
136
+ return null;
137
+ }
106
138
  match(pattern) {
107
139
  for (let i = 0;i < pattern.length; i++) {
108
140
  if (this.state.source[this.state.pos + i] !== pattern[i]) {
@@ -172,6 +204,13 @@ class Lexer {
172
204
  return;
173
205
  }
174
206
  if (this.match("[[")) {
207
+ const invalidEnd = this.findInvalidAnchorNameEnd();
208
+ if (invalidEnd !== null) {
209
+ this.splitBlockClosePositions.add(invalidEnd);
210
+ this.advance(1);
211
+ this.addToken("TEXT", "[");
212
+ return;
213
+ }
175
214
  this.advance(2);
176
215
  this.addToken("BLOCK_OPEN", "[[");
177
216
  return;
@@ -182,6 +221,14 @@ class Lexer {
182
221
  return;
183
222
  }
184
223
  if (this.match("]]")) {
224
+ if (this.splitBlockClosePositions.has(this.state.pos)) {
225
+ this.splitBlockClosePositions.delete(this.state.pos);
226
+ this.advance(1);
227
+ this.addToken("BRACKET_CLOSE", "]");
228
+ this.advance(1);
229
+ this.addToken("TEXT", "]");
230
+ return;
231
+ }
185
232
  this.advance(2);
186
233
  this.addToken("BLOCK_CLOSE", "]]");
187
234
  return;
@@ -216,7 +263,7 @@ class Lexer {
216
263
  this.addToken("BOLD_MARKER", "**");
217
264
  return;
218
265
  }
219
- if (isLineStart && this.match("---")) {
266
+ if (isLineStart && this.match("----")) {
220
267
  let dashes = "";
221
268
  while (this.current() === "-") {
222
269
  dashes += this.advance();
@@ -431,6 +478,11 @@ class Lexer {
431
478
  this.addToken("BACKSLASH", "\\");
432
479
  return;
433
480
  }
481
+ if (char.charCodeAt(0) === 57344) {
482
+ this.advance();
483
+ this.addToken("BACKSLASH_BREAK", char);
484
+ return;
485
+ }
434
486
  if (this.isAlphanumeric(char)) {
435
487
  let ident = "";
436
488
  while (!this.isAtEnd() && this.isAlphanumeric(this.current())) {
@@ -470,7 +522,7 @@ function substitute(text) {
470
522
  `);
471
523
  result = replaceLeadingSpaces(result);
472
524
  result = result.replace(WHITESPACE_ONLY_LINE, "");
473
- result = result.replace(CONCAT_LINES, "");
525
+ result = result.replace(CONCAT_LINES, String.fromCharCode(57344));
474
526
  result = result.replace(TABS, " ");
475
527
  result = result.replace(NULL_CHARS, " ");
476
528
  result = result.replace(LEADING_NEWLINES, "");
@@ -640,9 +692,42 @@ function parseInlineUntil(ctx, endType) {
640
692
  isInvalidBlockOpen = true;
641
693
  }
642
694
  }
695
+ let skipWhitespace = 0;
696
+ while (ctx.tokens[afterOpen + skipWhitespace]?.type === "WHITESPACE") {
697
+ skipWhitespace++;
698
+ }
699
+ const blockNameToken = ctx.tokens[afterOpen + skipWhitespace];
700
+ if (blockNameToken && (blockNameToken.type === "TEXT" || blockNameToken.type === "IDENTIFIER") && blockNameToken.value.toLowerCase() === "footnoteblock" && ctx.footnoteBlockParsed) {
701
+ isInvalidBlockOpen = true;
702
+ }
703
+ }
704
+ let isInvalidHeading = false;
705
+ if (nextMeaningfulToken?.type === "HEADING_MARKER") {
706
+ const markerLen = nextMeaningfulToken.value.length;
707
+ const afterMarkerPos = pos + lookAhead + 1;
708
+ const afterMarker = ctx.tokens[afterMarkerPos];
709
+ if (markerLen > 6) {
710
+ isInvalidHeading = true;
711
+ } else if (afterMarker?.type === "STAR") {
712
+ const afterStar = ctx.tokens[afterMarkerPos + 1];
713
+ if (afterStar?.type !== "WHITESPACE") {
714
+ isInvalidHeading = true;
715
+ }
716
+ } else if (afterMarker?.type !== "WHITESPACE") {
717
+ isInvalidHeading = true;
718
+ }
643
719
  }
644
- const isBlockStart = nextMeaningfulToken && BLOCK_START_TOKENS.includes(nextMeaningfulToken.type) && nextMeaningfulToken.lineStart && !isOrphanCloseSpan && !isAnchorName && !isInvalidBlockOpen;
720
+ const isBlockStart = nextMeaningfulToken && BLOCK_START_TOKENS.includes(nextMeaningfulToken.type) && nextMeaningfulToken.lineStart && !isOrphanCloseSpan && !isAnchorName && !isInvalidBlockOpen && !isInvalidHeading;
645
721
  if (!nextMeaningfulToken || nextMeaningfulToken.type === "NEWLINE" || nextMeaningfulToken.type === "EOF" || isBlockStart) {
722
+ if (isBlockStart && nodes.length > 0) {
723
+ const nextPos = pos + lookAhead;
724
+ const shouldPreserve = ctx.blockRules.some((rule) => rule.preservesPrecedingLineBreak && rule.isStartPattern?.(ctx, nextPos));
725
+ if (shouldPreserve) {
726
+ const lb = { element: "line-break" };
727
+ lb._preservedTrailingBreak = true;
728
+ nodes.push(lb);
729
+ }
730
+ }
646
731
  consumed++;
647
732
  if (nextMeaningfulToken?.type === "NEWLINE") {
648
733
  consumed++;
@@ -703,7 +788,10 @@ var headingRule = {
703
788
  if (ctx.tokens[pos]?.type !== "WHITESPACE") {
704
789
  return { success: false };
705
790
  }
706
- const depth = Math.min(marker.value.length, 6);
791
+ if (marker.value.length > 6) {
792
+ return { success: false };
793
+ }
794
+ const depth = marker.value.length;
707
795
  while (ctx.tokens[pos]?.type === "WHITESPACE") {
708
796
  pos++;
709
797
  consumed++;
@@ -1076,10 +1164,6 @@ function filterUnsafeAttributes(attrs) {
1076
1164
  function parseBlockName(ctx, startPos) {
1077
1165
  let pos = startPos;
1078
1166
  let consumed = 0;
1079
- while (ctx.tokens[pos]?.type === "WHITESPACE") {
1080
- pos++;
1081
- consumed++;
1082
- }
1083
1167
  const token = ctx.tokens[pos];
1084
1168
  if (!token || token.type !== "TEXT" && token.type !== "IDENTIFIER") {
1085
1169
  return null;
@@ -1156,79 +1240,6 @@ function parseBlocksUntil(ctx, closeCondition) {
1156
1240
  }
1157
1241
  return { elements, consumed };
1158
1242
  }
1159
- function parseInlineContentUntil(ctx, closeCondition) {
1160
- const elements = [];
1161
- let consumed = 0;
1162
- let pos = ctx.pos;
1163
- const { blockRules, inlineRules } = ctx;
1164
- while (pos < ctx.tokens.length) {
1165
- const token = ctx.tokens[pos];
1166
- if (!token || token.type === "EOF") {
1167
- break;
1168
- }
1169
- const checkCtx = { ...ctx, pos };
1170
- if (closeCondition(checkCtx)) {
1171
- break;
1172
- }
1173
- if (token.type === "WHITESPACE" && token.lineStart) {
1174
- pos++;
1175
- consumed++;
1176
- continue;
1177
- }
1178
- if (token.type === "NEWLINE") {
1179
- pos++;
1180
- consumed++;
1181
- while (ctx.tokens[pos]?.type === "NEWLINE") {
1182
- pos++;
1183
- consumed++;
1184
- }
1185
- const nextToken = ctx.tokens[pos];
1186
- if (nextToken?.type === "BLOCK_OPEN" || nextToken?.type === "BLOCK_END_OPEN" || nextToken?.type === "EOF" || !nextToken) {
1187
- continue;
1188
- }
1189
- elements.push({ element: "line-break" });
1190
- continue;
1191
- }
1192
- let matched = false;
1193
- const blockCtx = { ...ctx, pos };
1194
- for (const rule of blockRules) {
1195
- if (canApplyBlockRule(rule, token)) {
1196
- const result = rule.parse(blockCtx);
1197
- if (result.success) {
1198
- elements.push(...result.elements);
1199
- consumed += result.consumed;
1200
- pos += result.consumed;
1201
- matched = true;
1202
- break;
1203
- }
1204
- }
1205
- }
1206
- if (matched)
1207
- continue;
1208
- const inlineCtx = { ...ctx, pos };
1209
- for (const rule of inlineRules) {
1210
- if (canApplyInlineRule(rule, token)) {
1211
- const result = rule.parse(inlineCtx);
1212
- if (result.success) {
1213
- elements.push(...result.elements);
1214
- consumed += result.consumed;
1215
- pos += result.consumed;
1216
- matched = true;
1217
- break;
1218
- }
1219
- }
1220
- }
1221
- if (!matched) {
1222
- elements.push({ element: "text", data: token.value });
1223
- consumed++;
1224
- pos++;
1225
- }
1226
- }
1227
- while (elements.length > 0 && elements[elements.length - 1]?.element === "line-break") {
1228
- elements.pop();
1229
- }
1230
- return { elements, consumed };
1231
- }
1232
1243
  function parseAttributes(ctx, startPos) {
1233
1244
  const attrs = {};
1234
1245
  let pos = startPos;
@@ -1377,7 +1388,7 @@ function isLiOpen(ctx, pos) {
1377
1388
  const nameResult = parseBlockName(ctx, pos + 1);
1378
1389
  if (!nameResult)
1379
1390
  return null;
1380
- if (nameResult.name === "li" || nameResult.name === "li_") {
1391
+ if (nameResult.name === "li") {
1381
1392
  return { name: nameResult.name, consumed: 1 + nameResult.consumed };
1382
1393
  }
1383
1394
  return null;
@@ -1410,7 +1421,6 @@ function parseLiItem(ctx, startPos, listType) {
1410
1421
  const liOpen = isLiOpen(ctx, pos);
1411
1422
  if (!liOpen)
1412
1423
  return null;
1413
- const isParagraphStrip = liOpen.name === "li_";
1414
1424
  pos += liOpen.consumed;
1415
1425
  consumed += liOpen.consumed;
1416
1426
  const attrResult = parseAttributes(ctx, pos);
@@ -1456,13 +1466,23 @@ function parseLiItem(ctx, startPos, listType) {
1456
1466
  if (token.type === "NEWLINE") {
1457
1467
  pos++;
1458
1468
  consumed++;
1469
+ let consecutiveNewlines = 1;
1459
1470
  while (ctx.tokens[pos]?.type === "NEWLINE") {
1460
1471
  pos++;
1461
1472
  consumed++;
1473
+ consecutiveNewlines++;
1462
1474
  }
1463
- if (isParagraphStrip && !isLiClose(ctx, pos) && !isListClose(ctx, pos, listType) && !isNestedListOpen(ctx, pos) && ctx.tokens[pos]?.type !== "EOF") {
1475
+ while (ctx.tokens[pos]?.type === "WHITESPACE" && ctx.tokens[pos]?.lineStart) {
1476
+ pos++;
1477
+ consumed++;
1478
+ }
1479
+ const atCloseTag = isLiClose(ctx, pos) || isListClose(ctx, pos, listType) || ctx.tokens[pos]?.type === "EOF";
1480
+ if (consecutiveNewlines === 1 && contentElements.length > 0) {
1464
1481
  contentElements.push({ element: "line-break" });
1465
1482
  }
1483
+ if (atCloseTag) {
1484
+ continue;
1485
+ }
1466
1486
  continue;
1467
1487
  }
1468
1488
  let matched = false;
@@ -1505,6 +1525,57 @@ function parseLiItem(ctx, startPos, listType) {
1505
1525
  const closeConsumed = consumeCloseTag(ctx, pos);
1506
1526
  consumed += closeConsumed;
1507
1527
  pos += closeConsumed;
1528
+ while (ctx.tokens[pos]?.type === "NEWLINE") {
1529
+ pos++;
1530
+ consumed++;
1531
+ }
1532
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
1533
+ pos++;
1534
+ consumed++;
1535
+ }
1536
+ while (pos < ctx.tokens.length) {
1537
+ const tok = ctx.tokens[pos];
1538
+ if (!tok || tok.type === "EOF")
1539
+ break;
1540
+ if (tok.type === "NEWLINE") {
1541
+ pos++;
1542
+ consumed++;
1543
+ while (ctx.tokens[pos]?.type === "NEWLINE") {
1544
+ pos++;
1545
+ consumed++;
1546
+ }
1547
+ while (ctx.tokens[pos]?.type === "WHITESPACE" && ctx.tokens[pos]?.lineStart) {
1548
+ pos++;
1549
+ consumed++;
1550
+ }
1551
+ if (isLiOpen(ctx, pos) || isListClose(ctx, pos, listType) || isNestedListOpen(ctx, pos) || ctx.tokens[pos]?.type === "EOF") {
1552
+ break;
1553
+ }
1554
+ continue;
1555
+ }
1556
+ if (isLiOpen(ctx, pos) || isListClose(ctx, pos, listType) || isNestedListOpen(ctx, pos)) {
1557
+ break;
1558
+ }
1559
+ let matched = false;
1560
+ const inlineCtx = { ...ctx, pos };
1561
+ for (const rule of ctx.inlineRules) {
1562
+ if (rule.startTokens.includes(tok.type)) {
1563
+ const result = rule.parse(inlineCtx);
1564
+ if (result.success) {
1565
+ contentElements.push(...result.elements);
1566
+ consumed += result.consumed;
1567
+ pos += result.consumed;
1568
+ matched = true;
1569
+ break;
1570
+ }
1571
+ }
1572
+ }
1573
+ if (!matched) {
1574
+ contentElements.push({ element: "text", data: tok.value });
1575
+ consumed++;
1576
+ pos++;
1577
+ }
1578
+ }
1508
1579
  }
1509
1580
  return {
1510
1581
  item: {
@@ -1585,8 +1656,90 @@ function parseListBlock(ctx, startPos, listType) {
1585
1656
  pos += liResult.consumed;
1586
1657
  continue;
1587
1658
  }
1588
- pos++;
1589
- consumed++;
1659
+ const bareContent = [];
1660
+ let currentParagraph = [];
1661
+ const flushParagraph = () => {
1662
+ if (currentParagraph.length > 0) {
1663
+ while (currentParagraph.length > 0 && currentParagraph[currentParagraph.length - 1]?.element === "line-break") {
1664
+ currentParagraph.pop();
1665
+ }
1666
+ if (currentParagraph.length > 0) {
1667
+ bareContent.push({
1668
+ element: "container",
1669
+ data: {
1670
+ type: "paragraph",
1671
+ attributes: {},
1672
+ elements: currentParagraph
1673
+ }
1674
+ });
1675
+ }
1676
+ currentParagraph = [];
1677
+ }
1678
+ };
1679
+ while (pos < ctx.tokens.length) {
1680
+ const tok = ctx.tokens[pos];
1681
+ if (!tok || tok.type === "EOF")
1682
+ break;
1683
+ if (tok.type === "NEWLINE") {
1684
+ pos++;
1685
+ consumed++;
1686
+ let consecutiveNewlines = 1;
1687
+ while (ctx.tokens[pos]?.type === "NEWLINE") {
1688
+ pos++;
1689
+ consumed++;
1690
+ consecutiveNewlines++;
1691
+ }
1692
+ while (ctx.tokens[pos]?.type === "WHITESPACE" && ctx.tokens[pos]?.lineStart) {
1693
+ pos++;
1694
+ consumed++;
1695
+ }
1696
+ if (isListClose(ctx, pos, listType) || isLiOpen(ctx, pos) || isNestedListOpen(ctx, pos)) {
1697
+ break;
1698
+ }
1699
+ if (consecutiveNewlines >= 2) {
1700
+ flushParagraph();
1701
+ } else if (currentParagraph.length > 0) {
1702
+ currentParagraph.push({ element: "line-break" });
1703
+ }
1704
+ continue;
1705
+ }
1706
+ if (isListClose(ctx, pos, listType) || isLiOpen(ctx, pos) || isNestedListOpen(ctx, pos)) {
1707
+ break;
1708
+ }
1709
+ let matched = false;
1710
+ const inlineCtx = { ...ctx, pos };
1711
+ for (const rule of ctx.inlineRules) {
1712
+ if (rule.startTokens.includes(tok.type)) {
1713
+ const result = rule.parse(inlineCtx);
1714
+ if (result.success) {
1715
+ currentParagraph.push(...result.elements);
1716
+ consumed += result.consumed;
1717
+ pos += result.consumed;
1718
+ matched = true;
1719
+ break;
1720
+ }
1721
+ }
1722
+ }
1723
+ if (!matched) {
1724
+ currentParagraph.push({ element: "text", data: tok.value });
1725
+ consumed++;
1726
+ pos++;
1727
+ }
1728
+ }
1729
+ flushParagraph();
1730
+ if (bareContent.length > 0) {
1731
+ let finalElements;
1732
+ if (bareContent.length === 1 && bareContent[0]?.element === "container" && bareContent[0].data?.type === "paragraph") {
1733
+ finalElements = bareContent[0].data.elements;
1734
+ } else {
1735
+ finalElements = bareContent;
1736
+ }
1737
+ items.push({
1738
+ "item-type": "elements",
1739
+ attributes: { _noMarker: "true" },
1740
+ elements: finalElements
1741
+ });
1742
+ }
1590
1743
  }
1591
1744
  const listData = {
1592
1745
  type: listType === "ol" ? "numbered" : "bullet",
@@ -1642,21 +1795,6 @@ var blockquoteRule = {
1642
1795
  let pos = ctx.pos;
1643
1796
  let consumed = 0;
1644
1797
  while (pos < ctx.tokens.length) {
1645
- while (ctx.tokens[pos]?.type === "NEWLINE" && ctx.tokens[pos]?.lineStart) {
1646
- const nextPos = pos + 1;
1647
- const nextToken = ctx.tokens[nextPos];
1648
- if (nextToken?.type === "BLOCKQUOTE_MARKER" && nextToken.lineStart) {
1649
- depths.push({
1650
- depth: 0,
1651
- ltype: null,
1652
- value: { elements: [], hasLineBreak: false }
1653
- });
1654
- pos++;
1655
- consumed++;
1656
- } else {
1657
- break;
1658
- }
1659
- }
1660
1798
  const markerToken = ctx.tokens[pos];
1661
1799
  if (!markerToken || !markerToken.lineStart || markerToken.type !== "BLOCKQUOTE_MARKER") {
1662
1800
  break;
@@ -1667,6 +1805,17 @@ var blockquoteRule = {
1667
1805
  }
1668
1806
  pos++;
1669
1807
  consumed++;
1808
+ if (ctx.tokens[pos]?.type !== "WHITESPACE") {
1809
+ while (pos < ctx.tokens.length && ctx.tokens[pos]?.type !== "NEWLINE") {
1810
+ pos++;
1811
+ consumed++;
1812
+ }
1813
+ if (ctx.tokens[pos]?.type === "NEWLINE") {
1814
+ pos++;
1815
+ consumed++;
1816
+ }
1817
+ continue;
1818
+ }
1670
1819
  while (ctx.tokens[pos]?.type === "WHITESPACE") {
1671
1820
  pos++;
1672
1821
  consumed++;
@@ -1689,6 +1838,9 @@ var blockquoteRule = {
1689
1838
  });
1690
1839
  }
1691
1840
  if (depths.length === 0) {
1841
+ if (consumed > 0) {
1842
+ return { success: true, elements: [], consumed };
1843
+ }
1692
1844
  return { success: false };
1693
1845
  }
1694
1846
  const depthTrees = processDepths(null, depths);
@@ -1726,6 +1878,10 @@ function buildBlockquoteElement(list) {
1726
1878
  }
1727
1879
  for (const item of list) {
1728
1880
  if (item.kind === "item") {
1881
+ if (item.value.elements.length === 0) {
1882
+ flushParagraph();
1883
+ continue;
1884
+ }
1729
1885
  currentParagraphChildren.push(...item.value.elements);
1730
1886
  if (item.value.hasLineBreak) {
1731
1887
  currentParagraphChildren.push({ element: "line-break" });
@@ -1936,14 +2092,36 @@ var paragraphRule = {
1936
2092
  }
1937
2093
  let elements = processCloseSpanMarkers(result.elements);
1938
2094
  while (elements.length > 0 && elements[elements.length - 1]?.element === "line-break") {
2095
+ const lastEl = elements[elements.length - 1];
2096
+ if (lastEl._preservedTrailingBreak) {
2097
+ delete lastEl._preservedTrailingBreak;
2098
+ break;
2099
+ }
1939
2100
  elements.pop();
1940
2101
  }
2102
+ while (elements.length > 0) {
2103
+ const last = elements[elements.length - 1];
2104
+ if (last?.element === "text" && "data" in last && typeof last.data === "string" && last.data.trim() === "") {
2105
+ elements.pop();
2106
+ } else {
2107
+ break;
2108
+ }
2109
+ }
1941
2110
  while (elements.length > 0 && elements[0]?.element === "line-break") {
1942
2111
  elements.shift();
1943
2112
  }
1944
2113
  if (elements.length === 0) {
1945
2114
  return { success: false };
1946
2115
  }
2116
+ const nextPos = ctx.pos + result.consumed;
2117
+ const nextToken = ctx.tokens[nextPos];
2118
+ if (nextToken?.type === "COLON" && nextToken.lineStart) {
2119
+ return {
2120
+ success: true,
2121
+ elements: [...elements, { element: "line-break" }],
2122
+ consumed: result.consumed
2123
+ };
2124
+ }
1947
2125
  return {
1948
2126
  success: true,
1949
2127
  elements: [
@@ -1996,7 +2174,7 @@ var divRule = {
1996
2174
  pos++;
1997
2175
  consumed++;
1998
2176
  if (ctx.tokens[pos]?.type !== "NEWLINE") {
1999
- return { success: false };
2177
+ return consumeFailedDiv(ctx);
2000
2178
  }
2001
2179
  pos++;
2002
2180
  consumed++;
@@ -2013,10 +2191,10 @@ var divRule = {
2013
2191
  const bodyCtx = { ...ctx, pos };
2014
2192
  let children;
2015
2193
  if (paragraphStrip) {
2016
- const bodyResult = parseInlineContentUntil(bodyCtx, closeCondition);
2194
+ const bodyResult = parseBlocksUntil(bodyCtx, closeCondition);
2017
2195
  consumed += bodyResult.consumed;
2018
2196
  pos += bodyResult.consumed;
2019
- children = bodyResult.elements;
2197
+ children = unwrapEdgeParagraphs(bodyResult.elements);
2020
2198
  } else {
2021
2199
  const bodyResult = parseBlocksUntil(bodyCtx, closeCondition);
2022
2200
  consumed += bodyResult.consumed;
@@ -2056,28 +2234,117 @@ var divRule = {
2056
2234
  };
2057
2235
  }
2058
2236
  };
2059
-
2060
- // packages/parser/src/parser/rules/block/code.ts
2061
- var codeBlockRule = {
2062
- name: "code",
2063
- startTokens: ["BLOCK_OPEN"],
2064
- requiresLineStart: false,
2065
- parse(ctx) {
2066
- const openToken = currentToken(ctx);
2067
- if (openToken.type !== "BLOCK_OPEN") {
2068
- return { success: false };
2069
- }
2070
- let pos = ctx.pos + 1;
2071
- let consumed = 1;
2072
- const nameResult = parseBlockName(ctx, pos);
2073
- if (!nameResult) {
2074
- return { success: false };
2075
- }
2076
- if (nameResult.name !== "code") {
2077
- return { success: false };
2237
+ function consumeFailedDiv(ctx) {
2238
+ const elements = [];
2239
+ let pos = ctx.pos;
2240
+ let consumed = 0;
2241
+ let lastClosePos = -1;
2242
+ let lastCloseConsumed = 0;
2243
+ let scanPos = pos;
2244
+ while (scanPos < ctx.tokens.length) {
2245
+ const t = ctx.tokens[scanPos];
2246
+ if (!t || t.type === "EOF")
2247
+ break;
2248
+ if (t.type === "BLOCK_END_OPEN") {
2249
+ const nameResult = parseBlockName(ctx, scanPos + 1);
2250
+ if (nameResult?.name === "div") {
2251
+ lastClosePos = scanPos;
2252
+ lastCloseConsumed = 1 + nameResult.consumed;
2253
+ const closeToken = ctx.tokens[scanPos + 1 + nameResult.consumed];
2254
+ if (closeToken?.type === "BLOCK_CLOSE") {
2255
+ lastCloseConsumed++;
2256
+ }
2257
+ }
2078
2258
  }
2079
- pos += nameResult.consumed;
2080
- consumed += nameResult.consumed;
2259
+ scanPos++;
2260
+ }
2261
+ if (lastClosePos === -1) {
2262
+ return { success: false };
2263
+ }
2264
+ const endPos = lastClosePos + lastCloseConsumed;
2265
+ while (pos < endPos && pos < ctx.tokens.length) {
2266
+ const t = ctx.tokens[pos];
2267
+ if (!t || t.type === "EOF")
2268
+ break;
2269
+ if (t.type === "NEWLINE") {
2270
+ let peekPos = pos + 1;
2271
+ while (ctx.tokens[peekPos]?.type === "WHITESPACE")
2272
+ peekPos++;
2273
+ if (ctx.tokens[peekPos]?.type === "NEWLINE") {
2274
+ while (ctx.tokens[pos]?.type === "NEWLINE" || ctx.tokens[pos]?.type === "WHITESPACE") {
2275
+ pos++;
2276
+ consumed++;
2277
+ }
2278
+ continue;
2279
+ }
2280
+ elements.push({ element: "line-break" });
2281
+ pos++;
2282
+ consumed++;
2283
+ continue;
2284
+ }
2285
+ elements.push({ element: "text", data: t.value });
2286
+ pos++;
2287
+ consumed++;
2288
+ }
2289
+ if (ctx.tokens[pos]?.type === "NEWLINE") {
2290
+ pos++;
2291
+ consumed++;
2292
+ }
2293
+ return {
2294
+ success: true,
2295
+ elements: [
2296
+ {
2297
+ element: "container",
2298
+ data: {
2299
+ type: "paragraph",
2300
+ attributes: {},
2301
+ elements
2302
+ }
2303
+ }
2304
+ ],
2305
+ consumed
2306
+ };
2307
+ }
2308
+ function unwrapEdgeParagraphs(elements) {
2309
+ if (elements.length === 0)
2310
+ return elements;
2311
+ const result = [...elements];
2312
+ if (isParagraphContainer(result[0])) {
2313
+ const inner = result[0].data.elements;
2314
+ result.splice(0, 1, ...inner);
2315
+ }
2316
+ const lastIdx = result.length - 1;
2317
+ if (lastIdx >= 0 && isParagraphContainer(result[lastIdx])) {
2318
+ const inner = result[lastIdx].data.elements;
2319
+ result.splice(lastIdx, 1, ...inner);
2320
+ }
2321
+ return result;
2322
+ }
2323
+ function isParagraphContainer(el) {
2324
+ return el !== undefined && el.element === "container" && typeof el.data === "object" && el.data !== null && "type" in el.data && el.data.type === "paragraph";
2325
+ }
2326
+
2327
+ // packages/parser/src/parser/rules/block/code.ts
2328
+ var codeBlockRule = {
2329
+ name: "code",
2330
+ startTokens: ["BLOCK_OPEN"],
2331
+ requiresLineStart: false,
2332
+ parse(ctx) {
2333
+ const openToken = currentToken(ctx);
2334
+ if (openToken.type !== "BLOCK_OPEN") {
2335
+ return { success: false };
2336
+ }
2337
+ let pos = ctx.pos + 1;
2338
+ let consumed = 1;
2339
+ const nameResult = parseBlockName(ctx, pos);
2340
+ if (!nameResult) {
2341
+ return { success: false };
2342
+ }
2343
+ if (nameResult.name !== "code") {
2344
+ return { success: false };
2345
+ }
2346
+ pos += nameResult.consumed;
2347
+ consumed += nameResult.consumed;
2081
2348
  const attrResult = parseAttributesRaw(ctx, pos);
2082
2349
  pos += attrResult.consumed;
2083
2350
  consumed += attrResult.consumed;
@@ -2862,9 +3129,10 @@ function parseCell(ctx, startPos) {
2862
3129
  return false;
2863
3130
  };
2864
3131
  const bodyCtx = { ...ctx, pos };
2865
- const bodyResult = parseBlocksUntil(bodyCtx, closeCondition);
3132
+ const bodyResult = parseCellContent(bodyCtx, closeCondition);
2866
3133
  consumed += bodyResult.consumed;
2867
3134
  pos += bodyResult.consumed;
3135
+ const hadParagraphBreaks = bodyResult.hadParagraphBreaks;
2868
3136
  if (ctx.tokens[pos]?.type === "BLOCK_END_OPEN") {
2869
3137
  pos++;
2870
3138
  consumed++;
@@ -2882,7 +3150,7 @@ function parseCell(ctx, startPos) {
2882
3150
  consumed++;
2883
3151
  }
2884
3152
  }
2885
- const processedElements = bodyResult.elements;
3153
+ const processedElements = hadParagraphBreaks ? bodyResult.elements : unwrapSingleInlineParagraph(bodyResult.elements);
2886
3154
  return {
2887
3155
  cell: {
2888
3156
  header: isHeader,
@@ -2894,6 +3162,170 @@ function parseCell(ctx, startPos) {
2894
3162
  consumed
2895
3163
  };
2896
3164
  }
3165
+ function unwrapSingleInlineParagraph(elements) {
3166
+ if (elements.length !== 1) {
3167
+ return elements;
3168
+ }
3169
+ const first = elements[0];
3170
+ if (first?.element !== "container" || typeof first.data !== "object" || first.data === null || !("type" in first.data) || first.data.type !== "paragraph") {
3171
+ return elements;
3172
+ }
3173
+ const paragraphData = first.data;
3174
+ const innerElements = paragraphData.elements ?? [];
3175
+ const hasBlockElement = innerElements.some((el) => isBlockElement(el));
3176
+ if (hasBlockElement) {
3177
+ return elements;
3178
+ }
3179
+ return innerElements;
3180
+ }
3181
+ function isBlockElement(el) {
3182
+ const blockTypes = ["table", "div", "blockquote", "code", "list", "iframe", "image-block"];
3183
+ if (blockTypes.includes(el.element)) {
3184
+ return true;
3185
+ }
3186
+ if (el.element === "container" && typeof el.data === "object" && el.data !== null) {
3187
+ const data = el.data;
3188
+ if (data.type === "paragraph" || data.type === "div" || data.type === "blockquote") {
3189
+ return true;
3190
+ }
3191
+ }
3192
+ return false;
3193
+ }
3194
+ function parseCellContent(ctx, closeCondition) {
3195
+ const elements = [];
3196
+ let consumed = 0;
3197
+ let pos = ctx.pos;
3198
+ let currentSegment = [];
3199
+ let hasMultipleParts = false;
3200
+ let hasBlockElement = false;
3201
+ let hadParagraphBreaks = false;
3202
+ const flushSegment = (wrapInParagraph) => {
3203
+ if (currentSegment.length === 0)
3204
+ return;
3205
+ while (currentSegment.length > 0) {
3206
+ const last = currentSegment[currentSegment.length - 1];
3207
+ if (last?.element === "text" && typeof last.data === "string" && last.data.trim() === "") {
3208
+ currentSegment.pop();
3209
+ } else if (last?.element === "line-break") {
3210
+ currentSegment.pop();
3211
+ } else {
3212
+ break;
3213
+ }
3214
+ }
3215
+ while (currentSegment.length > 0) {
3216
+ const first = currentSegment[0];
3217
+ if (first?.element === "text" && typeof first.data === "string" && first.data.trim() === "") {
3218
+ currentSegment.shift();
3219
+ } else {
3220
+ break;
3221
+ }
3222
+ }
3223
+ if (currentSegment.length === 0)
3224
+ return;
3225
+ if (wrapInParagraph) {
3226
+ elements.push({
3227
+ element: "container",
3228
+ data: {
3229
+ type: "paragraph",
3230
+ attributes: {},
3231
+ elements: [...currentSegment]
3232
+ }
3233
+ });
3234
+ } else {
3235
+ elements.push(...currentSegment);
3236
+ }
3237
+ currentSegment = [];
3238
+ };
3239
+ while (pos < ctx.tokens.length) {
3240
+ const token = ctx.tokens[pos];
3241
+ if (!token || token.type === "EOF") {
3242
+ break;
3243
+ }
3244
+ const checkCtx = { ...ctx, pos };
3245
+ if (closeCondition(checkCtx)) {
3246
+ break;
3247
+ }
3248
+ if (token.type === "NEWLINE") {
3249
+ pos++;
3250
+ consumed++;
3251
+ if (ctx.tokens[pos]?.type === "NEWLINE") {
3252
+ while (ctx.tokens[pos]?.type === "NEWLINE") {
3253
+ pos++;
3254
+ consumed++;
3255
+ }
3256
+ flushSegment(true);
3257
+ hasMultipleParts = true;
3258
+ hadParagraphBreaks = true;
3259
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
3260
+ pos++;
3261
+ consumed++;
3262
+ }
3263
+ continue;
3264
+ }
3265
+ const nextToken = ctx.tokens[pos];
3266
+ if (!nextToken || nextToken.type === "BLOCK_END_OPEN" || nextToken.type === "EOF") {
3267
+ continue;
3268
+ }
3269
+ if (nextToken.type === "BLOCK_OPEN") {
3270
+ flushSegment(true);
3271
+ hasMultipleParts = true;
3272
+ continue;
3273
+ }
3274
+ if (currentSegment.length === 0 && elements.length === 0) {
3275
+ continue;
3276
+ }
3277
+ currentSegment.push({ element: "line-break" });
3278
+ continue;
3279
+ }
3280
+ if (token.type === "WHITESPACE" && token.lineStart) {
3281
+ pos++;
3282
+ consumed++;
3283
+ continue;
3284
+ }
3285
+ let matched = false;
3286
+ const blockCtx = { ...ctx, pos };
3287
+ for (const rule of ctx.blockRules) {
3288
+ if (canApplyBlockRule(rule, token)) {
3289
+ const result = rule.parse(blockCtx);
3290
+ if (result.success) {
3291
+ if (currentSegment.length > 0) {
3292
+ flushSegment(true);
3293
+ hasMultipleParts = true;
3294
+ }
3295
+ elements.push(...result.elements);
3296
+ hasBlockElement = true;
3297
+ hasMultipleParts = true;
3298
+ consumed += result.consumed;
3299
+ pos += result.consumed;
3300
+ matched = true;
3301
+ break;
3302
+ }
3303
+ }
3304
+ }
3305
+ if (matched)
3306
+ continue;
3307
+ const inlineCtx = { ...ctx, pos };
3308
+ for (const rule of ctx.inlineRules) {
3309
+ if (canApplyInlineRule(rule, token)) {
3310
+ const result = rule.parse(inlineCtx);
3311
+ if (result.success) {
3312
+ currentSegment.push(...result.elements);
3313
+ consumed += result.consumed;
3314
+ pos += result.consumed;
3315
+ matched = true;
3316
+ break;
3317
+ }
3318
+ }
3319
+ }
3320
+ if (!matched) {
3321
+ currentSegment.push({ element: "text", data: token.value });
3322
+ consumed++;
3323
+ pos++;
3324
+ }
3325
+ }
3326
+ flushSegment(hasMultipleParts || hasBlockElement);
3327
+ return { elements, consumed, hadParagraphBreaks };
3328
+ }
2897
3329
 
2898
3330
  // packages/parser/src/parser/rules/block/module/rate/index.ts
2899
3331
  var rateModuleRule = {
@@ -3308,6 +3740,10 @@ var footnoteBlockRule = {
3308
3740
  }
3309
3741
  pos++;
3310
3742
  consumed++;
3743
+ if (ctx.footnoteBlockParsed) {
3744
+ return { success: false };
3745
+ }
3746
+ ctx.footnoteBlockParsed = true;
3311
3747
  const title = attrs.title !== undefined ? attrs.title : null;
3312
3748
  const hide = attrs.hide === "true" || attrs.hide === "yes";
3313
3749
  return {
@@ -3539,6 +3975,12 @@ var alignRule = {
3539
3975
  name: "align",
3540
3976
  startTokens: ["BLOCK_OPEN"],
3541
3977
  requiresLineStart: true,
3978
+ preservesPrecedingLineBreak: true,
3979
+ isStartPattern(ctx, pos) {
3980
+ if (ctx.tokens[pos]?.type !== "BLOCK_OPEN")
3981
+ return false;
3982
+ return parseAlignOpen(ctx, pos + 1) !== null;
3983
+ },
3542
3984
  parse(ctx) {
3543
3985
  const openToken = currentToken(ctx);
3544
3986
  if (openToken.type !== "BLOCK_OPEN") {
@@ -3964,7 +4406,11 @@ var mathBlockRule = {
3964
4406
  break;
3965
4407
  }
3966
4408
  }
3967
- latexSource += token.value;
4409
+ if (token.type === "BACKSLASH_BREAK") {
4410
+ latexSource += "\\\n";
4411
+ } else {
4412
+ latexSource += token.value;
4413
+ }
3968
4414
  pos++;
3969
4415
  consumed++;
3970
4416
  }
@@ -4023,23 +4469,25 @@ var htmlBlockRule = {
4023
4469
  }
4024
4470
  pos += nameResult.consumed;
4025
4471
  consumed += nameResult.consumed;
4026
- while (ctx.tokens[pos]?.type === "WHITESPACE") {
4027
- pos++;
4028
- consumed++;
4029
- }
4472
+ const attrResult = parseAttributesRaw(ctx, pos);
4473
+ pos += attrResult.consumed;
4474
+ consumed += attrResult.consumed;
4475
+ const style = attrResult.attrs.style;
4030
4476
  if (ctx.tokens[pos]?.type !== "BLOCK_CLOSE") {
4031
4477
  return { success: false };
4032
4478
  }
4033
4479
  pos++;
4034
4480
  consumed++;
4035
4481
  let contents = "";
4482
+ let foundClose = false;
4036
4483
  while (pos < ctx.tokens.length) {
4037
4484
  const token = ctx.tokens[pos];
4038
- if (!token)
4485
+ if (!token || token.type === "EOF")
4039
4486
  break;
4040
4487
  if (token.type === "BLOCK_END_OPEN") {
4041
4488
  const closeNameResult = parseBlockName(ctx, pos + 1);
4042
4489
  if (closeNameResult?.name.toLowerCase() === "html") {
4490
+ foundClose = true;
4043
4491
  break;
4044
4492
  }
4045
4493
  }
@@ -4047,6 +4495,9 @@ var htmlBlockRule = {
4047
4495
  pos++;
4048
4496
  consumed++;
4049
4497
  }
4498
+ if (!foundClose) {
4499
+ return { success: false };
4500
+ }
4050
4501
  if (ctx.tokens[pos]?.type === "BLOCK_END_OPEN") {
4051
4502
  pos++;
4052
4503
  consumed++;
@@ -4072,7 +4523,8 @@ var htmlBlockRule = {
4072
4523
  {
4073
4524
  element: "html",
4074
4525
  data: {
4075
- contents
4526
+ contents,
4527
+ ...style && { style }
4076
4528
  }
4077
4529
  }
4078
4530
  ],
@@ -4081,9 +4533,9 @@ var htmlBlockRule = {
4081
4533
  }
4082
4534
  };
4083
4535
 
4084
- // packages/parser/src/parser/rules/block/iframe.ts
4085
- var iframeRule = {
4086
- name: "iframe",
4536
+ // packages/parser/src/parser/rules/block/embed-block.ts
4537
+ var embedBlockRule = {
4538
+ name: "embed-block",
4087
4539
  startTokens: ["BLOCK_OPEN"],
4088
4540
  requiresLineStart: false,
4089
4541
  parse(ctx) {
@@ -4094,7 +4546,11 @@ var iframeRule = {
4094
4546
  let pos = ctx.pos + 1;
4095
4547
  let consumed = 1;
4096
4548
  const nameResult = parseBlockName(ctx, pos);
4097
- if (!nameResult || nameResult.name.toLowerCase() !== "iframe") {
4549
+ if (!nameResult) {
4550
+ return { success: false };
4551
+ }
4552
+ const blockName = nameResult.name.toLowerCase();
4553
+ if (blockName !== "embed" && blockName !== "embedvideo" && blockName !== "embedaudio") {
4098
4554
  return { success: false };
4099
4555
  }
4100
4556
  pos += nameResult.consumed;
@@ -4103,50 +4559,156 @@ var iframeRule = {
4103
4559
  pos++;
4104
4560
  consumed++;
4105
4561
  }
4106
- let url = "";
4562
+ if (ctx.tokens[pos]?.type !== "BLOCK_CLOSE") {
4563
+ return { success: false };
4564
+ }
4565
+ pos++;
4566
+ consumed++;
4567
+ let contents = "";
4568
+ let foundClose = false;
4107
4569
  while (pos < ctx.tokens.length) {
4108
4570
  const token = ctx.tokens[pos];
4109
4571
  if (!token)
4110
4572
  break;
4111
- if (token.type === "BLOCK_CLOSE" || token.type === "WHITESPACE" || token.type === "NEWLINE") {
4112
- break;
4573
+ if (token.type === "BLOCK_END_OPEN") {
4574
+ const closeNameResult = parseBlockName(ctx, pos + 1);
4575
+ if (closeNameResult) {
4576
+ const closeName = closeNameResult.name.toLowerCase();
4577
+ if (closeName === "embed" || closeName === "embedvideo" || closeName === "embedaudio") {
4578
+ foundClose = true;
4579
+ break;
4580
+ }
4581
+ }
4113
4582
  }
4114
- url += token.value;
4583
+ contents += token.value;
4115
4584
  pos++;
4116
4585
  consumed++;
4117
4586
  }
4118
- if (!url) {
4587
+ if (!foundClose) {
4119
4588
  return { success: false };
4120
4589
  }
4121
- const attributes = {};
4122
- while (pos < ctx.tokens.length) {
4123
- const token = ctx.tokens[pos];
4124
- if (!token || token.type === "BLOCK_CLOSE")
4125
- break;
4126
- if (token.type === "NEWLINE") {
4127
- break;
4590
+ if (ctx.tokens[pos]?.type === "BLOCK_END_OPEN") {
4591
+ pos++;
4592
+ consumed++;
4593
+ const closeNameResult = parseBlockName(ctx, pos);
4594
+ if (closeNameResult) {
4595
+ pos += closeNameResult.consumed;
4596
+ consumed += closeNameResult.consumed;
4128
4597
  }
4129
- if (token.type === "WHITESPACE") {
4598
+ if (ctx.tokens[pos]?.type === "BLOCK_CLOSE") {
4130
4599
  pos++;
4131
4600
  consumed++;
4132
- continue;
4133
4601
  }
4134
- if (token.type === "IDENTIFIER" || token.type === "TEXT") {
4135
- const key = token.value;
4602
+ if (ctx.tokens[pos]?.type === "NEWLINE") {
4136
4603
  pos++;
4137
4604
  consumed++;
4138
- while (ctx.tokens[pos]?.type === "WHITESPACE") {
4139
- pos++;
4140
- consumed++;
4141
- }
4142
- if (ctx.tokens[pos]?.type === "EQUALS") {
4143
- pos++;
4144
- consumed++;
4145
- while (ctx.tokens[pos]?.type === "WHITESPACE") {
4146
- pos++;
4147
- consumed++;
4605
+ }
4606
+ }
4607
+ contents = contents.trim();
4608
+ return {
4609
+ success: true,
4610
+ elements: [
4611
+ {
4612
+ element: "container",
4613
+ data: {
4614
+ type: "paragraph",
4615
+ attributes: {},
4616
+ elements: [
4617
+ {
4618
+ element: "embed-block",
4619
+ data: {
4620
+ contents
4621
+ }
4622
+ }
4623
+ ]
4148
4624
  }
4149
- let value = "";
4625
+ }
4626
+ ],
4627
+ consumed
4628
+ };
4629
+ }
4630
+ };
4631
+
4632
+ // packages/parser/src/parser/rules/block/iframe.ts
4633
+ var ALLOWED_IFRAME_ATTRS = new Set(["width", "height", "style", "scrolling", "frameborder"]);
4634
+ function normalizeUrl(url) {
4635
+ return url.replace(/[\s\u0000-\u001f\u007f-\u009f]/g, "").toLowerCase();
4636
+ }
4637
+ function isDangerousUrl(normalizedUrl) {
4638
+ return /^(javascript|data|vbscript):/i.test(normalizedUrl);
4639
+ }
4640
+ var iframeRule = {
4641
+ name: "iframe",
4642
+ startTokens: ["BLOCK_OPEN"],
4643
+ requiresLineStart: false,
4644
+ parse(ctx) {
4645
+ const openToken = currentToken(ctx);
4646
+ if (openToken.type !== "BLOCK_OPEN") {
4647
+ return { success: false };
4648
+ }
4649
+ let pos = ctx.pos + 1;
4650
+ let consumed = 1;
4651
+ const nameResult = parseBlockName(ctx, pos);
4652
+ if (!nameResult || nameResult.name.toLowerCase() !== "iframe") {
4653
+ return { success: false };
4654
+ }
4655
+ pos += nameResult.consumed;
4656
+ consumed += nameResult.consumed;
4657
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
4658
+ pos++;
4659
+ consumed++;
4660
+ }
4661
+ let url = "";
4662
+ while (pos < ctx.tokens.length) {
4663
+ const token = ctx.tokens[pos];
4664
+ if (!token)
4665
+ break;
4666
+ if (token.type === "BLOCK_CLOSE" || token.type === "WHITESPACE" || token.type === "NEWLINE") {
4667
+ break;
4668
+ }
4669
+ url += token.value;
4670
+ pos++;
4671
+ consumed++;
4672
+ }
4673
+ if (!url) {
4674
+ return { success: false };
4675
+ }
4676
+ const normalizedUrl = normalizeUrl(url);
4677
+ if (isDangerousUrl(normalizedUrl)) {
4678
+ return { success: false };
4679
+ }
4680
+ if (!/^https?:\/\//i.test(normalizedUrl)) {
4681
+ return { success: false };
4682
+ }
4683
+ const attributes = {};
4684
+ while (pos < ctx.tokens.length) {
4685
+ const token = ctx.tokens[pos];
4686
+ if (!token || token.type === "BLOCK_CLOSE")
4687
+ break;
4688
+ if (token.type === "NEWLINE") {
4689
+ break;
4690
+ }
4691
+ if (token.type === "WHITESPACE") {
4692
+ pos++;
4693
+ consumed++;
4694
+ continue;
4695
+ }
4696
+ if (token.type === "IDENTIFIER" || token.type === "TEXT") {
4697
+ const key = token.value;
4698
+ pos++;
4699
+ consumed++;
4700
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
4701
+ pos++;
4702
+ consumed++;
4703
+ }
4704
+ if (ctx.tokens[pos]?.type === "EQUALS") {
4705
+ pos++;
4706
+ consumed++;
4707
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
4708
+ pos++;
4709
+ consumed++;
4710
+ }
4711
+ let value = "";
4150
4712
  const valueToken = ctx.tokens[pos];
4151
4713
  if (valueToken?.type === "QUOTED_STRING") {
4152
4714
  value = valueToken.value.slice(1, -1);
@@ -4163,7 +4725,9 @@ var iframeRule = {
4163
4725
  consumed++;
4164
4726
  }
4165
4727
  }
4166
- attributes[key] = value;
4728
+ if (ALLOWED_IFRAME_ATTRS.has(key.toLowerCase())) {
4729
+ attributes[key.toLowerCase()] = value;
4730
+ }
4167
4731
  }
4168
4732
  } else {
4169
4733
  pos++;
@@ -4365,6 +4929,300 @@ var tocRule = {
4365
4929
  }
4366
4930
  };
4367
4931
 
4932
+ // packages/parser/src/parser/rules/block/orphan-li.ts
4933
+ function isLiOpen2(ctx, pos) {
4934
+ if (ctx.tokens[pos]?.type !== "BLOCK_OPEN")
4935
+ return null;
4936
+ const nameResult = parseBlockName(ctx, pos + 1);
4937
+ if (!nameResult)
4938
+ return null;
4939
+ if (nameResult.name === "li") {
4940
+ return { consumed: 1 + nameResult.consumed };
4941
+ }
4942
+ return null;
4943
+ }
4944
+ function isLiClose2(ctx, pos) {
4945
+ if (ctx.tokens[pos]?.type !== "BLOCK_END_OPEN")
4946
+ return null;
4947
+ const nameResult = parseBlockName(ctx, pos + 1);
4948
+ if (!nameResult || nameResult.name !== "li")
4949
+ return null;
4950
+ let consumed = 1 + nameResult.consumed;
4951
+ if (ctx.tokens[pos + consumed]?.type === "BLOCK_CLOSE")
4952
+ consumed++;
4953
+ return { consumed };
4954
+ }
4955
+ var orphanLiRule = {
4956
+ name: "orphan-li",
4957
+ startTokens: ["BLOCK_OPEN"],
4958
+ requiresLineStart: false,
4959
+ parse(ctx) {
4960
+ const openToken = currentToken(ctx);
4961
+ if (openToken.type !== "BLOCK_OPEN") {
4962
+ return { success: false };
4963
+ }
4964
+ const liOpen = isLiOpen2(ctx, ctx.pos);
4965
+ if (!liOpen) {
4966
+ return { success: false };
4967
+ }
4968
+ let pos = ctx.pos + liOpen.consumed;
4969
+ let consumed = liOpen.consumed;
4970
+ if (ctx.tokens[pos]?.type !== "BLOCK_CLOSE") {
4971
+ return { success: false };
4972
+ }
4973
+ pos++;
4974
+ consumed++;
4975
+ const elements = [];
4976
+ let foundClose = false;
4977
+ elements.push({ element: "text", data: "[[" });
4978
+ elements.push({ element: "text", data: "li" });
4979
+ elements.push({ element: "text", data: "]]" });
4980
+ while (pos < ctx.tokens.length) {
4981
+ const token = ctx.tokens[pos];
4982
+ if (!token || token.type === "EOF")
4983
+ break;
4984
+ const liClose = isLiClose2(ctx, pos);
4985
+ if (liClose) {
4986
+ foundClose = true;
4987
+ elements.push({ element: "text", data: "[[/" });
4988
+ elements.push({ element: "text", data: "li" });
4989
+ elements.push({ element: "text", data: "]]" });
4990
+ consumed += liClose.consumed;
4991
+ pos += liClose.consumed;
4992
+ if (ctx.tokens[pos]?.type === "NEWLINE") {
4993
+ pos++;
4994
+ consumed++;
4995
+ }
4996
+ break;
4997
+ }
4998
+ if (token.type === "NEWLINE") {
4999
+ elements.push({ element: "line-break" });
5000
+ pos++;
5001
+ consumed++;
5002
+ continue;
5003
+ }
5004
+ if (token.type === "WHITESPACE" && token.lineStart) {
5005
+ pos++;
5006
+ consumed++;
5007
+ continue;
5008
+ }
5009
+ elements.push({ element: "text", data: token.value });
5010
+ pos++;
5011
+ consumed++;
5012
+ }
5013
+ if (!foundClose) {
5014
+ return { success: false };
5015
+ }
5016
+ return {
5017
+ success: true,
5018
+ elements,
5019
+ consumed
5020
+ };
5021
+ }
5022
+ };
5023
+
5024
+ // packages/parser/src/parser/rules/block/bibliography.ts
5025
+ function parseBibliographyEntry(ctx, startPos) {
5026
+ let pos = startPos;
5027
+ let consumed = 0;
5028
+ const colonToken = ctx.tokens[pos];
5029
+ if (!colonToken || colonToken.type !== "COLON" || !colonToken.lineStart) {
5030
+ return null;
5031
+ }
5032
+ pos++;
5033
+ consumed++;
5034
+ const whitespaceAfterColon = ctx.tokens[pos];
5035
+ if (!whitespaceAfterColon || whitespaceAfterColon.type !== "WHITESPACE") {
5036
+ return null;
5037
+ }
5038
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
5039
+ pos++;
5040
+ consumed++;
5041
+ }
5042
+ let label = "";
5043
+ let foundSecondColon = false;
5044
+ const keyNodes = [];
5045
+ while (pos < ctx.tokens.length) {
5046
+ const token = ctx.tokens[pos];
5047
+ if (!token || token.type === "NEWLINE" || token.type === "EOF") {
5048
+ break;
5049
+ }
5050
+ if (token.type === "COLON") {
5051
+ foundSecondColon = true;
5052
+ pos++;
5053
+ consumed++;
5054
+ break;
5055
+ }
5056
+ label += token.value;
5057
+ keyNodes.push({ element: "text", data: token.value });
5058
+ pos++;
5059
+ consumed++;
5060
+ }
5061
+ if (!foundSecondColon) {
5062
+ return null;
5063
+ }
5064
+ label = label.trim();
5065
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
5066
+ pos++;
5067
+ consumed++;
5068
+ }
5069
+ const contentNodes = [];
5070
+ while (pos < ctx.tokens.length) {
5071
+ const token = ctx.tokens[pos];
5072
+ if (!token || token.type === "EOF") {
5073
+ break;
5074
+ }
5075
+ if (token.type === "BLOCK_END_OPEN") {
5076
+ const closeNameResult = parseBlockName(ctx, pos + 1);
5077
+ if (closeNameResult?.name === "bibliography") {
5078
+ break;
5079
+ }
5080
+ }
5081
+ if (token.type === "NEWLINE") {
5082
+ const nextToken = ctx.tokens[pos + 1];
5083
+ if (nextToken?.type === "COLON" && nextToken.lineStart) {
5084
+ pos++;
5085
+ consumed++;
5086
+ break;
5087
+ }
5088
+ if (nextToken?.type === "BLOCK_END_OPEN") {
5089
+ pos++;
5090
+ consumed++;
5091
+ break;
5092
+ }
5093
+ if (nextToken?.type === "NEWLINE" || !nextToken || nextToken.type === "EOF") {
5094
+ pos++;
5095
+ consumed++;
5096
+ break;
5097
+ }
5098
+ }
5099
+ const inlineCtx = { ...ctx, pos };
5100
+ const result = parseInlineUntil(inlineCtx, "NEWLINE");
5101
+ if (result.elements.length > 0) {
5102
+ contentNodes.push(...result.elements);
5103
+ pos += result.consumed;
5104
+ consumed += result.consumed;
5105
+ } else {
5106
+ pos++;
5107
+ consumed++;
5108
+ }
5109
+ }
5110
+ while (keyNodes.length > 0) {
5111
+ const lastNode = keyNodes[keyNodes.length - 1];
5112
+ if (lastNode && lastNode.element === "text" && typeof lastNode.data === "string" && lastNode.data.trim() === "") {
5113
+ keyNodes.pop();
5114
+ } else {
5115
+ break;
5116
+ }
5117
+ }
5118
+ return {
5119
+ entry: {
5120
+ label,
5121
+ key: keyNodes,
5122
+ content: contentNodes
5123
+ },
5124
+ consumed
5125
+ };
5126
+ }
5127
+ var bibliographyRule = {
5128
+ name: "bibliography",
5129
+ startTokens: ["BLOCK_OPEN"],
5130
+ requiresLineStart: false,
5131
+ parse(ctx) {
5132
+ const openToken = currentToken(ctx);
5133
+ if (openToken.type !== "BLOCK_OPEN") {
5134
+ return { success: false };
5135
+ }
5136
+ let pos = ctx.pos + 1;
5137
+ let consumed = 1;
5138
+ const nameResult = parseBlockName(ctx, pos);
5139
+ if (!nameResult || nameResult.name !== "bibliography") {
5140
+ return { success: false };
5141
+ }
5142
+ pos += nameResult.consumed;
5143
+ consumed += nameResult.consumed;
5144
+ const attrResult = parseAttributes(ctx, pos);
5145
+ pos += attrResult.consumed;
5146
+ consumed += attrResult.consumed;
5147
+ if (ctx.tokens[pos]?.type !== "BLOCK_CLOSE") {
5148
+ return { success: false };
5149
+ }
5150
+ pos++;
5151
+ consumed++;
5152
+ if (ctx.tokens[pos]?.type === "NEWLINE") {
5153
+ pos++;
5154
+ consumed++;
5155
+ }
5156
+ const entries = [];
5157
+ let foundClose = false;
5158
+ while (pos < ctx.tokens.length) {
5159
+ const token = ctx.tokens[pos];
5160
+ if (!token || token.type === "EOF") {
5161
+ break;
5162
+ }
5163
+ if (token.type === "BLOCK_END_OPEN") {
5164
+ const closeNameResult = parseBlockName(ctx, pos + 1);
5165
+ if (closeNameResult?.name === "bibliography") {
5166
+ foundClose = true;
5167
+ pos++;
5168
+ consumed++;
5169
+ pos += closeNameResult.consumed;
5170
+ consumed += closeNameResult.consumed;
5171
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
5172
+ pos++;
5173
+ consumed++;
5174
+ }
5175
+ if (ctx.tokens[pos]?.type === "BLOCK_CLOSE") {
5176
+ pos++;
5177
+ consumed++;
5178
+ }
5179
+ break;
5180
+ }
5181
+ }
5182
+ if (token.type === "WHITESPACE" || token.type === "NEWLINE") {
5183
+ pos++;
5184
+ consumed++;
5185
+ continue;
5186
+ }
5187
+ if (token.type === "COLON" && token.lineStart) {
5188
+ const result = parseBibliographyEntry(ctx, pos);
5189
+ if (result) {
5190
+ entries.push(result.entry);
5191
+ pos += result.consumed;
5192
+ consumed += result.consumed;
5193
+ continue;
5194
+ }
5195
+ }
5196
+ pos++;
5197
+ consumed++;
5198
+ }
5199
+ if (!foundClose) {
5200
+ return { success: false };
5201
+ }
5202
+ const definitionItems = entries.map((entry) => ({
5203
+ key_string: entry.label,
5204
+ key: entry.key,
5205
+ value: entry.content
5206
+ }));
5207
+ const title = attrResult.attrs.title ?? null;
5208
+ const hide = attrResult.attrs.hide === "true" || attrResult.attrs.hide === "";
5209
+ return {
5210
+ success: true,
5211
+ elements: [
5212
+ {
5213
+ element: "bibliography-block",
5214
+ data: {
5215
+ entries: definitionItems,
5216
+ title: typeof title === "string" ? title : null,
5217
+ hide
5218
+ }
5219
+ }
5220
+ ],
5221
+ consumed
5222
+ };
5223
+ }
5224
+ };
5225
+
4368
5226
  // packages/parser/src/parser/rules/block/index.ts
4369
5227
  var blockRules = [
4370
5228
  blockCommentRule,
@@ -4377,6 +5235,7 @@ var blockRules = [
4377
5235
  tableBlockRule,
4378
5236
  listRule,
4379
5237
  blockListRule,
5238
+ orphanLiRule,
4380
5239
  blockquoteRule,
4381
5240
  definitionListRule,
4382
5241
  codeBlockRule,
@@ -4389,8 +5248,10 @@ var blockRules = [
4389
5248
  includeRule,
4390
5249
  mathBlockRule,
4391
5250
  htmlBlockRule,
5251
+ embedBlockRule,
4392
5252
  iframeRule,
4393
5253
  iftagsRule,
5254
+ bibliographyRule,
4394
5255
  divRule
4395
5256
  ];
4396
5257
  // packages/parser/src/parser/rules/inline/bold.ts
@@ -4407,6 +5268,13 @@ var boldRule = {
4407
5268
  };
4408
5269
  }
4409
5270
  const result = parseInlineUntil({ ...ctx, pos: ctx.pos + 1 }, "BOLD_MARKER");
5271
+ if (result.elements.length === 0) {
5272
+ return {
5273
+ success: true,
5274
+ elements: [],
5275
+ consumed: 1 + result.consumed + 1
5276
+ };
5277
+ }
4410
5278
  return {
4411
5279
  success: true,
4412
5280
  elements: [
@@ -4497,6 +5365,13 @@ var underlineRule = {
4497
5365
  consumed++;
4498
5366
  }
4499
5367
  }
5368
+ if (children.length === 0) {
5369
+ return {
5370
+ success: true,
5371
+ elements: [],
5372
+ consumed
5373
+ };
5374
+ }
4500
5375
  return {
4501
5376
  success: true,
4502
5377
  elements: [
@@ -4577,6 +5452,13 @@ var superscriptRule = {
4577
5452
  };
4578
5453
  }
4579
5454
  const result = parseInlineUntil({ ...ctx, pos: ctx.pos + 1 }, "SUPER_MARKER");
5455
+ if (result.elements.length === 0) {
5456
+ return {
5457
+ success: true,
5458
+ elements: [],
5459
+ consumed: 1 + result.consumed + 1
5460
+ };
5461
+ }
4580
5462
  return {
4581
5463
  success: true,
4582
5464
  elements: [
@@ -4608,6 +5490,13 @@ var subscriptRule = {
4608
5490
  };
4609
5491
  }
4610
5492
  const result = parseInlineUntil({ ...ctx, pos: ctx.pos + 1 }, "SUB_MARKER");
5493
+ if (result.elements.length === 0) {
5494
+ return {
5495
+ success: true,
5496
+ elements: [],
5497
+ consumed: 1 + result.consumed + 1
5498
+ };
5499
+ }
4611
5500
  return {
4612
5501
  success: true,
4613
5502
  elements: [
@@ -4654,15 +5543,38 @@ var monospaceRule = {
4654
5543
  consumed: 1 + result.consumed + 1
4655
5544
  };
4656
5545
  }
4657
- };
4658
-
4659
- // packages/parser/src/parser/rules/inline/link-triple.ts
5546
+ };
5547
+
5548
+ // packages/parser/src/parser/rules/inline/link-triple.ts
5549
+ function hasClosingLinkMarker(ctx, startPos) {
5550
+ let pos = startPos;
5551
+ while (pos < ctx.tokens.length) {
5552
+ const token = ctx.tokens[pos];
5553
+ if (!token || token.type === "EOF") {
5554
+ return false;
5555
+ }
5556
+ if (token.type === "LINK_CLOSE") {
5557
+ return true;
5558
+ }
5559
+ if (token.type === "NEWLINE") {
5560
+ const next = ctx.tokens[pos + 1];
5561
+ if (next?.type === "NEWLINE") {
5562
+ return false;
5563
+ }
5564
+ if (next?.type === "LINK_CLOSE") {
5565
+ return false;
5566
+ }
5567
+ }
5568
+ pos++;
5569
+ }
5570
+ return false;
5571
+ }
4660
5572
  var linkTripleRule = {
4661
5573
  name: "linkTriple",
4662
5574
  startTokens: ["LINK_OPEN"],
4663
5575
  parse(ctx) {
4664
5576
  const startToken = currentToken(ctx);
4665
- if (!hasClosingMarkerBeforeNewline({ ...ctx, pos: ctx.pos + 1 }, "LINK_CLOSE")) {
5577
+ if (!hasClosingLinkMarker(ctx, ctx.pos + 1)) {
4666
5578
  return {
4667
5579
  success: true,
4668
5580
  elements: [{ element: "text", data: startToken.value }],
@@ -4676,9 +5588,14 @@ var linkTripleRule = {
4676
5588
  let pos = ctx.pos + 1;
4677
5589
  while (pos < ctx.tokens.length) {
4678
5590
  const token = ctx.tokens[pos];
4679
- if (!token || token.type === "LINK_CLOSE" || token.type === "NEWLINE" || token.type === "EOF") {
5591
+ if (!token || token.type === "LINK_CLOSE" || token.type === "EOF") {
4680
5592
  break;
4681
5593
  }
5594
+ if (token.type === "NEWLINE") {
5595
+ consumed++;
5596
+ pos++;
5597
+ continue;
5598
+ }
4682
5599
  if (token.type === "PIPE" && !foundPipe) {
4683
5600
  foundPipe = true;
4684
5601
  } else if (foundPipe) {
@@ -4693,8 +5610,40 @@ var linkTripleRule = {
4693
5610
  consumed++;
4694
5611
  }
4695
5612
  const trimmedTarget = target.trim();
4696
- const { linkType, link } = determineLinkTypeAndLocation(trimmedTarget);
4697
- const displayText = foundPipe ? labelText.trim() : trimmedTarget;
5613
+ if (trimmedTarget === "" && foundPipe) {
5614
+ return {
5615
+ success: true,
5616
+ elements: [{ element: "text", data: startToken.value }],
5617
+ consumed: 1
5618
+ };
5619
+ }
5620
+ if (/#{2,}/.test(trimmedTarget)) {
5621
+ return {
5622
+ success: true,
5623
+ elements: [{ element: "text", data: startToken.value }],
5624
+ consumed: 1
5625
+ };
5626
+ }
5627
+ let finalTarget = trimmedTarget;
5628
+ if (trimmedTarget === "*" && foundPipe) {
5629
+ finalTarget = "";
5630
+ }
5631
+ if (trimmedTarget.startsWith("*") && !foundPipe) {
5632
+ finalTarget = trimmedTarget.slice(1);
5633
+ }
5634
+ const { linkType, link } = determineLinkTypeAndLocation(finalTarget);
5635
+ const trimmedLabel = labelText.trim();
5636
+ let displayText;
5637
+ if (foundPipe) {
5638
+ displayText = trimmedLabel || finalTarget;
5639
+ } else {
5640
+ const colonIdx = trimmedTarget.indexOf(":");
5641
+ if (colonIdx !== -1 && !trimmedTarget.startsWith("http")) {
5642
+ displayText = trimmedTarget.slice(colonIdx + 1).trim();
5643
+ } else {
5644
+ displayText = trimmedTarget;
5645
+ }
5646
+ }
4698
5647
  const label = { text: displayText };
4699
5648
  return {
4700
5649
  success: true,
@@ -4714,6 +5663,7 @@ var linkTripleRule = {
4714
5663
  };
4715
5664
  }
4716
5665
  };
5666
+ var INTERWIKI_PREFIXES = new Set(["wikipedia", "google", "dictionary", "wikidot"]);
4717
5667
  function determineLinkTypeAndLocation(target) {
4718
5668
  if (target.startsWith("#")) {
4719
5669
  return { linkType: "anchor", link: target };
@@ -4721,8 +5671,12 @@ function determineLinkTypeAndLocation(target) {
4721
5671
  if (target.startsWith("http://") || target.startsWith("https://")) {
4722
5672
  return { linkType: "direct", link: target };
4723
5673
  }
4724
- if (target.includes(":") && !target.includes("/")) {
4725
- return { linkType: "interwiki", link: target };
5674
+ const colonIdx = target.indexOf(":");
5675
+ if (colonIdx > 0 && !target.includes("/")) {
5676
+ const prefix = target.slice(0, colonIdx).toLowerCase();
5677
+ if (INTERWIKI_PREFIXES.has(prefix)) {
5678
+ return { linkType: "interwiki", link: target };
5679
+ }
4726
5680
  }
4727
5681
  return { linkType: "page", link: { site: null, page: target } };
4728
5682
  }
@@ -4980,7 +5934,7 @@ var colorRule = {
4980
5934
  }
4981
5935
  const textChildren = contentResult.elements;
4982
5936
  const trimmedColor = colorSpec.trim();
4983
- if (textChildren.length === 0) {
5937
+ if (trimmedColor === "" || textChildren.length === 0) {
4984
5938
  return { success: false };
4985
5939
  }
4986
5940
  return {
@@ -5030,7 +5984,36 @@ var newlineLineBreakRule = {
5030
5984
  lookAhead++;
5031
5985
  }
5032
5986
  const nextMeaningfulToken = ctx.tokens[ctx.pos + lookAhead];
5033
- if (!nextMeaningfulToken || nextMeaningfulToken.type === "EOF" || nextMeaningfulToken.type === "NEWLINE" || isBlockStartToken(nextMeaningfulToken.type)) {
5987
+ let isValidBlock = isBlockStartToken(nextMeaningfulToken?.type);
5988
+ if (isValidBlock && (nextMeaningfulToken?.type === "LIST_BULLET" || nextMeaningfulToken?.type === "LIST_NUMBER")) {
5989
+ if (!nextMeaningfulToken.lineStart) {
5990
+ isValidBlock = false;
5991
+ }
5992
+ }
5993
+ if (isValidBlock && nextMeaningfulToken?.type === "HEADING_MARKER") {
5994
+ const markerLen = nextMeaningfulToken.value.length;
5995
+ const afterPos = ctx.pos + lookAhead + 1;
5996
+ const afterMarker = ctx.tokens[afterPos];
5997
+ if (markerLen > 6) {
5998
+ isValidBlock = false;
5999
+ } else if (afterMarker?.type === "STAR") {
6000
+ if (ctx.tokens[afterPos + 1]?.type !== "WHITESPACE")
6001
+ isValidBlock = false;
6002
+ } else if (afterMarker?.type !== "WHITESPACE") {
6003
+ isValidBlock = false;
6004
+ }
6005
+ }
6006
+ let hasBackslashBreak = false;
6007
+ {
6008
+ let ahead = 1;
6009
+ while (ctx.tokens[ctx.pos + ahead]?.type === "WHITESPACE") {
6010
+ ahead++;
6011
+ }
6012
+ if (ctx.tokens[ctx.pos + ahead]?.type === "BACKSLASH_BREAK") {
6013
+ hasBackslashBreak = true;
6014
+ }
6015
+ }
6016
+ if (!nextMeaningfulToken || nextMeaningfulToken.type === "EOF" || nextMeaningfulToken.type === "NEWLINE" || isValidBlock || hasBackslashBreak) {
5034
6017
  return {
5035
6018
  success: true,
5036
6019
  elements: [],
@@ -5044,6 +6027,52 @@ var newlineLineBreakRule = {
5044
6027
  };
5045
6028
  }
5046
6029
  };
6030
+ var backslashLineBreakRule = {
6031
+ name: "backslashLineBreak",
6032
+ startTokens: ["WHITESPACE", "BACKSLASH_BREAK"],
6033
+ parse(ctx) {
6034
+ const currentTok = ctx.tokens[ctx.pos];
6035
+ if (!currentTok) {
6036
+ return { success: false };
6037
+ }
6038
+ if (currentTok.type === "WHITESPACE") {
6039
+ const nextTok = ctx.tokens[ctx.pos + 1];
6040
+ if (nextTok?.type === "BACKSLASH_BREAK") {
6041
+ const afterBreak = ctx.tokens[ctx.pos + 2];
6042
+ const afterAfter = ctx.tokens[ctx.pos + 3];
6043
+ const afterAfterAfter = ctx.tokens[ctx.pos + 4];
6044
+ const isFollowedByUnderscoreBreak = afterBreak?.type === "WHITESPACE" && afterAfter?.type === "UNDERSCORE" && (afterAfterAfter?.type === "NEWLINE" || afterAfterAfter?.type === "EOF");
6045
+ if (isFollowedByUnderscoreBreak) {
6046
+ const lb2 = { element: "line-break" };
6047
+ lb2._preservedTrailingBreak = true;
6048
+ return {
6049
+ success: true,
6050
+ elements: [lb2],
6051
+ consumed: 2
6052
+ };
6053
+ }
6054
+ const lb = { element: "line-break" };
6055
+ lb._preservedTrailingBreak = true;
6056
+ return {
6057
+ success: true,
6058
+ elements: [lb, { element: "text", data: " " }],
6059
+ consumed: 2
6060
+ };
6061
+ }
6062
+ return { success: false };
6063
+ }
6064
+ if (currentTok.type === "BACKSLASH_BREAK") {
6065
+ const lb = { element: "line-break" };
6066
+ lb._preservedTrailingBreak = true;
6067
+ return {
6068
+ success: true,
6069
+ elements: [lb],
6070
+ consumed: 1
6071
+ };
6072
+ }
6073
+ return { success: false };
6074
+ }
6075
+ };
5047
6076
  var underscoreLineBreakRule = {
5048
6077
  name: "underscoreLineBreak",
5049
6078
  startTokens: ["WHITESPACE", "UNDERSCORE"],
@@ -5056,9 +6085,11 @@ var underscoreLineBreakRule = {
5056
6085
  const nextTok = ctx.tokens[ctx.pos + 1];
5057
6086
  const afterTok = ctx.tokens[ctx.pos + 2];
5058
6087
  if (nextTok?.type === "UNDERSCORE" && afterTok && (afterTok.type === "NEWLINE" || afterTok.type === "EOF")) {
6088
+ const lb = { element: "line-break" };
6089
+ lb._preservedTrailingBreak = true;
5059
6090
  return {
5060
6091
  success: true,
5061
- elements: [{ element: "line-break" }],
6092
+ elements: [lb],
5062
6093
  consumed: 3
5063
6094
  };
5064
6095
  }
@@ -5066,9 +6097,11 @@ var underscoreLineBreakRule = {
5066
6097
  if (currentTok.type === "UNDERSCORE" && currentTok.lineStart) {
5067
6098
  const nextTok = ctx.tokens[ctx.pos + 1];
5068
6099
  if (nextTok && (nextTok.type === "NEWLINE" || nextTok.type === "EOF")) {
6100
+ const lb = { element: "line-break" };
6101
+ lb._preservedTrailingBreak = true;
5069
6102
  return {
5070
6103
  success: true,
5071
- elements: [{ element: "line-break" }],
6104
+ elements: [lb],
5072
6105
  consumed: 2
5073
6106
  };
5074
6107
  }
@@ -5124,29 +6157,25 @@ function parseDoubleAtRaw(ctx) {
5124
6157
  const startToken = currentToken(ctx);
5125
6158
  let pos = ctx.pos + 1;
5126
6159
  const next1 = ctx.tokens[pos];
5127
- const next2 = ctx.tokens[pos + 1];
5128
- if (next1?.type === "RAW_OPEN" && next2?.type === "RAW_OPEN") {
5129
- return {
5130
- success: true,
5131
- elements: [{ element: "raw", data: "@@" }],
5132
- consumed: 3
5133
- };
5134
- }
5135
- if (next1?.type === "RAW_OPEN" && next2?.type === "AT") {
5136
- return {
5137
- success: true,
5138
- elements: [{ element: "raw", data: "@" }],
5139
- consumed: 3
5140
- };
5141
- }
5142
6160
  if (next1?.type === "RAW_OPEN") {
5143
6161
  return {
5144
6162
  success: true,
5145
- elements: [{ element: "raw", data: "" }],
6163
+ elements: [],
5146
6164
  consumed: 2
5147
6165
  };
5148
6166
  }
5149
6167
  if (!hasClosingMarkerBeforeNewline({ ...ctx, pos }, "RAW_OPEN")) {
6168
+ const nextToken = ctx.tokens[pos];
6169
+ if (nextToken?.type === "NEWLINE") {
6170
+ const afterNewline = ctx.tokens[pos + 1];
6171
+ if (afterNewline?.type === "RAW_OPEN") {
6172
+ return {
6173
+ success: true,
6174
+ elements: [],
6175
+ consumed: 3
6176
+ };
6177
+ }
6178
+ }
5150
6179
  return {
5151
6180
  success: true,
5152
6181
  elements: [{ element: "text", data: startToken.value }],
@@ -5155,17 +6184,46 @@ function parseDoubleAtRaw(ctx) {
5155
6184
  }
5156
6185
  let value = "";
5157
6186
  let consumed = 1;
6187
+ let hasBlockOpen = false;
6188
+ let hasBlockClose = false;
5158
6189
  while (pos < ctx.tokens.length) {
5159
6190
  const token = ctx.tokens[pos];
5160
6191
  if (!token || token.type === "RAW_OPEN" || token.type === "NEWLINE" || token.type === "EOF") {
5161
6192
  break;
5162
6193
  }
6194
+ if (token.type === "RAW_BLOCK_CLOSE") {
6195
+ const nextToken = ctx.tokens[pos + 1];
6196
+ if (nextToken?.type === "RAW_OPEN") {
6197
+ value += ">";
6198
+ consumed += 2;
6199
+ return {
6200
+ success: true,
6201
+ elements: [
6202
+ { element: "raw", data: value },
6203
+ { element: "text", data: "@" }
6204
+ ],
6205
+ consumed
6206
+ };
6207
+ }
6208
+ hasBlockClose = true;
6209
+ }
6210
+ if (token.type === "RAW_BLOCK_OPEN") {
6211
+ hasBlockOpen = true;
6212
+ }
5163
6213
  value += token.value;
5164
6214
  consumed++;
5165
6215
  pos++;
5166
6216
  }
5167
6217
  if (ctx.tokens[pos]?.type === "RAW_OPEN") {
5168
6218
  consumed++;
6219
+ pos++;
6220
+ }
6221
+ if (hasBlockOpen && hasBlockClose) {
6222
+ return {
6223
+ success: true,
6224
+ elements: [],
6225
+ consumed
6226
+ };
5169
6227
  }
5170
6228
  return {
5171
6229
  success: true,
@@ -5177,6 +6235,17 @@ function parseAngleRaw(ctx) {
5177
6235
  const startToken = currentToken(ctx);
5178
6236
  let pos = ctx.pos + 1;
5179
6237
  if (!hasClosingMarkerBeforeNewline({ ...ctx, pos }, "RAW_BLOCK_CLOSE")) {
6238
+ const nextToken = ctx.tokens[pos];
6239
+ if (nextToken?.type === "NEWLINE") {
6240
+ const afterNewline = ctx.tokens[pos + 1];
6241
+ if (afterNewline?.type === "RAW_BLOCK_CLOSE") {
6242
+ return {
6243
+ success: true,
6244
+ elements: [{ element: "text", data: startToken.value }],
6245
+ consumed: 3
6246
+ };
6247
+ }
6248
+ }
5180
6249
  return {
5181
6250
  success: true,
5182
6251
  elements: [{ element: "text", data: startToken.value }],
@@ -5458,6 +6527,12 @@ var closeSpanRule = {
5458
6527
  };
5459
6528
 
5460
6529
  // packages/parser/src/parser/rules/inline/size.ts
6530
+ var VALID_SIZE_UNITS = ["px", "em", "rem", "ex", "%", "cm", "mm", "in", "pc"];
6531
+ function isValidSizeValue(size) {
6532
+ const unitPattern = VALID_SIZE_UNITS.join("|");
6533
+ const match = size.match(new RegExp(`^(\\d+(?:\\.\\d+)?)(${unitPattern})$`, "i"));
6534
+ return match !== null;
6535
+ }
5461
6536
  function parseSizeValue(ctx, startPos) {
5462
6537
  let pos = startPos;
5463
6538
  let consumed = 0;
@@ -5481,7 +6556,11 @@ function parseSizeValue(ctx, startPos) {
5481
6556
  if (parts.length === 0) {
5482
6557
  return null;
5483
6558
  }
5484
- return { size: parts.join(""), consumed };
6559
+ const size = parts.join("");
6560
+ if (!isValidSizeValue(size)) {
6561
+ return null;
6562
+ }
6563
+ return { size, consumed };
5485
6564
  }
5486
6565
  var sizeRule = {
5487
6566
  name: "size",
@@ -5593,7 +6672,8 @@ var footnoteRule = {
5593
6672
  }
5594
6673
  pos++;
5595
6674
  consumed++;
5596
- const children = [];
6675
+ const paragraphs = [[]];
6676
+ let currentParagraph = 0;
5597
6677
  while (pos < ctx.tokens.length) {
5598
6678
  const token = ctx.tokens[pos];
5599
6679
  if (!token || token.type === "EOF") {
@@ -5618,23 +6698,58 @@ var footnoteRule = {
5618
6698
  }
5619
6699
  }
5620
6700
  if (token.type === "NEWLINE") {
5621
- children.push({ element: "line-break" });
5622
6701
  pos++;
5623
6702
  consumed++;
6703
+ if (ctx.tokens[pos]?.type === "NEWLINE") {
6704
+ while (ctx.tokens[pos]?.type === "NEWLINE") {
6705
+ pos++;
6706
+ consumed++;
6707
+ }
6708
+ currentParagraph++;
6709
+ paragraphs[currentParagraph] = [];
6710
+ } else {
6711
+ paragraphs[currentParagraph].push({ element: "line-break" });
6712
+ }
5624
6713
  continue;
5625
6714
  }
5626
6715
  const inlineCtx = { ...ctx, pos };
5627
6716
  const inlineResult = parseInlineUntil(inlineCtx, "BLOCK_END_OPEN");
5628
6717
  if (inlineResult.elements.length > 0) {
5629
- children.push(...inlineResult.elements);
6718
+ paragraphs[currentParagraph].push(...inlineResult.elements);
5630
6719
  pos += inlineResult.consumed;
5631
6720
  consumed += inlineResult.consumed;
5632
6721
  } else {
5633
- children.push({ element: "text", data: token.value });
6722
+ paragraphs[currentParagraph].push({ element: "text", data: token.value });
5634
6723
  pos++;
5635
6724
  consumed++;
5636
6725
  }
5637
6726
  }
6727
+ const children = [];
6728
+ for (let i = 0;i < paragraphs.length; i++) {
6729
+ const para = paragraphs[i];
6730
+ if (para.length === 0)
6731
+ continue;
6732
+ while (para.length > 0 && para[0]?.element === "line-break") {
6733
+ para.shift();
6734
+ }
6735
+ while (para.length > 0 && para[para.length - 1]?.element === "line-break") {
6736
+ para.pop();
6737
+ }
6738
+ if (para.length === 0)
6739
+ continue;
6740
+ if (i === 0) {
6741
+ children.push(...para);
6742
+ } else {
6743
+ children.push({
6744
+ element: "container",
6745
+ data: {
6746
+ type: "paragraph",
6747
+ attributes: {},
6748
+ elements: para
6749
+ }
6750
+ });
6751
+ }
6752
+ }
5638
6753
  ctx.footnotes.push(children);
5639
6754
  return {
5640
6755
  success: true,
@@ -5688,6 +6803,10 @@ function parseImageBlockName(ctx, startPos) {
5688
6803
  prefix = "f>";
5689
6804
  pos += 2;
5690
6805
  consumed += 2;
6806
+ } else if (nextToken?.type === "EQUALS") {
6807
+ prefix = "f=";
6808
+ pos += 2;
6809
+ consumed += 2;
5691
6810
  }
5692
6811
  }
5693
6812
  const nameToken = ctx.tokens[pos];
@@ -5710,6 +6829,15 @@ function parseImageSource(src) {
5710
6829
  const file = rest.substring(lastSlash + 1);
5711
6830
  return { type: "file3", data: { site, page, file } };
5712
6831
  }
6832
+ const slashes = src.split("/").length - 1;
6833
+ if (slashes >= 2) {
6834
+ const firstSlash = src.indexOf("/");
6835
+ const lastSlash = src.lastIndexOf("/");
6836
+ const site = src.substring(0, firstSlash);
6837
+ const page = src.substring(firstSlash + 1, lastSlash);
6838
+ const file = src.substring(lastSlash + 1);
6839
+ return { type: "file3", data: { site, page, file } };
6840
+ }
5713
6841
  if (slashIdx > 0) {
5714
6842
  const page = src.substring(0, slashIdx);
5715
6843
  const file = src.substring(slashIdx + 1);
@@ -5732,6 +6860,9 @@ function parseAlignment(blockName) {
5732
6860
  } else if (blockName === "f>image") {
5733
6861
  align = "right";
5734
6862
  float = true;
6863
+ } else if (blockName === "f=image") {
6864
+ align = "center";
6865
+ float = true;
5735
6866
  } else if (blockName === "image") {
5736
6867
  return null;
5737
6868
  }
@@ -5752,7 +6883,7 @@ var imageRule = {
5752
6883
  return { success: false };
5753
6884
  }
5754
6885
  const blockName = nameResult.name;
5755
- const imageNames = ["image", "=image", "<image", ">image", "f<image", "f>image"];
6886
+ const imageNames = ["image", "=image", "<image", ">image", "f<image", "f>image", "f=image"];
5756
6887
  if (!imageNames.includes(blockName)) {
5757
6888
  return { success: false };
5758
6889
  }
@@ -5841,9 +6972,8 @@ var userRule = {
5841
6972
  }
5842
6973
  let pos = ctx.pos + 1;
5843
6974
  let consumed = 1;
5844
- while (ctx.tokens[pos]?.type === "WHITESPACE") {
5845
- pos++;
5846
- consumed++;
6975
+ if (ctx.tokens[pos]?.type === "WHITESPACE") {
6976
+ return { success: false };
5847
6977
  }
5848
6978
  let showAvatar = false;
5849
6979
  if (ctx.tokens[pos]?.type === "STAR") {
@@ -6067,6 +7197,10 @@ var anchorRule = {
6067
7197
  consumed++;
6068
7198
  }
6069
7199
  foundClose = true;
7200
+ while (paragraphStrip && ctx.tokens[pos]?.type === "NEWLINE") {
7201
+ pos++;
7202
+ consumed++;
7203
+ }
6070
7204
  break;
6071
7205
  }
6072
7206
  }
@@ -6567,6 +7701,79 @@ var ifExprRule = {
6567
7701
  }
6568
7702
  };
6569
7703
 
7704
+ // packages/parser/src/parser/rules/inline/bibcite.ts
7705
+ var bibciteRule = {
7706
+ name: "bibcite",
7707
+ startTokens: ["TEXT"],
7708
+ parse(ctx) {
7709
+ const token = currentToken(ctx);
7710
+ if (token.type !== "TEXT" || token.value !== "(") {
7711
+ return { success: false };
7712
+ }
7713
+ const nextToken = ctx.tokens[ctx.pos + 1];
7714
+ if (!nextToken || nextToken.type !== "TEXT" || nextToken.value !== "(") {
7715
+ return { success: false };
7716
+ }
7717
+ let pos = ctx.pos + 2;
7718
+ let consumed = 2;
7719
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
7720
+ pos++;
7721
+ consumed++;
7722
+ }
7723
+ const nameToken = ctx.tokens[pos];
7724
+ if (!nameToken || nameToken.type !== "IDENTIFIER" || nameToken.value.toLowerCase() !== "bibcite") {
7725
+ return { success: false };
7726
+ }
7727
+ pos++;
7728
+ consumed++;
7729
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
7730
+ pos++;
7731
+ consumed++;
7732
+ }
7733
+ const labelToken = ctx.tokens[pos];
7734
+ if (!labelToken || labelToken.type !== "IDENTIFIER" && labelToken.type !== "TEXT") {
7735
+ return { success: false };
7736
+ }
7737
+ let label = "";
7738
+ while (pos < ctx.tokens.length) {
7739
+ const t = ctx.tokens[pos];
7740
+ if (!t)
7741
+ break;
7742
+ if (t.type === "TEXT" && t.value === ")") {
7743
+ const nextT = ctx.tokens[pos + 1];
7744
+ if (nextT?.type === "TEXT" && nextT.value === ")") {
7745
+ consumed += 2;
7746
+ break;
7747
+ }
7748
+ }
7749
+ if (t.type === "NEWLINE" || t.type === "EOF") {
7750
+ return { success: false };
7751
+ }
7752
+ label += t.value;
7753
+ pos++;
7754
+ consumed++;
7755
+ }
7756
+ label = label.trim();
7757
+ if (!label) {
7758
+ return { success: false };
7759
+ }
7760
+ ctx.bibcites.push(label);
7761
+ return {
7762
+ success: true,
7763
+ elements: [
7764
+ {
7765
+ element: "bibliography-cite",
7766
+ data: {
7767
+ label,
7768
+ brackets: false
7769
+ }
7770
+ }
7771
+ ],
7772
+ consumed
7773
+ };
7774
+ }
7775
+ };
7776
+
6570
7777
  // packages/parser/src/parser/rules/inline/text.ts
6571
7778
  var textRule = {
6572
7779
  name: "text",
@@ -6595,6 +7802,7 @@ var inlineRules = [
6595
7802
  linkAnchorRule,
6596
7803
  linkStarRule,
6597
7804
  colorRule,
7805
+ backslashLineBreakRule,
6598
7806
  underscoreLineBreakRule,
6599
7807
  newlineLineBreakRule,
6600
7808
  commentRule,
@@ -6612,6 +7820,7 @@ var inlineRules = [
6612
7820
  anchorRule,
6613
7821
  mathInlineRule,
6614
7822
  equationRefRule,
7823
+ bibciteRule,
6615
7824
  guillemetRule,
6616
7825
  textRule
6617
7826
  ];
@@ -6768,6 +7977,59 @@ function splitParagraphAtBlankLineSpans(para) {
6768
7977
  }
6769
7978
  return result.length > 0 ? result : [para];
6770
7979
  }
7980
+ function isEmptyExpr(el) {
7981
+ if (el.element !== "expr")
7982
+ return false;
7983
+ const data = el.data;
7984
+ return data.expression === "";
7985
+ }
7986
+ function splitParagraphAtEmptyExpr(para) {
7987
+ const data = getContainerData(para);
7988
+ if (!data || data.type !== "paragraph")
7989
+ return [para];
7990
+ const hasEmptyExpr = data.elements.some(isEmptyExpr);
7991
+ if (!hasEmptyExpr)
7992
+ return [para];
7993
+ const result = [];
7994
+ let currentElements = [];
7995
+ for (let i = 0;i < data.elements.length; i++) {
7996
+ const child = data.elements[i];
7997
+ if (!child)
7998
+ continue;
7999
+ if (isEmptyExpr(child)) {
8000
+ if (currentElements.length > 0 && currentElements[currentElements.length - 1]?.element === "line-break") {
8001
+ currentElements.pop();
8002
+ }
8003
+ if (currentElements.length > 0) {
8004
+ result.push({
8005
+ element: "container",
8006
+ data: {
8007
+ type: "paragraph",
8008
+ attributes: {},
8009
+ elements: currentElements
8010
+ }
8011
+ });
8012
+ currentElements = [];
8013
+ }
8014
+ if (i + 1 < data.elements.length && data.elements[i + 1]?.element === "line-break") {
8015
+ i++;
8016
+ }
8017
+ } else {
8018
+ currentElements.push(child);
8019
+ }
8020
+ }
8021
+ if (currentElements.length > 0) {
8022
+ result.push({
8023
+ element: "container",
8024
+ data: {
8025
+ type: "paragraph",
8026
+ attributes: {},
8027
+ elements: currentElements
8028
+ }
8029
+ });
8030
+ }
8031
+ return result.length > 0 ? result : [];
8032
+ }
6771
8033
  function mergeSpanStripParagraphs(children) {
6772
8034
  const expandedChildren = [];
6773
8035
  for (const child of children) {
@@ -6775,6 +8037,8 @@ function mergeSpanStripParagraphs(children) {
6775
8037
  const data = getContainerData(child);
6776
8038
  if (data && data.elements.some(isSplitSpan)) {
6777
8039
  expandedChildren.push(...splitParagraphAtBlankLineSpans(child));
8040
+ } else if (data && data.elements.some(isEmptyExpr)) {
8041
+ expandedChildren.push(...splitParagraphAtEmptyExpr(child));
6778
8042
  } else {
6779
8043
  expandedChildren.push(child);
6780
8044
  }
@@ -6792,7 +8056,8 @@ function mergeSpanStripParagraphs(children) {
6792
8056
  i++;
6793
8057
  continue;
6794
8058
  }
6795
- if (!hasParagraphStripSpan(node)) {
8059
+ const thisHasSpanStrip = hasParagraphStripSpan(node);
8060
+ if (!thisHasSpanStrip) {
6796
8061
  result.push(node);
6797
8062
  i++;
6798
8063
  continue;
@@ -6806,17 +8071,18 @@ function mergeSpanStripParagraphs(children) {
6806
8071
  const mergedChildren = [...paraData.elements];
6807
8072
  i++;
6808
8073
  while (i < expandedChildren.length) {
6809
- const nextNode = expandedChildren[i];
6810
- if (!nextNode || !isContainer(nextNode, "paragraph")) {
8074
+ const nextPara = expandedChildren[i];
8075
+ if (!nextPara || !isContainer(nextPara, "paragraph")) {
6811
8076
  break;
6812
8077
  }
6813
- const nextParaData = getContainerData(nextNode);
8078
+ const nextParaData = getContainerData(nextPara);
6814
8079
  if (!nextParaData) {
6815
8080
  break;
6816
8081
  }
8082
+ const hasSpanStrip = hasParagraphStripSpan(nextPara);
6817
8083
  mergedChildren.push(...nextParaData.elements);
6818
8084
  i++;
6819
- if (!hasParagraphStripSpan(nextNode)) {
8085
+ if (!hasSpanStrip) {
6820
8086
  const peekNext = expandedChildren[i];
6821
8087
  if (!peekNext || !isContainer(peekNext, "paragraph") || !hasParagraphStripSpan(peekNext)) {
6822
8088
  break;
@@ -6825,16 +8091,22 @@ function mergeSpanStripParagraphs(children) {
6825
8091
  }
6826
8092
  const escapedSpans = extractEscapedSpans(mergedChildren);
6827
8093
  removeLineBreaksAroundSpanStrip(mergedChildren);
6828
- if (mergedChildren.length > 0) {
6829
- const mergedPara = {
6830
- element: "container",
6831
- data: {
6832
- type: "paragraph",
6833
- attributes: {},
6834
- elements: mergedChildren
6835
- }
6836
- };
6837
- result.push(mergedPara);
8094
+ if (escapedSpans.length > 0) {
8095
+ if (mergedChildren.length > 0) {
8096
+ const para = {
8097
+ element: "container",
8098
+ data: {
8099
+ type: "paragraph",
8100
+ attributes: {},
8101
+ elements: mergedChildren
8102
+ }
8103
+ };
8104
+ result.push(para);
8105
+ }
8106
+ } else {
8107
+ for (const child of mergedChildren) {
8108
+ result.push(child);
8109
+ }
6838
8110
  }
6839
8111
  for (const span of escapedSpans) {
6840
8112
  result.push(span);
@@ -6875,6 +8147,9 @@ function removeEmptySpansAndAdjacentWhitespace(elements) {
6875
8147
  return result;
6876
8148
  }
6877
8149
  function cleanElement(el) {
8150
+ if (el.element === "line-break") {
8151
+ return { element: "line-break" };
8152
+ }
6878
8153
  if (el.element === "container") {
6879
8154
  const data = el.data;
6880
8155
  const cleanedData = {
@@ -6905,6 +8180,42 @@ function cleanElement(el) {
6905
8180
  }
6906
8181
  };
6907
8182
  }
8183
+ if (el.element === "list") {
8184
+ const data = el.data;
8185
+ return {
8186
+ element: "list",
8187
+ data: {
8188
+ ...data,
8189
+ items: data.items.map((item) => {
8190
+ if (item["item-type"] === "elements") {
8191
+ return {
8192
+ ...item,
8193
+ elements: cleanInternalFlags(item.elements)
8194
+ };
8195
+ } else if (item["item-type"] === "sub-list") {
8196
+ const cleanedList = cleanElement({ element: "list", data: item.data });
8197
+ return {
8198
+ "item-type": "sub-list",
8199
+ element: "list",
8200
+ data: "data" in cleanedList ? cleanedList.data : item.data
8201
+ };
8202
+ }
8203
+ return item;
8204
+ })
8205
+ }
8206
+ };
8207
+ }
8208
+ if (el.element === "definition-list") {
8209
+ const items = el.data;
8210
+ return {
8211
+ element: "definition-list",
8212
+ data: items.map((item) => ({
8213
+ ...item,
8214
+ key: cleanInternalFlags(item.key),
8215
+ value: cleanInternalFlags(item.value)
8216
+ }))
8217
+ };
8218
+ }
6908
8219
  return el;
6909
8220
  }
6910
8221
  // packages/parser/src/parser/toc.ts
@@ -6981,6 +8292,8 @@ class Parser {
6981
8292
  tocEntries: [],
6982
8293
  codeBlocks: [],
6983
8294
  htmlBlocks: [],
8295
+ footnoteBlockParsed: false,
8296
+ bibcites: [],
6984
8297
  blockRules,
6985
8298
  blockFallbackRule: paragraphRule,
6986
8299
  inlineRules