@wdprlib/parser 0.1.5 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -25,6 +25,7 @@ import { createPoint, createPosition } from "@wdprlib/ast";
25
25
  class Lexer {
26
26
  state;
27
27
  options;
28
+ splitBlockClosePositions = new Set;
28
29
  constructor(source, options = {}) {
29
30
  this.options = {
30
31
  trackPositions: options.trackPositions ?? true
@@ -51,6 +52,37 @@ class Lexer {
51
52
  current() {
52
53
  return this.state.source[this.state.pos] ?? "";
53
54
  }
55
+ findInvalidAnchorNameEnd() {
56
+ const src = this.state.source;
57
+ const pos = this.state.pos;
58
+ if (src[pos] !== "[" || src[pos + 1] !== "[" || src[pos + 2] !== "#") {
59
+ return null;
60
+ }
61
+ if (src[pos + 3] !== " ") {
62
+ return null;
63
+ }
64
+ let i = pos + 4;
65
+ while (i < src.length && src[i] === " ") {
66
+ i++;
67
+ }
68
+ let foundInvalid = false;
69
+ while (i < src.length) {
70
+ const ch = src[i];
71
+ if (ch === `
72
+ `)
73
+ return null;
74
+ if (ch === "]" && src[i + 1] === "]") {
75
+ return foundInvalid ? i : null;
76
+ }
77
+ const code = ch.charCodeAt(0);
78
+ const isValid = code >= 48 && code <= 57 || code >= 65 && code <= 90 || code >= 97 && code <= 122 || code === 45 || code === 95 || code === 46 || code === 37;
79
+ if (!isValid) {
80
+ foundInvalid = true;
81
+ }
82
+ i++;
83
+ }
84
+ return null;
85
+ }
54
86
  match(pattern) {
55
87
  for (let i = 0;i < pattern.length; i++) {
56
88
  if (this.state.source[this.state.pos + i] !== pattern[i]) {
@@ -120,6 +152,13 @@ class Lexer {
120
152
  return;
121
153
  }
122
154
  if (this.match("[[")) {
155
+ const invalidEnd = this.findInvalidAnchorNameEnd();
156
+ if (invalidEnd !== null) {
157
+ this.splitBlockClosePositions.add(invalidEnd);
158
+ this.advance(1);
159
+ this.addToken("TEXT", "[");
160
+ return;
161
+ }
123
162
  this.advance(2);
124
163
  this.addToken("BLOCK_OPEN", "[[");
125
164
  return;
@@ -130,6 +169,14 @@ class Lexer {
130
169
  return;
131
170
  }
132
171
  if (this.match("]]")) {
172
+ if (this.splitBlockClosePositions.has(this.state.pos)) {
173
+ this.splitBlockClosePositions.delete(this.state.pos);
174
+ this.advance(1);
175
+ this.addToken("BRACKET_CLOSE", "]");
176
+ this.advance(1);
177
+ this.addToken("TEXT", "]");
178
+ return;
179
+ }
133
180
  this.advance(2);
134
181
  this.addToken("BLOCK_CLOSE", "]]");
135
182
  return;
@@ -164,7 +211,7 @@ class Lexer {
164
211
  this.addToken("BOLD_MARKER", "**");
165
212
  return;
166
213
  }
167
- if (isLineStart && this.match("---")) {
214
+ if (isLineStart && this.match("----")) {
168
215
  let dashes = "";
169
216
  while (this.current() === "-") {
170
217
  dashes += this.advance();
@@ -379,6 +426,11 @@ class Lexer {
379
426
  this.addToken("BACKSLASH", "\\");
380
427
  return;
381
428
  }
429
+ if (char.charCodeAt(0) === 57344) {
430
+ this.advance();
431
+ this.addToken("BACKSLASH_BREAK", char);
432
+ return;
433
+ }
382
434
  if (this.isAlphanumeric(char)) {
383
435
  let ident = "";
384
436
  while (!this.isAtEnd() && this.isAlphanumeric(this.current())) {
@@ -418,7 +470,7 @@ function substitute(text) {
418
470
  `);
419
471
  result = replaceLeadingSpaces(result);
420
472
  result = result.replace(WHITESPACE_ONLY_LINE, "");
421
- result = result.replace(CONCAT_LINES, "");
473
+ result = result.replace(CONCAT_LINES, String.fromCharCode(57344));
422
474
  result = result.replace(TABS, " ");
423
475
  result = result.replace(NULL_CHARS, " ");
424
476
  result = result.replace(LEADING_NEWLINES, "");
@@ -588,9 +640,42 @@ function parseInlineUntil(ctx, endType) {
588
640
  isInvalidBlockOpen = true;
589
641
  }
590
642
  }
643
+ let skipWhitespace = 0;
644
+ while (ctx.tokens[afterOpen + skipWhitespace]?.type === "WHITESPACE") {
645
+ skipWhitespace++;
646
+ }
647
+ const blockNameToken = ctx.tokens[afterOpen + skipWhitespace];
648
+ if (blockNameToken && (blockNameToken.type === "TEXT" || blockNameToken.type === "IDENTIFIER") && blockNameToken.value.toLowerCase() === "footnoteblock" && ctx.footnoteBlockParsed) {
649
+ isInvalidBlockOpen = true;
650
+ }
651
+ }
652
+ let isInvalidHeading = false;
653
+ if (nextMeaningfulToken?.type === "HEADING_MARKER") {
654
+ const markerLen = nextMeaningfulToken.value.length;
655
+ const afterMarkerPos = pos + lookAhead + 1;
656
+ const afterMarker = ctx.tokens[afterMarkerPos];
657
+ if (markerLen > 6) {
658
+ isInvalidHeading = true;
659
+ } else if (afterMarker?.type === "STAR") {
660
+ const afterStar = ctx.tokens[afterMarkerPos + 1];
661
+ if (afterStar?.type !== "WHITESPACE") {
662
+ isInvalidHeading = true;
663
+ }
664
+ } else if (afterMarker?.type !== "WHITESPACE") {
665
+ isInvalidHeading = true;
666
+ }
591
667
  }
592
- const isBlockStart = nextMeaningfulToken && BLOCK_START_TOKENS.includes(nextMeaningfulToken.type) && nextMeaningfulToken.lineStart && !isOrphanCloseSpan && !isAnchorName && !isInvalidBlockOpen;
668
+ const isBlockStart = nextMeaningfulToken && BLOCK_START_TOKENS.includes(nextMeaningfulToken.type) && nextMeaningfulToken.lineStart && !isOrphanCloseSpan && !isAnchorName && !isInvalidBlockOpen && !isInvalidHeading;
593
669
  if (!nextMeaningfulToken || nextMeaningfulToken.type === "NEWLINE" || nextMeaningfulToken.type === "EOF" || isBlockStart) {
670
+ if (isBlockStart && nodes.length > 0) {
671
+ const nextPos = pos + lookAhead;
672
+ const shouldPreserve = ctx.blockRules.some((rule) => rule.preservesPrecedingLineBreak && rule.isStartPattern?.(ctx, nextPos));
673
+ if (shouldPreserve) {
674
+ const lb = { element: "line-break" };
675
+ lb._preservedTrailingBreak = true;
676
+ nodes.push(lb);
677
+ }
678
+ }
594
679
  consumed++;
595
680
  if (nextMeaningfulToken?.type === "NEWLINE") {
596
681
  consumed++;
@@ -651,7 +736,10 @@ var headingRule = {
651
736
  if (ctx.tokens[pos]?.type !== "WHITESPACE") {
652
737
  return { success: false };
653
738
  }
654
- const depth = Math.min(marker.value.length, 6);
739
+ if (marker.value.length > 6) {
740
+ return { success: false };
741
+ }
742
+ const depth = marker.value.length;
655
743
  while (ctx.tokens[pos]?.type === "WHITESPACE") {
656
744
  pos++;
657
745
  consumed++;
@@ -1024,10 +1112,6 @@ function filterUnsafeAttributes(attrs) {
1024
1112
  function parseBlockName(ctx, startPos) {
1025
1113
  let pos = startPos;
1026
1114
  let consumed = 0;
1027
- while (ctx.tokens[pos]?.type === "WHITESPACE") {
1028
- pos++;
1029
- consumed++;
1030
- }
1031
1115
  const token = ctx.tokens[pos];
1032
1116
  if (!token || token.type !== "TEXT" && token.type !== "IDENTIFIER") {
1033
1117
  return null;
@@ -1104,79 +1188,6 @@ function parseBlocksUntil(ctx, closeCondition) {
1104
1188
  }
1105
1189
  return { elements, consumed };
1106
1190
  }
1107
- function parseInlineContentUntil(ctx, closeCondition) {
1108
- const elements = [];
1109
- let consumed = 0;
1110
- let pos = ctx.pos;
1111
- const { blockRules, inlineRules } = ctx;
1112
- while (pos < ctx.tokens.length) {
1113
- const token = ctx.tokens[pos];
1114
- if (!token || token.type === "EOF") {
1115
- break;
1116
- }
1117
- const checkCtx = { ...ctx, pos };
1118
- if (closeCondition(checkCtx)) {
1119
- break;
1120
- }
1121
- if (token.type === "WHITESPACE" && token.lineStart) {
1122
- pos++;
1123
- consumed++;
1124
- continue;
1125
- }
1126
- if (token.type === "NEWLINE") {
1127
- pos++;
1128
- consumed++;
1129
- while (ctx.tokens[pos]?.type === "NEWLINE") {
1130
- pos++;
1131
- consumed++;
1132
- }
1133
- const nextToken = ctx.tokens[pos];
1134
- if (nextToken?.type === "BLOCK_OPEN" || nextToken?.type === "BLOCK_END_OPEN" || nextToken?.type === "EOF" || !nextToken) {
1135
- continue;
1136
- }
1137
- elements.push({ element: "line-break" });
1138
- continue;
1139
- }
1140
- let matched = false;
1141
- const blockCtx = { ...ctx, pos };
1142
- for (const rule of blockRules) {
1143
- if (canApplyBlockRule(rule, token)) {
1144
- const result = rule.parse(blockCtx);
1145
- if (result.success) {
1146
- elements.push(...result.elements);
1147
- consumed += result.consumed;
1148
- pos += result.consumed;
1149
- matched = true;
1150
- break;
1151
- }
1152
- }
1153
- }
1154
- if (matched)
1155
- continue;
1156
- const inlineCtx = { ...ctx, pos };
1157
- for (const rule of inlineRules) {
1158
- if (canApplyInlineRule(rule, token)) {
1159
- const result = rule.parse(inlineCtx);
1160
- if (result.success) {
1161
- elements.push(...result.elements);
1162
- consumed += result.consumed;
1163
- pos += result.consumed;
1164
- matched = true;
1165
- break;
1166
- }
1167
- }
1168
- }
1169
- if (!matched) {
1170
- elements.push({ element: "text", data: token.value });
1171
- consumed++;
1172
- pos++;
1173
- }
1174
- }
1175
- while (elements.length > 0 && elements[elements.length - 1]?.element === "line-break") {
1176
- elements.pop();
1177
- }
1178
- return { elements, consumed };
1179
- }
1180
1191
  function parseAttributes(ctx, startPos) {
1181
1192
  const attrs = {};
1182
1193
  let pos = startPos;
@@ -1325,7 +1336,7 @@ function isLiOpen(ctx, pos) {
1325
1336
  const nameResult = parseBlockName(ctx, pos + 1);
1326
1337
  if (!nameResult)
1327
1338
  return null;
1328
- if (nameResult.name === "li" || nameResult.name === "li_") {
1339
+ if (nameResult.name === "li") {
1329
1340
  return { name: nameResult.name, consumed: 1 + nameResult.consumed };
1330
1341
  }
1331
1342
  return null;
@@ -1358,7 +1369,6 @@ function parseLiItem(ctx, startPos, listType) {
1358
1369
  const liOpen = isLiOpen(ctx, pos);
1359
1370
  if (!liOpen)
1360
1371
  return null;
1361
- const isParagraphStrip = liOpen.name === "li_";
1362
1372
  pos += liOpen.consumed;
1363
1373
  consumed += liOpen.consumed;
1364
1374
  const attrResult = parseAttributes(ctx, pos);
@@ -1404,13 +1414,23 @@ function parseLiItem(ctx, startPos, listType) {
1404
1414
  if (token.type === "NEWLINE") {
1405
1415
  pos++;
1406
1416
  consumed++;
1417
+ let consecutiveNewlines = 1;
1407
1418
  while (ctx.tokens[pos]?.type === "NEWLINE") {
1408
1419
  pos++;
1409
1420
  consumed++;
1421
+ consecutiveNewlines++;
1410
1422
  }
1411
- if (isParagraphStrip && !isLiClose(ctx, pos) && !isListClose(ctx, pos, listType) && !isNestedListOpen(ctx, pos) && ctx.tokens[pos]?.type !== "EOF") {
1423
+ while (ctx.tokens[pos]?.type === "WHITESPACE" && ctx.tokens[pos]?.lineStart) {
1424
+ pos++;
1425
+ consumed++;
1426
+ }
1427
+ const atCloseTag = isLiClose(ctx, pos) || isListClose(ctx, pos, listType) || ctx.tokens[pos]?.type === "EOF";
1428
+ if (consecutiveNewlines === 1 && contentElements.length > 0) {
1412
1429
  contentElements.push({ element: "line-break" });
1413
1430
  }
1431
+ if (atCloseTag) {
1432
+ continue;
1433
+ }
1414
1434
  continue;
1415
1435
  }
1416
1436
  let matched = false;
@@ -1453,6 +1473,57 @@ function parseLiItem(ctx, startPos, listType) {
1453
1473
  const closeConsumed = consumeCloseTag(ctx, pos);
1454
1474
  consumed += closeConsumed;
1455
1475
  pos += closeConsumed;
1476
+ while (ctx.tokens[pos]?.type === "NEWLINE") {
1477
+ pos++;
1478
+ consumed++;
1479
+ }
1480
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
1481
+ pos++;
1482
+ consumed++;
1483
+ }
1484
+ while (pos < ctx.tokens.length) {
1485
+ const tok = ctx.tokens[pos];
1486
+ if (!tok || tok.type === "EOF")
1487
+ break;
1488
+ if (tok.type === "NEWLINE") {
1489
+ pos++;
1490
+ consumed++;
1491
+ while (ctx.tokens[pos]?.type === "NEWLINE") {
1492
+ pos++;
1493
+ consumed++;
1494
+ }
1495
+ while (ctx.tokens[pos]?.type === "WHITESPACE" && ctx.tokens[pos]?.lineStart) {
1496
+ pos++;
1497
+ consumed++;
1498
+ }
1499
+ if (isLiOpen(ctx, pos) || isListClose(ctx, pos, listType) || isNestedListOpen(ctx, pos) || ctx.tokens[pos]?.type === "EOF") {
1500
+ break;
1501
+ }
1502
+ continue;
1503
+ }
1504
+ if (isLiOpen(ctx, pos) || isListClose(ctx, pos, listType) || isNestedListOpen(ctx, pos)) {
1505
+ break;
1506
+ }
1507
+ let matched = false;
1508
+ const inlineCtx = { ...ctx, pos };
1509
+ for (const rule of ctx.inlineRules) {
1510
+ if (rule.startTokens.includes(tok.type)) {
1511
+ const result = rule.parse(inlineCtx);
1512
+ if (result.success) {
1513
+ contentElements.push(...result.elements);
1514
+ consumed += result.consumed;
1515
+ pos += result.consumed;
1516
+ matched = true;
1517
+ break;
1518
+ }
1519
+ }
1520
+ }
1521
+ if (!matched) {
1522
+ contentElements.push({ element: "text", data: tok.value });
1523
+ consumed++;
1524
+ pos++;
1525
+ }
1526
+ }
1456
1527
  }
1457
1528
  return {
1458
1529
  item: {
@@ -1533,8 +1604,90 @@ function parseListBlock(ctx, startPos, listType) {
1533
1604
  pos += liResult.consumed;
1534
1605
  continue;
1535
1606
  }
1536
- pos++;
1537
- consumed++;
1607
+ const bareContent = [];
1608
+ let currentParagraph = [];
1609
+ const flushParagraph = () => {
1610
+ if (currentParagraph.length > 0) {
1611
+ while (currentParagraph.length > 0 && currentParagraph[currentParagraph.length - 1]?.element === "line-break") {
1612
+ currentParagraph.pop();
1613
+ }
1614
+ if (currentParagraph.length > 0) {
1615
+ bareContent.push({
1616
+ element: "container",
1617
+ data: {
1618
+ type: "paragraph",
1619
+ attributes: {},
1620
+ elements: currentParagraph
1621
+ }
1622
+ });
1623
+ }
1624
+ currentParagraph = [];
1625
+ }
1626
+ };
1627
+ while (pos < ctx.tokens.length) {
1628
+ const tok = ctx.tokens[pos];
1629
+ if (!tok || tok.type === "EOF")
1630
+ break;
1631
+ if (tok.type === "NEWLINE") {
1632
+ pos++;
1633
+ consumed++;
1634
+ let consecutiveNewlines = 1;
1635
+ while (ctx.tokens[pos]?.type === "NEWLINE") {
1636
+ pos++;
1637
+ consumed++;
1638
+ consecutiveNewlines++;
1639
+ }
1640
+ while (ctx.tokens[pos]?.type === "WHITESPACE" && ctx.tokens[pos]?.lineStart) {
1641
+ pos++;
1642
+ consumed++;
1643
+ }
1644
+ if (isListClose(ctx, pos, listType) || isLiOpen(ctx, pos) || isNestedListOpen(ctx, pos)) {
1645
+ break;
1646
+ }
1647
+ if (consecutiveNewlines >= 2) {
1648
+ flushParagraph();
1649
+ } else if (currentParagraph.length > 0) {
1650
+ currentParagraph.push({ element: "line-break" });
1651
+ }
1652
+ continue;
1653
+ }
1654
+ if (isListClose(ctx, pos, listType) || isLiOpen(ctx, pos) || isNestedListOpen(ctx, pos)) {
1655
+ break;
1656
+ }
1657
+ let matched = false;
1658
+ const inlineCtx = { ...ctx, pos };
1659
+ for (const rule of ctx.inlineRules) {
1660
+ if (rule.startTokens.includes(tok.type)) {
1661
+ const result = rule.parse(inlineCtx);
1662
+ if (result.success) {
1663
+ currentParagraph.push(...result.elements);
1664
+ consumed += result.consumed;
1665
+ pos += result.consumed;
1666
+ matched = true;
1667
+ break;
1668
+ }
1669
+ }
1670
+ }
1671
+ if (!matched) {
1672
+ currentParagraph.push({ element: "text", data: tok.value });
1673
+ consumed++;
1674
+ pos++;
1675
+ }
1676
+ }
1677
+ flushParagraph();
1678
+ if (bareContent.length > 0) {
1679
+ let finalElements;
1680
+ if (bareContent.length === 1 && bareContent[0]?.element === "container" && bareContent[0].data?.type === "paragraph") {
1681
+ finalElements = bareContent[0].data.elements;
1682
+ } else {
1683
+ finalElements = bareContent;
1684
+ }
1685
+ items.push({
1686
+ "item-type": "elements",
1687
+ attributes: { _noMarker: "true" },
1688
+ elements: finalElements
1689
+ });
1690
+ }
1538
1691
  }
1539
1692
  const listData = {
1540
1693
  type: listType === "ol" ? "numbered" : "bullet",
@@ -1590,21 +1743,6 @@ var blockquoteRule = {
1590
1743
  let pos = ctx.pos;
1591
1744
  let consumed = 0;
1592
1745
  while (pos < ctx.tokens.length) {
1593
- while (ctx.tokens[pos]?.type === "NEWLINE" && ctx.tokens[pos]?.lineStart) {
1594
- const nextPos = pos + 1;
1595
- const nextToken = ctx.tokens[nextPos];
1596
- if (nextToken?.type === "BLOCKQUOTE_MARKER" && nextToken.lineStart) {
1597
- depths.push({
1598
- depth: 0,
1599
- ltype: null,
1600
- value: { elements: [], hasLineBreak: false }
1601
- });
1602
- pos++;
1603
- consumed++;
1604
- } else {
1605
- break;
1606
- }
1607
- }
1608
1746
  const markerToken = ctx.tokens[pos];
1609
1747
  if (!markerToken || !markerToken.lineStart || markerToken.type !== "BLOCKQUOTE_MARKER") {
1610
1748
  break;
@@ -1615,6 +1753,17 @@ var blockquoteRule = {
1615
1753
  }
1616
1754
  pos++;
1617
1755
  consumed++;
1756
+ if (ctx.tokens[pos]?.type !== "WHITESPACE") {
1757
+ while (pos < ctx.tokens.length && ctx.tokens[pos]?.type !== "NEWLINE") {
1758
+ pos++;
1759
+ consumed++;
1760
+ }
1761
+ if (ctx.tokens[pos]?.type === "NEWLINE") {
1762
+ pos++;
1763
+ consumed++;
1764
+ }
1765
+ continue;
1766
+ }
1618
1767
  while (ctx.tokens[pos]?.type === "WHITESPACE") {
1619
1768
  pos++;
1620
1769
  consumed++;
@@ -1637,6 +1786,9 @@ var blockquoteRule = {
1637
1786
  });
1638
1787
  }
1639
1788
  if (depths.length === 0) {
1789
+ if (consumed > 0) {
1790
+ return { success: true, elements: [], consumed };
1791
+ }
1640
1792
  return { success: false };
1641
1793
  }
1642
1794
  const depthTrees = processDepths(null, depths);
@@ -1674,6 +1826,10 @@ function buildBlockquoteElement(list) {
1674
1826
  }
1675
1827
  for (const item of list) {
1676
1828
  if (item.kind === "item") {
1829
+ if (item.value.elements.length === 0) {
1830
+ flushParagraph();
1831
+ continue;
1832
+ }
1677
1833
  currentParagraphChildren.push(...item.value.elements);
1678
1834
  if (item.value.hasLineBreak) {
1679
1835
  currentParagraphChildren.push({ element: "line-break" });
@@ -1884,14 +2040,36 @@ var paragraphRule = {
1884
2040
  }
1885
2041
  let elements = processCloseSpanMarkers(result.elements);
1886
2042
  while (elements.length > 0 && elements[elements.length - 1]?.element === "line-break") {
2043
+ const lastEl = elements[elements.length - 1];
2044
+ if (lastEl._preservedTrailingBreak) {
2045
+ delete lastEl._preservedTrailingBreak;
2046
+ break;
2047
+ }
1887
2048
  elements.pop();
1888
2049
  }
2050
+ while (elements.length > 0) {
2051
+ const last = elements[elements.length - 1];
2052
+ if (last?.element === "text" && "data" in last && typeof last.data === "string" && last.data.trim() === "") {
2053
+ elements.pop();
2054
+ } else {
2055
+ break;
2056
+ }
2057
+ }
1889
2058
  while (elements.length > 0 && elements[0]?.element === "line-break") {
1890
2059
  elements.shift();
1891
2060
  }
1892
2061
  if (elements.length === 0) {
1893
2062
  return { success: false };
1894
2063
  }
2064
+ const nextPos = ctx.pos + result.consumed;
2065
+ const nextToken = ctx.tokens[nextPos];
2066
+ if (nextToken?.type === "COLON" && nextToken.lineStart) {
2067
+ return {
2068
+ success: true,
2069
+ elements: [...elements, { element: "line-break" }],
2070
+ consumed: result.consumed
2071
+ };
2072
+ }
1895
2073
  return {
1896
2074
  success: true,
1897
2075
  elements: [
@@ -1944,7 +2122,7 @@ var divRule = {
1944
2122
  pos++;
1945
2123
  consumed++;
1946
2124
  if (ctx.tokens[pos]?.type !== "NEWLINE") {
1947
- return { success: false };
2125
+ return consumeFailedDiv(ctx);
1948
2126
  }
1949
2127
  pos++;
1950
2128
  consumed++;
@@ -1961,10 +2139,10 @@ var divRule = {
1961
2139
  const bodyCtx = { ...ctx, pos };
1962
2140
  let children;
1963
2141
  if (paragraphStrip) {
1964
- const bodyResult = parseInlineContentUntil(bodyCtx, closeCondition);
2142
+ const bodyResult = parseBlocksUntil(bodyCtx, closeCondition);
1965
2143
  consumed += bodyResult.consumed;
1966
2144
  pos += bodyResult.consumed;
1967
- children = bodyResult.elements;
2145
+ children = unwrapEdgeParagraphs(bodyResult.elements);
1968
2146
  } else {
1969
2147
  const bodyResult = parseBlocksUntil(bodyCtx, closeCondition);
1970
2148
  consumed += bodyResult.consumed;
@@ -2004,28 +2182,117 @@ var divRule = {
2004
2182
  };
2005
2183
  }
2006
2184
  };
2007
-
2008
- // packages/parser/src/parser/rules/block/code.ts
2009
- var codeBlockRule = {
2010
- name: "code",
2011
- startTokens: ["BLOCK_OPEN"],
2012
- requiresLineStart: false,
2013
- parse(ctx) {
2014
- const openToken = currentToken(ctx);
2015
- if (openToken.type !== "BLOCK_OPEN") {
2016
- return { success: false };
2017
- }
2018
- let pos = ctx.pos + 1;
2019
- let consumed = 1;
2020
- const nameResult = parseBlockName(ctx, pos);
2021
- if (!nameResult) {
2022
- return { success: false };
2023
- }
2024
- if (nameResult.name !== "code") {
2025
- return { success: false };
2185
+ function consumeFailedDiv(ctx) {
2186
+ const elements = [];
2187
+ let pos = ctx.pos;
2188
+ let consumed = 0;
2189
+ let lastClosePos = -1;
2190
+ let lastCloseConsumed = 0;
2191
+ let scanPos = pos;
2192
+ while (scanPos < ctx.tokens.length) {
2193
+ const t = ctx.tokens[scanPos];
2194
+ if (!t || t.type === "EOF")
2195
+ break;
2196
+ if (t.type === "BLOCK_END_OPEN") {
2197
+ const nameResult = parseBlockName(ctx, scanPos + 1);
2198
+ if (nameResult?.name === "div") {
2199
+ lastClosePos = scanPos;
2200
+ lastCloseConsumed = 1 + nameResult.consumed;
2201
+ const closeToken = ctx.tokens[scanPos + 1 + nameResult.consumed];
2202
+ if (closeToken?.type === "BLOCK_CLOSE") {
2203
+ lastCloseConsumed++;
2204
+ }
2205
+ }
2026
2206
  }
2027
- pos += nameResult.consumed;
2028
- consumed += nameResult.consumed;
2207
+ scanPos++;
2208
+ }
2209
+ if (lastClosePos === -1) {
2210
+ return { success: false };
2211
+ }
2212
+ const endPos = lastClosePos + lastCloseConsumed;
2213
+ while (pos < endPos && pos < ctx.tokens.length) {
2214
+ const t = ctx.tokens[pos];
2215
+ if (!t || t.type === "EOF")
2216
+ break;
2217
+ if (t.type === "NEWLINE") {
2218
+ let peekPos = pos + 1;
2219
+ while (ctx.tokens[peekPos]?.type === "WHITESPACE")
2220
+ peekPos++;
2221
+ if (ctx.tokens[peekPos]?.type === "NEWLINE") {
2222
+ while (ctx.tokens[pos]?.type === "NEWLINE" || ctx.tokens[pos]?.type === "WHITESPACE") {
2223
+ pos++;
2224
+ consumed++;
2225
+ }
2226
+ continue;
2227
+ }
2228
+ elements.push({ element: "line-break" });
2229
+ pos++;
2230
+ consumed++;
2231
+ continue;
2232
+ }
2233
+ elements.push({ element: "text", data: t.value });
2234
+ pos++;
2235
+ consumed++;
2236
+ }
2237
+ if (ctx.tokens[pos]?.type === "NEWLINE") {
2238
+ pos++;
2239
+ consumed++;
2240
+ }
2241
+ return {
2242
+ success: true,
2243
+ elements: [
2244
+ {
2245
+ element: "container",
2246
+ data: {
2247
+ type: "paragraph",
2248
+ attributes: {},
2249
+ elements
2250
+ }
2251
+ }
2252
+ ],
2253
+ consumed
2254
+ };
2255
+ }
2256
+ function unwrapEdgeParagraphs(elements) {
2257
+ if (elements.length === 0)
2258
+ return elements;
2259
+ const result = [...elements];
2260
+ if (isParagraphContainer(result[0])) {
2261
+ const inner = result[0].data.elements;
2262
+ result.splice(0, 1, ...inner);
2263
+ }
2264
+ const lastIdx = result.length - 1;
2265
+ if (lastIdx >= 0 && isParagraphContainer(result[lastIdx])) {
2266
+ const inner = result[lastIdx].data.elements;
2267
+ result.splice(lastIdx, 1, ...inner);
2268
+ }
2269
+ return result;
2270
+ }
2271
+ function isParagraphContainer(el) {
2272
+ return el !== undefined && el.element === "container" && typeof el.data === "object" && el.data !== null && "type" in el.data && el.data.type === "paragraph";
2273
+ }
2274
+
2275
+ // packages/parser/src/parser/rules/block/code.ts
2276
+ var codeBlockRule = {
2277
+ name: "code",
2278
+ startTokens: ["BLOCK_OPEN"],
2279
+ requiresLineStart: false,
2280
+ parse(ctx) {
2281
+ const openToken = currentToken(ctx);
2282
+ if (openToken.type !== "BLOCK_OPEN") {
2283
+ return { success: false };
2284
+ }
2285
+ let pos = ctx.pos + 1;
2286
+ let consumed = 1;
2287
+ const nameResult = parseBlockName(ctx, pos);
2288
+ if (!nameResult) {
2289
+ return { success: false };
2290
+ }
2291
+ if (nameResult.name !== "code") {
2292
+ return { success: false };
2293
+ }
2294
+ pos += nameResult.consumed;
2295
+ consumed += nameResult.consumed;
2029
2296
  const attrResult = parseAttributesRaw(ctx, pos);
2030
2297
  pos += attrResult.consumed;
2031
2298
  consumed += attrResult.consumed;
@@ -2810,9 +3077,10 @@ function parseCell(ctx, startPos) {
2810
3077
  return false;
2811
3078
  };
2812
3079
  const bodyCtx = { ...ctx, pos };
2813
- const bodyResult = parseBlocksUntil(bodyCtx, closeCondition);
3080
+ const bodyResult = parseCellContent(bodyCtx, closeCondition);
2814
3081
  consumed += bodyResult.consumed;
2815
3082
  pos += bodyResult.consumed;
3083
+ const hadParagraphBreaks = bodyResult.hadParagraphBreaks;
2816
3084
  if (ctx.tokens[pos]?.type === "BLOCK_END_OPEN") {
2817
3085
  pos++;
2818
3086
  consumed++;
@@ -2830,7 +3098,7 @@ function parseCell(ctx, startPos) {
2830
3098
  consumed++;
2831
3099
  }
2832
3100
  }
2833
- const processedElements = bodyResult.elements;
3101
+ const processedElements = hadParagraphBreaks ? bodyResult.elements : unwrapSingleInlineParagraph(bodyResult.elements);
2834
3102
  return {
2835
3103
  cell: {
2836
3104
  header: isHeader,
@@ -2842,6 +3110,170 @@ function parseCell(ctx, startPos) {
2842
3110
  consumed
2843
3111
  };
2844
3112
  }
3113
+ function unwrapSingleInlineParagraph(elements) {
3114
+ if (elements.length !== 1) {
3115
+ return elements;
3116
+ }
3117
+ const first = elements[0];
3118
+ if (first?.element !== "container" || typeof first.data !== "object" || first.data === null || !("type" in first.data) || first.data.type !== "paragraph") {
3119
+ return elements;
3120
+ }
3121
+ const paragraphData = first.data;
3122
+ const innerElements = paragraphData.elements ?? [];
3123
+ const hasBlockElement = innerElements.some((el) => isBlockElement(el));
3124
+ if (hasBlockElement) {
3125
+ return elements;
3126
+ }
3127
+ return innerElements;
3128
+ }
3129
+ function isBlockElement(el) {
3130
+ const blockTypes = ["table", "div", "blockquote", "code", "list", "iframe", "image-block"];
3131
+ if (blockTypes.includes(el.element)) {
3132
+ return true;
3133
+ }
3134
+ if (el.element === "container" && typeof el.data === "object" && el.data !== null) {
3135
+ const data = el.data;
3136
+ if (data.type === "paragraph" || data.type === "div" || data.type === "blockquote") {
3137
+ return true;
3138
+ }
3139
+ }
3140
+ return false;
3141
+ }
3142
+ function parseCellContent(ctx, closeCondition) {
3143
+ const elements = [];
3144
+ let consumed = 0;
3145
+ let pos = ctx.pos;
3146
+ let currentSegment = [];
3147
+ let hasMultipleParts = false;
3148
+ let hasBlockElement = false;
3149
+ let hadParagraphBreaks = false;
3150
+ const flushSegment = (wrapInParagraph) => {
3151
+ if (currentSegment.length === 0)
3152
+ return;
3153
+ while (currentSegment.length > 0) {
3154
+ const last = currentSegment[currentSegment.length - 1];
3155
+ if (last?.element === "text" && typeof last.data === "string" && last.data.trim() === "") {
3156
+ currentSegment.pop();
3157
+ } else if (last?.element === "line-break") {
3158
+ currentSegment.pop();
3159
+ } else {
3160
+ break;
3161
+ }
3162
+ }
3163
+ while (currentSegment.length > 0) {
3164
+ const first = currentSegment[0];
3165
+ if (first?.element === "text" && typeof first.data === "string" && first.data.trim() === "") {
3166
+ currentSegment.shift();
3167
+ } else {
3168
+ break;
3169
+ }
3170
+ }
3171
+ if (currentSegment.length === 0)
3172
+ return;
3173
+ if (wrapInParagraph) {
3174
+ elements.push({
3175
+ element: "container",
3176
+ data: {
3177
+ type: "paragraph",
3178
+ attributes: {},
3179
+ elements: [...currentSegment]
3180
+ }
3181
+ });
3182
+ } else {
3183
+ elements.push(...currentSegment);
3184
+ }
3185
+ currentSegment = [];
3186
+ };
3187
+ while (pos < ctx.tokens.length) {
3188
+ const token = ctx.tokens[pos];
3189
+ if (!token || token.type === "EOF") {
3190
+ break;
3191
+ }
3192
+ const checkCtx = { ...ctx, pos };
3193
+ if (closeCondition(checkCtx)) {
3194
+ break;
3195
+ }
3196
+ if (token.type === "NEWLINE") {
3197
+ pos++;
3198
+ consumed++;
3199
+ if (ctx.tokens[pos]?.type === "NEWLINE") {
3200
+ while (ctx.tokens[pos]?.type === "NEWLINE") {
3201
+ pos++;
3202
+ consumed++;
3203
+ }
3204
+ flushSegment(true);
3205
+ hasMultipleParts = true;
3206
+ hadParagraphBreaks = true;
3207
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
3208
+ pos++;
3209
+ consumed++;
3210
+ }
3211
+ continue;
3212
+ }
3213
+ const nextToken = ctx.tokens[pos];
3214
+ if (!nextToken || nextToken.type === "BLOCK_END_OPEN" || nextToken.type === "EOF") {
3215
+ continue;
3216
+ }
3217
+ if (nextToken.type === "BLOCK_OPEN") {
3218
+ flushSegment(true);
3219
+ hasMultipleParts = true;
3220
+ continue;
3221
+ }
3222
+ if (currentSegment.length === 0 && elements.length === 0) {
3223
+ continue;
3224
+ }
3225
+ currentSegment.push({ element: "line-break" });
3226
+ continue;
3227
+ }
3228
+ if (token.type === "WHITESPACE" && token.lineStart) {
3229
+ pos++;
3230
+ consumed++;
3231
+ continue;
3232
+ }
3233
+ let matched = false;
3234
+ const blockCtx = { ...ctx, pos };
3235
+ for (const rule of ctx.blockRules) {
3236
+ if (canApplyBlockRule(rule, token)) {
3237
+ const result = rule.parse(blockCtx);
3238
+ if (result.success) {
3239
+ if (currentSegment.length > 0) {
3240
+ flushSegment(true);
3241
+ hasMultipleParts = true;
3242
+ }
3243
+ elements.push(...result.elements);
3244
+ hasBlockElement = true;
3245
+ hasMultipleParts = true;
3246
+ consumed += result.consumed;
3247
+ pos += result.consumed;
3248
+ matched = true;
3249
+ break;
3250
+ }
3251
+ }
3252
+ }
3253
+ if (matched)
3254
+ continue;
3255
+ const inlineCtx = { ...ctx, pos };
3256
+ for (const rule of ctx.inlineRules) {
3257
+ if (canApplyInlineRule(rule, token)) {
3258
+ const result = rule.parse(inlineCtx);
3259
+ if (result.success) {
3260
+ currentSegment.push(...result.elements);
3261
+ consumed += result.consumed;
3262
+ pos += result.consumed;
3263
+ matched = true;
3264
+ break;
3265
+ }
3266
+ }
3267
+ }
3268
+ if (!matched) {
3269
+ currentSegment.push({ element: "text", data: token.value });
3270
+ consumed++;
3271
+ pos++;
3272
+ }
3273
+ }
3274
+ flushSegment(hasMultipleParts || hasBlockElement);
3275
+ return { elements, consumed, hadParagraphBreaks };
3276
+ }
2845
3277
 
2846
3278
  // packages/parser/src/parser/rules/block/module/rate/index.ts
2847
3279
  var rateModuleRule = {
@@ -3256,6 +3688,10 @@ var footnoteBlockRule = {
3256
3688
  }
3257
3689
  pos++;
3258
3690
  consumed++;
3691
+ if (ctx.footnoteBlockParsed) {
3692
+ return { success: false };
3693
+ }
3694
+ ctx.footnoteBlockParsed = true;
3259
3695
  const title = attrs.title !== undefined ? attrs.title : null;
3260
3696
  const hide = attrs.hide === "true" || attrs.hide === "yes";
3261
3697
  return {
@@ -3487,6 +3923,12 @@ var alignRule = {
3487
3923
  name: "align",
3488
3924
  startTokens: ["BLOCK_OPEN"],
3489
3925
  requiresLineStart: true,
3926
+ preservesPrecedingLineBreak: true,
3927
+ isStartPattern(ctx, pos) {
3928
+ if (ctx.tokens[pos]?.type !== "BLOCK_OPEN")
3929
+ return false;
3930
+ return parseAlignOpen(ctx, pos + 1) !== null;
3931
+ },
3490
3932
  parse(ctx) {
3491
3933
  const openToken = currentToken(ctx);
3492
3934
  if (openToken.type !== "BLOCK_OPEN") {
@@ -3912,7 +4354,11 @@ var mathBlockRule = {
3912
4354
  break;
3913
4355
  }
3914
4356
  }
3915
- latexSource += token.value;
4357
+ if (token.type === "BACKSLASH_BREAK") {
4358
+ latexSource += "\\\n";
4359
+ } else {
4360
+ latexSource += token.value;
4361
+ }
3916
4362
  pos++;
3917
4363
  consumed++;
3918
4364
  }
@@ -3971,23 +4417,25 @@ var htmlBlockRule = {
3971
4417
  }
3972
4418
  pos += nameResult.consumed;
3973
4419
  consumed += nameResult.consumed;
3974
- while (ctx.tokens[pos]?.type === "WHITESPACE") {
3975
- pos++;
3976
- consumed++;
3977
- }
4420
+ const attrResult = parseAttributesRaw(ctx, pos);
4421
+ pos += attrResult.consumed;
4422
+ consumed += attrResult.consumed;
4423
+ const style = attrResult.attrs.style;
3978
4424
  if (ctx.tokens[pos]?.type !== "BLOCK_CLOSE") {
3979
4425
  return { success: false };
3980
4426
  }
3981
4427
  pos++;
3982
4428
  consumed++;
3983
4429
  let contents = "";
4430
+ let foundClose = false;
3984
4431
  while (pos < ctx.tokens.length) {
3985
4432
  const token = ctx.tokens[pos];
3986
- if (!token)
4433
+ if (!token || token.type === "EOF")
3987
4434
  break;
3988
4435
  if (token.type === "BLOCK_END_OPEN") {
3989
4436
  const closeNameResult = parseBlockName(ctx, pos + 1);
3990
4437
  if (closeNameResult?.name.toLowerCase() === "html") {
4438
+ foundClose = true;
3991
4439
  break;
3992
4440
  }
3993
4441
  }
@@ -3995,6 +4443,9 @@ var htmlBlockRule = {
3995
4443
  pos++;
3996
4444
  consumed++;
3997
4445
  }
4446
+ if (!foundClose) {
4447
+ return { success: false };
4448
+ }
3998
4449
  if (ctx.tokens[pos]?.type === "BLOCK_END_OPEN") {
3999
4450
  pos++;
4000
4451
  consumed++;
@@ -4020,7 +4471,8 @@ var htmlBlockRule = {
4020
4471
  {
4021
4472
  element: "html",
4022
4473
  data: {
4023
- contents
4474
+ contents,
4475
+ ...style && { style }
4024
4476
  }
4025
4477
  }
4026
4478
  ],
@@ -4029,9 +4481,9 @@ var htmlBlockRule = {
4029
4481
  }
4030
4482
  };
4031
4483
 
4032
- // packages/parser/src/parser/rules/block/iframe.ts
4033
- var iframeRule = {
4034
- name: "iframe",
4484
+ // packages/parser/src/parser/rules/block/embed-block.ts
4485
+ var embedBlockRule = {
4486
+ name: "embed-block",
4035
4487
  startTokens: ["BLOCK_OPEN"],
4036
4488
  requiresLineStart: false,
4037
4489
  parse(ctx) {
@@ -4042,7 +4494,11 @@ var iframeRule = {
4042
4494
  let pos = ctx.pos + 1;
4043
4495
  let consumed = 1;
4044
4496
  const nameResult = parseBlockName(ctx, pos);
4045
- if (!nameResult || nameResult.name.toLowerCase() !== "iframe") {
4497
+ if (!nameResult) {
4498
+ return { success: false };
4499
+ }
4500
+ const blockName = nameResult.name.toLowerCase();
4501
+ if (blockName !== "embed" && blockName !== "embedvideo" && blockName !== "embedaudio") {
4046
4502
  return { success: false };
4047
4503
  }
4048
4504
  pos += nameResult.consumed;
@@ -4051,50 +4507,156 @@ var iframeRule = {
4051
4507
  pos++;
4052
4508
  consumed++;
4053
4509
  }
4054
- let url = "";
4510
+ if (ctx.tokens[pos]?.type !== "BLOCK_CLOSE") {
4511
+ return { success: false };
4512
+ }
4513
+ pos++;
4514
+ consumed++;
4515
+ let contents = "";
4516
+ let foundClose = false;
4055
4517
  while (pos < ctx.tokens.length) {
4056
4518
  const token = ctx.tokens[pos];
4057
4519
  if (!token)
4058
4520
  break;
4059
- if (token.type === "BLOCK_CLOSE" || token.type === "WHITESPACE" || token.type === "NEWLINE") {
4060
- break;
4521
+ if (token.type === "BLOCK_END_OPEN") {
4522
+ const closeNameResult = parseBlockName(ctx, pos + 1);
4523
+ if (closeNameResult) {
4524
+ const closeName = closeNameResult.name.toLowerCase();
4525
+ if (closeName === "embed" || closeName === "embedvideo" || closeName === "embedaudio") {
4526
+ foundClose = true;
4527
+ break;
4528
+ }
4529
+ }
4061
4530
  }
4062
- url += token.value;
4531
+ contents += token.value;
4063
4532
  pos++;
4064
4533
  consumed++;
4065
4534
  }
4066
- if (!url) {
4535
+ if (!foundClose) {
4067
4536
  return { success: false };
4068
4537
  }
4069
- const attributes = {};
4070
- while (pos < ctx.tokens.length) {
4071
- const token = ctx.tokens[pos];
4072
- if (!token || token.type === "BLOCK_CLOSE")
4073
- break;
4074
- if (token.type === "NEWLINE") {
4075
- break;
4538
+ if (ctx.tokens[pos]?.type === "BLOCK_END_OPEN") {
4539
+ pos++;
4540
+ consumed++;
4541
+ const closeNameResult = parseBlockName(ctx, pos);
4542
+ if (closeNameResult) {
4543
+ pos += closeNameResult.consumed;
4544
+ consumed += closeNameResult.consumed;
4076
4545
  }
4077
- if (token.type === "WHITESPACE") {
4546
+ if (ctx.tokens[pos]?.type === "BLOCK_CLOSE") {
4078
4547
  pos++;
4079
4548
  consumed++;
4080
- continue;
4081
4549
  }
4082
- if (token.type === "IDENTIFIER" || token.type === "TEXT") {
4083
- const key = token.value;
4550
+ if (ctx.tokens[pos]?.type === "NEWLINE") {
4084
4551
  pos++;
4085
4552
  consumed++;
4086
- while (ctx.tokens[pos]?.type === "WHITESPACE") {
4087
- pos++;
4088
- consumed++;
4089
- }
4090
- if (ctx.tokens[pos]?.type === "EQUALS") {
4091
- pos++;
4092
- consumed++;
4093
- while (ctx.tokens[pos]?.type === "WHITESPACE") {
4094
- pos++;
4095
- consumed++;
4553
+ }
4554
+ }
4555
+ contents = contents.trim();
4556
+ return {
4557
+ success: true,
4558
+ elements: [
4559
+ {
4560
+ element: "container",
4561
+ data: {
4562
+ type: "paragraph",
4563
+ attributes: {},
4564
+ elements: [
4565
+ {
4566
+ element: "embed-block",
4567
+ data: {
4568
+ contents
4569
+ }
4570
+ }
4571
+ ]
4096
4572
  }
4097
- let value = "";
4573
+ }
4574
+ ],
4575
+ consumed
4576
+ };
4577
+ }
4578
+ };
4579
+
4580
+ // packages/parser/src/parser/rules/block/iframe.ts
4581
+ var ALLOWED_IFRAME_ATTRS = new Set(["width", "height", "style", "scrolling", "frameborder"]);
4582
+ function normalizeUrl(url) {
4583
+ return url.replace(/[\s\u0000-\u001f\u007f-\u009f]/g, "").toLowerCase();
4584
+ }
4585
+ function isDangerousUrl(normalizedUrl) {
4586
+ return /^(javascript|data|vbscript):/i.test(normalizedUrl);
4587
+ }
4588
+ var iframeRule = {
4589
+ name: "iframe",
4590
+ startTokens: ["BLOCK_OPEN"],
4591
+ requiresLineStart: false,
4592
+ parse(ctx) {
4593
+ const openToken = currentToken(ctx);
4594
+ if (openToken.type !== "BLOCK_OPEN") {
4595
+ return { success: false };
4596
+ }
4597
+ let pos = ctx.pos + 1;
4598
+ let consumed = 1;
4599
+ const nameResult = parseBlockName(ctx, pos);
4600
+ if (!nameResult || nameResult.name.toLowerCase() !== "iframe") {
4601
+ return { success: false };
4602
+ }
4603
+ pos += nameResult.consumed;
4604
+ consumed += nameResult.consumed;
4605
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
4606
+ pos++;
4607
+ consumed++;
4608
+ }
4609
+ let url = "";
4610
+ while (pos < ctx.tokens.length) {
4611
+ const token = ctx.tokens[pos];
4612
+ if (!token)
4613
+ break;
4614
+ if (token.type === "BLOCK_CLOSE" || token.type === "WHITESPACE" || token.type === "NEWLINE") {
4615
+ break;
4616
+ }
4617
+ url += token.value;
4618
+ pos++;
4619
+ consumed++;
4620
+ }
4621
+ if (!url) {
4622
+ return { success: false };
4623
+ }
4624
+ const normalizedUrl = normalizeUrl(url);
4625
+ if (isDangerousUrl(normalizedUrl)) {
4626
+ return { success: false };
4627
+ }
4628
+ if (!/^https?:\/\//i.test(normalizedUrl)) {
4629
+ return { success: false };
4630
+ }
4631
+ const attributes = {};
4632
+ while (pos < ctx.tokens.length) {
4633
+ const token = ctx.tokens[pos];
4634
+ if (!token || token.type === "BLOCK_CLOSE")
4635
+ break;
4636
+ if (token.type === "NEWLINE") {
4637
+ break;
4638
+ }
4639
+ if (token.type === "WHITESPACE") {
4640
+ pos++;
4641
+ consumed++;
4642
+ continue;
4643
+ }
4644
+ if (token.type === "IDENTIFIER" || token.type === "TEXT") {
4645
+ const key = token.value;
4646
+ pos++;
4647
+ consumed++;
4648
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
4649
+ pos++;
4650
+ consumed++;
4651
+ }
4652
+ if (ctx.tokens[pos]?.type === "EQUALS") {
4653
+ pos++;
4654
+ consumed++;
4655
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
4656
+ pos++;
4657
+ consumed++;
4658
+ }
4659
+ let value = "";
4098
4660
  const valueToken = ctx.tokens[pos];
4099
4661
  if (valueToken?.type === "QUOTED_STRING") {
4100
4662
  value = valueToken.value.slice(1, -1);
@@ -4111,7 +4673,9 @@ var iframeRule = {
4111
4673
  consumed++;
4112
4674
  }
4113
4675
  }
4114
- attributes[key] = value;
4676
+ if (ALLOWED_IFRAME_ATTRS.has(key.toLowerCase())) {
4677
+ attributes[key.toLowerCase()] = value;
4678
+ }
4115
4679
  }
4116
4680
  } else {
4117
4681
  pos++;
@@ -4313,6 +4877,300 @@ var tocRule = {
4313
4877
  }
4314
4878
  };
4315
4879
 
4880
+ // packages/parser/src/parser/rules/block/orphan-li.ts
4881
+ function isLiOpen2(ctx, pos) {
4882
+ if (ctx.tokens[pos]?.type !== "BLOCK_OPEN")
4883
+ return null;
4884
+ const nameResult = parseBlockName(ctx, pos + 1);
4885
+ if (!nameResult)
4886
+ return null;
4887
+ if (nameResult.name === "li") {
4888
+ return { consumed: 1 + nameResult.consumed };
4889
+ }
4890
+ return null;
4891
+ }
4892
+ function isLiClose2(ctx, pos) {
4893
+ if (ctx.tokens[pos]?.type !== "BLOCK_END_OPEN")
4894
+ return null;
4895
+ const nameResult = parseBlockName(ctx, pos + 1);
4896
+ if (!nameResult || nameResult.name !== "li")
4897
+ return null;
4898
+ let consumed = 1 + nameResult.consumed;
4899
+ if (ctx.tokens[pos + consumed]?.type === "BLOCK_CLOSE")
4900
+ consumed++;
4901
+ return { consumed };
4902
+ }
4903
+ var orphanLiRule = {
4904
+ name: "orphan-li",
4905
+ startTokens: ["BLOCK_OPEN"],
4906
+ requiresLineStart: false,
4907
+ parse(ctx) {
4908
+ const openToken = currentToken(ctx);
4909
+ if (openToken.type !== "BLOCK_OPEN") {
4910
+ return { success: false };
4911
+ }
4912
+ const liOpen = isLiOpen2(ctx, ctx.pos);
4913
+ if (!liOpen) {
4914
+ return { success: false };
4915
+ }
4916
+ let pos = ctx.pos + liOpen.consumed;
4917
+ let consumed = liOpen.consumed;
4918
+ if (ctx.tokens[pos]?.type !== "BLOCK_CLOSE") {
4919
+ return { success: false };
4920
+ }
4921
+ pos++;
4922
+ consumed++;
4923
+ const elements = [];
4924
+ let foundClose = false;
4925
+ elements.push({ element: "text", data: "[[" });
4926
+ elements.push({ element: "text", data: "li" });
4927
+ elements.push({ element: "text", data: "]]" });
4928
+ while (pos < ctx.tokens.length) {
4929
+ const token = ctx.tokens[pos];
4930
+ if (!token || token.type === "EOF")
4931
+ break;
4932
+ const liClose = isLiClose2(ctx, pos);
4933
+ if (liClose) {
4934
+ foundClose = true;
4935
+ elements.push({ element: "text", data: "[[/" });
4936
+ elements.push({ element: "text", data: "li" });
4937
+ elements.push({ element: "text", data: "]]" });
4938
+ consumed += liClose.consumed;
4939
+ pos += liClose.consumed;
4940
+ if (ctx.tokens[pos]?.type === "NEWLINE") {
4941
+ pos++;
4942
+ consumed++;
4943
+ }
4944
+ break;
4945
+ }
4946
+ if (token.type === "NEWLINE") {
4947
+ elements.push({ element: "line-break" });
4948
+ pos++;
4949
+ consumed++;
4950
+ continue;
4951
+ }
4952
+ if (token.type === "WHITESPACE" && token.lineStart) {
4953
+ pos++;
4954
+ consumed++;
4955
+ continue;
4956
+ }
4957
+ elements.push({ element: "text", data: token.value });
4958
+ pos++;
4959
+ consumed++;
4960
+ }
4961
+ if (!foundClose) {
4962
+ return { success: false };
4963
+ }
4964
+ return {
4965
+ success: true,
4966
+ elements,
4967
+ consumed
4968
+ };
4969
+ }
4970
+ };
4971
+
4972
+ // packages/parser/src/parser/rules/block/bibliography.ts
4973
+ function parseBibliographyEntry(ctx, startPos) {
4974
+ let pos = startPos;
4975
+ let consumed = 0;
4976
+ const colonToken = ctx.tokens[pos];
4977
+ if (!colonToken || colonToken.type !== "COLON" || !colonToken.lineStart) {
4978
+ return null;
4979
+ }
4980
+ pos++;
4981
+ consumed++;
4982
+ const whitespaceAfterColon = ctx.tokens[pos];
4983
+ if (!whitespaceAfterColon || whitespaceAfterColon.type !== "WHITESPACE") {
4984
+ return null;
4985
+ }
4986
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
4987
+ pos++;
4988
+ consumed++;
4989
+ }
4990
+ let label = "";
4991
+ let foundSecondColon = false;
4992
+ const keyNodes = [];
4993
+ while (pos < ctx.tokens.length) {
4994
+ const token = ctx.tokens[pos];
4995
+ if (!token || token.type === "NEWLINE" || token.type === "EOF") {
4996
+ break;
4997
+ }
4998
+ if (token.type === "COLON") {
4999
+ foundSecondColon = true;
5000
+ pos++;
5001
+ consumed++;
5002
+ break;
5003
+ }
5004
+ label += token.value;
5005
+ keyNodes.push({ element: "text", data: token.value });
5006
+ pos++;
5007
+ consumed++;
5008
+ }
5009
+ if (!foundSecondColon) {
5010
+ return null;
5011
+ }
5012
+ label = label.trim();
5013
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
5014
+ pos++;
5015
+ consumed++;
5016
+ }
5017
+ const contentNodes = [];
5018
+ while (pos < ctx.tokens.length) {
5019
+ const token = ctx.tokens[pos];
5020
+ if (!token || token.type === "EOF") {
5021
+ break;
5022
+ }
5023
+ if (token.type === "BLOCK_END_OPEN") {
5024
+ const closeNameResult = parseBlockName(ctx, pos + 1);
5025
+ if (closeNameResult?.name === "bibliography") {
5026
+ break;
5027
+ }
5028
+ }
5029
+ if (token.type === "NEWLINE") {
5030
+ const nextToken = ctx.tokens[pos + 1];
5031
+ if (nextToken?.type === "COLON" && nextToken.lineStart) {
5032
+ pos++;
5033
+ consumed++;
5034
+ break;
5035
+ }
5036
+ if (nextToken?.type === "BLOCK_END_OPEN") {
5037
+ pos++;
5038
+ consumed++;
5039
+ break;
5040
+ }
5041
+ if (nextToken?.type === "NEWLINE" || !nextToken || nextToken.type === "EOF") {
5042
+ pos++;
5043
+ consumed++;
5044
+ break;
5045
+ }
5046
+ }
5047
+ const inlineCtx = { ...ctx, pos };
5048
+ const result = parseInlineUntil(inlineCtx, "NEWLINE");
5049
+ if (result.elements.length > 0) {
5050
+ contentNodes.push(...result.elements);
5051
+ pos += result.consumed;
5052
+ consumed += result.consumed;
5053
+ } else {
5054
+ pos++;
5055
+ consumed++;
5056
+ }
5057
+ }
5058
+ while (keyNodes.length > 0) {
5059
+ const lastNode = keyNodes[keyNodes.length - 1];
5060
+ if (lastNode && lastNode.element === "text" && typeof lastNode.data === "string" && lastNode.data.trim() === "") {
5061
+ keyNodes.pop();
5062
+ } else {
5063
+ break;
5064
+ }
5065
+ }
5066
+ return {
5067
+ entry: {
5068
+ label,
5069
+ key: keyNodes,
5070
+ content: contentNodes
5071
+ },
5072
+ consumed
5073
+ };
5074
+ }
5075
+ var bibliographyRule = {
5076
+ name: "bibliography",
5077
+ startTokens: ["BLOCK_OPEN"],
5078
+ requiresLineStart: false,
5079
+ parse(ctx) {
5080
+ const openToken = currentToken(ctx);
5081
+ if (openToken.type !== "BLOCK_OPEN") {
5082
+ return { success: false };
5083
+ }
5084
+ let pos = ctx.pos + 1;
5085
+ let consumed = 1;
5086
+ const nameResult = parseBlockName(ctx, pos);
5087
+ if (!nameResult || nameResult.name !== "bibliography") {
5088
+ return { success: false };
5089
+ }
5090
+ pos += nameResult.consumed;
5091
+ consumed += nameResult.consumed;
5092
+ const attrResult = parseAttributes(ctx, pos);
5093
+ pos += attrResult.consumed;
5094
+ consumed += attrResult.consumed;
5095
+ if (ctx.tokens[pos]?.type !== "BLOCK_CLOSE") {
5096
+ return { success: false };
5097
+ }
5098
+ pos++;
5099
+ consumed++;
5100
+ if (ctx.tokens[pos]?.type === "NEWLINE") {
5101
+ pos++;
5102
+ consumed++;
5103
+ }
5104
+ const entries = [];
5105
+ let foundClose = false;
5106
+ while (pos < ctx.tokens.length) {
5107
+ const token = ctx.tokens[pos];
5108
+ if (!token || token.type === "EOF") {
5109
+ break;
5110
+ }
5111
+ if (token.type === "BLOCK_END_OPEN") {
5112
+ const closeNameResult = parseBlockName(ctx, pos + 1);
5113
+ if (closeNameResult?.name === "bibliography") {
5114
+ foundClose = true;
5115
+ pos++;
5116
+ consumed++;
5117
+ pos += closeNameResult.consumed;
5118
+ consumed += closeNameResult.consumed;
5119
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
5120
+ pos++;
5121
+ consumed++;
5122
+ }
5123
+ if (ctx.tokens[pos]?.type === "BLOCK_CLOSE") {
5124
+ pos++;
5125
+ consumed++;
5126
+ }
5127
+ break;
5128
+ }
5129
+ }
5130
+ if (token.type === "WHITESPACE" || token.type === "NEWLINE") {
5131
+ pos++;
5132
+ consumed++;
5133
+ continue;
5134
+ }
5135
+ if (token.type === "COLON" && token.lineStart) {
5136
+ const result = parseBibliographyEntry(ctx, pos);
5137
+ if (result) {
5138
+ entries.push(result.entry);
5139
+ pos += result.consumed;
5140
+ consumed += result.consumed;
5141
+ continue;
5142
+ }
5143
+ }
5144
+ pos++;
5145
+ consumed++;
5146
+ }
5147
+ if (!foundClose) {
5148
+ return { success: false };
5149
+ }
5150
+ const definitionItems = entries.map((entry) => ({
5151
+ key_string: entry.label,
5152
+ key: entry.key,
5153
+ value: entry.content
5154
+ }));
5155
+ const title = attrResult.attrs.title ?? null;
5156
+ const hide = attrResult.attrs.hide === "true" || attrResult.attrs.hide === "";
5157
+ return {
5158
+ success: true,
5159
+ elements: [
5160
+ {
5161
+ element: "bibliography-block",
5162
+ data: {
5163
+ entries: definitionItems,
5164
+ title: typeof title === "string" ? title : null,
5165
+ hide
5166
+ }
5167
+ }
5168
+ ],
5169
+ consumed
5170
+ };
5171
+ }
5172
+ };
5173
+
4316
5174
  // packages/parser/src/parser/rules/block/index.ts
4317
5175
  var blockRules = [
4318
5176
  blockCommentRule,
@@ -4325,6 +5183,7 @@ var blockRules = [
4325
5183
  tableBlockRule,
4326
5184
  listRule,
4327
5185
  blockListRule,
5186
+ orphanLiRule,
4328
5187
  blockquoteRule,
4329
5188
  definitionListRule,
4330
5189
  codeBlockRule,
@@ -4337,8 +5196,10 @@ var blockRules = [
4337
5196
  includeRule,
4338
5197
  mathBlockRule,
4339
5198
  htmlBlockRule,
5199
+ embedBlockRule,
4340
5200
  iframeRule,
4341
5201
  iftagsRule,
5202
+ bibliographyRule,
4342
5203
  divRule
4343
5204
  ];
4344
5205
  // packages/parser/src/parser/rules/inline/bold.ts
@@ -4355,6 +5216,13 @@ var boldRule = {
4355
5216
  };
4356
5217
  }
4357
5218
  const result = parseInlineUntil({ ...ctx, pos: ctx.pos + 1 }, "BOLD_MARKER");
5219
+ if (result.elements.length === 0) {
5220
+ return {
5221
+ success: true,
5222
+ elements: [],
5223
+ consumed: 1 + result.consumed + 1
5224
+ };
5225
+ }
4358
5226
  return {
4359
5227
  success: true,
4360
5228
  elements: [
@@ -4445,6 +5313,13 @@ var underlineRule = {
4445
5313
  consumed++;
4446
5314
  }
4447
5315
  }
5316
+ if (children.length === 0) {
5317
+ return {
5318
+ success: true,
5319
+ elements: [],
5320
+ consumed
5321
+ };
5322
+ }
4448
5323
  return {
4449
5324
  success: true,
4450
5325
  elements: [
@@ -4525,6 +5400,13 @@ var superscriptRule = {
4525
5400
  };
4526
5401
  }
4527
5402
  const result = parseInlineUntil({ ...ctx, pos: ctx.pos + 1 }, "SUPER_MARKER");
5403
+ if (result.elements.length === 0) {
5404
+ return {
5405
+ success: true,
5406
+ elements: [],
5407
+ consumed: 1 + result.consumed + 1
5408
+ };
5409
+ }
4528
5410
  return {
4529
5411
  success: true,
4530
5412
  elements: [
@@ -4556,6 +5438,13 @@ var subscriptRule = {
4556
5438
  };
4557
5439
  }
4558
5440
  const result = parseInlineUntil({ ...ctx, pos: ctx.pos + 1 }, "SUB_MARKER");
5441
+ if (result.elements.length === 0) {
5442
+ return {
5443
+ success: true,
5444
+ elements: [],
5445
+ consumed: 1 + result.consumed + 1
5446
+ };
5447
+ }
4559
5448
  return {
4560
5449
  success: true,
4561
5450
  elements: [
@@ -4602,15 +5491,38 @@ var monospaceRule = {
4602
5491
  consumed: 1 + result.consumed + 1
4603
5492
  };
4604
5493
  }
4605
- };
4606
-
4607
- // packages/parser/src/parser/rules/inline/link-triple.ts
5494
+ };
5495
+
5496
+ // packages/parser/src/parser/rules/inline/link-triple.ts
5497
+ function hasClosingLinkMarker(ctx, startPos) {
5498
+ let pos = startPos;
5499
+ while (pos < ctx.tokens.length) {
5500
+ const token = ctx.tokens[pos];
5501
+ if (!token || token.type === "EOF") {
5502
+ return false;
5503
+ }
5504
+ if (token.type === "LINK_CLOSE") {
5505
+ return true;
5506
+ }
5507
+ if (token.type === "NEWLINE") {
5508
+ const next = ctx.tokens[pos + 1];
5509
+ if (next?.type === "NEWLINE") {
5510
+ return false;
5511
+ }
5512
+ if (next?.type === "LINK_CLOSE") {
5513
+ return false;
5514
+ }
5515
+ }
5516
+ pos++;
5517
+ }
5518
+ return false;
5519
+ }
4608
5520
  var linkTripleRule = {
4609
5521
  name: "linkTriple",
4610
5522
  startTokens: ["LINK_OPEN"],
4611
5523
  parse(ctx) {
4612
5524
  const startToken = currentToken(ctx);
4613
- if (!hasClosingMarkerBeforeNewline({ ...ctx, pos: ctx.pos + 1 }, "LINK_CLOSE")) {
5525
+ if (!hasClosingLinkMarker(ctx, ctx.pos + 1)) {
4614
5526
  return {
4615
5527
  success: true,
4616
5528
  elements: [{ element: "text", data: startToken.value }],
@@ -4624,9 +5536,14 @@ var linkTripleRule = {
4624
5536
  let pos = ctx.pos + 1;
4625
5537
  while (pos < ctx.tokens.length) {
4626
5538
  const token = ctx.tokens[pos];
4627
- if (!token || token.type === "LINK_CLOSE" || token.type === "NEWLINE" || token.type === "EOF") {
5539
+ if (!token || token.type === "LINK_CLOSE" || token.type === "EOF") {
4628
5540
  break;
4629
5541
  }
5542
+ if (token.type === "NEWLINE") {
5543
+ consumed++;
5544
+ pos++;
5545
+ continue;
5546
+ }
4630
5547
  if (token.type === "PIPE" && !foundPipe) {
4631
5548
  foundPipe = true;
4632
5549
  } else if (foundPipe) {
@@ -4641,8 +5558,40 @@ var linkTripleRule = {
4641
5558
  consumed++;
4642
5559
  }
4643
5560
  const trimmedTarget = target.trim();
4644
- const { linkType, link } = determineLinkTypeAndLocation(trimmedTarget);
4645
- const displayText = foundPipe ? labelText.trim() : trimmedTarget;
5561
+ if (trimmedTarget === "" && foundPipe) {
5562
+ return {
5563
+ success: true,
5564
+ elements: [{ element: "text", data: startToken.value }],
5565
+ consumed: 1
5566
+ };
5567
+ }
5568
+ if (/#{2,}/.test(trimmedTarget)) {
5569
+ return {
5570
+ success: true,
5571
+ elements: [{ element: "text", data: startToken.value }],
5572
+ consumed: 1
5573
+ };
5574
+ }
5575
+ let finalTarget = trimmedTarget;
5576
+ if (trimmedTarget === "*" && foundPipe) {
5577
+ finalTarget = "";
5578
+ }
5579
+ if (trimmedTarget.startsWith("*") && !foundPipe) {
5580
+ finalTarget = trimmedTarget.slice(1);
5581
+ }
5582
+ const { linkType, link } = determineLinkTypeAndLocation(finalTarget);
5583
+ const trimmedLabel = labelText.trim();
5584
+ let displayText;
5585
+ if (foundPipe) {
5586
+ displayText = trimmedLabel || finalTarget;
5587
+ } else {
5588
+ const colonIdx = trimmedTarget.indexOf(":");
5589
+ if (colonIdx !== -1 && !trimmedTarget.startsWith("http")) {
5590
+ displayText = trimmedTarget.slice(colonIdx + 1).trim();
5591
+ } else {
5592
+ displayText = trimmedTarget;
5593
+ }
5594
+ }
4646
5595
  const label = { text: displayText };
4647
5596
  return {
4648
5597
  success: true,
@@ -4662,6 +5611,7 @@ var linkTripleRule = {
4662
5611
  };
4663
5612
  }
4664
5613
  };
5614
+ var INTERWIKI_PREFIXES = new Set(["wikipedia", "google", "dictionary", "wikidot"]);
4665
5615
  function determineLinkTypeAndLocation(target) {
4666
5616
  if (target.startsWith("#")) {
4667
5617
  return { linkType: "anchor", link: target };
@@ -4669,8 +5619,12 @@ function determineLinkTypeAndLocation(target) {
4669
5619
  if (target.startsWith("http://") || target.startsWith("https://")) {
4670
5620
  return { linkType: "direct", link: target };
4671
5621
  }
4672
- if (target.includes(":") && !target.includes("/")) {
4673
- return { linkType: "interwiki", link: target };
5622
+ const colonIdx = target.indexOf(":");
5623
+ if (colonIdx > 0 && !target.includes("/")) {
5624
+ const prefix = target.slice(0, colonIdx).toLowerCase();
5625
+ if (INTERWIKI_PREFIXES.has(prefix)) {
5626
+ return { linkType: "interwiki", link: target };
5627
+ }
4674
5628
  }
4675
5629
  return { linkType: "page", link: { site: null, page: target } };
4676
5630
  }
@@ -4928,7 +5882,7 @@ var colorRule = {
4928
5882
  }
4929
5883
  const textChildren = contentResult.elements;
4930
5884
  const trimmedColor = colorSpec.trim();
4931
- if (textChildren.length === 0) {
5885
+ if (trimmedColor === "" || textChildren.length === 0) {
4932
5886
  return { success: false };
4933
5887
  }
4934
5888
  return {
@@ -4978,7 +5932,36 @@ var newlineLineBreakRule = {
4978
5932
  lookAhead++;
4979
5933
  }
4980
5934
  const nextMeaningfulToken = ctx.tokens[ctx.pos + lookAhead];
4981
- if (!nextMeaningfulToken || nextMeaningfulToken.type === "EOF" || nextMeaningfulToken.type === "NEWLINE" || isBlockStartToken(nextMeaningfulToken.type)) {
5935
+ let isValidBlock = isBlockStartToken(nextMeaningfulToken?.type);
5936
+ if (isValidBlock && (nextMeaningfulToken?.type === "LIST_BULLET" || nextMeaningfulToken?.type === "LIST_NUMBER")) {
5937
+ if (!nextMeaningfulToken.lineStart) {
5938
+ isValidBlock = false;
5939
+ }
5940
+ }
5941
+ if (isValidBlock && nextMeaningfulToken?.type === "HEADING_MARKER") {
5942
+ const markerLen = nextMeaningfulToken.value.length;
5943
+ const afterPos = ctx.pos + lookAhead + 1;
5944
+ const afterMarker = ctx.tokens[afterPos];
5945
+ if (markerLen > 6) {
5946
+ isValidBlock = false;
5947
+ } else if (afterMarker?.type === "STAR") {
5948
+ if (ctx.tokens[afterPos + 1]?.type !== "WHITESPACE")
5949
+ isValidBlock = false;
5950
+ } else if (afterMarker?.type !== "WHITESPACE") {
5951
+ isValidBlock = false;
5952
+ }
5953
+ }
5954
+ let hasBackslashBreak = false;
5955
+ {
5956
+ let ahead = 1;
5957
+ while (ctx.tokens[ctx.pos + ahead]?.type === "WHITESPACE") {
5958
+ ahead++;
5959
+ }
5960
+ if (ctx.tokens[ctx.pos + ahead]?.type === "BACKSLASH_BREAK") {
5961
+ hasBackslashBreak = true;
5962
+ }
5963
+ }
5964
+ if (!nextMeaningfulToken || nextMeaningfulToken.type === "EOF" || nextMeaningfulToken.type === "NEWLINE" || isValidBlock || hasBackslashBreak) {
4982
5965
  return {
4983
5966
  success: true,
4984
5967
  elements: [],
@@ -4992,6 +5975,52 @@ var newlineLineBreakRule = {
4992
5975
  };
4993
5976
  }
4994
5977
  };
5978
+ var backslashLineBreakRule = {
5979
+ name: "backslashLineBreak",
5980
+ startTokens: ["WHITESPACE", "BACKSLASH_BREAK"],
5981
+ parse(ctx) {
5982
+ const currentTok = ctx.tokens[ctx.pos];
5983
+ if (!currentTok) {
5984
+ return { success: false };
5985
+ }
5986
+ if (currentTok.type === "WHITESPACE") {
5987
+ const nextTok = ctx.tokens[ctx.pos + 1];
5988
+ if (nextTok?.type === "BACKSLASH_BREAK") {
5989
+ const afterBreak = ctx.tokens[ctx.pos + 2];
5990
+ const afterAfter = ctx.tokens[ctx.pos + 3];
5991
+ const afterAfterAfter = ctx.tokens[ctx.pos + 4];
5992
+ const isFollowedByUnderscoreBreak = afterBreak?.type === "WHITESPACE" && afterAfter?.type === "UNDERSCORE" && (afterAfterAfter?.type === "NEWLINE" || afterAfterAfter?.type === "EOF");
5993
+ if (isFollowedByUnderscoreBreak) {
5994
+ const lb2 = { element: "line-break" };
5995
+ lb2._preservedTrailingBreak = true;
5996
+ return {
5997
+ success: true,
5998
+ elements: [lb2],
5999
+ consumed: 2
6000
+ };
6001
+ }
6002
+ const lb = { element: "line-break" };
6003
+ lb._preservedTrailingBreak = true;
6004
+ return {
6005
+ success: true,
6006
+ elements: [lb, { element: "text", data: " " }],
6007
+ consumed: 2
6008
+ };
6009
+ }
6010
+ return { success: false };
6011
+ }
6012
+ if (currentTok.type === "BACKSLASH_BREAK") {
6013
+ const lb = { element: "line-break" };
6014
+ lb._preservedTrailingBreak = true;
6015
+ return {
6016
+ success: true,
6017
+ elements: [lb],
6018
+ consumed: 1
6019
+ };
6020
+ }
6021
+ return { success: false };
6022
+ }
6023
+ };
4995
6024
  var underscoreLineBreakRule = {
4996
6025
  name: "underscoreLineBreak",
4997
6026
  startTokens: ["WHITESPACE", "UNDERSCORE"],
@@ -5004,9 +6033,11 @@ var underscoreLineBreakRule = {
5004
6033
  const nextTok = ctx.tokens[ctx.pos + 1];
5005
6034
  const afterTok = ctx.tokens[ctx.pos + 2];
5006
6035
  if (nextTok?.type === "UNDERSCORE" && afterTok && (afterTok.type === "NEWLINE" || afterTok.type === "EOF")) {
6036
+ const lb = { element: "line-break" };
6037
+ lb._preservedTrailingBreak = true;
5007
6038
  return {
5008
6039
  success: true,
5009
- elements: [{ element: "line-break" }],
6040
+ elements: [lb],
5010
6041
  consumed: 3
5011
6042
  };
5012
6043
  }
@@ -5014,9 +6045,11 @@ var underscoreLineBreakRule = {
5014
6045
  if (currentTok.type === "UNDERSCORE" && currentTok.lineStart) {
5015
6046
  const nextTok = ctx.tokens[ctx.pos + 1];
5016
6047
  if (nextTok && (nextTok.type === "NEWLINE" || nextTok.type === "EOF")) {
6048
+ const lb = { element: "line-break" };
6049
+ lb._preservedTrailingBreak = true;
5017
6050
  return {
5018
6051
  success: true,
5019
- elements: [{ element: "line-break" }],
6052
+ elements: [lb],
5020
6053
  consumed: 2
5021
6054
  };
5022
6055
  }
@@ -5072,29 +6105,25 @@ function parseDoubleAtRaw(ctx) {
5072
6105
  const startToken = currentToken(ctx);
5073
6106
  let pos = ctx.pos + 1;
5074
6107
  const next1 = ctx.tokens[pos];
5075
- const next2 = ctx.tokens[pos + 1];
5076
- if (next1?.type === "RAW_OPEN" && next2?.type === "RAW_OPEN") {
5077
- return {
5078
- success: true,
5079
- elements: [{ element: "raw", data: "@@" }],
5080
- consumed: 3
5081
- };
5082
- }
5083
- if (next1?.type === "RAW_OPEN" && next2?.type === "AT") {
5084
- return {
5085
- success: true,
5086
- elements: [{ element: "raw", data: "@" }],
5087
- consumed: 3
5088
- };
5089
- }
5090
6108
  if (next1?.type === "RAW_OPEN") {
5091
6109
  return {
5092
6110
  success: true,
5093
- elements: [{ element: "raw", data: "" }],
6111
+ elements: [],
5094
6112
  consumed: 2
5095
6113
  };
5096
6114
  }
5097
6115
  if (!hasClosingMarkerBeforeNewline({ ...ctx, pos }, "RAW_OPEN")) {
6116
+ const nextToken = ctx.tokens[pos];
6117
+ if (nextToken?.type === "NEWLINE") {
6118
+ const afterNewline = ctx.tokens[pos + 1];
6119
+ if (afterNewline?.type === "RAW_OPEN") {
6120
+ return {
6121
+ success: true,
6122
+ elements: [],
6123
+ consumed: 3
6124
+ };
6125
+ }
6126
+ }
5098
6127
  return {
5099
6128
  success: true,
5100
6129
  elements: [{ element: "text", data: startToken.value }],
@@ -5103,17 +6132,46 @@ function parseDoubleAtRaw(ctx) {
5103
6132
  }
5104
6133
  let value = "";
5105
6134
  let consumed = 1;
6135
+ let hasBlockOpen = false;
6136
+ let hasBlockClose = false;
5106
6137
  while (pos < ctx.tokens.length) {
5107
6138
  const token = ctx.tokens[pos];
5108
6139
  if (!token || token.type === "RAW_OPEN" || token.type === "NEWLINE" || token.type === "EOF") {
5109
6140
  break;
5110
6141
  }
6142
+ if (token.type === "RAW_BLOCK_CLOSE") {
6143
+ const nextToken = ctx.tokens[pos + 1];
6144
+ if (nextToken?.type === "RAW_OPEN") {
6145
+ value += ">";
6146
+ consumed += 2;
6147
+ return {
6148
+ success: true,
6149
+ elements: [
6150
+ { element: "raw", data: value },
6151
+ { element: "text", data: "@" }
6152
+ ],
6153
+ consumed
6154
+ };
6155
+ }
6156
+ hasBlockClose = true;
6157
+ }
6158
+ if (token.type === "RAW_BLOCK_OPEN") {
6159
+ hasBlockOpen = true;
6160
+ }
5111
6161
  value += token.value;
5112
6162
  consumed++;
5113
6163
  pos++;
5114
6164
  }
5115
6165
  if (ctx.tokens[pos]?.type === "RAW_OPEN") {
5116
6166
  consumed++;
6167
+ pos++;
6168
+ }
6169
+ if (hasBlockOpen && hasBlockClose) {
6170
+ return {
6171
+ success: true,
6172
+ elements: [],
6173
+ consumed
6174
+ };
5117
6175
  }
5118
6176
  return {
5119
6177
  success: true,
@@ -5125,6 +6183,17 @@ function parseAngleRaw(ctx) {
5125
6183
  const startToken = currentToken(ctx);
5126
6184
  let pos = ctx.pos + 1;
5127
6185
  if (!hasClosingMarkerBeforeNewline({ ...ctx, pos }, "RAW_BLOCK_CLOSE")) {
6186
+ const nextToken = ctx.tokens[pos];
6187
+ if (nextToken?.type === "NEWLINE") {
6188
+ const afterNewline = ctx.tokens[pos + 1];
6189
+ if (afterNewline?.type === "RAW_BLOCK_CLOSE") {
6190
+ return {
6191
+ success: true,
6192
+ elements: [{ element: "text", data: startToken.value }],
6193
+ consumed: 3
6194
+ };
6195
+ }
6196
+ }
5128
6197
  return {
5129
6198
  success: true,
5130
6199
  elements: [{ element: "text", data: startToken.value }],
@@ -5406,6 +6475,12 @@ var closeSpanRule = {
5406
6475
  };
5407
6476
 
5408
6477
  // packages/parser/src/parser/rules/inline/size.ts
6478
+ var VALID_SIZE_UNITS = ["px", "em", "rem", "ex", "%", "cm", "mm", "in", "pc"];
6479
+ function isValidSizeValue(size) {
6480
+ const unitPattern = VALID_SIZE_UNITS.join("|");
6481
+ const match = size.match(new RegExp(`^(\\d+(?:\\.\\d+)?)(${unitPattern})$`, "i"));
6482
+ return match !== null;
6483
+ }
5409
6484
  function parseSizeValue(ctx, startPos) {
5410
6485
  let pos = startPos;
5411
6486
  let consumed = 0;
@@ -5429,7 +6504,11 @@ function parseSizeValue(ctx, startPos) {
5429
6504
  if (parts.length === 0) {
5430
6505
  return null;
5431
6506
  }
5432
- return { size: parts.join(""), consumed };
6507
+ const size = parts.join("");
6508
+ if (!isValidSizeValue(size)) {
6509
+ return null;
6510
+ }
6511
+ return { size, consumed };
5433
6512
  }
5434
6513
  var sizeRule = {
5435
6514
  name: "size",
@@ -5541,7 +6620,8 @@ var footnoteRule = {
5541
6620
  }
5542
6621
  pos++;
5543
6622
  consumed++;
5544
- const children = [];
6623
+ const paragraphs = [[]];
6624
+ let currentParagraph = 0;
5545
6625
  while (pos < ctx.tokens.length) {
5546
6626
  const token = ctx.tokens[pos];
5547
6627
  if (!token || token.type === "EOF") {
@@ -5566,23 +6646,58 @@ var footnoteRule = {
5566
6646
  }
5567
6647
  }
5568
6648
  if (token.type === "NEWLINE") {
5569
- children.push({ element: "line-break" });
5570
6649
  pos++;
5571
6650
  consumed++;
6651
+ if (ctx.tokens[pos]?.type === "NEWLINE") {
6652
+ while (ctx.tokens[pos]?.type === "NEWLINE") {
6653
+ pos++;
6654
+ consumed++;
6655
+ }
6656
+ currentParagraph++;
6657
+ paragraphs[currentParagraph] = [];
6658
+ } else {
6659
+ paragraphs[currentParagraph].push({ element: "line-break" });
6660
+ }
5572
6661
  continue;
5573
6662
  }
5574
6663
  const inlineCtx = { ...ctx, pos };
5575
6664
  const inlineResult = parseInlineUntil(inlineCtx, "BLOCK_END_OPEN");
5576
6665
  if (inlineResult.elements.length > 0) {
5577
- children.push(...inlineResult.elements);
6666
+ paragraphs[currentParagraph].push(...inlineResult.elements);
5578
6667
  pos += inlineResult.consumed;
5579
6668
  consumed += inlineResult.consumed;
5580
6669
  } else {
5581
- children.push({ element: "text", data: token.value });
6670
+ paragraphs[currentParagraph].push({ element: "text", data: token.value });
5582
6671
  pos++;
5583
6672
  consumed++;
5584
6673
  }
5585
6674
  }
6675
+ const children = [];
6676
+ for (let i = 0;i < paragraphs.length; i++) {
6677
+ const para = paragraphs[i];
6678
+ if (para.length === 0)
6679
+ continue;
6680
+ while (para.length > 0 && para[0]?.element === "line-break") {
6681
+ para.shift();
6682
+ }
6683
+ while (para.length > 0 && para[para.length - 1]?.element === "line-break") {
6684
+ para.pop();
6685
+ }
6686
+ if (para.length === 0)
6687
+ continue;
6688
+ if (i === 0) {
6689
+ children.push(...para);
6690
+ } else {
6691
+ children.push({
6692
+ element: "container",
6693
+ data: {
6694
+ type: "paragraph",
6695
+ attributes: {},
6696
+ elements: para
6697
+ }
6698
+ });
6699
+ }
6700
+ }
5586
6701
  ctx.footnotes.push(children);
5587
6702
  return {
5588
6703
  success: true,
@@ -5636,6 +6751,10 @@ function parseImageBlockName(ctx, startPos) {
5636
6751
  prefix = "f>";
5637
6752
  pos += 2;
5638
6753
  consumed += 2;
6754
+ } else if (nextToken?.type === "EQUALS") {
6755
+ prefix = "f=";
6756
+ pos += 2;
6757
+ consumed += 2;
5639
6758
  }
5640
6759
  }
5641
6760
  const nameToken = ctx.tokens[pos];
@@ -5658,6 +6777,15 @@ function parseImageSource(src) {
5658
6777
  const file = rest.substring(lastSlash + 1);
5659
6778
  return { type: "file3", data: { site, page, file } };
5660
6779
  }
6780
+ const slashes = src.split("/").length - 1;
6781
+ if (slashes >= 2) {
6782
+ const firstSlash = src.indexOf("/");
6783
+ const lastSlash = src.lastIndexOf("/");
6784
+ const site = src.substring(0, firstSlash);
6785
+ const page = src.substring(firstSlash + 1, lastSlash);
6786
+ const file = src.substring(lastSlash + 1);
6787
+ return { type: "file3", data: { site, page, file } };
6788
+ }
5661
6789
  if (slashIdx > 0) {
5662
6790
  const page = src.substring(0, slashIdx);
5663
6791
  const file = src.substring(slashIdx + 1);
@@ -5680,6 +6808,9 @@ function parseAlignment(blockName) {
5680
6808
  } else if (blockName === "f>image") {
5681
6809
  align = "right";
5682
6810
  float = true;
6811
+ } else if (blockName === "f=image") {
6812
+ align = "center";
6813
+ float = true;
5683
6814
  } else if (blockName === "image") {
5684
6815
  return null;
5685
6816
  }
@@ -5700,7 +6831,7 @@ var imageRule = {
5700
6831
  return { success: false };
5701
6832
  }
5702
6833
  const blockName = nameResult.name;
5703
- const imageNames = ["image", "=image", "<image", ">image", "f<image", "f>image"];
6834
+ const imageNames = ["image", "=image", "<image", ">image", "f<image", "f>image", "f=image"];
5704
6835
  if (!imageNames.includes(blockName)) {
5705
6836
  return { success: false };
5706
6837
  }
@@ -5789,9 +6920,8 @@ var userRule = {
5789
6920
  }
5790
6921
  let pos = ctx.pos + 1;
5791
6922
  let consumed = 1;
5792
- while (ctx.tokens[pos]?.type === "WHITESPACE") {
5793
- pos++;
5794
- consumed++;
6923
+ if (ctx.tokens[pos]?.type === "WHITESPACE") {
6924
+ return { success: false };
5795
6925
  }
5796
6926
  let showAvatar = false;
5797
6927
  if (ctx.tokens[pos]?.type === "STAR") {
@@ -6015,6 +7145,10 @@ var anchorRule = {
6015
7145
  consumed++;
6016
7146
  }
6017
7147
  foundClose = true;
7148
+ while (paragraphStrip && ctx.tokens[pos]?.type === "NEWLINE") {
7149
+ pos++;
7150
+ consumed++;
7151
+ }
6018
7152
  break;
6019
7153
  }
6020
7154
  }
@@ -6515,6 +7649,79 @@ var ifExprRule = {
6515
7649
  }
6516
7650
  };
6517
7651
 
7652
+ // packages/parser/src/parser/rules/inline/bibcite.ts
7653
+ var bibciteRule = {
7654
+ name: "bibcite",
7655
+ startTokens: ["TEXT"],
7656
+ parse(ctx) {
7657
+ const token = currentToken(ctx);
7658
+ if (token.type !== "TEXT" || token.value !== "(") {
7659
+ return { success: false };
7660
+ }
7661
+ const nextToken = ctx.tokens[ctx.pos + 1];
7662
+ if (!nextToken || nextToken.type !== "TEXT" || nextToken.value !== "(") {
7663
+ return { success: false };
7664
+ }
7665
+ let pos = ctx.pos + 2;
7666
+ let consumed = 2;
7667
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
7668
+ pos++;
7669
+ consumed++;
7670
+ }
7671
+ const nameToken = ctx.tokens[pos];
7672
+ if (!nameToken || nameToken.type !== "IDENTIFIER" || nameToken.value.toLowerCase() !== "bibcite") {
7673
+ return { success: false };
7674
+ }
7675
+ pos++;
7676
+ consumed++;
7677
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
7678
+ pos++;
7679
+ consumed++;
7680
+ }
7681
+ const labelToken = ctx.tokens[pos];
7682
+ if (!labelToken || labelToken.type !== "IDENTIFIER" && labelToken.type !== "TEXT") {
7683
+ return { success: false };
7684
+ }
7685
+ let label = "";
7686
+ while (pos < ctx.tokens.length) {
7687
+ const t = ctx.tokens[pos];
7688
+ if (!t)
7689
+ break;
7690
+ if (t.type === "TEXT" && t.value === ")") {
7691
+ const nextT = ctx.tokens[pos + 1];
7692
+ if (nextT?.type === "TEXT" && nextT.value === ")") {
7693
+ consumed += 2;
7694
+ break;
7695
+ }
7696
+ }
7697
+ if (t.type === "NEWLINE" || t.type === "EOF") {
7698
+ return { success: false };
7699
+ }
7700
+ label += t.value;
7701
+ pos++;
7702
+ consumed++;
7703
+ }
7704
+ label = label.trim();
7705
+ if (!label) {
7706
+ return { success: false };
7707
+ }
7708
+ ctx.bibcites.push(label);
7709
+ return {
7710
+ success: true,
7711
+ elements: [
7712
+ {
7713
+ element: "bibliography-cite",
7714
+ data: {
7715
+ label,
7716
+ brackets: false
7717
+ }
7718
+ }
7719
+ ],
7720
+ consumed
7721
+ };
7722
+ }
7723
+ };
7724
+
6518
7725
  // packages/parser/src/parser/rules/inline/text.ts
6519
7726
  var textRule = {
6520
7727
  name: "text",
@@ -6543,6 +7750,7 @@ var inlineRules = [
6543
7750
  linkAnchorRule,
6544
7751
  linkStarRule,
6545
7752
  colorRule,
7753
+ backslashLineBreakRule,
6546
7754
  underscoreLineBreakRule,
6547
7755
  newlineLineBreakRule,
6548
7756
  commentRule,
@@ -6560,6 +7768,7 @@ var inlineRules = [
6560
7768
  anchorRule,
6561
7769
  mathInlineRule,
6562
7770
  equationRefRule,
7771
+ bibciteRule,
6563
7772
  guillemetRule,
6564
7773
  textRule
6565
7774
  ];
@@ -6716,6 +7925,59 @@ function splitParagraphAtBlankLineSpans(para) {
6716
7925
  }
6717
7926
  return result.length > 0 ? result : [para];
6718
7927
  }
7928
+ function isEmptyExpr(el) {
7929
+ if (el.element !== "expr")
7930
+ return false;
7931
+ const data = el.data;
7932
+ return data.expression === "";
7933
+ }
7934
+ function splitParagraphAtEmptyExpr(para) {
7935
+ const data = getContainerData(para);
7936
+ if (!data || data.type !== "paragraph")
7937
+ return [para];
7938
+ const hasEmptyExpr = data.elements.some(isEmptyExpr);
7939
+ if (!hasEmptyExpr)
7940
+ return [para];
7941
+ const result = [];
7942
+ let currentElements = [];
7943
+ for (let i = 0;i < data.elements.length; i++) {
7944
+ const child = data.elements[i];
7945
+ if (!child)
7946
+ continue;
7947
+ if (isEmptyExpr(child)) {
7948
+ if (currentElements.length > 0 && currentElements[currentElements.length - 1]?.element === "line-break") {
7949
+ currentElements.pop();
7950
+ }
7951
+ if (currentElements.length > 0) {
7952
+ result.push({
7953
+ element: "container",
7954
+ data: {
7955
+ type: "paragraph",
7956
+ attributes: {},
7957
+ elements: currentElements
7958
+ }
7959
+ });
7960
+ currentElements = [];
7961
+ }
7962
+ if (i + 1 < data.elements.length && data.elements[i + 1]?.element === "line-break") {
7963
+ i++;
7964
+ }
7965
+ } else {
7966
+ currentElements.push(child);
7967
+ }
7968
+ }
7969
+ if (currentElements.length > 0) {
7970
+ result.push({
7971
+ element: "container",
7972
+ data: {
7973
+ type: "paragraph",
7974
+ attributes: {},
7975
+ elements: currentElements
7976
+ }
7977
+ });
7978
+ }
7979
+ return result.length > 0 ? result : [];
7980
+ }
6719
7981
  function mergeSpanStripParagraphs(children) {
6720
7982
  const expandedChildren = [];
6721
7983
  for (const child of children) {
@@ -6723,6 +7985,8 @@ function mergeSpanStripParagraphs(children) {
6723
7985
  const data = getContainerData(child);
6724
7986
  if (data && data.elements.some(isSplitSpan)) {
6725
7987
  expandedChildren.push(...splitParagraphAtBlankLineSpans(child));
7988
+ } else if (data && data.elements.some(isEmptyExpr)) {
7989
+ expandedChildren.push(...splitParagraphAtEmptyExpr(child));
6726
7990
  } else {
6727
7991
  expandedChildren.push(child);
6728
7992
  }
@@ -6740,7 +8004,8 @@ function mergeSpanStripParagraphs(children) {
6740
8004
  i++;
6741
8005
  continue;
6742
8006
  }
6743
- if (!hasParagraphStripSpan(node)) {
8007
+ const thisHasSpanStrip = hasParagraphStripSpan(node);
8008
+ if (!thisHasSpanStrip) {
6744
8009
  result.push(node);
6745
8010
  i++;
6746
8011
  continue;
@@ -6754,17 +8019,18 @@ function mergeSpanStripParagraphs(children) {
6754
8019
  const mergedChildren = [...paraData.elements];
6755
8020
  i++;
6756
8021
  while (i < expandedChildren.length) {
6757
- const nextNode = expandedChildren[i];
6758
- if (!nextNode || !isContainer(nextNode, "paragraph")) {
8022
+ const nextPara = expandedChildren[i];
8023
+ if (!nextPara || !isContainer(nextPara, "paragraph")) {
6759
8024
  break;
6760
8025
  }
6761
- const nextParaData = getContainerData(nextNode);
8026
+ const nextParaData = getContainerData(nextPara);
6762
8027
  if (!nextParaData) {
6763
8028
  break;
6764
8029
  }
8030
+ const hasSpanStrip = hasParagraphStripSpan(nextPara);
6765
8031
  mergedChildren.push(...nextParaData.elements);
6766
8032
  i++;
6767
- if (!hasParagraphStripSpan(nextNode)) {
8033
+ if (!hasSpanStrip) {
6768
8034
  const peekNext = expandedChildren[i];
6769
8035
  if (!peekNext || !isContainer(peekNext, "paragraph") || !hasParagraphStripSpan(peekNext)) {
6770
8036
  break;
@@ -6773,16 +8039,22 @@ function mergeSpanStripParagraphs(children) {
6773
8039
  }
6774
8040
  const escapedSpans = extractEscapedSpans(mergedChildren);
6775
8041
  removeLineBreaksAroundSpanStrip(mergedChildren);
6776
- if (mergedChildren.length > 0) {
6777
- const mergedPara = {
6778
- element: "container",
6779
- data: {
6780
- type: "paragraph",
6781
- attributes: {},
6782
- elements: mergedChildren
6783
- }
6784
- };
6785
- result.push(mergedPara);
8042
+ if (escapedSpans.length > 0) {
8043
+ if (mergedChildren.length > 0) {
8044
+ const para = {
8045
+ element: "container",
8046
+ data: {
8047
+ type: "paragraph",
8048
+ attributes: {},
8049
+ elements: mergedChildren
8050
+ }
8051
+ };
8052
+ result.push(para);
8053
+ }
8054
+ } else {
8055
+ for (const child of mergedChildren) {
8056
+ result.push(child);
8057
+ }
6786
8058
  }
6787
8059
  for (const span of escapedSpans) {
6788
8060
  result.push(span);
@@ -6823,6 +8095,9 @@ function removeEmptySpansAndAdjacentWhitespace(elements) {
6823
8095
  return result;
6824
8096
  }
6825
8097
  function cleanElement(el) {
8098
+ if (el.element === "line-break") {
8099
+ return { element: "line-break" };
8100
+ }
6826
8101
  if (el.element === "container") {
6827
8102
  const data = el.data;
6828
8103
  const cleanedData = {
@@ -6853,6 +8128,42 @@ function cleanElement(el) {
6853
8128
  }
6854
8129
  };
6855
8130
  }
8131
+ if (el.element === "list") {
8132
+ const data = el.data;
8133
+ return {
8134
+ element: "list",
8135
+ data: {
8136
+ ...data,
8137
+ items: data.items.map((item) => {
8138
+ if (item["item-type"] === "elements") {
8139
+ return {
8140
+ ...item,
8141
+ elements: cleanInternalFlags(item.elements)
8142
+ };
8143
+ } else if (item["item-type"] === "sub-list") {
8144
+ const cleanedList = cleanElement({ element: "list", data: item.data });
8145
+ return {
8146
+ "item-type": "sub-list",
8147
+ element: "list",
8148
+ data: "data" in cleanedList ? cleanedList.data : item.data
8149
+ };
8150
+ }
8151
+ return item;
8152
+ })
8153
+ }
8154
+ };
8155
+ }
8156
+ if (el.element === "definition-list") {
8157
+ const items = el.data;
8158
+ return {
8159
+ element: "definition-list",
8160
+ data: items.map((item) => ({
8161
+ ...item,
8162
+ key: cleanInternalFlags(item.key),
8163
+ value: cleanInternalFlags(item.value)
8164
+ }))
8165
+ };
8166
+ }
6856
8167
  return el;
6857
8168
  }
6858
8169
  // packages/parser/src/parser/toc.ts
@@ -6929,6 +8240,8 @@ class Parser {
6929
8240
  tocEntries: [],
6930
8241
  codeBlocks: [],
6931
8242
  htmlBlocks: [],
8243
+ footnoteBlockParsed: false,
8244
+ bibcites: [],
6932
8245
  blockRules,
6933
8246
  blockFallbackRule: paragraphRule,
6934
8247
  inlineRules