@wdprlib/parser 3.1.2 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/dist/index.cjs +295 -118
  2. package/dist/index.js +272 -95
  3. package/package.json +5 -3
  4. package/src/index.ts +163 -0
  5. package/src/lexer/index.ts +20 -0
  6. package/src/lexer/lexer.ts +687 -0
  7. package/src/lexer/tokens.ts +141 -0
  8. package/src/parser/constants.ts +173 -0
  9. package/src/parser/depth.ts +251 -0
  10. package/src/parser/index.ts +18 -0
  11. package/src/parser/parse.ts +315 -0
  12. package/src/parser/postprocess/divAdjacentParagraph.ts +76 -0
  13. package/src/parser/postprocess/index.ts +15 -0
  14. package/src/parser/postprocess/spanStrip.ts +697 -0
  15. package/src/parser/preprocess/expr.ts +265 -0
  16. package/src/parser/preprocess/index.ts +38 -0
  17. package/src/parser/preprocess/typography.ts +67 -0
  18. package/src/parser/preprocess/utils.ts +250 -0
  19. package/src/parser/preprocess/whitespace.ts +111 -0
  20. package/src/parser/rules/block/align.ts +282 -0
  21. package/src/parser/rules/block/bibliography.ts +359 -0
  22. package/src/parser/rules/block/block-list.ts +689 -0
  23. package/src/parser/rules/block/blockquote.ts +238 -0
  24. package/src/parser/rules/block/center.ts +87 -0
  25. package/src/parser/rules/block/clear-float.ts +75 -0
  26. package/src/parser/rules/block/code.ts +187 -0
  27. package/src/parser/rules/block/collapsible.ts +337 -0
  28. package/src/parser/rules/block/comment.ts +73 -0
  29. package/src/parser/rules/block/content-separator.ts +79 -0
  30. package/src/parser/rules/block/definition-list.ts +270 -0
  31. package/src/parser/rules/block/div.ts +400 -0
  32. package/src/parser/rules/block/embed-block.ts +153 -0
  33. package/src/parser/rules/block/footnoteblock.ts +200 -0
  34. package/src/parser/rules/block/heading.ts +142 -0
  35. package/src/parser/rules/block/horizontal-rule.ts +61 -0
  36. package/src/parser/rules/block/html.ts +222 -0
  37. package/src/parser/rules/block/iframe.ts +239 -0
  38. package/src/parser/rules/block/iftags.ts +150 -0
  39. package/src/parser/rules/block/include.ts +179 -0
  40. package/src/parser/rules/block/index.ts +127 -0
  41. package/src/parser/rules/block/list.ts +244 -0
  42. package/src/parser/rules/block/math.ts +183 -0
  43. package/src/parser/rules/block/module/backlinks/index.ts +31 -0
  44. package/src/parser/rules/block/module/backlinks/types.ts +21 -0
  45. package/src/parser/rules/block/module/categories/index.ts +34 -0
  46. package/src/parser/rules/block/module/categories/types.ts +21 -0
  47. package/src/parser/rules/block/module/css/index.ts +37 -0
  48. package/src/parser/rules/block/module/iftags/condition.ts +109 -0
  49. package/src/parser/rules/block/module/iftags/index.ts +26 -0
  50. package/src/parser/rules/block/module/iftags/preprocess.ts +140 -0
  51. package/src/parser/rules/block/module/iftags/resolve.ts +73 -0
  52. package/src/parser/rules/block/module/iftags/types.ts +63 -0
  53. package/src/parser/rules/block/module/include/index.ts +20 -0
  54. package/src/parser/rules/block/module/include/resolve.ts +556 -0
  55. package/src/parser/rules/block/module/index.ts +122 -0
  56. package/src/parser/rules/block/module/join/index.ts +34 -0
  57. package/src/parser/rules/block/module/join/types.ts +23 -0
  58. package/src/parser/rules/block/module/listpages/compiler.ts +453 -0
  59. package/src/parser/rules/block/module/listpages/extract.ts +410 -0
  60. package/src/parser/rules/block/module/listpages/index.ts +83 -0
  61. package/src/parser/rules/block/module/listpages/normalize.ts +390 -0
  62. package/src/parser/rules/block/module/listpages/parser.ts +106 -0
  63. package/src/parser/rules/block/module/listpages/resolve.ts +130 -0
  64. package/src/parser/rules/block/module/listpages/types.ts +513 -0
  65. package/src/parser/rules/block/module/listpages/url-resolver.ts +186 -0
  66. package/src/parser/rules/block/module/listusers/compiler.ts +77 -0
  67. package/src/parser/rules/block/module/listusers/extract.ts +45 -0
  68. package/src/parser/rules/block/module/listusers/index.ts +36 -0
  69. package/src/parser/rules/block/module/listusers/parser.ts +54 -0
  70. package/src/parser/rules/block/module/listusers/resolve.ts +58 -0
  71. package/src/parser/rules/block/module/listusers/types.ts +93 -0
  72. package/src/parser/rules/block/module/mapping.ts +61 -0
  73. package/src/parser/rules/block/module/page-tree/index.ts +38 -0
  74. package/src/parser/rules/block/module/page-tree/types.ts +29 -0
  75. package/src/parser/rules/block/module/rate/index.ts +28 -0
  76. package/src/parser/rules/block/module/rate/types.ts +19 -0
  77. package/src/parser/rules/block/module/resolve.ts +411 -0
  78. package/src/parser/rules/block/module/types-common.ts +59 -0
  79. package/src/parser/rules/block/module/types.ts +61 -0
  80. package/src/parser/rules/block/module/utils.ts +43 -0
  81. package/src/parser/rules/block/module/walk.ts +380 -0
  82. package/src/parser/rules/block/module.ts +164 -0
  83. package/src/parser/rules/block/orphan-li.ts +177 -0
  84. package/src/parser/rules/block/paragraph.ts +157 -0
  85. package/src/parser/rules/block/table-block.ts +726 -0
  86. package/src/parser/rules/block/table.ts +441 -0
  87. package/src/parser/rules/block/tabview.ts +331 -0
  88. package/src/parser/rules/block/toc.ts +129 -0
  89. package/src/parser/rules/block/utils.ts +615 -0
  90. package/src/parser/rules/index.ts +49 -0
  91. package/src/parser/rules/inline/anchor-name.ts +154 -0
  92. package/src/parser/rules/inline/anchor.ts +327 -0
  93. package/src/parser/rules/inline/bibcite.ts +153 -0
  94. package/src/parser/rules/inline/bold.ts +86 -0
  95. package/src/parser/rules/inline/color.ts +140 -0
  96. package/src/parser/rules/inline/comment.ts +90 -0
  97. package/src/parser/rules/inline/equation-ref.ts +115 -0
  98. package/src/parser/rules/inline/expr.ts +526 -0
  99. package/src/parser/rules/inline/footnote.ts +223 -0
  100. package/src/parser/rules/inline/guillemet.ts +64 -0
  101. package/src/parser/rules/inline/html.ts +132 -0
  102. package/src/parser/rules/inline/image.ts +328 -0
  103. package/src/parser/rules/inline/index.ts +150 -0
  104. package/src/parser/rules/inline/italic.ts +74 -0
  105. package/src/parser/rules/inline/line-break.ts +326 -0
  106. package/src/parser/rules/inline/link-anchor.ts +147 -0
  107. package/src/parser/rules/inline/link-single.ts +164 -0
  108. package/src/parser/rules/inline/link-star.ts +134 -0
  109. package/src/parser/rules/inline/link-triple.ts +267 -0
  110. package/src/parser/rules/inline/math-inline.ts +126 -0
  111. package/src/parser/rules/inline/monospace.ts +78 -0
  112. package/src/parser/rules/inline/raw.ts +262 -0
  113. package/src/parser/rules/inline/size.ts +244 -0
  114. package/src/parser/rules/inline/span.ts +424 -0
  115. package/src/parser/rules/inline/strikethrough.ts +115 -0
  116. package/src/parser/rules/inline/subscript.ts +84 -0
  117. package/src/parser/rules/inline/superscript.ts +84 -0
  118. package/src/parser/rules/inline/text.ts +84 -0
  119. package/src/parser/rules/inline/underline.ts +127 -0
  120. package/src/parser/rules/inline/user.ts +147 -0
  121. package/src/parser/rules/inline/utils.ts +344 -0
  122. package/src/parser/rules/types.ts +252 -0
  123. package/src/parser/rules/utils.ts +155 -0
  124. package/src/parser/toc.ts +130 -0
@@ -0,0 +1,726 @@
1
+ /**
2
+ *
3
+ * Block rule for the explicit block-syntax table:
4
+ * `[[table]][[row]][[cell]]...[[/cell]][[/row]][[/table]]`.
5
+ *
6
+ * This is the structured alternative to the pipe-syntax table (`||`).
7
+ * Each element carries optional HTML attributes:
8
+ *
9
+ * ```
10
+ * [[table class="wiki-table"]]
11
+ * [[row]]
12
+ * [[hcell style="width: 50%"]]Header[[/hcell]]
13
+ * [[cell colspan="2"]]Data[[/cell]]
14
+ * [[/row]]
15
+ * [[/table]]
16
+ * ```
17
+ *
18
+ * Key details:
19
+ * - `[[hcell]]` produces header cells (`<th>`), `[[cell]]` produces data
20
+ * cells (`<td>`).
21
+ * - `colspan` is extracted from cell attributes and mapped to `column-span`.
22
+ * - Alignment can be derived from the `style` attribute's `text-align` value.
23
+ * - Cell content supports both block and inline elements, including nested
24
+ * tables. The custom `parseCellContent()` handles paragraph wrapping
25
+ * and block detection within cells.
26
+ * - Empty tables or tables with only empty rows fail the rule, falling
27
+ * back to text rendering.
28
+ * - The table element carries `_source: "block"` in attributes to
29
+ * distinguish it from pipe-syntax tables.
30
+ *
31
+ * @module
32
+ */
33
+ import type { Element, TableData, TableRow, TableCell, Alignment } from "@wdprlib/ast";
34
+ import type { BlockRule, ParseContext, RuleResult } from "../types";
35
+ import { currentToken } from "../types";
36
+ import { parseBlockName, parseAttributes, canApplyBlockRule } from "./utils";
37
+ import { canApplyInlineRule } from "../inline/utils";
38
+
39
+ /**
40
+ * Block rule for `[[table]]...[[/table]]` with `[[row]]` and
41
+ * `[[cell]]`/`[[hcell]]` children.
42
+ */
43
+ export const tableBlockRule: BlockRule = {
44
+ name: "table-block",
45
+ startTokens: ["BLOCK_OPEN"],
46
+ requiresLineStart: false,
47
+
48
+ parse(ctx: ParseContext): RuleResult<Element> {
49
+ const openToken = currentToken(ctx);
50
+ if (openToken.type !== "BLOCK_OPEN") {
51
+ return { success: false };
52
+ }
53
+
54
+ let pos = ctx.pos + 1;
55
+ let consumed = 1;
56
+
57
+ // Parse block name
58
+ const nameResult = parseBlockName(ctx, pos);
59
+ if (!nameResult || nameResult.name !== "table") {
60
+ return { success: false };
61
+ }
62
+
63
+ pos += nameResult.consumed;
64
+ consumed += nameResult.consumed;
65
+
66
+ // Parse attributes
67
+ const attrResult = parseAttributes(ctx, pos);
68
+ pos += attrResult.consumed;
69
+ consumed += attrResult.consumed;
70
+
71
+ // Expect ]]
72
+ if (ctx.tokens[pos]?.type !== "BLOCK_CLOSE") {
73
+ return { success: false };
74
+ }
75
+ pos++;
76
+ consumed++;
77
+
78
+ // Skip optional newline after [[table]]
79
+ if (ctx.tokens[pos]?.type === "NEWLINE") {
80
+ pos++;
81
+ consumed++;
82
+ }
83
+
84
+ // Parse rows
85
+ const rows: TableRow[] = [];
86
+ let foundTableClose = false;
87
+
88
+ while (pos < ctx.tokens.length) {
89
+ // Skip whitespace and newlines
90
+ while (ctx.tokens[pos]?.type === "WHITESPACE" || ctx.tokens[pos]?.type === "NEWLINE") {
91
+ pos++;
92
+ consumed++;
93
+ }
94
+
95
+ const token = ctx.tokens[pos];
96
+ if (!token || token.type === "EOF") {
97
+ break;
98
+ }
99
+
100
+ // Check for [[/table]]
101
+ if (token.type === "BLOCK_END_OPEN") {
102
+ const closeNameResult = parseBlockName(ctx, pos + 1);
103
+ if (closeNameResult?.name === "table") {
104
+ foundTableClose = true;
105
+ // Consume [[/table]]
106
+ pos++; // [[/
107
+ consumed++;
108
+ pos += closeNameResult.consumed; // table
109
+ consumed += closeNameResult.consumed;
110
+ if (ctx.tokens[pos]?.type === "BLOCK_CLOSE") {
111
+ pos++;
112
+ consumed++;
113
+ }
114
+ if (ctx.tokens[pos]?.type === "NEWLINE") {
115
+ pos++;
116
+ consumed++;
117
+ }
118
+ break;
119
+ }
120
+ }
121
+
122
+ // Check for [[row]]
123
+ if (token.type === "BLOCK_OPEN") {
124
+ const rowNameResult = parseBlockName(ctx, pos + 1);
125
+ if (rowNameResult?.name === "row") {
126
+ const rowResult = parseRow(ctx, pos);
127
+ if (rowResult) {
128
+ rows.push(rowResult.row);
129
+ pos += rowResult.consumed;
130
+ consumed += rowResult.consumed;
131
+ continue;
132
+ }
133
+ }
134
+ }
135
+
136
+ // Unknown token, skip to avoid infinite loop
137
+ pos++;
138
+ consumed++;
139
+ }
140
+
141
+ if (!foundTableClose) {
142
+ ctx.diagnostics.push({
143
+ severity: "warning",
144
+ code: "unclosed-block",
145
+ message: "Missing closing tag [[/table]] for [[table]]",
146
+ position: openToken.position,
147
+ });
148
+ }
149
+
150
+ // Wikidot behavior: empty tables or tables with only empty rows are not parsed
151
+ // They should be treated as plain text instead
152
+ const hasValidContent = rows.some((row) => row.cells.length > 0);
153
+ if (!hasValidContent) {
154
+ return { success: false };
155
+ }
156
+
157
+ const tableData: TableData = {
158
+ attributes: { ...attrResult.attrs, _source: "block" },
159
+ rows,
160
+ };
161
+
162
+ return {
163
+ success: true,
164
+ elements: [
165
+ {
166
+ element: "table",
167
+ data: tableData,
168
+ },
169
+ ],
170
+ consumed,
171
+ };
172
+ },
173
+ };
174
+
175
+ /**
176
+ * Parses a `[[row ...]]...[[/row]]` block, collecting its child cells.
177
+ *
178
+ * Row attributes (e.g. `class`, `style`) are passed through to the AST.
179
+ * The function skips whitespace/newlines between cells and stops when
180
+ * `[[/row]]` is found or the token stream ends.
181
+ *
182
+ * @param ctx - Parse context.
183
+ * @param startPos - Token index at the `[[row]]` BLOCK_OPEN.
184
+ * @returns The parsed row and consumed count, or `null` on failure.
185
+ */
186
+ function parseRow(ctx: ParseContext, startPos: number): { row: TableRow; consumed: number } | null {
187
+ let pos = startPos;
188
+ let consumed = 0;
189
+
190
+ // Expect [[row]]
191
+ if (ctx.tokens[pos]?.type !== "BLOCK_OPEN") {
192
+ return null;
193
+ }
194
+ pos++;
195
+ consumed++;
196
+
197
+ const nameResult = parseBlockName(ctx, pos);
198
+ if (!nameResult || nameResult.name !== "row") {
199
+ return null;
200
+ }
201
+ pos += nameResult.consumed;
202
+ consumed += nameResult.consumed;
203
+
204
+ // Parse row attributes
205
+ const attrResult = parseAttributes(ctx, pos);
206
+ pos += attrResult.consumed;
207
+ consumed += attrResult.consumed;
208
+
209
+ if (ctx.tokens[pos]?.type !== "BLOCK_CLOSE") {
210
+ return null;
211
+ }
212
+ pos++;
213
+ consumed++;
214
+
215
+ // Skip optional newline
216
+ if (ctx.tokens[pos]?.type === "NEWLINE") {
217
+ pos++;
218
+ consumed++;
219
+ }
220
+
221
+ // Parse cells
222
+ const cells: TableCell[] = [];
223
+ let foundRowClose = false;
224
+
225
+ while (pos < ctx.tokens.length) {
226
+ // Skip whitespace and newlines
227
+ while (ctx.tokens[pos]?.type === "WHITESPACE" || ctx.tokens[pos]?.type === "NEWLINE") {
228
+ pos++;
229
+ consumed++;
230
+ }
231
+
232
+ const token = ctx.tokens[pos];
233
+ if (!token || token.type === "EOF") {
234
+ break;
235
+ }
236
+
237
+ // Check for [[/row]]
238
+ if (token.type === "BLOCK_END_OPEN") {
239
+ const closeNameResult = parseBlockName(ctx, pos + 1);
240
+ if (closeNameResult?.name === "row") {
241
+ foundRowClose = true;
242
+ // Consume [[/row]]
243
+ pos++;
244
+ consumed++;
245
+ pos += closeNameResult.consumed;
246
+ consumed += closeNameResult.consumed;
247
+ if (ctx.tokens[pos]?.type === "BLOCK_CLOSE") {
248
+ pos++;
249
+ consumed++;
250
+ }
251
+ if (ctx.tokens[pos]?.type === "NEWLINE") {
252
+ pos++;
253
+ consumed++;
254
+ }
255
+ break;
256
+ }
257
+ }
258
+
259
+ // Check for [[cell]] or [[hcell]]
260
+ if (token.type === "BLOCK_OPEN") {
261
+ const cellNameResult = parseBlockName(ctx, pos + 1);
262
+ if (cellNameResult?.name === "cell" || cellNameResult?.name === "hcell") {
263
+ const cellResult = parseCell(ctx, pos);
264
+ if (cellResult) {
265
+ cells.push(cellResult.cell);
266
+ pos += cellResult.consumed;
267
+ consumed += cellResult.consumed;
268
+ continue;
269
+ }
270
+ }
271
+ }
272
+
273
+ // Unknown token, skip
274
+ pos++;
275
+ consumed++;
276
+ }
277
+
278
+ if (!foundRowClose) {
279
+ ctx.diagnostics.push({
280
+ severity: "warning",
281
+ code: "unclosed-block",
282
+ message: "Missing closing tag [[/row]] for [[row]]",
283
+ position: ctx.tokens[startPos]?.position ?? {
284
+ start: { line: 0, column: 0, offset: 0 },
285
+ end: { line: 0, column: 0, offset: 0 },
286
+ },
287
+ });
288
+ }
289
+
290
+ return {
291
+ row: {
292
+ attributes: attrResult.attrs,
293
+ cells,
294
+ },
295
+ consumed,
296
+ };
297
+ }
298
+
299
+ /**
300
+ * Parses a `[[cell ...]]...[[/cell]]` or `[[hcell ...]]...[[/hcell]]` block.
301
+ *
302
+ * Cell body content is parsed via `parseCellContent()`, which supports
303
+ * block elements (including nested tables), inline markup, and paragraph
304
+ * breaks. After parsing, simple single-paragraph content is unwrapped
305
+ * to match Wikidot's behaviour of not wrapping simple cells in `<p>`.
306
+ *
307
+ * The `colspan` attribute is extracted separately and mapped to
308
+ * `column-span` in the AST. Other attributes (rowspan, style, etc.) are
309
+ * kept in the attributes map for the renderer.
310
+ *
311
+ * @param ctx - Parse context.
312
+ * @param startPos - Token index at the `[[cell]]`/`[[hcell]]` BLOCK_OPEN.
313
+ * @returns The parsed cell and consumed count, or `null` on failure.
314
+ */
315
+ function parseCell(
316
+ ctx: ParseContext,
317
+ startPos: number,
318
+ ): { cell: TableCell; consumed: number } | null {
319
+ let pos = startPos;
320
+ let consumed = 0;
321
+
322
+ // Expect [[cell]] or [[hcell]]
323
+ if (ctx.tokens[pos]?.type !== "BLOCK_OPEN") {
324
+ return null;
325
+ }
326
+ pos++;
327
+ consumed++;
328
+
329
+ const nameResult = parseBlockName(ctx, pos);
330
+ if (!nameResult || (nameResult.name !== "cell" && nameResult.name !== "hcell")) {
331
+ return null;
332
+ }
333
+
334
+ const isHeader = nameResult.name === "hcell";
335
+ pos += nameResult.consumed;
336
+ consumed += nameResult.consumed;
337
+
338
+ // Parse cell attributes
339
+ const attrResult = parseAttributes(ctx, pos);
340
+ pos += attrResult.consumed;
341
+ consumed += attrResult.consumed;
342
+
343
+ // Extract colspan from attributes (rowspan stays in attributes for renderer)
344
+ const colspan = attrResult.attrs.colspan ? parseInt(attrResult.attrs.colspan, 10) : 1;
345
+
346
+ // Extract alignment from style attribute
347
+ let align: Alignment | null = null;
348
+ const style = attrResult.attrs.style;
349
+ if (style) {
350
+ const alignMatch = style.match(/text-align:\s*(left|center|right)/i);
351
+ if (alignMatch) {
352
+ align = alignMatch[1]?.toLowerCase() as Alignment;
353
+ }
354
+ }
355
+
356
+ // Remove colspan from attributes (it's handled separately via column-span)
357
+ const cellAttrs = { ...attrResult.attrs };
358
+ delete cellAttrs.colspan;
359
+
360
+ if (ctx.tokens[pos]?.type !== "BLOCK_CLOSE") {
361
+ return null;
362
+ }
363
+ pos++;
364
+ consumed++;
365
+
366
+ // Skip optional newline after [[cell]]
367
+ if (ctx.tokens[pos]?.type === "NEWLINE") {
368
+ pos++;
369
+ consumed++;
370
+ }
371
+
372
+ const closeName = isHeader ? "hcell" : "cell";
373
+
374
+ // Close condition for [[/cell]] or [[/hcell]]
375
+ const closeCondition = (checkCtx: ParseContext): boolean => {
376
+ const token = checkCtx.tokens[checkCtx.pos];
377
+ if (token?.type === "BLOCK_END_OPEN") {
378
+ const closeNameResult = parseBlockName(checkCtx, checkCtx.pos + 1);
379
+ if (closeNameResult?.name === closeName) {
380
+ return true;
381
+ }
382
+ }
383
+ return false;
384
+ };
385
+
386
+ // Parse cell content using parseCellContent (supports inline blocks like nested tables)
387
+ const bodyCtx: ParseContext = { ...ctx, pos };
388
+ const bodyResult = parseCellContent(bodyCtx, closeCondition);
389
+ consumed += bodyResult.consumed;
390
+ pos += bodyResult.consumed;
391
+ const hadParagraphBreaks = bodyResult.hadParagraphBreaks;
392
+
393
+ // Check for missing close tag
394
+ if (ctx.tokens[pos]?.type !== "BLOCK_END_OPEN") {
395
+ ctx.diagnostics.push({
396
+ severity: "warning",
397
+ code: "unclosed-block",
398
+ message: `Missing closing tag [[/${closeName}]] for [[${closeName}]]`,
399
+ position: ctx.tokens[startPos]?.position ?? {
400
+ start: { line: 0, column: 0, offset: 0 },
401
+ end: { line: 0, column: 0, offset: 0 },
402
+ },
403
+ });
404
+ }
405
+
406
+ // Consume [[/cell]] or [[/hcell]]
407
+ if (ctx.tokens[pos]?.type === "BLOCK_END_OPEN") {
408
+ pos++;
409
+ consumed++;
410
+ const closeNameResult = parseBlockName(ctx, pos);
411
+ if (closeNameResult) {
412
+ pos += closeNameResult.consumed;
413
+ consumed += closeNameResult.consumed;
414
+ }
415
+ if (ctx.tokens[pos]?.type === "BLOCK_CLOSE") {
416
+ pos++;
417
+ consumed++;
418
+ }
419
+ if (ctx.tokens[pos]?.type === "NEWLINE") {
420
+ pos++;
421
+ consumed++;
422
+ }
423
+ }
424
+
425
+ // Process cell elements: unwrap single paragraph if it contains only inline elements
426
+ // Wikidot behavior:
427
+ // - Simple inline content (no newlines/blank lines) → direct elements (no paragraph wrapper)
428
+ // - Content with blank lines or blocks → keep paragraph wrappers
429
+ const processedElements = hadParagraphBreaks
430
+ ? bodyResult.elements
431
+ : unwrapSingleInlineParagraph(bodyResult.elements);
432
+
433
+ return {
434
+ cell: {
435
+ header: isHeader,
436
+ "column-span": colspan,
437
+ align,
438
+ attributes: cellAttrs,
439
+ elements: processedElements,
440
+ },
441
+ consumed,
442
+ };
443
+ }
444
+
445
+ /**
446
+ * Unwraps a single-paragraph cell body to match Wikidot's rendering.
447
+ *
448
+ * When a cell contains exactly one paragraph with no block-level children,
449
+ * the paragraph wrapper is removed and its inner elements are returned
450
+ * directly. This produces output like `<td>text</td>` instead of
451
+ * `<td><p>text</p></td>`.
452
+ *
453
+ * If there are multiple elements, block children, or non-paragraph content,
454
+ * the array is returned as-is.
455
+ *
456
+ * @param elements - The parsed cell body elements.
457
+ * @returns Elements with the single paragraph unwrapped, if applicable.
458
+ */
459
+ function unwrapSingleInlineParagraph(elements: Element[]): Element[] {
460
+ // Only unwrap if there's exactly one element and it's a paragraph container
461
+ if (elements.length !== 1) {
462
+ return elements;
463
+ }
464
+
465
+ const first = elements[0];
466
+ if (
467
+ first?.element !== "container" ||
468
+ typeof first.data !== "object" ||
469
+ first.data === null ||
470
+ !("type" in first.data) ||
471
+ first.data.type !== "paragraph"
472
+ ) {
473
+ return elements;
474
+ }
475
+
476
+ // Check if paragraph contains any block elements
477
+ // If it does, keep the paragraph wrapper
478
+ const paragraphData = first.data as { elements?: Element[] };
479
+ const innerElements = paragraphData.elements ?? [];
480
+
481
+ const hasBlockElement = innerElements.some((el) => isBlockElement(el));
482
+ if (hasBlockElement) {
483
+ return elements;
484
+ }
485
+
486
+ // Unwrap: return the paragraph's inner elements directly
487
+ return innerElements;
488
+ }
489
+
490
+ /**
491
+ * Determines whether an element is block-level.
492
+ *
493
+ * Block elements (tables, divs, blockquotes, code, etc.) prevent the
494
+ * single-paragraph unwrapping optimisation in {@link unwrapSingleInlineParagraph}.
495
+ *
496
+ * @param el - The element to test.
497
+ * @returns `true` if the element is block-level.
498
+ */
499
+ function isBlockElement(el: Element): boolean {
500
+ // Block elements that should prevent unwrapping
501
+ const blockTypes = ["table", "div", "blockquote", "code", "list", "iframe", "image-block"];
502
+
503
+ if (blockTypes.includes(el.element)) {
504
+ return true;
505
+ }
506
+
507
+ // Also check for container types that are block-level
508
+ if (el.element === "container" && typeof el.data === "object" && el.data !== null) {
509
+ const data = el.data as { type?: string };
510
+ if (data.type === "paragraph" || data.type === "div" || data.type === "blockquote") {
511
+ return true;
512
+ }
513
+ }
514
+
515
+ return false;
516
+ }
517
+
518
+ /**
519
+ * Parses cell body content with support for both inline and block elements.
520
+ *
521
+ * Unlike the general {@link parseBlocksUntil}, this function recognises
522
+ * block elements (nested tables, divs, etc.) even when they do not appear
523
+ * at line start, because cell content inside `[[cell]]` is treated more
524
+ * permissively by Wikidot.
525
+ *
526
+ * Paragraph handling:
527
+ * - Simple inline content on a single line is NOT wrapped in a paragraph.
528
+ * - A blank line (double newline) creates a paragraph break.
529
+ * - Block elements flush the current inline segment into a paragraph
530
+ * and are emitted as standalone elements.
531
+ *
532
+ * The `hadParagraphBreaks` flag in the return value tells the caller
533
+ * whether any blank-line paragraph breaks occurred, which influences
534
+ * whether the final result keeps paragraph wrappers.
535
+ *
536
+ * @param ctx - Parse context.
537
+ * @param closeCondition - Predicate that returns `true` at the cell's
538
+ * closing tag (`[[/cell]]` or `[[/hcell]]`).
539
+ * @returns Parsed elements, consumed count, and paragraph-break flag.
540
+ */
541
+ function parseCellContent(
542
+ ctx: ParseContext,
543
+ closeCondition: (ctx: ParseContext) => boolean,
544
+ ): { elements: Element[]; consumed: number; hadParagraphBreaks: boolean } {
545
+ const elements: Element[] = [];
546
+ let consumed = 0;
547
+ let pos = ctx.pos;
548
+
549
+ // Collect inline content segments
550
+ let currentSegment: Element[] = [];
551
+ // Track if content spans multiple "parts" (blocks, blank lines, or newlines before blocks)
552
+ let hasMultipleParts = false;
553
+ // Track if we've added any block element
554
+ let hasBlockElement = false;
555
+ // Track if we've seen any blank line (paragraph break)
556
+ let hadParagraphBreaks = false;
557
+
558
+ const flushSegment = (wrapInParagraph: boolean) => {
559
+ if (currentSegment.length === 0) return;
560
+
561
+ // Trim trailing whitespace and line-breaks
562
+ while (currentSegment.length > 0) {
563
+ const last = currentSegment[currentSegment.length - 1];
564
+ if (last?.element === "text" && typeof last.data === "string" && last.data.trim() === "") {
565
+ currentSegment.pop();
566
+ } else if (last?.element === "line-break") {
567
+ currentSegment.pop();
568
+ } else {
569
+ break;
570
+ }
571
+ }
572
+
573
+ // Trim leading whitespace
574
+ while (currentSegment.length > 0) {
575
+ const first = currentSegment[0];
576
+ if (first?.element === "text" && typeof first.data === "string" && first.data.trim() === "") {
577
+ currentSegment.shift();
578
+ } else {
579
+ break;
580
+ }
581
+ }
582
+
583
+ if (currentSegment.length === 0) return;
584
+
585
+ if (wrapInParagraph) {
586
+ elements.push({
587
+ element: "container",
588
+ data: {
589
+ type: "paragraph",
590
+ attributes: {},
591
+ elements: [...currentSegment],
592
+ },
593
+ });
594
+ } else {
595
+ elements.push(...currentSegment);
596
+ }
597
+ currentSegment = [];
598
+ };
599
+
600
+ while (pos < ctx.tokens.length) {
601
+ const token = ctx.tokens[pos];
602
+ if (!token || token.type === "EOF") {
603
+ break;
604
+ }
605
+
606
+ // Check close condition
607
+ const checkCtx: ParseContext = { ...ctx, pos };
608
+ if (closeCondition(checkCtx)) {
609
+ break;
610
+ }
611
+
612
+ // Handle newlines
613
+ if (token.type === "NEWLINE") {
614
+ pos++;
615
+ consumed++;
616
+
617
+ // Check for blank line (paragraph break)
618
+ if (ctx.tokens[pos]?.type === "NEWLINE") {
619
+ // Skip additional newlines
620
+ while (ctx.tokens[pos]?.type === "NEWLINE") {
621
+ pos++;
622
+ consumed++;
623
+ }
624
+
625
+ // Flush current segment as paragraph
626
+ flushSegment(true);
627
+ // Blank line means all subsequent content should be in paragraphs
628
+ hasMultipleParts = true;
629
+ hadParagraphBreaks = true;
630
+
631
+ // Skip whitespace after blank line
632
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
633
+ pos++;
634
+ consumed++;
635
+ }
636
+ continue;
637
+ }
638
+
639
+ // Single newline - check if next is block start or close
640
+ const nextToken = ctx.tokens[pos];
641
+ if (!nextToken || nextToken.type === "BLOCK_END_OPEN" || nextToken.type === "EOF") {
642
+ continue;
643
+ }
644
+
645
+ // Check if next token would start a block
646
+ if (nextToken.type === "BLOCK_OPEN") {
647
+ // This newline separates text from block - flush as paragraph
648
+ flushSegment(true);
649
+ hasMultipleParts = true;
650
+ continue;
651
+ }
652
+
653
+ // If we have no content yet, this is just leading whitespace - skip
654
+ if (currentSegment.length === 0 && elements.length === 0) {
655
+ continue;
656
+ }
657
+
658
+ // Otherwise, treat as line break within same segment
659
+ currentSegment.push({ element: "line-break" });
660
+ continue;
661
+ }
662
+
663
+ // Skip whitespace at line start
664
+ if (token.type === "WHITESPACE" && token.lineStart) {
665
+ pos++;
666
+ consumed++;
667
+ continue;
668
+ }
669
+
670
+ // Try block rules first (for nested tables, divs, etc.)
671
+ let matched = false;
672
+ const blockCtx: ParseContext = { ...ctx, pos };
673
+
674
+ for (const rule of ctx.blockRules) {
675
+ if (canApplyBlockRule(rule, token)) {
676
+ const result = rule.parse(blockCtx);
677
+ if (result.success) {
678
+ // Flush current segment before adding block
679
+ if (currentSegment.length > 0) {
680
+ flushSegment(true);
681
+ hasMultipleParts = true;
682
+ }
683
+
684
+ elements.push(...result.elements);
685
+ hasBlockElement = true;
686
+ hasMultipleParts = true;
687
+ consumed += result.consumed;
688
+ pos += result.consumed;
689
+ matched = true;
690
+ break;
691
+ }
692
+ }
693
+ }
694
+
695
+ if (matched) continue;
696
+
697
+ // Try inline rules
698
+ const inlineCtx: ParseContext = { ...ctx, pos };
699
+
700
+ for (const rule of ctx.inlineRules) {
701
+ if (canApplyInlineRule(rule, token)) {
702
+ const result = rule.parse(inlineCtx);
703
+ if (result.success) {
704
+ currentSegment.push(...result.elements);
705
+ consumed += result.consumed;
706
+ pos += result.consumed;
707
+ matched = true;
708
+ break;
709
+ }
710
+ }
711
+ }
712
+
713
+ if (!matched) {
714
+ // Fallback to text
715
+ currentSegment.push({ element: "text", data: token.value });
716
+ consumed++;
717
+ pos++;
718
+ }
719
+ }
720
+
721
+ // Flush remaining segment
722
+ // Wrap in paragraph if we had multiple parts or block elements
723
+ flushSegment(hasMultipleParts || hasBlockElement);
724
+
725
+ return { elements, consumed, hadParagraphBreaks };
726
+ }