@wdprlib/parser 3.1.2 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/dist/index.cjs +295 -118
  2. package/dist/index.js +272 -95
  3. package/package.json +5 -3
  4. package/src/index.ts +163 -0
  5. package/src/lexer/index.ts +20 -0
  6. package/src/lexer/lexer.ts +687 -0
  7. package/src/lexer/tokens.ts +141 -0
  8. package/src/parser/constants.ts +173 -0
  9. package/src/parser/depth.ts +251 -0
  10. package/src/parser/index.ts +18 -0
  11. package/src/parser/parse.ts +315 -0
  12. package/src/parser/postprocess/divAdjacentParagraph.ts +76 -0
  13. package/src/parser/postprocess/index.ts +15 -0
  14. package/src/parser/postprocess/spanStrip.ts +697 -0
  15. package/src/parser/preprocess/expr.ts +265 -0
  16. package/src/parser/preprocess/index.ts +38 -0
  17. package/src/parser/preprocess/typography.ts +67 -0
  18. package/src/parser/preprocess/utils.ts +250 -0
  19. package/src/parser/preprocess/whitespace.ts +111 -0
  20. package/src/parser/rules/block/align.ts +282 -0
  21. package/src/parser/rules/block/bibliography.ts +359 -0
  22. package/src/parser/rules/block/block-list.ts +689 -0
  23. package/src/parser/rules/block/blockquote.ts +238 -0
  24. package/src/parser/rules/block/center.ts +87 -0
  25. package/src/parser/rules/block/clear-float.ts +75 -0
  26. package/src/parser/rules/block/code.ts +187 -0
  27. package/src/parser/rules/block/collapsible.ts +337 -0
  28. package/src/parser/rules/block/comment.ts +73 -0
  29. package/src/parser/rules/block/content-separator.ts +79 -0
  30. package/src/parser/rules/block/definition-list.ts +270 -0
  31. package/src/parser/rules/block/div.ts +400 -0
  32. package/src/parser/rules/block/embed-block.ts +153 -0
  33. package/src/parser/rules/block/footnoteblock.ts +200 -0
  34. package/src/parser/rules/block/heading.ts +142 -0
  35. package/src/parser/rules/block/horizontal-rule.ts +61 -0
  36. package/src/parser/rules/block/html.ts +222 -0
  37. package/src/parser/rules/block/iframe.ts +239 -0
  38. package/src/parser/rules/block/iftags.ts +150 -0
  39. package/src/parser/rules/block/include.ts +179 -0
  40. package/src/parser/rules/block/index.ts +127 -0
  41. package/src/parser/rules/block/list.ts +244 -0
  42. package/src/parser/rules/block/math.ts +183 -0
  43. package/src/parser/rules/block/module/backlinks/index.ts +31 -0
  44. package/src/parser/rules/block/module/backlinks/types.ts +21 -0
  45. package/src/parser/rules/block/module/categories/index.ts +34 -0
  46. package/src/parser/rules/block/module/categories/types.ts +21 -0
  47. package/src/parser/rules/block/module/css/index.ts +37 -0
  48. package/src/parser/rules/block/module/iftags/condition.ts +109 -0
  49. package/src/parser/rules/block/module/iftags/index.ts +26 -0
  50. package/src/parser/rules/block/module/iftags/preprocess.ts +140 -0
  51. package/src/parser/rules/block/module/iftags/resolve.ts +73 -0
  52. package/src/parser/rules/block/module/iftags/types.ts +63 -0
  53. package/src/parser/rules/block/module/include/index.ts +20 -0
  54. package/src/parser/rules/block/module/include/resolve.ts +556 -0
  55. package/src/parser/rules/block/module/index.ts +122 -0
  56. package/src/parser/rules/block/module/join/index.ts +34 -0
  57. package/src/parser/rules/block/module/join/types.ts +23 -0
  58. package/src/parser/rules/block/module/listpages/compiler.ts +453 -0
  59. package/src/parser/rules/block/module/listpages/extract.ts +410 -0
  60. package/src/parser/rules/block/module/listpages/index.ts +83 -0
  61. package/src/parser/rules/block/module/listpages/normalize.ts +390 -0
  62. package/src/parser/rules/block/module/listpages/parser.ts +106 -0
  63. package/src/parser/rules/block/module/listpages/resolve.ts +130 -0
  64. package/src/parser/rules/block/module/listpages/types.ts +513 -0
  65. package/src/parser/rules/block/module/listpages/url-resolver.ts +186 -0
  66. package/src/parser/rules/block/module/listusers/compiler.ts +77 -0
  67. package/src/parser/rules/block/module/listusers/extract.ts +45 -0
  68. package/src/parser/rules/block/module/listusers/index.ts +36 -0
  69. package/src/parser/rules/block/module/listusers/parser.ts +54 -0
  70. package/src/parser/rules/block/module/listusers/resolve.ts +58 -0
  71. package/src/parser/rules/block/module/listusers/types.ts +93 -0
  72. package/src/parser/rules/block/module/mapping.ts +61 -0
  73. package/src/parser/rules/block/module/page-tree/index.ts +38 -0
  74. package/src/parser/rules/block/module/page-tree/types.ts +29 -0
  75. package/src/parser/rules/block/module/rate/index.ts +28 -0
  76. package/src/parser/rules/block/module/rate/types.ts +19 -0
  77. package/src/parser/rules/block/module/resolve.ts +411 -0
  78. package/src/parser/rules/block/module/types-common.ts +59 -0
  79. package/src/parser/rules/block/module/types.ts +61 -0
  80. package/src/parser/rules/block/module/utils.ts +43 -0
  81. package/src/parser/rules/block/module/walk.ts +380 -0
  82. package/src/parser/rules/block/module.ts +164 -0
  83. package/src/parser/rules/block/orphan-li.ts +177 -0
  84. package/src/parser/rules/block/paragraph.ts +157 -0
  85. package/src/parser/rules/block/table-block.ts +726 -0
  86. package/src/parser/rules/block/table.ts +441 -0
  87. package/src/parser/rules/block/tabview.ts +331 -0
  88. package/src/parser/rules/block/toc.ts +129 -0
  89. package/src/parser/rules/block/utils.ts +615 -0
  90. package/src/parser/rules/index.ts +49 -0
  91. package/src/parser/rules/inline/anchor-name.ts +154 -0
  92. package/src/parser/rules/inline/anchor.ts +327 -0
  93. package/src/parser/rules/inline/bibcite.ts +153 -0
  94. package/src/parser/rules/inline/bold.ts +86 -0
  95. package/src/parser/rules/inline/color.ts +140 -0
  96. package/src/parser/rules/inline/comment.ts +90 -0
  97. package/src/parser/rules/inline/equation-ref.ts +115 -0
  98. package/src/parser/rules/inline/expr.ts +526 -0
  99. package/src/parser/rules/inline/footnote.ts +223 -0
  100. package/src/parser/rules/inline/guillemet.ts +64 -0
  101. package/src/parser/rules/inline/html.ts +132 -0
  102. package/src/parser/rules/inline/image.ts +328 -0
  103. package/src/parser/rules/inline/index.ts +150 -0
  104. package/src/parser/rules/inline/italic.ts +74 -0
  105. package/src/parser/rules/inline/line-break.ts +326 -0
  106. package/src/parser/rules/inline/link-anchor.ts +147 -0
  107. package/src/parser/rules/inline/link-single.ts +164 -0
  108. package/src/parser/rules/inline/link-star.ts +134 -0
  109. package/src/parser/rules/inline/link-triple.ts +267 -0
  110. package/src/parser/rules/inline/math-inline.ts +126 -0
  111. package/src/parser/rules/inline/monospace.ts +78 -0
  112. package/src/parser/rules/inline/raw.ts +262 -0
  113. package/src/parser/rules/inline/size.ts +244 -0
  114. package/src/parser/rules/inline/span.ts +424 -0
  115. package/src/parser/rules/inline/strikethrough.ts +115 -0
  116. package/src/parser/rules/inline/subscript.ts +84 -0
  117. package/src/parser/rules/inline/superscript.ts +84 -0
  118. package/src/parser/rules/inline/text.ts +84 -0
  119. package/src/parser/rules/inline/underline.ts +127 -0
  120. package/src/parser/rules/inline/user.ts +147 -0
  121. package/src/parser/rules/inline/utils.ts +344 -0
  122. package/src/parser/rules/types.ts +252 -0
  123. package/src/parser/rules/utils.ts +155 -0
  124. package/src/parser/toc.ts +130 -0
@@ -0,0 +1,689 @@
1
+ /**
2
+ *
3
+ * Block rule for the explicit list syntax: `[[ul]]`/`[[ol]]` with `[[li]]` items.
4
+ *
5
+ * Wikidot supports two kinds of lists: the lightweight "marker" syntax
6
+ * (`* item`, `# item`) handled by `list.ts`, and the block-level syntax
7
+ * handled here:
8
+ *
9
+ * ```
10
+ * [[ul]]
11
+ * [[li]]Item 1[[/li]]
12
+ * [[li]]Item 2[[/li]]
13
+ * [[/ul]]
14
+ * ```
15
+ *
16
+ * Block lists can carry HTML attributes on both the list wrapper and
17
+ * individual items, and support arbitrary nesting. Content inside `[[li]]`
18
+ * may include inline markup, block elements (divs, nested tables, etc.),
19
+ * and even nested `[[ul]]`/`[[ol]]` sub-lists.
20
+ *
21
+ * Key Wikidot-specific behaviors reproduced here:
22
+ * - `[[li_]]` is NOT a valid tag and is treated as plain text.
23
+ * - Bare content inside `[[ul]]`/`[[ol]]` (without `[[li]]`) is wrapped in
24
+ * a `<li style="list-style: none">` equivalent (the `_noMarker` flag).
25
+ * - A `<br />` is appended after the entire block list.
26
+ * - Newline handling inside `[[li]]` follows Wikidot rules: single newlines
27
+ * become `<br />`, including trailing newlines before `[[/li]]`.
28
+ * - Content after `[[/li]]` but before the next `[[li]]` or close tag is
29
+ * included in the same list item.
30
+ *
31
+ * @module
32
+ */
33
+ import type { Element, ListData, ListItem } from "@wdprlib/ast";
34
+ import type { BlockRule, ParseContext, RuleResult } from "../types";
35
+ import { currentToken } from "../types";
36
+ import { parseBlockName, parseAttributes, canApplyBlockRule } from "./utils";
37
+
38
+ /** Discriminated list type for `[[ul]]` (unordered) vs `[[ol]]` (ordered). */
39
+ type ListBlockType = "ul" | "ol";
40
+
41
+ /**
42
+ * Checks whether the tokens at `pos` form a `[[/ul]]` or `[[/ol]]` closing tag.
43
+ *
44
+ * @param ctx - Parse context.
45
+ * @param pos - Token index to inspect.
46
+ * @param expectedType - When provided, only matches that specific list type.
47
+ * @returns `true` if a matching close tag is found.
48
+ */
49
+ function isListClose(ctx: ParseContext, pos: number, expectedType?: ListBlockType): boolean {
50
+ if (ctx.tokens[pos]?.type !== "BLOCK_END_OPEN") return false;
51
+ const nameResult = parseBlockName(ctx, pos + 1);
52
+ if (!nameResult) return false;
53
+ const name = nameResult.name;
54
+ if (expectedType) {
55
+ return name === expectedType;
56
+ }
57
+ return name === "ul" || name === "ol";
58
+ }
59
+
60
+ /**
61
+ * Checks whether the tokens at `pos` form a `[[/li]]` closing tag.
62
+ *
63
+ * @param ctx - Parse context.
64
+ * @param pos - Token index to inspect.
65
+ * @returns `true` if `[[/li]]` is found.
66
+ */
67
+ function isLiClose(ctx: ParseContext, pos: number): boolean {
68
+ if (ctx.tokens[pos]?.type !== "BLOCK_END_OPEN") return false;
69
+ const nameResult = parseBlockName(ctx, pos + 1);
70
+ return nameResult?.name === "li";
71
+ }
72
+
73
+ /**
74
+ * Checks whether the tokens at `pos` form a `[[li]]` opening tag.
75
+ *
76
+ * Only the exact name `"li"` is accepted. Wikidot does NOT recognise
77
+ * `[[li_]]` (paragraph-strip variant) -- it is treated as plain text.
78
+ *
79
+ * @param ctx - Parse context.
80
+ * @param pos - Token index to inspect.
81
+ * @returns The block name and token count consumed, or `null` if not matched.
82
+ */
83
+ function isLiOpen(ctx: ParseContext, pos: number): { name: string; consumed: number } | null {
84
+ if (ctx.tokens[pos]?.type !== "BLOCK_OPEN") return null;
85
+ const nameResult = parseBlockName(ctx, pos + 1);
86
+ if (!nameResult) return null;
87
+ // Only "li" is valid, not "li_" (Wikidot doesn't recognize li_)
88
+ if (nameResult.name === "li") {
89
+ return { name: nameResult.name, consumed: 1 + nameResult.consumed };
90
+ }
91
+ return null;
92
+ }
93
+
94
+ /**
95
+ * Checks whether the tokens at `pos` form a `[[ul]]` or `[[ol]]` opening tag,
96
+ * which indicates a nested list inside the current list or list item.
97
+ *
98
+ * @param ctx - Parse context.
99
+ * @param pos - Token index to inspect.
100
+ * @returns The detected list type and token count, or `null` if not matched.
101
+ */
102
+ function isNestedListOpen(
103
+ ctx: ParseContext,
104
+ pos: number,
105
+ ): { type: ListBlockType; consumed: number } | null {
106
+ if (ctx.tokens[pos]?.type !== "BLOCK_OPEN") return null;
107
+ const nameResult = parseBlockName(ctx, pos + 1);
108
+ if (!nameResult) return null;
109
+ if (nameResult.name === "ul" || nameResult.name === "ol") {
110
+ return { type: nameResult.name as ListBlockType, consumed: 1 + nameResult.consumed };
111
+ }
112
+ return null;
113
+ }
114
+
115
+ /**
116
+ * Consumes the tokens of a closing tag `[[/name]]` and an optional trailing
117
+ * NEWLINE, returning the total number of tokens consumed.
118
+ *
119
+ * @param ctx - Parse context.
120
+ * @param pos - Token index at the BLOCK_END_OPEN token.
121
+ * @returns Number of tokens consumed.
122
+ */
123
+ function consumeCloseTag(ctx: ParseContext, pos: number): number {
124
+ let closeConsumed = 1; // BLOCK_END_OPEN
125
+ const nameResult = parseBlockName(ctx, pos + 1);
126
+ if (nameResult) closeConsumed += nameResult.consumed;
127
+ if (ctx.tokens[pos + closeConsumed]?.type === "BLOCK_CLOSE") closeConsumed++;
128
+ if (ctx.tokens[pos + closeConsumed]?.type === "NEWLINE") closeConsumed++;
129
+ return closeConsumed;
130
+ }
131
+
132
+ /**
133
+ * Parses a single `[[li]]...[[/li]]` list item, including its attributes
134
+ * and body content.
135
+ *
136
+ * Body content may include inline markup, block-level elements (except
137
+ * block-list itself, to prevent infinite recursion), and nested
138
+ * `[[ul]]`/`[[ol]]` sub-lists. Newlines inside the item follow Wikidot
139
+ * rules: a single newline produces `<br />`, even right before `[[/li]]`.
140
+ *
141
+ * After the `[[/li]]` tag, any trailing content before the next `[[li]]`,
142
+ * close tag, or nested list open is also captured into the same item,
143
+ * matching Wikidot's behaviour of appending post-close content to the
144
+ * preceding `<li>`.
145
+ *
146
+ * @param ctx - Parse context.
147
+ * @param startPos - Token index at the expected `[[li]]` open.
148
+ * @param listType - The parent list type, used to detect the list-level
149
+ * closing tag (`[[/ul]]` or `[[/ol]]`).
150
+ * @returns The parsed list item and token count, or `null` on failure.
151
+ */
152
+ function parseLiItem(
153
+ ctx: ParseContext,
154
+ startPos: number,
155
+ listType: ListBlockType,
156
+ ): { item: ListItem | Element; consumed: number } | null {
157
+ let pos = startPos;
158
+ let consumed = 0;
159
+
160
+ // Check for [[li]] open
161
+ const liOpen = isLiOpen(ctx, pos);
162
+ if (!liOpen) return null;
163
+
164
+ pos += liOpen.consumed;
165
+ consumed += liOpen.consumed;
166
+
167
+ // Parse attributes
168
+ const attrResult = parseAttributes(ctx, pos);
169
+ pos += attrResult.consumed;
170
+ consumed += attrResult.consumed;
171
+
172
+ // Expect ]]
173
+ if (ctx.tokens[pos]?.type !== "BLOCK_CLOSE") {
174
+ return null;
175
+ }
176
+ pos++;
177
+ consumed++;
178
+
179
+ // Optional newline after [[li ...]]
180
+ const hasNewlineAfterOpen = ctx.tokens[pos]?.type === "NEWLINE";
181
+ if (hasNewlineAfterOpen) {
182
+ pos++;
183
+ consumed++;
184
+ }
185
+
186
+ // Parse content until [[/li]] or [[ul]]/[[ol]] (nested list)
187
+ const contentElements: Element[] = [];
188
+
189
+ while (pos < ctx.tokens.length) {
190
+ const token = ctx.tokens[pos];
191
+ if (!token || token.type === "EOF") break;
192
+
193
+ // Check for [[/li]] close
194
+ if (isLiClose(ctx, pos)) {
195
+ break;
196
+ }
197
+
198
+ // Check for [[/ul]] or [[/ol]] - unclosed li
199
+ if (isListClose(ctx, pos, listType)) {
200
+ break;
201
+ }
202
+
203
+ // Check for nested [[ul]] or [[ol]]
204
+ const nestedListOpen = isNestedListOpen(ctx, pos);
205
+ if (nestedListOpen) {
206
+ const nestedResult = parseListBlock(ctx, pos, nestedListOpen.type);
207
+ if (nestedResult) {
208
+ // Add nested list directly to contentElements (maintains order)
209
+ contentElements.push(nestedResult.element);
210
+ // Wikidot adds <br /> after nested lists inside li
211
+ contentElements.push({ element: "line-break" });
212
+ consumed += nestedResult.consumed;
213
+ pos += nestedResult.consumed;
214
+ continue;
215
+ }
216
+ }
217
+
218
+ // Skip whitespace at beginning of lines
219
+ if (token.type === "WHITESPACE" && token.lineStart) {
220
+ pos++;
221
+ consumed++;
222
+ continue;
223
+ }
224
+
225
+ // Handle newlines
226
+ if (token.type === "NEWLINE") {
227
+ pos++;
228
+ consumed++;
229
+ // Check if next line starts with [[/li]] or nested list
230
+ // Count consecutive newlines
231
+ let consecutiveNewlines = 1;
232
+ while (ctx.tokens[pos]?.type === "NEWLINE") {
233
+ pos++;
234
+ consumed++;
235
+ consecutiveNewlines++;
236
+ }
237
+ // Skip leading whitespace
238
+ while (ctx.tokens[pos]?.type === "WHITESPACE" && ctx.tokens[pos]?.lineStart) {
239
+ pos++;
240
+ consumed++;
241
+ }
242
+ // Wikidot behavior:
243
+ // - Single newline followed by content or [[/li]] → <br />
244
+ // - Multiple newlines (paragraph break) → no <br />
245
+ // - Need content before this newline
246
+ const atCloseTag =
247
+ isLiClose(ctx, pos) || isListClose(ctx, pos, listType) || ctx.tokens[pos]?.type === "EOF";
248
+ if (consecutiveNewlines === 1 && contentElements.length > 0) {
249
+ // Single newline with content before - add line-break
250
+ // (Even before [[/li]], Wikidot adds <br /> for the trailing newline)
251
+ contentElements.push({ element: "line-break" });
252
+ }
253
+ if (atCloseTag) {
254
+ continue;
255
+ }
256
+ continue;
257
+ }
258
+
259
+ // Try block rules first (for div, etc. inside li)
260
+ let matched = false;
261
+ const blockCtx: ParseContext = { ...ctx, pos };
262
+
263
+ // Filter out block-list rule to avoid infinite recursion
264
+ const filteredBlockRules = ctx.blockRules.filter((r) => r.name !== "block-list");
265
+ for (const rule of filteredBlockRules) {
266
+ if (canApplyBlockRule(rule, token)) {
267
+ const result = rule.parse(blockCtx);
268
+ if (result.success) {
269
+ contentElements.push(...result.elements);
270
+ consumed += result.consumed;
271
+ pos += result.consumed;
272
+ matched = true;
273
+ break;
274
+ }
275
+ }
276
+ }
277
+
278
+ if (matched) continue;
279
+
280
+ // Try inline rules
281
+ const inlineCtx: ParseContext = { ...ctx, pos };
282
+ for (const rule of ctx.inlineRules) {
283
+ if (rule.startTokens.includes(token.type)) {
284
+ const result = rule.parse(inlineCtx);
285
+ if (result.success) {
286
+ contentElements.push(...result.elements);
287
+ consumed += result.consumed;
288
+ pos += result.consumed;
289
+ matched = true;
290
+ break;
291
+ }
292
+ }
293
+ }
294
+
295
+ if (!matched) {
296
+ // Fallback to text
297
+ contentElements.push({ element: "text", data: token.value });
298
+ consumed++;
299
+ pos++;
300
+ }
301
+ }
302
+
303
+ // Diagnostic for missing [[/li]]
304
+ if (!isLiClose(ctx, pos)) {
305
+ ctx.diagnostics.push({
306
+ severity: "warning",
307
+ code: "unclosed-block",
308
+ message: "Missing closing tag [[/li]] for [[li]]",
309
+ position: ctx.tokens[startPos]?.position ?? {
310
+ start: { line: 0, column: 0, offset: 0 },
311
+ end: { line: 0, column: 0, offset: 0 },
312
+ },
313
+ });
314
+ }
315
+
316
+ // Consume [[/li]] if present
317
+ if (isLiClose(ctx, pos)) {
318
+ const closeConsumed = consumeCloseTag(ctx, pos);
319
+ consumed += closeConsumed;
320
+ pos += closeConsumed;
321
+
322
+ // Wikidot behavior: content after [[/li]] but before next [[li]] or [[/ul]]/[[/ol]]
323
+ // is included in the same <li> element
324
+ // Skip newlines first
325
+ while (ctx.tokens[pos]?.type === "NEWLINE") {
326
+ pos++;
327
+ consumed++;
328
+ }
329
+ // Skip whitespace
330
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
331
+ pos++;
332
+ consumed++;
333
+ }
334
+
335
+ // Collect trailing content until next [[li]], [[/ul]], [[/ol]], or EOF
336
+ while (pos < ctx.tokens.length) {
337
+ const tok = ctx.tokens[pos];
338
+ if (!tok || tok.type === "EOF") break;
339
+ if (tok.type === "NEWLINE") {
340
+ pos++;
341
+ consumed++;
342
+ // Skip consecutive newlines
343
+ while (ctx.tokens[pos]?.type === "NEWLINE") {
344
+ pos++;
345
+ consumed++;
346
+ }
347
+ // Skip whitespace
348
+ while (ctx.tokens[pos]?.type === "WHITESPACE" && ctx.tokens[pos]?.lineStart) {
349
+ pos++;
350
+ consumed++;
351
+ }
352
+ // Check for end conditions
353
+ if (
354
+ isLiOpen(ctx, pos) ||
355
+ isListClose(ctx, pos, listType) ||
356
+ isNestedListOpen(ctx, pos) ||
357
+ ctx.tokens[pos]?.type === "EOF"
358
+ ) {
359
+ break;
360
+ }
361
+ continue;
362
+ }
363
+ if (isLiOpen(ctx, pos) || isListClose(ctx, pos, listType) || isNestedListOpen(ctx, pos)) {
364
+ break;
365
+ }
366
+ // Parse inline content for trailing
367
+ let matched = false;
368
+ const inlineCtx: ParseContext = { ...ctx, pos };
369
+ for (const rule of ctx.inlineRules) {
370
+ if (rule.startTokens.includes(tok.type)) {
371
+ const result = rule.parse(inlineCtx);
372
+ if (result.success) {
373
+ contentElements.push(...result.elements);
374
+ consumed += result.consumed;
375
+ pos += result.consumed;
376
+ matched = true;
377
+ break;
378
+ }
379
+ }
380
+ }
381
+ if (!matched) {
382
+ contentElements.push({ element: "text", data: tok.value });
383
+ consumed++;
384
+ pos++;
385
+ }
386
+ }
387
+ }
388
+
389
+ // Regular list item with content (may include nested list as element)
390
+ return {
391
+ item: {
392
+ "item-type": "elements",
393
+ attributes: attrResult.attrs,
394
+ elements: contentElements,
395
+ } as ListItem,
396
+ consumed,
397
+ };
398
+ }
399
+
400
+ /**
401
+ * Parses a complete `[[ul]]...[[/ul]]` or `[[ol]]...[[/ol]]` block,
402
+ * including its attributes, child `[[li]]` items, bare content, and
403
+ * nested sub-lists.
404
+ *
405
+ * Bare content (text without an enclosing `[[li]]`) is wrapped in an
406
+ * item with the `_noMarker` attribute, which the renderer translates
407
+ * to `<li style="list-style: none">`. When there is only a single
408
+ * paragraph of bare content, the paragraph wrapper is removed to match
409
+ * Wikidot's output.
410
+ *
411
+ * @param ctx - Parse context.
412
+ * @param startPos - Token index at the BLOCK_OPEN for `[[ul]]`/`[[ol]]`.
413
+ * @param listType - Whether this is an unordered or ordered list.
414
+ * @returns The list element and consumed token count, or `null` on failure.
415
+ */
416
+ function parseListBlock(
417
+ ctx: ParseContext,
418
+ startPos: number,
419
+ listType: ListBlockType,
420
+ ): { element: Element; consumed: number } | null {
421
+ let pos = startPos;
422
+ let consumed = 0;
423
+
424
+ // Skip [[ul or [[ol
425
+ if (ctx.tokens[pos]?.type !== "BLOCK_OPEN") return null;
426
+ pos++;
427
+ consumed++;
428
+
429
+ // Parse block name
430
+ const nameResult = parseBlockName(ctx, pos);
431
+ if (!nameResult || (nameResult.name !== "ul" && nameResult.name !== "ol")) {
432
+ return null;
433
+ }
434
+ pos += nameResult.consumed;
435
+ consumed += nameResult.consumed;
436
+
437
+ // Parse attributes
438
+ const attrResult = parseAttributes(ctx, pos);
439
+ pos += attrResult.consumed;
440
+ consumed += attrResult.consumed;
441
+
442
+ // Expect ]]
443
+ if (ctx.tokens[pos]?.type !== "BLOCK_CLOSE") {
444
+ return null;
445
+ }
446
+ pos++;
447
+ consumed++;
448
+
449
+ // Wikidot: [[ul]] must be followed by newline to be recognized
450
+ // Exception: inline form [[ul]][[li]]...[[/li]][[/ul]] on same line is valid
451
+ const hasNewlineAfterOpen = ctx.tokens[pos]?.type === "NEWLINE";
452
+ if (hasNewlineAfterOpen) {
453
+ pos++;
454
+ consumed++;
455
+ }
456
+
457
+ // Parse list items
458
+ const items: ListItem[] = [];
459
+ let foundListClose = false;
460
+
461
+ while (pos < ctx.tokens.length) {
462
+ const token = ctx.tokens[pos];
463
+ if (!token || token.type === "EOF") break;
464
+
465
+ // Check for [[/ul]] or [[/ol]] close
466
+ if (isListClose(ctx, pos, listType)) {
467
+ foundListClose = true;
468
+ const closeConsumed = consumeCloseTag(ctx, pos);
469
+ consumed += closeConsumed;
470
+ break;
471
+ }
472
+
473
+ // Skip whitespace
474
+ if (token.type === "WHITESPACE") {
475
+ pos++;
476
+ consumed++;
477
+ continue;
478
+ }
479
+
480
+ // Skip newlines
481
+ if (token.type === "NEWLINE") {
482
+ pos++;
483
+ consumed++;
484
+ continue;
485
+ }
486
+
487
+ // Check for nested [[ul]] or [[ol]] without [[li]] wrapper
488
+ const nestedListOpen = isNestedListOpen(ctx, pos);
489
+ if (nestedListOpen) {
490
+ const nestedResult = parseListBlock(ctx, pos, nestedListOpen.type);
491
+ if (nestedResult && nestedResult.element.element === "list") {
492
+ const listData = nestedResult.element.data as ListData;
493
+ items.push({
494
+ "item-type": "sub-list",
495
+ element: "list",
496
+ data: listData,
497
+ });
498
+ consumed += nestedResult.consumed;
499
+ pos += nestedResult.consumed;
500
+ continue;
501
+ }
502
+ }
503
+
504
+ // Try to parse [[li]] item
505
+ const liResult = parseLiItem(ctx, pos, listType);
506
+ if (liResult) {
507
+ if ("item-type" in liResult.item) {
508
+ items.push(liResult.item as ListItem);
509
+ }
510
+ consumed += liResult.consumed;
511
+ pos += liResult.consumed;
512
+ continue;
513
+ }
514
+
515
+ // Wikidot behavior: bare content inside [[ul]]/[[ol]] (without [[li]])
516
+ // is wrapped in <li style="list-style: none">
517
+ // Empty lines create paragraph breaks within the bare content
518
+ // Collect content until [[/ul]], [[/ol]], [[li]], or [[ul]]/[[ol]]
519
+ const bareContent: Element[] = [];
520
+ let currentParagraph: Element[] = [];
521
+
522
+ const flushParagraph = () => {
523
+ if (currentParagraph.length > 0) {
524
+ // Trim trailing line-breaks from paragraph
525
+ while (
526
+ currentParagraph.length > 0 &&
527
+ currentParagraph[currentParagraph.length - 1]?.element === "line-break"
528
+ ) {
529
+ currentParagraph.pop();
530
+ }
531
+ if (currentParagraph.length > 0) {
532
+ bareContent.push({
533
+ element: "container",
534
+ data: {
535
+ type: "paragraph",
536
+ attributes: {},
537
+ elements: currentParagraph,
538
+ },
539
+ });
540
+ }
541
+ currentParagraph = [];
542
+ }
543
+ };
544
+
545
+ while (pos < ctx.tokens.length) {
546
+ const tok = ctx.tokens[pos];
547
+ if (!tok || tok.type === "EOF") break;
548
+ if (tok.type === "NEWLINE") {
549
+ pos++;
550
+ consumed++;
551
+ // Count consecutive newlines
552
+ let consecutiveNewlines = 1;
553
+ while (ctx.tokens[pos]?.type === "NEWLINE") {
554
+ pos++;
555
+ consumed++;
556
+ consecutiveNewlines++;
557
+ }
558
+ // Skip leading whitespace
559
+ while (ctx.tokens[pos]?.type === "WHITESPACE" && ctx.tokens[pos]?.lineStart) {
560
+ pos++;
561
+ consumed++;
562
+ }
563
+ // Check if next meaningful token is a close tag or li open
564
+ if (isListClose(ctx, pos, listType) || isLiOpen(ctx, pos) || isNestedListOpen(ctx, pos)) {
565
+ break;
566
+ }
567
+ // Multiple newlines = paragraph break
568
+ if (consecutiveNewlines >= 2) {
569
+ flushParagraph();
570
+ } else if (currentParagraph.length > 0) {
571
+ // Single newline = line break
572
+ currentParagraph.push({ element: "line-break" });
573
+ }
574
+ continue;
575
+ }
576
+ if (isListClose(ctx, pos, listType) || isLiOpen(ctx, pos) || isNestedListOpen(ctx, pos)) {
577
+ break;
578
+ }
579
+ // Parse inline content
580
+ let matched = false;
581
+ const inlineCtx: ParseContext = { ...ctx, pos };
582
+ for (const rule of ctx.inlineRules) {
583
+ if (rule.startTokens.includes(tok.type)) {
584
+ const result = rule.parse(inlineCtx);
585
+ if (result.success) {
586
+ currentParagraph.push(...result.elements);
587
+ consumed += result.consumed;
588
+ pos += result.consumed;
589
+ matched = true;
590
+ break;
591
+ }
592
+ }
593
+ }
594
+ if (!matched) {
595
+ currentParagraph.push({ element: "text", data: tok.value });
596
+ consumed++;
597
+ pos++;
598
+ }
599
+ }
600
+ // Flush remaining content
601
+ flushParagraph();
602
+ if (bareContent.length > 0) {
603
+ // Wikidot behavior: if there's only one paragraph, unwrap it
604
+ // Only use <p> tags when there are multiple paragraphs
605
+ let finalElements: Element[];
606
+ if (
607
+ bareContent.length === 1 &&
608
+ bareContent[0]?.element === "container" &&
609
+ (bareContent[0] as { data?: { type?: string } }).data?.type === "paragraph"
610
+ ) {
611
+ // Single paragraph - unwrap
612
+ finalElements = (bareContent[0] as { data: { elements: Element[] } }).data.elements;
613
+ } else {
614
+ finalElements = bareContent;
615
+ }
616
+ items.push({
617
+ "item-type": "elements",
618
+ attributes: { _noMarker: "true" }, // Flag for list-style: none
619
+ elements: finalElements,
620
+ });
621
+ }
622
+ }
623
+
624
+ if (!foundListClose) {
625
+ ctx.diagnostics.push({
626
+ severity: "warning",
627
+ code: "unclosed-block",
628
+ message: `Missing closing tag [[/${listType}]] for [[${listType}]]`,
629
+ position: ctx.tokens[startPos]?.position ?? {
630
+ start: { line: 0, column: 0, offset: 0 },
631
+ end: { line: 0, column: 0, offset: 0 },
632
+ },
633
+ });
634
+ }
635
+
636
+ const listData: ListData = {
637
+ type: listType === "ol" ? "numbered" : "bullet",
638
+ attributes: attrResult.attrs,
639
+ items,
640
+ };
641
+
642
+ return {
643
+ element: {
644
+ element: "list",
645
+ data: listData,
646
+ },
647
+ consumed,
648
+ };
649
+ }
650
+
651
+ /**
652
+ * Block rule for Wikidot explicit list syntax (`[[ul]]`/`[[ol]]`).
653
+ *
654
+ * The entry point verifies that the BLOCK_OPEN is followed by the name
655
+ * `"ul"` or `"ol"`, then delegates to `parseListBlock()`. On success
656
+ * a trailing `<br />` element is appended, matching Wikidot's rendering.
657
+ */
658
+ export const blockListRule: BlockRule = {
659
+ name: "block-list",
660
+ startTokens: ["BLOCK_OPEN"],
661
+ requiresLineStart: false,
662
+
663
+ parse(ctx: ParseContext): RuleResult<Element> {
664
+ const openToken = currentToken(ctx);
665
+ if (openToken.type !== "BLOCK_OPEN") {
666
+ return { success: false };
667
+ }
668
+
669
+ // Check for [[ul or [[ol
670
+ const nameResult = parseBlockName(ctx, ctx.pos + 1);
671
+ if (!nameResult || (nameResult.name !== "ul" && nameResult.name !== "ol")) {
672
+ return { success: false };
673
+ }
674
+
675
+ const listType = nameResult.name as ListBlockType;
676
+ const result = parseListBlock(ctx, ctx.pos, listType);
677
+
678
+ if (!result) {
679
+ return { success: false };
680
+ }
681
+
682
+ // Wikidot adds <br /> after block lists
683
+ return {
684
+ success: true,
685
+ elements: [result.element, { element: "line-break" }],
686
+ consumed: result.consumed,
687
+ };
688
+ },
689
+ };