@wdprlib/parser 3.1.2 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/dist/index.cjs +295 -118
  2. package/dist/index.js +272 -95
  3. package/package.json +5 -3
  4. package/src/index.ts +163 -0
  5. package/src/lexer/index.ts +20 -0
  6. package/src/lexer/lexer.ts +687 -0
  7. package/src/lexer/tokens.ts +141 -0
  8. package/src/parser/constants.ts +173 -0
  9. package/src/parser/depth.ts +251 -0
  10. package/src/parser/index.ts +18 -0
  11. package/src/parser/parse.ts +315 -0
  12. package/src/parser/postprocess/divAdjacentParagraph.ts +76 -0
  13. package/src/parser/postprocess/index.ts +15 -0
  14. package/src/parser/postprocess/spanStrip.ts +697 -0
  15. package/src/parser/preprocess/expr.ts +265 -0
  16. package/src/parser/preprocess/index.ts +38 -0
  17. package/src/parser/preprocess/typography.ts +67 -0
  18. package/src/parser/preprocess/utils.ts +250 -0
  19. package/src/parser/preprocess/whitespace.ts +111 -0
  20. package/src/parser/rules/block/align.ts +282 -0
  21. package/src/parser/rules/block/bibliography.ts +359 -0
  22. package/src/parser/rules/block/block-list.ts +689 -0
  23. package/src/parser/rules/block/blockquote.ts +238 -0
  24. package/src/parser/rules/block/center.ts +87 -0
  25. package/src/parser/rules/block/clear-float.ts +75 -0
  26. package/src/parser/rules/block/code.ts +187 -0
  27. package/src/parser/rules/block/collapsible.ts +337 -0
  28. package/src/parser/rules/block/comment.ts +73 -0
  29. package/src/parser/rules/block/content-separator.ts +79 -0
  30. package/src/parser/rules/block/definition-list.ts +270 -0
  31. package/src/parser/rules/block/div.ts +400 -0
  32. package/src/parser/rules/block/embed-block.ts +153 -0
  33. package/src/parser/rules/block/footnoteblock.ts +200 -0
  34. package/src/parser/rules/block/heading.ts +142 -0
  35. package/src/parser/rules/block/horizontal-rule.ts +61 -0
  36. package/src/parser/rules/block/html.ts +222 -0
  37. package/src/parser/rules/block/iframe.ts +239 -0
  38. package/src/parser/rules/block/iftags.ts +150 -0
  39. package/src/parser/rules/block/include.ts +179 -0
  40. package/src/parser/rules/block/index.ts +127 -0
  41. package/src/parser/rules/block/list.ts +244 -0
  42. package/src/parser/rules/block/math.ts +183 -0
  43. package/src/parser/rules/block/module/backlinks/index.ts +31 -0
  44. package/src/parser/rules/block/module/backlinks/types.ts +21 -0
  45. package/src/parser/rules/block/module/categories/index.ts +34 -0
  46. package/src/parser/rules/block/module/categories/types.ts +21 -0
  47. package/src/parser/rules/block/module/css/index.ts +37 -0
  48. package/src/parser/rules/block/module/iftags/condition.ts +109 -0
  49. package/src/parser/rules/block/module/iftags/index.ts +26 -0
  50. package/src/parser/rules/block/module/iftags/preprocess.ts +140 -0
  51. package/src/parser/rules/block/module/iftags/resolve.ts +73 -0
  52. package/src/parser/rules/block/module/iftags/types.ts +63 -0
  53. package/src/parser/rules/block/module/include/index.ts +20 -0
  54. package/src/parser/rules/block/module/include/resolve.ts +556 -0
  55. package/src/parser/rules/block/module/index.ts +122 -0
  56. package/src/parser/rules/block/module/join/index.ts +34 -0
  57. package/src/parser/rules/block/module/join/types.ts +23 -0
  58. package/src/parser/rules/block/module/listpages/compiler.ts +453 -0
  59. package/src/parser/rules/block/module/listpages/extract.ts +410 -0
  60. package/src/parser/rules/block/module/listpages/index.ts +83 -0
  61. package/src/parser/rules/block/module/listpages/normalize.ts +390 -0
  62. package/src/parser/rules/block/module/listpages/parser.ts +106 -0
  63. package/src/parser/rules/block/module/listpages/resolve.ts +130 -0
  64. package/src/parser/rules/block/module/listpages/types.ts +513 -0
  65. package/src/parser/rules/block/module/listpages/url-resolver.ts +186 -0
  66. package/src/parser/rules/block/module/listusers/compiler.ts +77 -0
  67. package/src/parser/rules/block/module/listusers/extract.ts +45 -0
  68. package/src/parser/rules/block/module/listusers/index.ts +36 -0
  69. package/src/parser/rules/block/module/listusers/parser.ts +54 -0
  70. package/src/parser/rules/block/module/listusers/resolve.ts +58 -0
  71. package/src/parser/rules/block/module/listusers/types.ts +93 -0
  72. package/src/parser/rules/block/module/mapping.ts +61 -0
  73. package/src/parser/rules/block/module/page-tree/index.ts +38 -0
  74. package/src/parser/rules/block/module/page-tree/types.ts +29 -0
  75. package/src/parser/rules/block/module/rate/index.ts +28 -0
  76. package/src/parser/rules/block/module/rate/types.ts +19 -0
  77. package/src/parser/rules/block/module/resolve.ts +411 -0
  78. package/src/parser/rules/block/module/types-common.ts +59 -0
  79. package/src/parser/rules/block/module/types.ts +61 -0
  80. package/src/parser/rules/block/module/utils.ts +43 -0
  81. package/src/parser/rules/block/module/walk.ts +380 -0
  82. package/src/parser/rules/block/module.ts +164 -0
  83. package/src/parser/rules/block/orphan-li.ts +177 -0
  84. package/src/parser/rules/block/paragraph.ts +157 -0
  85. package/src/parser/rules/block/table-block.ts +726 -0
  86. package/src/parser/rules/block/table.ts +441 -0
  87. package/src/parser/rules/block/tabview.ts +331 -0
  88. package/src/parser/rules/block/toc.ts +129 -0
  89. package/src/parser/rules/block/utils.ts +615 -0
  90. package/src/parser/rules/index.ts +49 -0
  91. package/src/parser/rules/inline/anchor-name.ts +154 -0
  92. package/src/parser/rules/inline/anchor.ts +327 -0
  93. package/src/parser/rules/inline/bibcite.ts +153 -0
  94. package/src/parser/rules/inline/bold.ts +86 -0
  95. package/src/parser/rules/inline/color.ts +140 -0
  96. package/src/parser/rules/inline/comment.ts +90 -0
  97. package/src/parser/rules/inline/equation-ref.ts +115 -0
  98. package/src/parser/rules/inline/expr.ts +526 -0
  99. package/src/parser/rules/inline/footnote.ts +223 -0
  100. package/src/parser/rules/inline/guillemet.ts +64 -0
  101. package/src/parser/rules/inline/html.ts +132 -0
  102. package/src/parser/rules/inline/image.ts +328 -0
  103. package/src/parser/rules/inline/index.ts +150 -0
  104. package/src/parser/rules/inline/italic.ts +74 -0
  105. package/src/parser/rules/inline/line-break.ts +326 -0
  106. package/src/parser/rules/inline/link-anchor.ts +147 -0
  107. package/src/parser/rules/inline/link-single.ts +164 -0
  108. package/src/parser/rules/inline/link-star.ts +134 -0
  109. package/src/parser/rules/inline/link-triple.ts +267 -0
  110. package/src/parser/rules/inline/math-inline.ts +126 -0
  111. package/src/parser/rules/inline/monospace.ts +78 -0
  112. package/src/parser/rules/inline/raw.ts +262 -0
  113. package/src/parser/rules/inline/size.ts +244 -0
  114. package/src/parser/rules/inline/span.ts +424 -0
  115. package/src/parser/rules/inline/strikethrough.ts +115 -0
  116. package/src/parser/rules/inline/subscript.ts +84 -0
  117. package/src/parser/rules/inline/superscript.ts +84 -0
  118. package/src/parser/rules/inline/text.ts +84 -0
  119. package/src/parser/rules/inline/underline.ts +127 -0
  120. package/src/parser/rules/inline/user.ts +147 -0
  121. package/src/parser/rules/inline/utils.ts +344 -0
  122. package/src/parser/rules/types.ts +252 -0
  123. package/src/parser/rules/utils.ts +155 -0
  124. package/src/parser/toc.ts +130 -0
@@ -0,0 +1,270 @@
1
+ /**
2
+ *
3
+ * Block rule for Wikidot definition lists written with the `: key : value` syntax.
4
+ *
5
+ * Each item starts at the beginning of a line with a COLON, followed by
6
+ * mandatory whitespace, the key (term), a second COLON, and then the value
7
+ * (definition). Multiple consecutive items form a single `<dl>` block.
8
+ *
9
+ * ```
10
+ * : Apple : A fruit that grows on trees.
11
+ * : Banana : A yellow curved fruit.
12
+ * ```
13
+ *
14
+ * Key parsing details:
15
+ * - Whitespace after the first colon is required (`": key"` not `":key"`).
16
+ * - The key portion supports inline markup (bold, links, etc.).
17
+ * - The value continues until a double newline, a new definition entry, or
18
+ * the end of the document.
19
+ * - A single newline within the value does NOT break the entry -- parsing
20
+ * continues on the next line.
21
+ *
22
+ * @module
23
+ */
24
+ import type { Element, DefinitionListItem } from "@wdprlib/ast";
25
+ import type { BlockRule, ParseContext, RuleResult } from "../types";
26
+ import { parseInlineUntil } from "../inline/utils";
27
+
28
+ /**
29
+ * Internal representation of one definition list item before conversion
30
+ * to the AST's {@link DefinitionListItem} format.
31
+ */
32
+ interface ParsedDefinitionItem {
33
+ /** Raw string of the key, used for `key_string` in the AST. */
34
+ keyString: string;
35
+ /** Parsed inline elements representing the key / term. */
36
+ key: Element[];
37
+ /** Parsed inline elements representing the value / definition. */
38
+ value: Element[];
39
+ }
40
+
41
+ /**
42
+ * Parses a single definition list entry of the form `: key : value`.
43
+ *
44
+ * The function expects `startPos` to point at a line-start COLON token.
45
+ * It consumes the first colon, mandatory whitespace, key tokens up to
46
+ * the second colon, then value tokens until one of the following:
47
+ * - A double newline (paragraph break).
48
+ * - A new entry (COLON at line start).
49
+ * - End of input.
50
+ *
51
+ * The key is parsed for inline content using {@link parseInlineUntil}
52
+ * (stopping at COLON), and the value uses the same utility (stopping at
53
+ * NEWLINE).
54
+ *
55
+ * @param ctx - Parse context.
56
+ * @param startPos - Token index of the expected line-start COLON.
57
+ * @returns The parsed item and token count, or `null` on failure.
58
+ */
59
+ function parseDefinitionItem(
60
+ ctx: ParseContext,
61
+ startPos: number,
62
+ ): { item: ParsedDefinitionItem; consumed: number } | null {
63
+ let pos = startPos;
64
+ let consumed = 0;
65
+
66
+ // Expect COLON at line start
67
+ const colonToken = ctx.tokens[pos];
68
+ if (!colonToken || colonToken.type !== "COLON" || !colonToken.lineStart) {
69
+ return null;
70
+ }
71
+ pos++;
72
+ consumed++;
73
+
74
+ // Wikidot requires whitespace after first colon: ": key : value"
75
+ const whitespaceAfterColon = ctx.tokens[pos];
76
+ if (!whitespaceAfterColon || whitespaceAfterColon.type !== "WHITESPACE") {
77
+ return null;
78
+ }
79
+
80
+ // Skip whitespace after first colon
81
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
82
+ pos++;
83
+ consumed++;
84
+ }
85
+
86
+ // Collect key tokens until second COLON
87
+ const keyTokens: string[] = [];
88
+ const keyNodes: Element[] = [];
89
+ let foundSecondColon = false;
90
+
91
+ while (pos < ctx.tokens.length) {
92
+ const token = ctx.tokens[pos];
93
+ if (!token || token.type === "NEWLINE" || token.type === "EOF") {
94
+ break;
95
+ }
96
+ if (token.type === "COLON") {
97
+ foundSecondColon = true;
98
+ pos++;
99
+ consumed++;
100
+ break;
101
+ }
102
+
103
+ // Parse inline content for key
104
+ const inlineCtx: ParseContext = { ...ctx, pos };
105
+ const result = parseInlineUntil(inlineCtx, "COLON");
106
+ if (result.elements.length > 0) {
107
+ keyNodes.push(...result.elements);
108
+ // Collect raw key string
109
+ for (let i = 0; i < result.consumed; i++) {
110
+ const t = ctx.tokens[pos + i];
111
+ if (t) keyTokens.push(t.value);
112
+ }
113
+ pos += result.consumed;
114
+ consumed += result.consumed;
115
+ } else {
116
+ keyTokens.push(token.value);
117
+ pos++;
118
+ consumed++;
119
+ }
120
+ }
121
+
122
+ if (!foundSecondColon) {
123
+ return null;
124
+ }
125
+
126
+ // Skip whitespace after second colon
127
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
128
+ pos++;
129
+ consumed++;
130
+ }
131
+
132
+ // Parse value (rest of line, can continue with line breaks)
133
+ const valueNodes: Element[] = [];
134
+ while (pos < ctx.tokens.length) {
135
+ const token = ctx.tokens[pos];
136
+ if (!token || token.type === "EOF") {
137
+ break;
138
+ }
139
+
140
+ // Check for end of definition (double newline or new definition)
141
+ if (token.type === "NEWLINE") {
142
+ const nextToken = ctx.tokens[pos + 1];
143
+ // Look ahead for continuation with underscore line break
144
+ if (nextToken?.type === "COLON" && nextToken.lineStart) {
145
+ // New definition item starts
146
+ pos++;
147
+ consumed++;
148
+ break;
149
+ }
150
+ if (nextToken?.type === "NEWLINE" || !nextToken || nextToken.type === "EOF") {
151
+ // Double newline or end - stop
152
+ pos++;
153
+ consumed++;
154
+ break;
155
+ }
156
+ // Single newline - continue parsing (becomes line break)
157
+ }
158
+
159
+ // Parse inline content
160
+ const inlineCtx: ParseContext = { ...ctx, pos };
161
+ const result = parseInlineUntil(inlineCtx, "NEWLINE");
162
+ if (result.elements.length > 0) {
163
+ valueNodes.push(...result.elements);
164
+ pos += result.consumed;
165
+ consumed += result.consumed;
166
+ } else {
167
+ pos++;
168
+ consumed++;
169
+ }
170
+ }
171
+
172
+ // Remove trailing whitespace from key
173
+ const keyString = keyTokens.join("").trim();
174
+
175
+ // Remove trailing whitespace nodes from key
176
+ while (keyNodes.length > 0) {
177
+ const lastNode = keyNodes[keyNodes.length - 1];
178
+ if (
179
+ lastNode &&
180
+ lastNode.element === "text" &&
181
+ typeof lastNode.data === "string" &&
182
+ lastNode.data.trim() === ""
183
+ ) {
184
+ keyNodes.pop();
185
+ } else {
186
+ break;
187
+ }
188
+ }
189
+
190
+ return {
191
+ item: {
192
+ keyString,
193
+ key: keyNodes,
194
+ value: valueNodes,
195
+ },
196
+ consumed,
197
+ };
198
+ }
199
+
200
+ /**
201
+ * Block rule for Wikidot definition lists (`: key : value`).
202
+ *
203
+ * Parsing strategy:
204
+ * 1. Verify the first token is a line-start COLON.
205
+ * 2. Repeatedly call `parseDefinitionItem()` to collect entries.
206
+ * 3. Stop when the current token is no longer a line-start COLON (i.e.
207
+ * the definition list block has ended).
208
+ * 4. Convert internal items into the AST {@link DefinitionListItem} format.
209
+ * 5. Emit a single `definition-list` element.
210
+ */
211
+ export const definitionListRule: BlockRule = {
212
+ name: "definitionList",
213
+ startTokens: ["COLON"],
214
+ requiresLineStart: true,
215
+
216
+ parse(ctx: ParseContext): RuleResult<Element> {
217
+ const items: ParsedDefinitionItem[] = [];
218
+ let pos = ctx.pos;
219
+ let totalConsumed = 0;
220
+
221
+ while (pos < ctx.tokens.length) {
222
+ const token = ctx.tokens[pos];
223
+ if (!token || token.type === "EOF") {
224
+ break;
225
+ }
226
+
227
+ // Only parse lines starting with :
228
+ if (token.type !== "COLON" || !token.lineStart) {
229
+ break;
230
+ }
231
+
232
+ const result = parseDefinitionItem(ctx, pos);
233
+ if (!result) {
234
+ break;
235
+ }
236
+
237
+ items.push(result.item);
238
+ pos += result.consumed;
239
+ totalConsumed += result.consumed;
240
+
241
+ // Skip any whitespace between items
242
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
243
+ pos++;
244
+ totalConsumed++;
245
+ }
246
+ }
247
+
248
+ if (items.length === 0) {
249
+ return { success: false };
250
+ }
251
+
252
+ // Convert items to DefinitionListItem format
253
+ const definitionItems: DefinitionListItem[] = items.map((item) => ({
254
+ key_string: item.keyString,
255
+ key: item.key,
256
+ value: item.value,
257
+ }));
258
+
259
+ return {
260
+ success: true,
261
+ elements: [
262
+ {
263
+ element: "definition-list",
264
+ data: definitionItems,
265
+ },
266
+ ],
267
+ consumed: totalConsumed,
268
+ };
269
+ },
270
+ };
@@ -0,0 +1,400 @@
1
+ /**
2
+ *
3
+ * Block rule for Wikidot `[[div]]` and `[[div_]]` container blocks.
4
+ *
5
+ * `[[div]]` wraps its body content in a `<div>` element, with full
6
+ * paragraph processing for the body. `[[div_]]` (paragraph strip mode)
7
+ * unwraps the first and last paragraphs so their content appears directly
8
+ * inside the `<div>`, while middle paragraphs keep their `<p>` wrappers.
9
+ *
10
+ * Both variants accept HTML attributes (class, style, id, etc.) on the
11
+ * opening tag.
12
+ *
13
+ * Wikidot-specific edge cases:
14
+ * - The opening `]]` MUST be followed by a NEWLINE for the block to be
15
+ * recognised. `[[div]]inline[[/div]]` is NOT a valid div -- it becomes
16
+ * a failed div (see `consumeFailedDiv()`).
17
+ * - When a div fails, everything from the opening `[[div]]` through the
18
+ * last `[[/div]]` is collected as a single paragraph of text/line-break
19
+ * elements. Blank lines within that span are silently removed.
20
+ * - `[[div_]]` uses `unwrapEdgeParagraphs()` to strip paragraph
21
+ * wrappers from the first and last elements.
22
+ *
23
+ * @module
24
+ */
25
+ import type { Element } from "@wdprlib/ast";
26
+ import type { BlockRule, ParseContext, RuleResult } from "../types";
27
+ import { currentToken } from "../types";
28
+ import { parseBlockName, parseAttributes, parseBlocksUntil } from "./utils";
29
+
30
+ /**
31
+ * Block rule for `[[div]]`/`[[div_]]` container blocks.
32
+ *
33
+ * `requiresLineStart` is `false` because nested `[[div_]]` inside another
34
+ * `[[div_]]` may appear after inline content.
35
+ */
36
+ export const divRule: BlockRule = {
37
+ name: "div",
38
+ startTokens: ["BLOCK_OPEN"],
39
+ requiresLineStart: false, // Allow nested [[div_]] inside [[div_]]
40
+
41
+ parse(ctx: ParseContext): RuleResult<Element> {
42
+ const openToken = currentToken(ctx);
43
+ if (openToken.type !== "BLOCK_OPEN") {
44
+ return { success: false };
45
+ }
46
+
47
+ let pos = ctx.pos + 1;
48
+ let consumed = 1;
49
+
50
+ // Parse block name
51
+ const nameResult = parseBlockName(ctx, pos);
52
+ if (!nameResult) {
53
+ return { success: false };
54
+ }
55
+
56
+ const blockName = nameResult.name;
57
+ // Check if it's a div or div_
58
+ if (blockName !== "div" && blockName !== "div_") {
59
+ return { success: false };
60
+ }
61
+
62
+ // div_ means paragraph strip (no paragraph wrapping)
63
+ const paragraphStrip = blockName === "div_";
64
+
65
+ pos += nameResult.consumed;
66
+ consumed += nameResult.consumed;
67
+
68
+ // Parse attributes
69
+ const attrResult = parseAttributes(ctx, pos);
70
+ pos += attrResult.consumed;
71
+ consumed += attrResult.consumed;
72
+
73
+ // Expect ]]
74
+ if (ctx.tokens[pos]?.type !== "BLOCK_CLOSE") {
75
+ return { success: false };
76
+ }
77
+ pos++;
78
+ consumed++;
79
+
80
+ // Wikidot: [[div]] must be followed by newline to be recognized as block
81
+ // [[div]]inline[[/div]] is NOT recognized as div
82
+ // When this fails, Wikidot consumes everything up to the last [[/div]]
83
+ // as text in a single paragraph (blank lines are ignored)
84
+ if (ctx.tokens[pos]?.type !== "NEWLINE") {
85
+ return consumeFailedDiv(ctx);
86
+ }
87
+
88
+ // Wikidot matches [[div]]/[[/div]] pairs from outside-in. When there are
89
+ // more opens than closes, the innermost excess opens become text. We enforce
90
+ // this with a "closes budget": the number of additional nested divs that can
91
+ // open. When budget reaches 0, this div cannot open.
92
+ if (ctx.scope.divClosesBudget === 0) {
93
+ return { success: false };
94
+ }
95
+
96
+ pos++;
97
+ consumed++;
98
+
99
+ // Record opening tag position for diagnostics
100
+ const openPosition = openToken.position;
101
+
102
+ // Calculate closes budget for nested divs in the body.
103
+ // Count [[/div]] from body start to scope boundary, subtract 1 (for self).
104
+ let bodyBudget: number | undefined;
105
+ if (ctx.scope.divClosesBudget !== undefined) {
106
+ bodyBudget = ctx.scope.divClosesBudget - 1;
107
+ } else {
108
+ const closesInScope = countDivCloses(ctx, pos);
109
+ bodyBudget = closesInScope > 0 ? closesInScope - 1 : 0;
110
+ }
111
+
112
+ // Close condition for [[/div]]
113
+ const closeCondition = (checkCtx: ParseContext): boolean => {
114
+ const token = checkCtx.tokens[checkCtx.pos];
115
+ if (token?.type === "BLOCK_END_OPEN") {
116
+ const closeNameResult = parseBlockName(checkCtx, checkCtx.pos + 1);
117
+ if (closeNameResult?.name === "div") {
118
+ return true;
119
+ }
120
+ }
121
+ return false;
122
+ };
123
+
124
+ const bodyCtx: ParseContext = {
125
+ ...ctx,
126
+ pos,
127
+ scope: { ...ctx.scope, divClosesBudget: bodyBudget },
128
+ };
129
+ let children: Element[];
130
+
131
+ if (paragraphStrip) {
132
+ // div_ - parse as blocks, then unwrap first/last paragraphs
133
+ // Wikidot: blank lines create <p> for middle blocks only
134
+ const bodyResult = parseBlocksUntil(bodyCtx, closeCondition);
135
+ consumed += bodyResult.consumed;
136
+ pos += bodyResult.consumed;
137
+ children = unwrapEdgeParagraphs(bodyResult.elements);
138
+ } else {
139
+ // div - parse blocks with paragraph wrapping
140
+ const bodyResult = parseBlocksUntil(bodyCtx, closeCondition);
141
+ consumed += bodyResult.consumed;
142
+ pos += bodyResult.consumed;
143
+ children = bodyResult.elements;
144
+ }
145
+
146
+ // Check for missing close tag
147
+ if (ctx.tokens[pos]?.type !== "BLOCK_END_OPEN") {
148
+ ctx.diagnostics.push({
149
+ severity: "warning",
150
+ code: "unclosed-block",
151
+ message: `Missing closing tag [[/div]] for [[${blockName}]]`,
152
+ position: openPosition,
153
+ });
154
+ }
155
+
156
+ // Consume [[/div]]
157
+ if (ctx.tokens[pos]?.type === "BLOCK_END_OPEN") {
158
+ pos++;
159
+ consumed++;
160
+ const closeNameResult = parseBlockName(ctx, pos);
161
+ if (closeNameResult) {
162
+ pos += closeNameResult.consumed;
163
+ consumed += closeNameResult.consumed;
164
+ }
165
+ if (ctx.tokens[pos]?.type === "BLOCK_CLOSE") {
166
+ pos++;
167
+ consumed++;
168
+ }
169
+ if (ctx.tokens[pos]?.type === "NEWLINE") {
170
+ pos++;
171
+ consumed++;
172
+ }
173
+ }
174
+
175
+ return {
176
+ success: true,
177
+ elements: [
178
+ {
179
+ element: "container",
180
+ data: {
181
+ type: "div",
182
+ attributes: attrResult.attrs,
183
+ elements: children,
184
+ },
185
+ },
186
+ ],
187
+ consumed,
188
+ };
189
+ },
190
+ };
191
+
192
+ /**
193
+ * Counts `[[/div]]` close tags from a given position to the end of the
194
+ * token stream. Used to calculate the nesting budget for div blocks.
195
+ */
196
+ function countDivCloses(ctx: ParseContext, startPos: number): number {
197
+ let count = 0;
198
+ for (let i = startPos; i < ctx.tokens.length; i++) {
199
+ const t = ctx.tokens[i];
200
+ if (!t || t.type === "EOF") break;
201
+ if (t.type === "BLOCK_END_OPEN") {
202
+ const nameResult = parseBlockName(ctx, i + 1);
203
+ if (nameResult?.name === "div") {
204
+ count++;
205
+ }
206
+ }
207
+ }
208
+ return count;
209
+ }
210
+
211
+ /**
212
+ * Handles the case where `[[div]]` fails as a block element because
213
+ * the closing `]]` is not followed by a NEWLINE.
214
+ *
215
+ * In Wikidot, this scenario causes the parser to scan forward for the
216
+ * LAST `[[/div]]` in the contiguous token stream and collect everything
217
+ * from the current position through that close tag as a single paragraph.
218
+ * Blank lines (double newlines) within the range are silently collapsed,
219
+ * and single newlines become `<br />`.
220
+ *
221
+ * If no `[[/div]]` is found at all, the rule fails entirely.
222
+ *
223
+ * @param ctx - Parse context, positioned at the opening `[[div...]]` tag.
224
+ * @returns A paragraph container with text/line-break elements, or failure.
225
+ */
226
+ function consumeFailedDiv(ctx: ParseContext): RuleResult<Element> {
227
+ const elements: Element[] = [];
228
+ let pos = ctx.pos;
229
+ let consumed = 0;
230
+ let lastClosePos = -1;
231
+ let lastCloseConsumed = 0;
232
+
233
+ // Find the last [[/div]] before the next valid div block.
234
+ // A valid div block is [[div]]/[[div_]] at line start followed by ]] + NEWLINE.
235
+ // When a valid div block is found, stop scanning — it should be parsed as a
236
+ // separate block element, not absorbed into this failed div's text.
237
+ let scanPos = pos;
238
+ while (scanPos < ctx.tokens.length) {
239
+ const t = ctx.tokens[scanPos];
240
+ if (!t || t.type === "EOF") break;
241
+
242
+ // Check for a valid div block opening (skip the initial failed div at pos)
243
+ if (t.type === "BLOCK_OPEN" && t.lineStart && scanPos > pos) {
244
+ const nameResult = parseBlockName(ctx, scanPos + 1);
245
+ if (nameResult?.name === "div" || nameResult?.name === "div_") {
246
+ let checkPos = scanPos + 1 + nameResult.consumed;
247
+ const attrResult = parseAttributes(ctx, checkPos);
248
+ checkPos += attrResult.consumed;
249
+ if (ctx.tokens[checkPos]?.type === "BLOCK_CLOSE") {
250
+ checkPos++;
251
+ if (ctx.tokens[checkPos]?.type === "NEWLINE" || ctx.tokens[checkPos]?.type === "EOF") {
252
+ // Valid div block found — stop scanning here
253
+ break;
254
+ }
255
+ }
256
+ }
257
+ }
258
+
259
+ if (t.type === "BLOCK_END_OPEN") {
260
+ const nameResult = parseBlockName(ctx, scanPos + 1);
261
+ if (nameResult?.name === "div") {
262
+ // Found [[/div]] - record position after ]]
263
+ lastClosePos = scanPos;
264
+ lastCloseConsumed = 1 + nameResult.consumed; // [[/ + div
265
+ const closeToken = ctx.tokens[scanPos + 1 + nameResult.consumed];
266
+ if (closeToken?.type === "BLOCK_CLOSE") {
267
+ lastCloseConsumed++;
268
+ }
269
+ }
270
+ }
271
+ scanPos++;
272
+ }
273
+
274
+ if (lastClosePos === -1) {
275
+ // No [[/div]] found, fall back to normal failure
276
+ return { success: false };
277
+ }
278
+
279
+ // Emit diagnostics for all inline [[div]] patterns in the absorbed range.
280
+ // The initial [[div]] at ctx.pos is always included; any additional [[div]]
281
+ // patterns within the range also get diagnostics.
282
+ const endPosForDiag = lastClosePos;
283
+ for (let diagPos = ctx.pos; diagPos < endPosForDiag; diagPos++) {
284
+ const t = ctx.tokens[diagPos];
285
+ if (t?.type === "BLOCK_OPEN") {
286
+ const nameResult = parseBlockName(ctx, diagPos + 1);
287
+ if (nameResult?.name === "div" || nameResult?.name === "div_") {
288
+ if (t.position) {
289
+ ctx.diagnostics.push({
290
+ severity: "error",
291
+ code: "inline-block-element",
292
+ message: `[[${nameResult.name}]] must be followed by a newline to be a block element`,
293
+ position: t.position,
294
+ });
295
+ }
296
+ }
297
+ }
298
+ }
299
+
300
+ // Consume everything from current position to after the last [[/div]]
301
+ const endPos = lastClosePos + lastCloseConsumed;
302
+ while (pos < endPos && pos < ctx.tokens.length) {
303
+ const t = ctx.tokens[pos];
304
+ if (!t || t.type === "EOF") break;
305
+
306
+ if (t.type === "NEWLINE") {
307
+ // Check if this is a blank line (NEWLINE+NEWLINE or NEWLINE+WHITESPACE+NEWLINE)
308
+ let peekPos = pos + 1;
309
+ while (ctx.tokens[peekPos]?.type === "WHITESPACE") peekPos++;
310
+ if (ctx.tokens[peekPos]?.type === "NEWLINE") {
311
+ // Blank line — skip all newlines and whitespace
312
+ while (ctx.tokens[pos]?.type === "NEWLINE" || ctx.tokens[pos]?.type === "WHITESPACE") {
313
+ pos++;
314
+ consumed++;
315
+ }
316
+ continue;
317
+ }
318
+ // Single newline → line-break
319
+ elements.push({ element: "line-break" });
320
+ pos++;
321
+ consumed++;
322
+ continue;
323
+ }
324
+
325
+ elements.push({ element: "text", data: t.value });
326
+ pos++;
327
+ consumed++;
328
+ }
329
+
330
+ // Consume trailing newline after [[/div]] if present
331
+ if (ctx.tokens[pos]?.type === "NEWLINE") {
332
+ pos++;
333
+ consumed++;
334
+ }
335
+
336
+ return {
337
+ success: true,
338
+ elements: [
339
+ {
340
+ element: "container",
341
+ data: {
342
+ type: "paragraph",
343
+ attributes: {},
344
+ elements,
345
+ },
346
+ },
347
+ ],
348
+ consumed,
349
+ };
350
+ }
351
+
352
+ /**
353
+ * Implements the `[[div_]]` paragraph-strip behaviour.
354
+ *
355
+ * In Wikidot's `div_` mode, the first and last paragraph containers have
356
+ * their `<p>` wrappers removed, leaving the inner elements bare. Any
357
+ * middle paragraphs retain their wrapping. This produces output where the
358
+ * opening and closing text sit directly inside the `<div>`.
359
+ *
360
+ * @param elements - Block elements produced by body parsing.
361
+ * @returns A new array with edge paragraphs unwrapped.
362
+ */
363
+ function unwrapEdgeParagraphs(elements: Element[]): Element[] {
364
+ if (elements.length === 0) return elements;
365
+
366
+ const result = [...elements];
367
+
368
+ // Unwrap first element if paragraph
369
+ if (isParagraphContainer(result[0])) {
370
+ const inner = (result[0] as any).data.elements as Element[];
371
+ result.splice(0, 1, ...inner);
372
+ }
373
+
374
+ // Unwrap last element if paragraph (find new last index after splice)
375
+ const lastIdx = result.length - 1;
376
+ if (lastIdx >= 0 && isParagraphContainer(result[lastIdx])) {
377
+ const inner = (result[lastIdx] as any).data.elements as Element[];
378
+ result.splice(lastIdx, 1, ...inner);
379
+ }
380
+
381
+ return result;
382
+ }
383
+
384
+ /**
385
+ * Checks whether an element is a paragraph container
386
+ * (i.e. `{ element: "container", data: { type: "paragraph" } }`).
387
+ *
388
+ * @param el - Element to test, or `undefined`.
389
+ * @returns `true` if the element is a paragraph container.
390
+ */
391
+ function isParagraphContainer(el: Element | undefined): boolean {
392
+ return (
393
+ el !== undefined &&
394
+ el.element === "container" &&
395
+ typeof el.data === "object" &&
396
+ el.data !== null &&
397
+ "type" in el.data &&
398
+ el.data.type === "paragraph"
399
+ );
400
+ }