@wdprlib/parser 3.1.2 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/dist/index.cjs +295 -118
  2. package/dist/index.js +272 -95
  3. package/package.json +5 -3
  4. package/src/index.ts +163 -0
  5. package/src/lexer/index.ts +20 -0
  6. package/src/lexer/lexer.ts +687 -0
  7. package/src/lexer/tokens.ts +141 -0
  8. package/src/parser/constants.ts +173 -0
  9. package/src/parser/depth.ts +251 -0
  10. package/src/parser/index.ts +18 -0
  11. package/src/parser/parse.ts +315 -0
  12. package/src/parser/postprocess/divAdjacentParagraph.ts +76 -0
  13. package/src/parser/postprocess/index.ts +15 -0
  14. package/src/parser/postprocess/spanStrip.ts +697 -0
  15. package/src/parser/preprocess/expr.ts +265 -0
  16. package/src/parser/preprocess/index.ts +38 -0
  17. package/src/parser/preprocess/typography.ts +67 -0
  18. package/src/parser/preprocess/utils.ts +250 -0
  19. package/src/parser/preprocess/whitespace.ts +111 -0
  20. package/src/parser/rules/block/align.ts +282 -0
  21. package/src/parser/rules/block/bibliography.ts +359 -0
  22. package/src/parser/rules/block/block-list.ts +689 -0
  23. package/src/parser/rules/block/blockquote.ts +238 -0
  24. package/src/parser/rules/block/center.ts +87 -0
  25. package/src/parser/rules/block/clear-float.ts +75 -0
  26. package/src/parser/rules/block/code.ts +187 -0
  27. package/src/parser/rules/block/collapsible.ts +337 -0
  28. package/src/parser/rules/block/comment.ts +73 -0
  29. package/src/parser/rules/block/content-separator.ts +79 -0
  30. package/src/parser/rules/block/definition-list.ts +270 -0
  31. package/src/parser/rules/block/div.ts +400 -0
  32. package/src/parser/rules/block/embed-block.ts +153 -0
  33. package/src/parser/rules/block/footnoteblock.ts +200 -0
  34. package/src/parser/rules/block/heading.ts +142 -0
  35. package/src/parser/rules/block/horizontal-rule.ts +61 -0
  36. package/src/parser/rules/block/html.ts +222 -0
  37. package/src/parser/rules/block/iframe.ts +239 -0
  38. package/src/parser/rules/block/iftags.ts +150 -0
  39. package/src/parser/rules/block/include.ts +179 -0
  40. package/src/parser/rules/block/index.ts +127 -0
  41. package/src/parser/rules/block/list.ts +244 -0
  42. package/src/parser/rules/block/math.ts +183 -0
  43. package/src/parser/rules/block/module/backlinks/index.ts +31 -0
  44. package/src/parser/rules/block/module/backlinks/types.ts +21 -0
  45. package/src/parser/rules/block/module/categories/index.ts +34 -0
  46. package/src/parser/rules/block/module/categories/types.ts +21 -0
  47. package/src/parser/rules/block/module/css/index.ts +37 -0
  48. package/src/parser/rules/block/module/iftags/condition.ts +109 -0
  49. package/src/parser/rules/block/module/iftags/index.ts +26 -0
  50. package/src/parser/rules/block/module/iftags/preprocess.ts +140 -0
  51. package/src/parser/rules/block/module/iftags/resolve.ts +73 -0
  52. package/src/parser/rules/block/module/iftags/types.ts +63 -0
  53. package/src/parser/rules/block/module/include/index.ts +20 -0
  54. package/src/parser/rules/block/module/include/resolve.ts +556 -0
  55. package/src/parser/rules/block/module/index.ts +122 -0
  56. package/src/parser/rules/block/module/join/index.ts +34 -0
  57. package/src/parser/rules/block/module/join/types.ts +23 -0
  58. package/src/parser/rules/block/module/listpages/compiler.ts +453 -0
  59. package/src/parser/rules/block/module/listpages/extract.ts +410 -0
  60. package/src/parser/rules/block/module/listpages/index.ts +83 -0
  61. package/src/parser/rules/block/module/listpages/normalize.ts +390 -0
  62. package/src/parser/rules/block/module/listpages/parser.ts +106 -0
  63. package/src/parser/rules/block/module/listpages/resolve.ts +130 -0
  64. package/src/parser/rules/block/module/listpages/types.ts +513 -0
  65. package/src/parser/rules/block/module/listpages/url-resolver.ts +186 -0
  66. package/src/parser/rules/block/module/listusers/compiler.ts +77 -0
  67. package/src/parser/rules/block/module/listusers/extract.ts +45 -0
  68. package/src/parser/rules/block/module/listusers/index.ts +36 -0
  69. package/src/parser/rules/block/module/listusers/parser.ts +54 -0
  70. package/src/parser/rules/block/module/listusers/resolve.ts +58 -0
  71. package/src/parser/rules/block/module/listusers/types.ts +93 -0
  72. package/src/parser/rules/block/module/mapping.ts +61 -0
  73. package/src/parser/rules/block/module/page-tree/index.ts +38 -0
  74. package/src/parser/rules/block/module/page-tree/types.ts +29 -0
  75. package/src/parser/rules/block/module/rate/index.ts +28 -0
  76. package/src/parser/rules/block/module/rate/types.ts +19 -0
  77. package/src/parser/rules/block/module/resolve.ts +411 -0
  78. package/src/parser/rules/block/module/types-common.ts +59 -0
  79. package/src/parser/rules/block/module/types.ts +61 -0
  80. package/src/parser/rules/block/module/utils.ts +43 -0
  81. package/src/parser/rules/block/module/walk.ts +380 -0
  82. package/src/parser/rules/block/module.ts +164 -0
  83. package/src/parser/rules/block/orphan-li.ts +177 -0
  84. package/src/parser/rules/block/paragraph.ts +157 -0
  85. package/src/parser/rules/block/table-block.ts +726 -0
  86. package/src/parser/rules/block/table.ts +441 -0
  87. package/src/parser/rules/block/tabview.ts +331 -0
  88. package/src/parser/rules/block/toc.ts +129 -0
  89. package/src/parser/rules/block/utils.ts +615 -0
  90. package/src/parser/rules/index.ts +49 -0
  91. package/src/parser/rules/inline/anchor-name.ts +154 -0
  92. package/src/parser/rules/inline/anchor.ts +327 -0
  93. package/src/parser/rules/inline/bibcite.ts +153 -0
  94. package/src/parser/rules/inline/bold.ts +86 -0
  95. package/src/parser/rules/inline/color.ts +140 -0
  96. package/src/parser/rules/inline/comment.ts +90 -0
  97. package/src/parser/rules/inline/equation-ref.ts +115 -0
  98. package/src/parser/rules/inline/expr.ts +526 -0
  99. package/src/parser/rules/inline/footnote.ts +223 -0
  100. package/src/parser/rules/inline/guillemet.ts +64 -0
  101. package/src/parser/rules/inline/html.ts +132 -0
  102. package/src/parser/rules/inline/image.ts +328 -0
  103. package/src/parser/rules/inline/index.ts +150 -0
  104. package/src/parser/rules/inline/italic.ts +74 -0
  105. package/src/parser/rules/inline/line-break.ts +326 -0
  106. package/src/parser/rules/inline/link-anchor.ts +147 -0
  107. package/src/parser/rules/inline/link-single.ts +164 -0
  108. package/src/parser/rules/inline/link-star.ts +134 -0
  109. package/src/parser/rules/inline/link-triple.ts +267 -0
  110. package/src/parser/rules/inline/math-inline.ts +126 -0
  111. package/src/parser/rules/inline/monospace.ts +78 -0
  112. package/src/parser/rules/inline/raw.ts +262 -0
  113. package/src/parser/rules/inline/size.ts +244 -0
  114. package/src/parser/rules/inline/span.ts +424 -0
  115. package/src/parser/rules/inline/strikethrough.ts +115 -0
  116. package/src/parser/rules/inline/subscript.ts +84 -0
  117. package/src/parser/rules/inline/superscript.ts +84 -0
  118. package/src/parser/rules/inline/text.ts +84 -0
  119. package/src/parser/rules/inline/underline.ts +127 -0
  120. package/src/parser/rules/inline/user.ts +147 -0
  121. package/src/parser/rules/inline/utils.ts +344 -0
  122. package/src/parser/rules/types.ts +252 -0
  123. package/src/parser/rules/utils.ts +155 -0
  124. package/src/parser/toc.ts +130 -0
@@ -0,0 +1,238 @@
1
+ /**
2
+ *
3
+ * Block rule for Wikidot-style blockquotes using `>` markers.
4
+ *
5
+ * Wikidot blockquotes are written with one or more `>` characters at the
6
+ * start of a line, followed by a mandatory space and then the content:
7
+ *
8
+ * ```
9
+ * > First level
10
+ * >> Second level
11
+ * > Back to first
12
+ * ```
13
+ *
14
+ * Key behaviours:
15
+ * - The depth is determined by the number of consecutive `>` characters.
16
+ * - A space after the `>` markers is required; lines like `>No space` are
17
+ * consumed but silently discarded from output.
18
+ * - An empty line (just `> `) within the same depth acts as a paragraph
19
+ * separator inside the blockquote.
20
+ * - Nesting is handled by the generic {@link processDepths} utility, which
21
+ * converts flat depth-annotated rows into a recursive tree structure.
22
+ * - Maximum depth is capped at `MAX_BLOCKQUOTE_DEPTH` (30) to guard
23
+ * against pathological input.
24
+ *
25
+ * @module
26
+ */
27
+ import type { Element } from "@wdprlib/ast";
28
+ import type { BlockRule, ParseContext, RuleResult } from "../types";
29
+ import { currentToken } from "../types";
30
+ import { parseInlineUntil } from "../inline/utils";
31
+ import { processDepths, type DepthList } from "../../depth";
32
+
33
+ /**
34
+ * Safety limit for blockquote nesting depth.
35
+ * Lines exceeding this depth are not parsed, preventing stack issues
36
+ * on deeply nested or malicious input.
37
+ */
38
+ const MAX_BLOCKQUOTE_DEPTH = 30;
39
+
40
+ /**
41
+ * Block rule for `>` prefix blockquotes.
42
+ *
43
+ * Parsing strategy:
44
+ * 1. Collect consecutive lines that begin with BLOCKQUOTE_MARKER at line start.
45
+ * 2. For each line, record the depth (number of `>` chars, zero-indexed)
46
+ * and parse the inline content after the mandatory space.
47
+ * 3. Lines missing the required space are consumed but produce no output.
48
+ * 4. Feed the flat depth list into {@link processDepths} to build a nested tree.
49
+ * 5. Recursively convert the tree into nested blockquote container elements
50
+ * via `buildBlockquoteElement()`.
51
+ */
52
+ export const blockquoteRule: BlockRule = {
53
+ name: "blockquote",
54
+ startTokens: ["BLOCKQUOTE_MARKER"],
55
+ requiresLineStart: true,
56
+
57
+ parse(ctx: ParseContext): RuleResult<Element> {
58
+ const firstToken = currentToken(ctx);
59
+
60
+ if (!firstToken.lineStart) {
61
+ return { success: false };
62
+ }
63
+
64
+ // Collect depth-annotated lines
65
+ const depths: Array<{
66
+ depth: number;
67
+ ltype: null;
68
+ value: { elements: Element[]; hasLineBreak: boolean };
69
+ }> = [];
70
+ let pos = ctx.pos;
71
+ let consumed = 0;
72
+
73
+ while (pos < ctx.tokens.length) {
74
+ const markerToken = ctx.tokens[pos];
75
+ if (!markerToken || !markerToken.lineStart || markerToken.type !== "BLOCKQUOTE_MARKER") {
76
+ break;
77
+ }
78
+
79
+ // Depth is determined by the number of > characters
80
+ // Token value is like ">", ">>", ">>>", etc.
81
+ const depth = markerToken.value.length;
82
+
83
+ // Check maximum depth to prevent DOS
84
+ if (depth > MAX_BLOCKQUOTE_DEPTH) {
85
+ break;
86
+ }
87
+
88
+ // Skip marker
89
+ pos++;
90
+ consumed++;
91
+
92
+ // Wikidot requires a space after > markers
93
+ // Lines without space (e.g. ">No") are consumed but not output
94
+ if (ctx.tokens[pos]?.type !== "WHITESPACE") {
95
+ // Consume rest of line silently
96
+ while (pos < ctx.tokens.length && ctx.tokens[pos]?.type !== "NEWLINE") {
97
+ pos++;
98
+ consumed++;
99
+ }
100
+ if (ctx.tokens[pos]?.type === "NEWLINE") {
101
+ pos++;
102
+ consumed++;
103
+ }
104
+ continue;
105
+ }
106
+
107
+ // Skip whitespace after marker
108
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
109
+ pos++;
110
+ consumed++;
111
+ }
112
+
113
+ // Parse inline content until newline or paragraph break
114
+ const inlineCtx: ParseContext = { ...ctx, pos };
115
+ const inlineResult = parseInlineUntil(inlineCtx, "NEWLINE");
116
+ const inlineChildren: Element[] = inlineResult.elements;
117
+ consumed += inlineResult.consumed;
118
+ pos += inlineResult.consumed;
119
+
120
+ // Check if there's a line break
121
+ let hasLineBreak = false;
122
+ if (ctx.tokens[pos]?.type === "NEWLINE") {
123
+ hasLineBreak = true;
124
+ pos++;
125
+ consumed++;
126
+ }
127
+
128
+ // Append depth item (depth is 0-indexed, so subtract 1)
129
+ depths.push({
130
+ depth: depth - 1,
131
+ ltype: null,
132
+ value: { elements: inlineChildren, hasLineBreak },
133
+ });
134
+ }
135
+
136
+ // No rows parsed
137
+ if (depths.length === 0) {
138
+ // If we consumed tokens (e.g. lines without space after >), return empty success
139
+ if (consumed > 0) {
140
+ return { success: true, elements: [], consumed };
141
+ }
142
+ return { success: false };
143
+ }
144
+
145
+ // Process depths to build nested structure
146
+ const depthTrees = processDepths<null, { elements: Element[]; hasLineBreak: boolean }>(
147
+ null,
148
+ depths,
149
+ );
150
+
151
+ // Convert depth trees to blockquote elements
152
+ const blockquotes = depthTrees.map(({ list }) => buildBlockquoteElement(list));
153
+
154
+ // Return first blockquote (should usually be only one)
155
+ if (blockquotes.length === 0) {
156
+ return { success: false };
157
+ }
158
+
159
+ return {
160
+ success: true,
161
+ elements: blockquotes,
162
+ consumed,
163
+ };
164
+ },
165
+ };
166
+
167
+ /**
168
+ * Recursively converts a depth-tree (produced by `processDepths()`) into
169
+ * a blockquote container element.
170
+ *
171
+ * Leaf items are accumulated into paragraph containers. An empty-content
172
+ * item acts as a paragraph separator. When a nested sub-list is encountered,
173
+ * the current paragraph is flushed and a child blockquote is created.
174
+ *
175
+ * @param list - The depth list to convert.
176
+ * @returns A container element with `type: "blockquote"`.
177
+ */
178
+ function buildBlockquoteElement(
179
+ list: DepthList<null, { elements: Element[]; hasLineBreak: boolean }>,
180
+ ): Element {
181
+ const children: Element[] = [];
182
+ let currentParagraphChildren: Element[] = [];
183
+
184
+ function flushParagraph() {
185
+ if (currentParagraphChildren.length > 0) {
186
+ // Remove trailing line break from paragraph
187
+ while (
188
+ currentParagraphChildren.length > 0 &&
189
+ currentParagraphChildren[currentParagraphChildren.length - 1]?.element === "line-break"
190
+ ) {
191
+ currentParagraphChildren.pop();
192
+ }
193
+ if (currentParagraphChildren.length > 0) {
194
+ children.push({
195
+ element: "container",
196
+ data: {
197
+ type: "paragraph",
198
+ attributes: {},
199
+ elements: currentParagraphChildren,
200
+ },
201
+ });
202
+ }
203
+ currentParagraphChildren = [];
204
+ }
205
+ }
206
+
207
+ for (const item of list) {
208
+ if (item.kind === "item") {
209
+ // Empty content line (e.g. "> ") acts as paragraph separator
210
+ if (item.value.elements.length === 0) {
211
+ flushParagraph();
212
+ continue;
213
+ }
214
+ // Add elements to current paragraph
215
+ currentParagraphChildren.push(...item.value.elements);
216
+ // Add line break after this line
217
+ if (item.value.hasLineBreak) {
218
+ currentParagraphChildren.push({ element: "line-break" });
219
+ }
220
+ } else {
221
+ // Nested blockquote - flush current paragraph and add nested blockquote
222
+ flushParagraph();
223
+ children.push(buildBlockquoteElement(item.children));
224
+ }
225
+ }
226
+
227
+ // Flush remaining paragraph content
228
+ flushParagraph();
229
+
230
+ return {
231
+ element: "container",
232
+ data: {
233
+ type: "blockquote",
234
+ attributes: {},
235
+ elements: children,
236
+ },
237
+ };
238
+ }
@@ -0,0 +1,87 @@
1
+ /**
2
+ *
3
+ * Block rule for Wikidot single-line center alignment: `= text`.
4
+ *
5
+ * When a line begins with a single `=` followed by whitespace, the rest
6
+ * of the line is rendered as a centered paragraph (`<p style="text-align: center;">`).
7
+ *
8
+ * This is distinct from the `[[=]]...[[/=]]` alignment container (handled
9
+ * by `align.ts`), which wraps multiple block-level elements. The center
10
+ * rule here only affects a single line.
11
+ *
12
+ * Conditions for the rule to match:
13
+ * - Token must be EQUALS at the start of a line.
14
+ * - Must be exactly one `=` (4+ consecutive equals are a content separator,
15
+ * handled by `content-separator.ts`).
16
+ * - Must be followed by a WHITESPACE token.
17
+ *
18
+ * The inline content is parsed until the end of line.
19
+ *
20
+ * @module
21
+ */
22
+ import type { Element } from "@wdprlib/ast";
23
+ import type { BlockRule, ParseContext, RuleResult } from "../types";
24
+ import { currentToken } from "../types";
25
+ import { parseInlineUntil } from "../inline/utils";
26
+
27
+ /**
28
+ * Block rule for single-line center alignment (`= text`).
29
+ *
30
+ * Produces a paragraph container with `style: "text-align: center;"`.
31
+ */
32
+ export const centerRule: BlockRule = {
33
+ name: "center",
34
+ startTokens: ["EQUALS"],
35
+ requiresLineStart: true,
36
+
37
+ parse(ctx: ParseContext): RuleResult<Element> {
38
+ const marker = currentToken(ctx);
39
+
40
+ if (!marker.lineStart) {
41
+ return { success: false };
42
+ }
43
+
44
+ // Wikidot requires whitespace after = for center alignment
45
+ let pos = ctx.pos + 1;
46
+ let consumed = 1;
47
+
48
+ if (ctx.tokens[pos]?.type !== "WHITESPACE") {
49
+ return { success: false };
50
+ }
51
+
52
+ // Skip whitespace
53
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
54
+ pos++;
55
+ consumed++;
56
+ }
57
+
58
+ // Parse inline content until newline
59
+ const inlineCtx: ParseContext = { ...ctx, pos };
60
+ const inlineResult = parseInlineUntil(inlineCtx, "NEWLINE");
61
+ const children: Element[] = inlineResult.elements;
62
+ consumed += inlineResult.consumed;
63
+ pos += inlineResult.consumed;
64
+
65
+ // Consume newline
66
+ if (ctx.tokens[pos]?.type === "NEWLINE") {
67
+ consumed++;
68
+ }
69
+
70
+ return {
71
+ success: true,
72
+ elements: [
73
+ {
74
+ element: "container",
75
+ data: {
76
+ type: "paragraph",
77
+ attributes: {
78
+ style: "text-align: center;",
79
+ },
80
+ elements: children,
81
+ },
82
+ },
83
+ ],
84
+ consumed,
85
+ };
86
+ },
87
+ };
@@ -0,0 +1,75 @@
1
+ /**
2
+ *
3
+ * Block rule for Wikidot's float-clearing syntax: `~~~~`.
4
+ *
5
+ * Four or more tilde characters (`~`) at the start of a line produce a
6
+ * `<div style="clear: both;">` (or left/right) element. This is commonly
7
+ * used after floated images or divs to prevent subsequent content from
8
+ * wrapping alongside them.
9
+ *
10
+ * Variants:
11
+ * - `~~~~` (or more tildes) -- `clear: both`
12
+ * - `~~~~<` -- `clear: left`
13
+ * - `~~~~>` -- `clear: right`
14
+ *
15
+ * Three tildes (`~~~`) do NOT trigger this rule in Wikidot -- the minimum
16
+ * is four. The tilde count is validated at parse time even though the
17
+ * lexer already tokenises valid sequences, as a defensive check.
18
+ *
19
+ * @module
20
+ */
21
+ import type { Element } from "@wdprlib/ast";
22
+ import type { BlockRule, ParseContext, RuleResult } from "../types";
23
+ import { currentToken } from "../types";
24
+
25
+ /**
26
+ * Block rule for the clear-float directive (`~~~~`, `~~~~<`, `~~~~>`).
27
+ *
28
+ * Produces a `clear-float` element whose data is the direction string:
29
+ * `"both"`, `"left"`, or `"right"`.
30
+ */
31
+ export const clearFloatRule: BlockRule = {
32
+ name: "clear-float",
33
+ startTokens: ["CLEAR_FLOAT", "CLEAR_FLOAT_LEFT", "CLEAR_FLOAT_RIGHT"],
34
+ requiresLineStart: true,
35
+
36
+ parse(ctx: ParseContext): RuleResult<Element> {
37
+ const token = currentToken(ctx);
38
+
39
+ if (!token.lineStart) {
40
+ return { success: false };
41
+ }
42
+
43
+ // Count tildes - Wikidot requires at least 4
44
+ const tildeCount = token.value.replace(/[<>]$/, "").length;
45
+ if (tildeCount < 4) {
46
+ return { success: false };
47
+ }
48
+
49
+ // Determine direction
50
+ let direction: "both" | "left" | "right" = "both";
51
+ if (token.type === "CLEAR_FLOAT_LEFT") {
52
+ direction = "left";
53
+ } else if (token.type === "CLEAR_FLOAT_RIGHT") {
54
+ direction = "right";
55
+ }
56
+
57
+ let consumed = 1;
58
+
59
+ // Consume newline if present
60
+ if (ctx.tokens[ctx.pos + 1]?.type === "NEWLINE") {
61
+ consumed++;
62
+ }
63
+
64
+ return {
65
+ success: true,
66
+ elements: [
67
+ {
68
+ element: "clear-float",
69
+ data: direction,
70
+ },
71
+ ],
72
+ consumed,
73
+ };
74
+ },
75
+ };
@@ -0,0 +1,187 @@
1
+ /**
2
+ *
3
+ * Block rule for the Wikidot code block: `[[code]]...[[/code]]`.
4
+ *
5
+ * A code block captures its body as raw text (no inline parsing) and
6
+ * supports two optional attributes:
7
+ * - `type` -- the programming language for syntax highlighting (e.g.
8
+ * `type="python"`).
9
+ * - `name` -- a label or filename displayed alongside the code.
10
+ *
11
+ * The content between the tags is collected verbatim, with a single
12
+ * trailing newline stripped. The parsed block is also pushed into
13
+ * `ctx.codeBlocks` so higher-level consumers can enumerate all code
14
+ * blocks in the document.
15
+ *
16
+ * Edge case: when a quoted attribute value swallows the `]]` and even
17
+ * the `[[/code]]` (e.g. `[[code type="css]][[/code]]`), the parser
18
+ * detects this by inspecting the QUOTED_STRING token and truncates the
19
+ * value at the first `]]`. If `[[/code]]` is also inside the quoted
20
+ * string, the body is treated as empty.
21
+ *
22
+ * @module
23
+ */
24
+ import type { Element, CodeBlockData } from "@wdprlib/ast";
25
+ import type { BlockRule, ParseContext, RuleResult } from "../types";
26
+ import { currentToken } from "../types";
27
+ import { parseBlockName } from "../utils";
28
+ import { parseAttributesRaw } from "./utils";
29
+
30
+ /**
31
+ * Block rule for `[[code type="..." name="..."]]...[[/code]]`.
32
+ *
33
+ * Body content is stored as-is (not parsed for inline markup). The rule
34
+ * also registers the code block in `ctx.codeBlocks` for document-level
35
+ * introspection.
36
+ */
37
+ export const codeBlockRule: BlockRule = {
38
+ name: "code",
39
+ startTokens: ["BLOCK_OPEN"],
40
+ requiresLineStart: false,
41
+
42
+ parse(ctx: ParseContext): RuleResult<Element> {
43
+ const openToken = currentToken(ctx);
44
+ if (openToken.type !== "BLOCK_OPEN") {
45
+ return { success: false };
46
+ }
47
+
48
+ let pos = ctx.pos + 1;
49
+ let consumed = 1;
50
+
51
+ // Parse block name
52
+ const nameResult = parseBlockName(ctx, pos);
53
+ if (!nameResult) {
54
+ return { success: false };
55
+ }
56
+
57
+ if (nameResult.name !== "code") {
58
+ return { success: false };
59
+ }
60
+
61
+ pos += nameResult.consumed;
62
+ consumed += nameResult.consumed;
63
+
64
+ // Parse attributes
65
+ const attrResult = parseAttributesRaw(ctx, pos);
66
+ pos += attrResult.consumed;
67
+ consumed += attrResult.consumed;
68
+
69
+ // Expect ]]
70
+ let closingSwallowed = false;
71
+ if (ctx.tokens[pos]?.type !== "BLOCK_CLOSE") {
72
+ // Handle unclosed quoted string that swallowed ]]
73
+ // e.g. [[code type="css]][[/code]] → QUOTED_STRING consumed both ]] and [[/code]]
74
+ const prevToken = ctx.tokens[pos - 1];
75
+ if (prevToken?.type === "QUOTED_STRING" && prevToken.value.includes("]]")) {
76
+ const rawValue = prevToken.value;
77
+ const bracketIdx = rawValue.indexOf("]]");
78
+ const truncatedValue = rawValue.startsWith('"')
79
+ ? rawValue.slice(1, bracketIdx)
80
+ : rawValue.slice(0, bracketIdx);
81
+ // Update the attr that was assigned this malformed value
82
+ for (const key of Object.keys(attrResult.attrs)) {
83
+ const stored = attrResult.attrs[key]!;
84
+ if (
85
+ stored === rawValue ||
86
+ stored === rawValue.slice(1, -1) ||
87
+ stored === rawValue.slice(1)
88
+ ) {
89
+ attrResult.attrs[key] = truncatedValue;
90
+ break;
91
+ }
92
+ }
93
+ // If [[/code]] is also inside the quoted string, content is empty
94
+ if (rawValue.includes("[[/code]]")) {
95
+ closingSwallowed = true;
96
+ }
97
+ } else {
98
+ return { success: false };
99
+ }
100
+ } else {
101
+ pos++;
102
+ consumed++;
103
+ }
104
+
105
+ // Skip newline after opening tag if present
106
+ if (ctx.tokens[pos]?.type === "NEWLINE") {
107
+ pos++;
108
+ consumed++;
109
+ }
110
+
111
+ // Collect raw content until [[/code]]
112
+ let codeContent = "";
113
+ let foundClose = closingSwallowed;
114
+
115
+ while (!closingSwallowed && pos < ctx.tokens.length) {
116
+ const token = ctx.tokens[pos];
117
+ if (!token || token.type === "EOF") {
118
+ break;
119
+ }
120
+
121
+ // Check for [[/code]]
122
+ if (token.type === "BLOCK_END_OPEN") {
123
+ const closeNameResult = parseBlockName(ctx, pos + 1);
124
+ if (closeNameResult && closeNameResult.name === "code") {
125
+ foundClose = true;
126
+ // Skip [[/code]]
127
+ pos++; // [[/
128
+ consumed++;
129
+ pos += closeNameResult.consumed; // code
130
+ consumed += closeNameResult.consumed;
131
+ // Skip ]]
132
+ if (ctx.tokens[pos]?.type === "BLOCK_CLOSE") {
133
+ pos++;
134
+ consumed++;
135
+ }
136
+ // Skip trailing newline
137
+ if (ctx.tokens[pos]?.type === "NEWLINE") {
138
+ pos++;
139
+ consumed++;
140
+ }
141
+ break;
142
+ }
143
+ }
144
+
145
+ // Collect raw content (don't parse)
146
+ codeContent += token.value;
147
+ pos++;
148
+ consumed++;
149
+ }
150
+
151
+ // Diagnostic for missing close tag
152
+ if (!foundClose) {
153
+ ctx.diagnostics.push({
154
+ severity: "warning",
155
+ code: "unclosed-block",
156
+ message: "Missing closing tag [[/code]] for [[code]]",
157
+ position: openToken.position,
158
+ });
159
+ }
160
+
161
+ // Trim trailing newline from content
162
+ codeContent = codeContent.replace(/\n$/, "");
163
+
164
+ // Store code block in context
165
+ const codeBlockData: CodeBlockData = {
166
+ contents: codeContent,
167
+ language: attrResult.attrs.type ?? null,
168
+ name: attrResult.attrs.name ?? null,
169
+ };
170
+ ctx.codeBlocks.push(codeBlockData);
171
+
172
+ return {
173
+ success: true,
174
+ elements: [
175
+ {
176
+ element: "code",
177
+ data: {
178
+ contents: codeContent,
179
+ language: attrResult.attrs.type ?? null,
180
+ name: attrResult.attrs.name ?? null,
181
+ },
182
+ },
183
+ ],
184
+ consumed,
185
+ };
186
+ },
187
+ };