@wdprlib/parser 3.1.1 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/dist/index.cjs +312 -121
  2. package/dist/index.js +289 -98
  3. package/package.json +5 -3
  4. package/src/index.ts +163 -0
  5. package/src/lexer/index.ts +20 -0
  6. package/src/lexer/lexer.ts +687 -0
  7. package/src/lexer/tokens.ts +141 -0
  8. package/src/parser/constants.ts +173 -0
  9. package/src/parser/depth.ts +251 -0
  10. package/src/parser/index.ts +18 -0
  11. package/src/parser/parse.ts +315 -0
  12. package/src/parser/postprocess/divAdjacentParagraph.ts +76 -0
  13. package/src/parser/postprocess/index.ts +15 -0
  14. package/src/parser/postprocess/spanStrip.ts +697 -0
  15. package/src/parser/preprocess/expr.ts +265 -0
  16. package/src/parser/preprocess/index.ts +38 -0
  17. package/src/parser/preprocess/typography.ts +67 -0
  18. package/src/parser/preprocess/utils.ts +250 -0
  19. package/src/parser/preprocess/whitespace.ts +111 -0
  20. package/src/parser/rules/block/align.ts +282 -0
  21. package/src/parser/rules/block/bibliography.ts +359 -0
  22. package/src/parser/rules/block/block-list.ts +689 -0
  23. package/src/parser/rules/block/blockquote.ts +238 -0
  24. package/src/parser/rules/block/center.ts +87 -0
  25. package/src/parser/rules/block/clear-float.ts +75 -0
  26. package/src/parser/rules/block/code.ts +187 -0
  27. package/src/parser/rules/block/collapsible.ts +337 -0
  28. package/src/parser/rules/block/comment.ts +73 -0
  29. package/src/parser/rules/block/content-separator.ts +79 -0
  30. package/src/parser/rules/block/definition-list.ts +270 -0
  31. package/src/parser/rules/block/div.ts +400 -0
  32. package/src/parser/rules/block/embed-block.ts +153 -0
  33. package/src/parser/rules/block/footnoteblock.ts +200 -0
  34. package/src/parser/rules/block/heading.ts +142 -0
  35. package/src/parser/rules/block/horizontal-rule.ts +61 -0
  36. package/src/parser/rules/block/html.ts +222 -0
  37. package/src/parser/rules/block/iframe.ts +239 -0
  38. package/src/parser/rules/block/iftags.ts +150 -0
  39. package/src/parser/rules/block/include.ts +179 -0
  40. package/src/parser/rules/block/index.ts +127 -0
  41. package/src/parser/rules/block/list.ts +244 -0
  42. package/src/parser/rules/block/math.ts +183 -0
  43. package/src/parser/rules/block/module/backlinks/index.ts +31 -0
  44. package/src/parser/rules/block/module/backlinks/types.ts +21 -0
  45. package/src/parser/rules/block/module/categories/index.ts +34 -0
  46. package/src/parser/rules/block/module/categories/types.ts +21 -0
  47. package/src/parser/rules/block/module/css/index.ts +37 -0
  48. package/src/parser/rules/block/module/iftags/condition.ts +109 -0
  49. package/src/parser/rules/block/module/iftags/index.ts +26 -0
  50. package/src/parser/rules/block/module/iftags/preprocess.ts +140 -0
  51. package/src/parser/rules/block/module/iftags/resolve.ts +73 -0
  52. package/src/parser/rules/block/module/iftags/types.ts +63 -0
  53. package/src/parser/rules/block/module/include/index.ts +20 -0
  54. package/src/parser/rules/block/module/include/resolve.ts +556 -0
  55. package/src/parser/rules/block/module/index.ts +122 -0
  56. package/src/parser/rules/block/module/join/index.ts +34 -0
  57. package/src/parser/rules/block/module/join/types.ts +23 -0
  58. package/src/parser/rules/block/module/listpages/compiler.ts +453 -0
  59. package/src/parser/rules/block/module/listpages/extract.ts +410 -0
  60. package/src/parser/rules/block/module/listpages/index.ts +83 -0
  61. package/src/parser/rules/block/module/listpages/normalize.ts +390 -0
  62. package/src/parser/rules/block/module/listpages/parser.ts +106 -0
  63. package/src/parser/rules/block/module/listpages/resolve.ts +130 -0
  64. package/src/parser/rules/block/module/listpages/types.ts +513 -0
  65. package/src/parser/rules/block/module/listpages/url-resolver.ts +186 -0
  66. package/src/parser/rules/block/module/listusers/compiler.ts +77 -0
  67. package/src/parser/rules/block/module/listusers/extract.ts +45 -0
  68. package/src/parser/rules/block/module/listusers/index.ts +36 -0
  69. package/src/parser/rules/block/module/listusers/parser.ts +54 -0
  70. package/src/parser/rules/block/module/listusers/resolve.ts +58 -0
  71. package/src/parser/rules/block/module/listusers/types.ts +93 -0
  72. package/src/parser/rules/block/module/mapping.ts +61 -0
  73. package/src/parser/rules/block/module/page-tree/index.ts +38 -0
  74. package/src/parser/rules/block/module/page-tree/types.ts +29 -0
  75. package/src/parser/rules/block/module/rate/index.ts +28 -0
  76. package/src/parser/rules/block/module/rate/types.ts +19 -0
  77. package/src/parser/rules/block/module/resolve.ts +411 -0
  78. package/src/parser/rules/block/module/types-common.ts +59 -0
  79. package/src/parser/rules/block/module/types.ts +61 -0
  80. package/src/parser/rules/block/module/utils.ts +43 -0
  81. package/src/parser/rules/block/module/walk.ts +380 -0
  82. package/src/parser/rules/block/module.ts +164 -0
  83. package/src/parser/rules/block/orphan-li.ts +177 -0
  84. package/src/parser/rules/block/paragraph.ts +157 -0
  85. package/src/parser/rules/block/table-block.ts +726 -0
  86. package/src/parser/rules/block/table.ts +441 -0
  87. package/src/parser/rules/block/tabview.ts +331 -0
  88. package/src/parser/rules/block/toc.ts +129 -0
  89. package/src/parser/rules/block/utils.ts +615 -0
  90. package/src/parser/rules/index.ts +49 -0
  91. package/src/parser/rules/inline/anchor-name.ts +154 -0
  92. package/src/parser/rules/inline/anchor.ts +327 -0
  93. package/src/parser/rules/inline/bibcite.ts +153 -0
  94. package/src/parser/rules/inline/bold.ts +86 -0
  95. package/src/parser/rules/inline/color.ts +140 -0
  96. package/src/parser/rules/inline/comment.ts +90 -0
  97. package/src/parser/rules/inline/equation-ref.ts +115 -0
  98. package/src/parser/rules/inline/expr.ts +526 -0
  99. package/src/parser/rules/inline/footnote.ts +223 -0
  100. package/src/parser/rules/inline/guillemet.ts +64 -0
  101. package/src/parser/rules/inline/html.ts +132 -0
  102. package/src/parser/rules/inline/image.ts +328 -0
  103. package/src/parser/rules/inline/index.ts +150 -0
  104. package/src/parser/rules/inline/italic.ts +74 -0
  105. package/src/parser/rules/inline/line-break.ts +326 -0
  106. package/src/parser/rules/inline/link-anchor.ts +147 -0
  107. package/src/parser/rules/inline/link-single.ts +164 -0
  108. package/src/parser/rules/inline/link-star.ts +134 -0
  109. package/src/parser/rules/inline/link-triple.ts +267 -0
  110. package/src/parser/rules/inline/math-inline.ts +126 -0
  111. package/src/parser/rules/inline/monospace.ts +78 -0
  112. package/src/parser/rules/inline/raw.ts +262 -0
  113. package/src/parser/rules/inline/size.ts +244 -0
  114. package/src/parser/rules/inline/span.ts +424 -0
  115. package/src/parser/rules/inline/strikethrough.ts +115 -0
  116. package/src/parser/rules/inline/subscript.ts +84 -0
  117. package/src/parser/rules/inline/superscript.ts +84 -0
  118. package/src/parser/rules/inline/text.ts +84 -0
  119. package/src/parser/rules/inline/underline.ts +127 -0
  120. package/src/parser/rules/inline/user.ts +147 -0
  121. package/src/parser/rules/inline/utils.ts +344 -0
  122. package/src/parser/rules/types.ts +252 -0
  123. package/src/parser/rules/utils.ts +155 -0
  124. package/src/parser/toc.ts +130 -0
@@ -0,0 +1,424 @@
1
+ /**
2
+ *
3
+ * Parses the Wikidot span block syntax: `[[span attributes]]content[[/span]]`
4
+ * and its paragraph-strip variant `[[span_]]`.
5
+ *
6
+ * A span wraps inline content in an HTML `<span>` element with arbitrary
7
+ * attributes (class, style, id, etc.). It supports multiline content
8
+ * where single newlines become `<br />` elements.
9
+ *
10
+ * Blank lines (double newlines) within spans trigger special behavior:
11
+ *
12
+ * Regular span (`[[span]]`):
13
+ * - Blank lines split the content into separate spans, each placed in
14
+ * its own paragraph. Segments after the first are marked with
15
+ * `_splitByBlankLine: true` for postprocessing.
16
+ *
17
+ * Paragraph-strip span (`[[span_]]`):
18
+ * - Content before a blank line gets `_paragraphStrip: true`, indicating
19
+ * it should merge with the surrounding paragraph.
20
+ * - Content after a blank line gets `_escapedFromParagraph: true`,
21
+ * indicating it should appear outside any paragraph wrapper.
22
+ * - An empty `[[span_]][[/span]]` produces a marker with
23
+ * `_emptyParagraphStrip: true` that absorbs adjacent line breaks.
24
+ *
25
+ * Spans support nesting -- a `[[span]]` inside another `[[span]]` will
26
+ * correctly find its own `[[/span]]` closing tag.
27
+ *
28
+ * The `closeSpanRule` handles orphaned `[[/span]]` tags that result
29
+ * from paragraph-break splitting. These wrap preceding inline content
30
+ * into a span, matching Wikidot's behavior.
31
+ *
32
+ * @module
33
+ */
34
+ import type { Element } from "@wdprlib/ast";
35
+ import type { InlineRule, ParseContext, RuleResult } from "../types";
36
+ import { currentToken } from "../types";
37
+ import { inlineRules } from "../index";
38
+ import { parseBlockName } from "../utils";
39
+ import { parseAttributes } from "../block/utils";
40
+ import { canApplyInlineRule } from "./utils";
41
+
42
+ /**
43
+ * Inline rule for parsing `[[span attributes]]content[[/span]]`.
44
+ *
45
+ * Triggered by a `BLOCK_OPEN` (`[[`) token. Recognizes both `span`
46
+ * and `span_` block names. Parses HTML attributes after the block name,
47
+ * then recursively parses inline content (including nested spans) until
48
+ * the matching `[[/span]]` closing tag.
49
+ *
50
+ * Fails if:
51
+ * - The block name is not `span` or `span_`
52
+ * - No `]]` follows the attributes
53
+ * - No matching `[[/span]]` closing tag is found
54
+ */
55
+ export const spanRule: InlineRule = {
56
+ name: "span",
57
+ startTokens: ["BLOCK_OPEN"],
58
+
59
+ /**
60
+ * Attempts to parse a span block at the current position.
61
+ *
62
+ * @param ctx - Parse context with token stream and current position
63
+ * @returns A successful result with one or more `"container"` elements
64
+ * of type `"span"`, or `{ success: false }`
65
+ */
66
+ parse(ctx: ParseContext): RuleResult<Element> {
67
+ const openToken = currentToken(ctx);
68
+ if (openToken.type !== "BLOCK_OPEN") {
69
+ return { success: false };
70
+ }
71
+
72
+ let pos = ctx.pos + 1;
73
+ let consumed = 1;
74
+
75
+ // Parse block name
76
+ const nameResult = parseBlockName(ctx, pos);
77
+ if (!nameResult) {
78
+ return { success: false };
79
+ }
80
+
81
+ const blockName = nameResult.name;
82
+ // Handle both span and span_ (paragraph strip mode)
83
+ if (blockName !== "span" && blockName !== "span_") {
84
+ return { success: false };
85
+ }
86
+
87
+ const paragraphStrip = blockName === "span_";
88
+
89
+ pos += nameResult.consumed;
90
+ consumed += nameResult.consumed;
91
+
92
+ // Parse attributes
93
+ const attrResult = parseAttributes(ctx, pos);
94
+ pos += attrResult.consumed;
95
+ consumed += attrResult.consumed;
96
+
97
+ // Expect ]]
98
+ if (ctx.tokens[pos]?.type !== "BLOCK_CLOSE") {
99
+ return { success: false };
100
+ }
101
+ pos++;
102
+ consumed++;
103
+
104
+ // Parse inline content until [[/span]]
105
+ // Span supports multi-line content - newlines become line-breaks
106
+ // For regular span: blank lines split into separate paragraphs with spans
107
+ const children: Element[] = [];
108
+ const escapedChildren: Element[] = []; // For span_: content after blank line
109
+ const splitSpans: Element[][] = []; // For regular span: content segments split by blank lines
110
+ let foundClose = false;
111
+ let afterBlankLine = false; // For span_: tracks if we're after a blank line
112
+
113
+ while (pos < ctx.tokens.length) {
114
+ const token = ctx.tokens[pos];
115
+ if (!token || token.type === "EOF") {
116
+ break;
117
+ }
118
+
119
+ // Check for [[/span]]
120
+ if (token.type === "BLOCK_END_OPEN") {
121
+ const closeNameResult = parseBlockName(ctx, pos + 1);
122
+ if (closeNameResult && closeNameResult.name === "span") {
123
+ // Skip [[/span]]
124
+ pos++; // [[/
125
+ consumed++;
126
+ pos += closeNameResult.consumed; // span
127
+ consumed += closeNameResult.consumed;
128
+ // Skip ]]
129
+ if (ctx.tokens[pos]?.type === "BLOCK_CLOSE") {
130
+ pos++;
131
+ consumed++;
132
+ }
133
+ foundClose = true;
134
+ break;
135
+ }
136
+ }
137
+
138
+ // Handle NEWLINE
139
+ if (token.type === "NEWLINE") {
140
+ // Check for paragraph break (blank line = double newline)
141
+ // Skip whitespace to find next meaningful token
142
+ let lookAhead = 1;
143
+ while (ctx.tokens[pos + lookAhead]?.type === "WHITESPACE") {
144
+ lookAhead++;
145
+ }
146
+ const nextToken = ctx.tokens[pos + lookAhead];
147
+
148
+ // If next token is another NEWLINE, this is a paragraph break
149
+ if (nextToken?.type === "NEWLINE") {
150
+ if (paragraphStrip) {
151
+ // For span_: blank lines split the span, content after goes outside paragraph
152
+ // Consume the blank lines and continue parsing
153
+ pos++; // First newline
154
+ consumed++;
155
+ while (ctx.tokens[pos]?.type === "WHITESPACE" || ctx.tokens[pos]?.type === "NEWLINE") {
156
+ pos++;
157
+ consumed++;
158
+ }
159
+ // Mark that we're now parsing escaped content (outside paragraph)
160
+ afterBlankLine = true;
161
+ continue;
162
+ }
163
+ // For regular span: blank lines split into separate spans in separate paragraphs
164
+ // Save current content and start a new segment
165
+ if (children.length > 0) {
166
+ splitSpans.push([...children]);
167
+ children.length = 0;
168
+ }
169
+ pos++; // First newline
170
+ consumed++;
171
+ // Skip whitespace and additional newlines
172
+ while (ctx.tokens[pos]?.type === "WHITESPACE" || ctx.tokens[pos]?.type === "NEWLINE") {
173
+ pos++;
174
+ consumed++;
175
+ }
176
+ continue;
177
+ }
178
+
179
+ // Single newline - convert to line-break
180
+ const targetChildren = afterBlankLine ? escapedChildren : children;
181
+ targetChildren.push({ element: "line-break" });
182
+ pos++;
183
+ consumed++;
184
+ // Skip leading whitespace after newline
185
+ while (ctx.tokens[pos]?.type === "WHITESPACE" && ctx.tokens[pos]?.lineStart) {
186
+ pos++;
187
+ consumed++;
188
+ }
189
+ continue;
190
+ }
191
+
192
+ // Skip whitespace at beginning of content (after ]] or after newline)
193
+ // But don't skip whitespace between words
194
+ if (token.type === "WHITESPACE" && token.lineStart) {
195
+ pos++;
196
+ consumed++;
197
+ continue;
198
+ }
199
+
200
+ // Determine which array to add content to
201
+ const targetChildren = afterBlankLine ? escapedChildren : children;
202
+
203
+ // Try each inline rule
204
+ let matched = false;
205
+ const inlineCtx: ParseContext = { ...ctx, pos };
206
+
207
+ for (const rule of inlineRules) {
208
+ // Allow nested spans - each nested span will find its own [[/span]] closing tag
209
+ // No infinite recursion because each span consumes its own opening and closing tags
210
+ if (canApplyInlineRule(rule, token)) {
211
+ const result = rule.parse(inlineCtx);
212
+ if (result.success) {
213
+ targetChildren.push(...result.elements);
214
+ pos += result.consumed;
215
+ consumed += result.consumed;
216
+ matched = true;
217
+ break;
218
+ }
219
+ }
220
+ }
221
+
222
+ if (!matched) {
223
+ // Fallback: just add as text
224
+ targetChildren.push({ element: "text", data: token.value });
225
+ pos++;
226
+ consumed++;
227
+ }
228
+ }
229
+
230
+ // If we didn't find [[/span]], this is not a valid span
231
+ if (!foundClose) {
232
+ ctx.diagnostics.push({
233
+ severity: "warning",
234
+ code: "unclosed-block",
235
+ message: `Missing closing tag [[/span]] for [[${blockName}]]`,
236
+ position: openToken.position,
237
+ });
238
+ return { success: false };
239
+ }
240
+
241
+ // For span_ (paragraph strip mode):
242
+ // - Empty span_ produces no output
243
+ // - Non-empty span_ gets a _paragraphStrip marker for paragraph merging
244
+ // - Content after blank line gets _escapedFromParagraph marker
245
+ if (paragraphStrip) {
246
+ // Remove leading/trailing line-breaks from both arrays
247
+ while (children.length > 0 && children[0]?.element === "line-break") {
248
+ children.shift();
249
+ }
250
+ while (children.length > 0 && children[children.length - 1]?.element === "line-break") {
251
+ children.pop();
252
+ }
253
+ while (escapedChildren.length > 0 && escapedChildren[0]?.element === "line-break") {
254
+ escapedChildren.shift();
255
+ }
256
+ while (
257
+ escapedChildren.length > 0 &&
258
+ escapedChildren[escapedChildren.length - 1]?.element === "line-break"
259
+ ) {
260
+ escapedChildren.pop();
261
+ }
262
+
263
+ const elements: Element[] = [];
264
+
265
+ // Main span (before blank line) - gets _paragraphStrip
266
+ if (children.length > 0) {
267
+ elements.push({
268
+ element: "container",
269
+ data: {
270
+ type: "span",
271
+ attributes: attrResult.attrs,
272
+ elements: children,
273
+ _paragraphStrip: true,
274
+ },
275
+ });
276
+ }
277
+
278
+ // Escaped spans (after blank line) - get _escapedFromParagraph
279
+ if (escapedChildren.length > 0) {
280
+ elements.push({
281
+ element: "container",
282
+ data: {
283
+ type: "span",
284
+ attributes: {},
285
+ elements: escapedChildren,
286
+ _escapedFromParagraph: true,
287
+ },
288
+ });
289
+ }
290
+
291
+ // Empty span_ - return a marker that will absorb adjacent line-breaks
292
+ if (elements.length === 0) {
293
+ return {
294
+ success: true,
295
+ elements: [
296
+ {
297
+ element: "container",
298
+ data: {
299
+ type: "span",
300
+ attributes: {},
301
+ elements: [],
302
+ _emptyParagraphStrip: true,
303
+ },
304
+ },
305
+ ],
306
+ consumed,
307
+ };
308
+ }
309
+
310
+ return {
311
+ success: true,
312
+ elements,
313
+ consumed,
314
+ };
315
+ }
316
+
317
+ // For regular span with blank lines: return multiple spans with _splitByBlankLine marker
318
+ // These will be processed by postprocess to create separate paragraphs
319
+ if (splitSpans.length > 0) {
320
+ // Add remaining children as last segment
321
+ if (children.length > 0) {
322
+ splitSpans.push(children);
323
+ }
324
+
325
+ const elements: Element[] = splitSpans.map((segment, index) => ({
326
+ element: "container" as const,
327
+ data: {
328
+ type: "span" as const,
329
+ attributes: index === 0 ? attrResult.attrs : {},
330
+ elements: segment,
331
+ _splitByBlankLine: index > 0, // Mark segments after first for paragraph splitting
332
+ },
333
+ }));
334
+
335
+ return {
336
+ success: true,
337
+ elements,
338
+ consumed,
339
+ };
340
+ }
341
+
342
+ return {
343
+ success: true,
344
+ elements: [
345
+ {
346
+ element: "container",
347
+ data: {
348
+ type: "span",
349
+ attributes: attrResult.attrs,
350
+ elements: children,
351
+ },
352
+ },
353
+ ],
354
+ consumed,
355
+ };
356
+ },
357
+ };
358
+
359
+ /**
360
+ * Inline rule for handling orphaned `[[/span]]` closing tags.
361
+ *
362
+ * When a span is split across paragraphs by a blank line, the closing
363
+ * `[[/span]]` tag appears without a matching opening tag in the current
364
+ * inline context. This rule detects such orphaned closing tags and
365
+ * produces a special marker element with `_closeSpan: true`.
366
+ *
367
+ * The paragraph parser uses this marker to retroactively wrap all
368
+ * preceding inline content in the current paragraph into a span,
369
+ * replicating Wikidot's behavior for paragraph-split spans.
370
+ *
371
+ * Triggered by a `BLOCK_END_OPEN` (`[[/`) token, and only matches
372
+ * when the block name is `span`.
373
+ */
374
+ export const closeSpanRule: InlineRule = {
375
+ name: "closeSpan",
376
+ startTokens: ["BLOCK_END_OPEN"],
377
+
378
+ /**
379
+ * Attempts to parse an orphaned `[[/span]]` tag at the current position.
380
+ *
381
+ * @param ctx - Parse context with token stream and current position
382
+ * @returns A successful result with a span marker element (with `_closeSpan: true`),
383
+ * or `{ success: false }` if this is not a `[[/span]]` tag
384
+ */
385
+ parse(ctx: ParseContext): RuleResult<Element> {
386
+ const token = currentToken(ctx);
387
+ if (token.type !== "BLOCK_END_OPEN") {
388
+ return { success: false };
389
+ }
390
+
391
+ // Check if this is [[/span]]
392
+ const nameResult = parseBlockName(ctx, ctx.pos + 1);
393
+ if (!nameResult || nameResult.name !== "span") {
394
+ return { success: false };
395
+ }
396
+
397
+ let pos = ctx.pos + 1 + nameResult.consumed;
398
+ let consumed = 1 + nameResult.consumed;
399
+
400
+ // Skip ]]
401
+ if (ctx.tokens[pos]?.type === "BLOCK_CLOSE") {
402
+ pos++;
403
+ consumed++;
404
+ }
405
+
406
+ // Return a special marker that indicates "wrap preceding content in span"
407
+ // The paragraph parser will handle this
408
+ return {
409
+ success: true,
410
+ elements: [
411
+ {
412
+ element: "container",
413
+ data: {
414
+ type: "span",
415
+ attributes: {},
416
+ elements: [],
417
+ _closeSpan: true,
418
+ },
419
+ },
420
+ ],
421
+ consumed,
422
+ };
423
+ },
424
+ };
@@ -0,0 +1,115 @@
1
+ /**
2
+ *
3
+ * Parses the Wikidot strikethrough formatting syntax: `--text--`.
4
+ *
5
+ * Strikethrough text is delimited by double hyphens. However, the `--`
6
+ * token has dual meaning in Wikidot: it can be either a strikethrough
7
+ * marker or an em-dash. The disambiguation rule is:
8
+ *
9
+ * - If a matching closing `--` is found on the same line AND the closing
10
+ * marker is NOT preceded by whitespace, it is treated as strikethrough.
11
+ * - Otherwise, the `--` is converted to an em-dash character (U+2014).
12
+ *
13
+ * This means `--word--` produces strikethrough, but `-- word --` produces
14
+ * two em-dashes with "word" between them.
15
+ *
16
+ * Produces a `"container"` AST element with `type: "strikethrough"`,
17
+ * or a `"text"` element containing the em-dash character.
18
+ *
19
+ * @module
20
+ */
21
+ import type { Element } from "@wdprlib/ast";
22
+ import type { InlineRule, ParseContext, RuleResult } from "../types";
23
+ import { parseInlineUntil } from "./utils";
24
+
25
+ /**
26
+ * Validates whether the current position contains a valid strikethrough
27
+ * pair (opening and closing `--` markers).
28
+ *
29
+ * Scans from the token after the opening marker to find a closing
30
+ * `STRIKE_MARKER`. The strikethrough is invalid if:
31
+ * - No closing marker is found before a newline or EOF
32
+ * - The closing marker is preceded by a whitespace token
33
+ *
34
+ * The whitespace restriction exists because Wikidot distinguishes
35
+ * `--text--` (strikethrough) from `-- text --` (em-dashes).
36
+ *
37
+ * @param ctx - Parse context positioned at the opening `--` marker
38
+ * @returns `true` if a valid strikethrough pair is found
39
+ */
40
+ function isValidStrikethrough(ctx: ParseContext): boolean {
41
+ let pos = ctx.pos + 1; // Start after opening marker
42
+ let prevWasWhitespace = false;
43
+
44
+ while (pos < ctx.tokens.length) {
45
+ const token = ctx.tokens[pos];
46
+ if (!token || token.type === "NEWLINE" || token.type === "EOF") {
47
+ return false;
48
+ }
49
+
50
+ if (token.type === "STRIKE_MARKER") {
51
+ // Found closing marker
52
+ // Invalid if preceded by whitespace
53
+ if (prevWasWhitespace) {
54
+ return false;
55
+ }
56
+ return true;
57
+ }
58
+
59
+ prevWasWhitespace = token.type === "WHITESPACE";
60
+ pos++;
61
+ }
62
+ return false;
63
+ }
64
+
65
+ /**
66
+ * Inline rule for parsing `--strikethrough--` formatting or converting
67
+ * `--` to an em-dash.
68
+ *
69
+ * Triggered by a `STRIKE_MARKER` token (`--`). First validates whether
70
+ * a proper strikethrough pair exists. If yes, parses the content
71
+ * between markers as strikethrough. If no, converts the `--` to an
72
+ * em-dash character (U+2014).
73
+ */
74
+ export const strikethroughRule: InlineRule = {
75
+ name: "strikethrough",
76
+ startTokens: ["STRIKE_MARKER"],
77
+
78
+ /**
79
+ * Attempts to parse strikethrough formatting or produce an em-dash.
80
+ *
81
+ * @param ctx - Parse context with token stream and current position
82
+ * @returns A successful result with either a `"container"` element of
83
+ * type `"strikethrough"`, or a `"text"` element containing
84
+ * the em-dash character
85
+ */
86
+ parse(ctx: ParseContext): RuleResult<Element> {
87
+ // Check if valid strikethrough (no whitespace before closing --)
88
+ if (!isValidStrikethrough(ctx)) {
89
+ // Not valid strikethrough, convert to em-dash
90
+ return {
91
+ success: true,
92
+ elements: [{ element: "text", data: "\u2014" }], // em-dash
93
+ consumed: 1,
94
+ };
95
+ }
96
+
97
+ // Parse content between markers
98
+ const result = parseInlineUntil({ ...ctx, pos: ctx.pos + 1 }, "STRIKE_MARKER");
99
+
100
+ return {
101
+ success: true,
102
+ elements: [
103
+ {
104
+ element: "container",
105
+ data: {
106
+ type: "strikethrough",
107
+ attributes: {},
108
+ elements: result.elements,
109
+ },
110
+ },
111
+ ],
112
+ consumed: 1 + result.consumed + 1, // open + content + close
113
+ };
114
+ },
115
+ };
@@ -0,0 +1,84 @@
1
+ /**
2
+ *
3
+ * Parses the Wikidot subscript formatting syntax: `,,text,,`.
4
+ *
5
+ * Subscript text is delimited by double commas. The opening and closing
6
+ * markers must appear on the same line. If no closing `,,` is found
7
+ * before a newline, the opening marker is emitted as literal text.
8
+ *
9
+ * Empty subscript (`,,,,`) is silently discarded by Wikidot (produces
10
+ * no output), matching the behavior of bold and superscript.
11
+ *
12
+ * Renders as a `<sub>` element in HTML.
13
+ *
14
+ * Produces a `"container"` AST element with `type: "subscript"`.
15
+ *
16
+ * @module
17
+ */
18
+ import type { Element } from "@wdprlib/ast";
19
+ import type { InlineRule, ParseContext, RuleResult } from "../types";
20
+ import { currentToken, hasClosingMarkerBeforeNewline } from "../types";
21
+ import { parseInlineUntil } from "./utils";
22
+
23
+ /**
24
+ * Inline rule for parsing `,,subscript,,` formatting.
25
+ *
26
+ * Triggered by a `SUB_MARKER` token (`,,`). Checks for a matching
27
+ * closing marker on the same line, then recursively parses inline
28
+ * content between the markers.
29
+ *
30
+ * When no closing marker is found, the opening `,,` is treated as
31
+ * literal text.
32
+ */
33
+ export const subscriptRule: InlineRule = {
34
+ name: "subscript",
35
+ startTokens: ["SUB_MARKER"],
36
+
37
+ /**
38
+ * Attempts to parse subscript formatting at the current position.
39
+ *
40
+ * @param ctx - Parse context with token stream and current position
41
+ * @returns A successful result containing either a `"container"` element
42
+ * with `type: "subscript"`, an empty array (for `,,,,`), or a
43
+ * text fallback for unmatched markers
44
+ */
45
+ parse(ctx: ParseContext): RuleResult<Element> {
46
+ const startToken = currentToken(ctx);
47
+
48
+ // Check if closing marker exists
49
+ if (!hasClosingMarkerBeforeNewline({ ...ctx, pos: ctx.pos + 1 }, "SUB_MARKER")) {
50
+ return {
51
+ success: true,
52
+ elements: [{ element: "text", data: startToken.value }],
53
+ consumed: 1,
54
+ };
55
+ }
56
+
57
+ // Parse content between markers
58
+ const result = parseInlineUntil({ ...ctx, pos: ctx.pos + 1 }, "SUB_MARKER");
59
+
60
+ // Empty subscript (,,,,) is ignored in Wikidot
61
+ if (result.elements.length === 0) {
62
+ return {
63
+ success: true,
64
+ elements: [],
65
+ consumed: 1 + result.consumed + 1,
66
+ };
67
+ }
68
+
69
+ return {
70
+ success: true,
71
+ elements: [
72
+ {
73
+ element: "container",
74
+ data: {
75
+ type: "subscript",
76
+ attributes: {},
77
+ elements: result.elements,
78
+ },
79
+ },
80
+ ],
81
+ consumed: 1 + result.consumed + 1,
82
+ };
83
+ },
84
+ };
@@ -0,0 +1,84 @@
1
+ /**
2
+ *
3
+ * Parses the Wikidot superscript formatting syntax: `^^text^^`.
4
+ *
5
+ * Superscript text is delimited by double carets. The opening and
6
+ * closing markers must appear on the same line. If no closing `^^`
7
+ * is found before a newline, the opening marker is emitted as literal text.
8
+ *
9
+ * Empty superscript (`^^^^`) is silently discarded by Wikidot (produces
10
+ * no output), matching the behavior of bold and subscript.
11
+ *
12
+ * Renders as a `<sup>` element in HTML.
13
+ *
14
+ * Produces a `"container"` AST element with `type: "superscript"`.
15
+ *
16
+ * @module
17
+ */
18
+ import type { Element } from "@wdprlib/ast";
19
+ import type { InlineRule, ParseContext, RuleResult } from "../types";
20
+ import { currentToken, hasClosingMarkerBeforeNewline } from "../types";
21
+ import { parseInlineUntil } from "./utils";
22
+
23
+ /**
24
+ * Inline rule for parsing `^^superscript^^` formatting.
25
+ *
26
+ * Triggered by a `SUPER_MARKER` token (`^^`). Checks for a matching
27
+ * closing marker on the same line, then recursively parses inline
28
+ * content between the markers.
29
+ *
30
+ * When no closing marker is found, the opening `^^` is treated as
31
+ * literal text.
32
+ */
33
+ export const superscriptRule: InlineRule = {
34
+ name: "superscript",
35
+ startTokens: ["SUPER_MARKER"],
36
+
37
+ /**
38
+ * Attempts to parse superscript formatting at the current position.
39
+ *
40
+ * @param ctx - Parse context with token stream and current position
41
+ * @returns A successful result containing either a `"container"` element
42
+ * with `type: "superscript"`, an empty array (for `^^^^`), or a
43
+ * text fallback for unmatched markers
44
+ */
45
+ parse(ctx: ParseContext): RuleResult<Element> {
46
+ const startToken = currentToken(ctx);
47
+
48
+ // Check if closing marker exists
49
+ if (!hasClosingMarkerBeforeNewline({ ...ctx, pos: ctx.pos + 1 }, "SUPER_MARKER")) {
50
+ return {
51
+ success: true,
52
+ elements: [{ element: "text", data: startToken.value }],
53
+ consumed: 1,
54
+ };
55
+ }
56
+
57
+ // Parse content between markers
58
+ const result = parseInlineUntil({ ...ctx, pos: ctx.pos + 1 }, "SUPER_MARKER");
59
+
60
+ // Empty superscript (^^^^) is ignored in Wikidot
61
+ if (result.elements.length === 0) {
62
+ return {
63
+ success: true,
64
+ elements: [],
65
+ consumed: 1 + result.consumed + 1,
66
+ };
67
+ }
68
+
69
+ return {
70
+ success: true,
71
+ elements: [
72
+ {
73
+ element: "container",
74
+ data: {
75
+ type: "superscript",
76
+ attributes: {},
77
+ elements: result.elements,
78
+ },
79
+ },
80
+ ],
81
+ consumed: 1 + result.consumed + 1,
82
+ };
83
+ },
84
+ };