@wdprlib/parser 3.1.1 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/dist/index.cjs +312 -121
  2. package/dist/index.js +289 -98
  3. package/package.json +5 -3
  4. package/src/index.ts +163 -0
  5. package/src/lexer/index.ts +20 -0
  6. package/src/lexer/lexer.ts +687 -0
  7. package/src/lexer/tokens.ts +141 -0
  8. package/src/parser/constants.ts +173 -0
  9. package/src/parser/depth.ts +251 -0
  10. package/src/parser/index.ts +18 -0
  11. package/src/parser/parse.ts +315 -0
  12. package/src/parser/postprocess/divAdjacentParagraph.ts +76 -0
  13. package/src/parser/postprocess/index.ts +15 -0
  14. package/src/parser/postprocess/spanStrip.ts +697 -0
  15. package/src/parser/preprocess/expr.ts +265 -0
  16. package/src/parser/preprocess/index.ts +38 -0
  17. package/src/parser/preprocess/typography.ts +67 -0
  18. package/src/parser/preprocess/utils.ts +250 -0
  19. package/src/parser/preprocess/whitespace.ts +111 -0
  20. package/src/parser/rules/block/align.ts +282 -0
  21. package/src/parser/rules/block/bibliography.ts +359 -0
  22. package/src/parser/rules/block/block-list.ts +689 -0
  23. package/src/parser/rules/block/blockquote.ts +238 -0
  24. package/src/parser/rules/block/center.ts +87 -0
  25. package/src/parser/rules/block/clear-float.ts +75 -0
  26. package/src/parser/rules/block/code.ts +187 -0
  27. package/src/parser/rules/block/collapsible.ts +337 -0
  28. package/src/parser/rules/block/comment.ts +73 -0
  29. package/src/parser/rules/block/content-separator.ts +79 -0
  30. package/src/parser/rules/block/definition-list.ts +270 -0
  31. package/src/parser/rules/block/div.ts +400 -0
  32. package/src/parser/rules/block/embed-block.ts +153 -0
  33. package/src/parser/rules/block/footnoteblock.ts +200 -0
  34. package/src/parser/rules/block/heading.ts +142 -0
  35. package/src/parser/rules/block/horizontal-rule.ts +61 -0
  36. package/src/parser/rules/block/html.ts +222 -0
  37. package/src/parser/rules/block/iframe.ts +239 -0
  38. package/src/parser/rules/block/iftags.ts +150 -0
  39. package/src/parser/rules/block/include.ts +179 -0
  40. package/src/parser/rules/block/index.ts +127 -0
  41. package/src/parser/rules/block/list.ts +244 -0
  42. package/src/parser/rules/block/math.ts +183 -0
  43. package/src/parser/rules/block/module/backlinks/index.ts +31 -0
  44. package/src/parser/rules/block/module/backlinks/types.ts +21 -0
  45. package/src/parser/rules/block/module/categories/index.ts +34 -0
  46. package/src/parser/rules/block/module/categories/types.ts +21 -0
  47. package/src/parser/rules/block/module/css/index.ts +37 -0
  48. package/src/parser/rules/block/module/iftags/condition.ts +109 -0
  49. package/src/parser/rules/block/module/iftags/index.ts +26 -0
  50. package/src/parser/rules/block/module/iftags/preprocess.ts +140 -0
  51. package/src/parser/rules/block/module/iftags/resolve.ts +73 -0
  52. package/src/parser/rules/block/module/iftags/types.ts +63 -0
  53. package/src/parser/rules/block/module/include/index.ts +20 -0
  54. package/src/parser/rules/block/module/include/resolve.ts +556 -0
  55. package/src/parser/rules/block/module/index.ts +122 -0
  56. package/src/parser/rules/block/module/join/index.ts +34 -0
  57. package/src/parser/rules/block/module/join/types.ts +23 -0
  58. package/src/parser/rules/block/module/listpages/compiler.ts +453 -0
  59. package/src/parser/rules/block/module/listpages/extract.ts +410 -0
  60. package/src/parser/rules/block/module/listpages/index.ts +83 -0
  61. package/src/parser/rules/block/module/listpages/normalize.ts +390 -0
  62. package/src/parser/rules/block/module/listpages/parser.ts +106 -0
  63. package/src/parser/rules/block/module/listpages/resolve.ts +130 -0
  64. package/src/parser/rules/block/module/listpages/types.ts +513 -0
  65. package/src/parser/rules/block/module/listpages/url-resolver.ts +186 -0
  66. package/src/parser/rules/block/module/listusers/compiler.ts +77 -0
  67. package/src/parser/rules/block/module/listusers/extract.ts +45 -0
  68. package/src/parser/rules/block/module/listusers/index.ts +36 -0
  69. package/src/parser/rules/block/module/listusers/parser.ts +54 -0
  70. package/src/parser/rules/block/module/listusers/resolve.ts +58 -0
  71. package/src/parser/rules/block/module/listusers/types.ts +93 -0
  72. package/src/parser/rules/block/module/mapping.ts +61 -0
  73. package/src/parser/rules/block/module/page-tree/index.ts +38 -0
  74. package/src/parser/rules/block/module/page-tree/types.ts +29 -0
  75. package/src/parser/rules/block/module/rate/index.ts +28 -0
  76. package/src/parser/rules/block/module/rate/types.ts +19 -0
  77. package/src/parser/rules/block/module/resolve.ts +411 -0
  78. package/src/parser/rules/block/module/types-common.ts +59 -0
  79. package/src/parser/rules/block/module/types.ts +61 -0
  80. package/src/parser/rules/block/module/utils.ts +43 -0
  81. package/src/parser/rules/block/module/walk.ts +380 -0
  82. package/src/parser/rules/block/module.ts +164 -0
  83. package/src/parser/rules/block/orphan-li.ts +177 -0
  84. package/src/parser/rules/block/paragraph.ts +157 -0
  85. package/src/parser/rules/block/table-block.ts +726 -0
  86. package/src/parser/rules/block/table.ts +441 -0
  87. package/src/parser/rules/block/tabview.ts +331 -0
  88. package/src/parser/rules/block/toc.ts +129 -0
  89. package/src/parser/rules/block/utils.ts +615 -0
  90. package/src/parser/rules/index.ts +49 -0
  91. package/src/parser/rules/inline/anchor-name.ts +154 -0
  92. package/src/parser/rules/inline/anchor.ts +327 -0
  93. package/src/parser/rules/inline/bibcite.ts +153 -0
  94. package/src/parser/rules/inline/bold.ts +86 -0
  95. package/src/parser/rules/inline/color.ts +140 -0
  96. package/src/parser/rules/inline/comment.ts +90 -0
  97. package/src/parser/rules/inline/equation-ref.ts +115 -0
  98. package/src/parser/rules/inline/expr.ts +526 -0
  99. package/src/parser/rules/inline/footnote.ts +223 -0
  100. package/src/parser/rules/inline/guillemet.ts +64 -0
  101. package/src/parser/rules/inline/html.ts +132 -0
  102. package/src/parser/rules/inline/image.ts +328 -0
  103. package/src/parser/rules/inline/index.ts +150 -0
  104. package/src/parser/rules/inline/italic.ts +74 -0
  105. package/src/parser/rules/inline/line-break.ts +326 -0
  106. package/src/parser/rules/inline/link-anchor.ts +147 -0
  107. package/src/parser/rules/inline/link-single.ts +164 -0
  108. package/src/parser/rules/inline/link-star.ts +134 -0
  109. package/src/parser/rules/inline/link-triple.ts +267 -0
  110. package/src/parser/rules/inline/math-inline.ts +126 -0
  111. package/src/parser/rules/inline/monospace.ts +78 -0
  112. package/src/parser/rules/inline/raw.ts +262 -0
  113. package/src/parser/rules/inline/size.ts +244 -0
  114. package/src/parser/rules/inline/span.ts +424 -0
  115. package/src/parser/rules/inline/strikethrough.ts +115 -0
  116. package/src/parser/rules/inline/subscript.ts +84 -0
  117. package/src/parser/rules/inline/superscript.ts +84 -0
  118. package/src/parser/rules/inline/text.ts +84 -0
  119. package/src/parser/rules/inline/underline.ts +127 -0
  120. package/src/parser/rules/inline/user.ts +147 -0
  121. package/src/parser/rules/inline/utils.ts +344 -0
  122. package/src/parser/rules/types.ts +252 -0
  123. package/src/parser/rules/utils.ts +155 -0
  124. package/src/parser/toc.ts +130 -0
@@ -0,0 +1,111 @@
1
+ /**
2
+ *
3
+ * Whitespace normalization preprocessing for Wikidot markup.
4
+ *
5
+ * This module ensures the lexer and parser receive input with consistent
6
+ * whitespace conventions. It handles platform differences (DOS/Mac newlines),
7
+ * normalizes exotic whitespace characters that users may paste from external
8
+ * sources, and applies Wikidot-specific behaviors like backslash line continuation.
9
+ *
10
+ * Substitutions are applied in a deliberate order:
11
+ * 1. Newline normalization (DOS `\r\n` and legacy Mac `\r` to Unix `\n`)
12
+ * 2. Non-standard leading whitespace replacement (nbsp, figure space to regular space)
13
+ * 3. Whitespace-only line stripping (collapse to empty lines)
14
+ * 4. Backslash line continuation (`\\\n` to line-break marker U+E000)
15
+ * 5. Tab expansion (tab to four spaces)
16
+ * 6. Null character replacement (NUL to space)
17
+ * 7. Leading/trailing newline removal
18
+ *
19
+ * @module
20
+ */
21
+
22
+ /**
23
+ * Matches non-standard whitespace characters (non-breaking space U+00A0,
24
+ * figure space U+2007) at the start of lines. These are replaced with
25
+ * regular ASCII spaces so the parser's indentation logic works correctly.
26
+ */
27
+ const LEADING_NONSTANDARD_WHITESPACE = /^[\u00a0\u2007]+/gm;
28
+
29
+ /** Matches lines containing only whitespace (collapsed to empty lines). */
30
+ const WHITESPACE_ONLY_LINE = /^\s+$/gm;
31
+
32
+ /** Matches one or more newlines at the very start of the text. */
33
+ const LEADING_NEWLINES = /^\n+/;
34
+
35
+ /** Matches one or more newlines at the very end of the text. */
36
+ const TRAILING_NEWLINES = /\n+$/;
37
+
38
+ /** Matches DOS (`\r\n`) and legacy Mac (`\r`) line endings. */
39
+ const DOS_MAC_NEWLINES = /\r\n?/g;
40
+
41
+ /**
42
+ * Matches a backslash immediately followed by a newline.
43
+ * In Wikidot, `\` at end of line acts as an explicit line break (`<br />`).
44
+ */
45
+ const CONCAT_LINES = /\\\n/g;
46
+
47
+ /** Matches tab characters (expanded to four spaces). */
48
+ const TABS = /\t/g;
49
+
50
+ /** Matches null (NUL) characters (replaced with spaces). */
51
+ const NULL_CHARS = /\0/g;
52
+
53
+ /**
54
+ * Replace non-standard whitespace characters at the start of each line
55
+ * with the same number of regular ASCII spaces.
56
+ *
57
+ * This ensures indentation-sensitive constructs (like nested lists) work
58
+ * correctly regardless of whether the user typed regular spaces, non-breaking
59
+ * spaces, or figure spaces.
60
+ *
61
+ * @param text - Input text with potentially non-standard leading whitespace
62
+ * @returns Text with leading non-standard whitespace replaced by ASCII spaces
63
+ */
64
+ function replaceLeadingSpaces(text: string): string {
65
+ return text.replace(LEADING_NONSTANDARD_WHITESPACE, (match) => {
66
+ return " ".repeat(match.length);
67
+ });
68
+ }
69
+
70
+ /**
71
+ * Apply all whitespace normalization substitutions to the given text.
72
+ *
73
+ * Substitutions are applied in a specific order that avoids interference
74
+ * between steps (e.g., DOS newlines must be normalized before backslash
75
+ * continuation can be detected).
76
+ *
77
+ * The backslash continuation step converts `\\\n` to the Private Use Area
78
+ * character U+E000, which the lexer later recognizes as an explicit line break.
79
+ * This approach avoids ambiguity with other uses of the backslash character.
80
+ *
81
+ * @param text - Raw input text
82
+ * @returns Text with normalized whitespace, ready for typography preprocessing
83
+ */
84
+ export function substitute(text: string): string {
85
+ let result = text;
86
+
87
+ // Replace DOS and Mac newlines
88
+ result = result.replace(DOS_MAC_NEWLINES, "\n");
89
+
90
+ // Replace leading non-standard spaces with regular spaces
91
+ result = replaceLeadingSpaces(result);
92
+
93
+ // Strip lines with only whitespace
94
+ result = result.replace(WHITESPACE_ONLY_LINE, "");
95
+
96
+ // Backslash at end of line → line break marker (U+E000)
97
+ // Wikidot treats \ at end of line as <br />
98
+ result = result.replace(CONCAT_LINES, String.fromCharCode(0xe000));
99
+
100
+ // Tabs to spaces
101
+ result = result.replace(TABS, " ");
102
+
103
+ // Null characters to spaces
104
+ result = result.replace(NULL_CHARS, " ");
105
+
106
+ // Remove leading and trailing newlines
107
+ result = result.replace(LEADING_NEWLINES, "");
108
+ result = result.replace(TRAILING_NEWLINES, "");
109
+
110
+ return result;
111
+ }
@@ -0,0 +1,282 @@
1
+ /**
2
+ *
3
+ * Block rule for Wikidot alignment containers.
4
+ *
5
+ * Wikidot provides a shorthand bracket syntax for wrapping content in a
6
+ * directional alignment container:
7
+ *
8
+ * ```
9
+ * [[>]] ... [[/>]] right-aligned
10
+ * [[<]] ... [[/<]] left-aligned
11
+ * [[=]] ... [[/=]] center-aligned
12
+ * [[==]] ... [[/==]] justify-aligned
13
+ * ```
14
+ *
15
+ * Each pair acts as a block-level wrapper. The opening tag must appear at
16
+ * the start of a line and be followed by a newline. Body content is parsed
17
+ * recursively as block-level markup, and the matching closing tag terminates
18
+ * the container.
19
+ *
20
+ * The resulting AST node is a generic container element whose `type` field
21
+ * carries the alignment direction (e.g. `{ align: "right" }`).
22
+ *
23
+ * @module
24
+ */
25
+ import type { Element } from "@wdprlib/ast";
26
+ import type { BlockRule, ParseContext, RuleResult } from "../types";
27
+ import { currentToken } from "../types";
28
+ import { parseBlocksUntil } from "./utils";
29
+
30
+ /** The four text-alignment directions Wikidot supports. */
31
+ type AlignDirection = "left" | "right" | "center" | "justify";
32
+
33
+ /**
34
+ * Attempts to parse the interior of an align opening tag starting after
35
+ * the BLOCK_OPEN (`[[`) token.
36
+ *
37
+ * The function inspects the token(s) immediately following `[[` to determine
38
+ * which alignment direction is requested:
39
+ *
40
+ * | Tokens after `[[` | Direction |
41
+ * |----------------------|-------------|
42
+ * | `>` `]]` | right |
43
+ * | `<` `]]` | left |
44
+ * | `=` `]]` | center |
45
+ * | `=` `=` `]]` | justify |
46
+ *
47
+ * The `>` character may arrive as either a BLOCKQUOTE_MARKER (when the
48
+ * line starts with `[[`) or as a TEXT token (when it does not).
49
+ *
50
+ * @param ctx - Current parse context.
51
+ * @param pos - Token index right after the BLOCK_OPEN token.
52
+ * @returns The detected direction and how many tokens were consumed,
53
+ * or `null` if the tokens do not form a valid align open tag.
54
+ */
55
+ function parseAlignOpen(
56
+ ctx: ParseContext,
57
+ pos: number,
58
+ ): { direction: AlignDirection; consumed: number } | null {
59
+ const tokens = ctx.tokens;
60
+
61
+ // After BLOCK_OPEN, expect specific patterns
62
+ const firstToken = tokens[pos];
63
+ if (!firstToken) return null;
64
+
65
+ // [[>]] - right
66
+ if (
67
+ firstToken.type === "BLOCKQUOTE_MARKER" &&
68
+ firstToken.value === ">" &&
69
+ tokens[pos + 1]?.type === "BLOCK_CLOSE"
70
+ ) {
71
+ return { direction: "right", consumed: 2 };
72
+ }
73
+
74
+ // Also handle TEXT ">" for non-line-start cases
75
+ if (
76
+ firstToken.type === "TEXT" &&
77
+ firstToken.value === ">" &&
78
+ tokens[pos + 1]?.type === "BLOCK_CLOSE"
79
+ ) {
80
+ return { direction: "right", consumed: 2 };
81
+ }
82
+
83
+ // [[<]] - left (LEFT_DOUBLE_ANGLE might be tokenized, but usually it's after [[)
84
+ if (
85
+ firstToken.type === "TEXT" &&
86
+ firstToken.value === "<" &&
87
+ tokens[pos + 1]?.type === "BLOCK_CLOSE"
88
+ ) {
89
+ return { direction: "left", consumed: 2 };
90
+ }
91
+
92
+ // [[=]] - center (single =)
93
+ if (firstToken.type === "EQUALS" && tokens[pos + 1]?.type === "BLOCK_CLOSE") {
94
+ return { direction: "center", consumed: 2 };
95
+ }
96
+
97
+ // [[==]] - justify (double =)
98
+ if (
99
+ firstToken.type === "EQUALS" &&
100
+ tokens[pos + 1]?.type === "EQUALS" &&
101
+ tokens[pos + 2]?.type === "BLOCK_CLOSE"
102
+ ) {
103
+ return { direction: "justify", consumed: 3 };
104
+ }
105
+
106
+ return null;
107
+ }
108
+
109
+ /**
110
+ * Tests whether the tokens at the current position form a closing align
111
+ * tag (`[[/> ]]`, `[[/< ]]`, `[[/= ]]`, or `[[/== ]]`) that matches
112
+ * the given direction.
113
+ *
114
+ * The closing tag always starts with a BLOCK_END_OPEN token (`[[/`)
115
+ * followed by the same symbol(s) as the opening tag plus BLOCK_CLOSE.
116
+ *
117
+ * @param ctx - Current parse context (reads from `ctx.pos`).
118
+ * @param direction - The alignment direction of the currently open block,
119
+ * used to select the expected closing pattern.
120
+ * @returns An object with `match` (whether the close tag was found) and
121
+ * `consumed` (number of tokens the closing tag occupies).
122
+ */
123
+ function isAlignClose(
124
+ ctx: ParseContext,
125
+ direction: AlignDirection,
126
+ ): { match: boolean; consumed: number } {
127
+ const tokens = ctx.tokens;
128
+ let pos = ctx.pos;
129
+
130
+ if (tokens[pos]?.type !== "BLOCK_END_OPEN") {
131
+ return { match: false, consumed: 0 };
132
+ }
133
+ pos++;
134
+
135
+ // [[/>]] - right
136
+ if (direction === "right") {
137
+ if (
138
+ (tokens[pos]?.type === "BLOCKQUOTE_MARKER" || tokens[pos]?.type === "TEXT") &&
139
+ tokens[pos]?.value === ">" &&
140
+ tokens[pos + 1]?.type === "BLOCK_CLOSE"
141
+ ) {
142
+ return { match: true, consumed: 3 };
143
+ }
144
+ }
145
+
146
+ // [[/<]] - left
147
+ if (direction === "left") {
148
+ if (
149
+ tokens[pos]?.type === "TEXT" &&
150
+ tokens[pos]?.value === "<" &&
151
+ tokens[pos + 1]?.type === "BLOCK_CLOSE"
152
+ ) {
153
+ return { match: true, consumed: 3 };
154
+ }
155
+ }
156
+
157
+ // [[/=]] - center
158
+ if (direction === "center") {
159
+ if (tokens[pos]?.type === "EQUALS" && tokens[pos + 1]?.type === "BLOCK_CLOSE") {
160
+ return { match: true, consumed: 3 };
161
+ }
162
+ }
163
+
164
+ // [[/==]] - justify
165
+ if (direction === "justify") {
166
+ if (
167
+ tokens[pos]?.type === "EQUALS" &&
168
+ tokens[pos + 1]?.type === "EQUALS" &&
169
+ tokens[pos + 2]?.type === "BLOCK_CLOSE"
170
+ ) {
171
+ return { match: true, consumed: 4 };
172
+ }
173
+ }
174
+
175
+ return { match: false, consumed: 0 };
176
+ }
177
+
178
+ /**
179
+ * Block rule that matches Wikidot directional alignment containers.
180
+ *
181
+ * Parsing strategy:
182
+ * 1. Verify the first token is BLOCK_OPEN at line start.
183
+ * 2. Delegate to `parseAlignOpen()` to identify direction and consume
184
+ * the opening tag interior.
185
+ * 3. Require a NEWLINE immediately after the opening tag.
186
+ * 4. Recursively parse body blocks via `parseBlocksUntil()`, stopping
187
+ * when `isAlignClose()` finds the matching closing tag.
188
+ * 5. Consume the closing tag and optional trailing newline.
189
+ * 6. Emit a container element with `type: { align: direction }`.
190
+ *
191
+ * `preservesPrecedingLineBreak` is `true` because, unlike most block
192
+ * constructs, an alignment block does not suppress a preceding `\n` from
193
+ * becoming a `<br />` in Wikidot's output.
194
+ */
195
+ export const alignRule: BlockRule = {
196
+ name: "align",
197
+ startTokens: ["BLOCK_OPEN"],
198
+ requiresLineStart: true,
199
+ preservesPrecedingLineBreak: true,
200
+
201
+ isStartPattern(ctx: ParseContext, pos: number): boolean {
202
+ if (ctx.tokens[pos]?.type !== "BLOCK_OPEN") return false;
203
+ return parseAlignOpen(ctx, pos + 1) !== null;
204
+ },
205
+
206
+ parse(ctx: ParseContext): RuleResult<Element> {
207
+ const openToken = currentToken(ctx);
208
+ if (openToken.type !== "BLOCK_OPEN") {
209
+ return { success: false };
210
+ }
211
+
212
+ let pos = ctx.pos + 1;
213
+ let consumed = 1;
214
+
215
+ // Parse align open syntax
216
+ const alignResult = parseAlignOpen(ctx, pos);
217
+ if (!alignResult) {
218
+ return { success: false };
219
+ }
220
+
221
+ const { direction } = alignResult;
222
+ pos += alignResult.consumed;
223
+ consumed += alignResult.consumed;
224
+
225
+ // Must be followed by newline
226
+ if (ctx.tokens[pos]?.type !== "NEWLINE") {
227
+ return { success: false };
228
+ }
229
+ pos++;
230
+ consumed++;
231
+
232
+ // Close condition
233
+ const closeCondition = (checkCtx: ParseContext): boolean => {
234
+ return isAlignClose(checkCtx, direction).match;
235
+ };
236
+
237
+ // Parse body
238
+ const bodyCtx: ParseContext = { ...ctx, pos };
239
+ const bodyResult = parseBlocksUntil(bodyCtx, closeCondition);
240
+ consumed += bodyResult.consumed;
241
+ pos += bodyResult.consumed;
242
+
243
+ // Check for missing close tag
244
+ const directionSymbol = { left: "<", right: ">", center: "=", justify: "==" }[direction];
245
+ const closeCheck = isAlignClose({ ...ctx, pos }, direction);
246
+ if (!closeCheck.match) {
247
+ ctx.diagnostics.push({
248
+ severity: "warning",
249
+ code: "unclosed-block",
250
+ message: `Missing closing tag [[/${directionSymbol}]] for [[${directionSymbol}]]`,
251
+ position: openToken.position,
252
+ });
253
+ }
254
+
255
+ // Consume closing tag
256
+ if (closeCheck.match) {
257
+ consumed += closeCheck.consumed;
258
+ pos += closeCheck.consumed;
259
+
260
+ // Consume trailing newline
261
+ if (ctx.tokens[pos]?.type === "NEWLINE") {
262
+ pos++;
263
+ consumed++;
264
+ }
265
+ }
266
+
267
+ return {
268
+ success: true,
269
+ elements: [
270
+ {
271
+ element: "container",
272
+ data: {
273
+ type: { align: direction },
274
+ attributes: {},
275
+ elements: bodyResult.elements,
276
+ },
277
+ },
278
+ ],
279
+ consumed,
280
+ };
281
+ },
282
+ };