@wdprlib/parser 3.1.2 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/dist/index.cjs +295 -118
  2. package/dist/index.js +272 -95
  3. package/package.json +5 -3
  4. package/src/index.ts +163 -0
  5. package/src/lexer/index.ts +20 -0
  6. package/src/lexer/lexer.ts +687 -0
  7. package/src/lexer/tokens.ts +141 -0
  8. package/src/parser/constants.ts +173 -0
  9. package/src/parser/depth.ts +251 -0
  10. package/src/parser/index.ts +18 -0
  11. package/src/parser/parse.ts +315 -0
  12. package/src/parser/postprocess/divAdjacentParagraph.ts +76 -0
  13. package/src/parser/postprocess/index.ts +15 -0
  14. package/src/parser/postprocess/spanStrip.ts +697 -0
  15. package/src/parser/preprocess/expr.ts +265 -0
  16. package/src/parser/preprocess/index.ts +38 -0
  17. package/src/parser/preprocess/typography.ts +67 -0
  18. package/src/parser/preprocess/utils.ts +250 -0
  19. package/src/parser/preprocess/whitespace.ts +111 -0
  20. package/src/parser/rules/block/align.ts +282 -0
  21. package/src/parser/rules/block/bibliography.ts +359 -0
  22. package/src/parser/rules/block/block-list.ts +689 -0
  23. package/src/parser/rules/block/blockquote.ts +238 -0
  24. package/src/parser/rules/block/center.ts +87 -0
  25. package/src/parser/rules/block/clear-float.ts +75 -0
  26. package/src/parser/rules/block/code.ts +187 -0
  27. package/src/parser/rules/block/collapsible.ts +337 -0
  28. package/src/parser/rules/block/comment.ts +73 -0
  29. package/src/parser/rules/block/content-separator.ts +79 -0
  30. package/src/parser/rules/block/definition-list.ts +270 -0
  31. package/src/parser/rules/block/div.ts +400 -0
  32. package/src/parser/rules/block/embed-block.ts +153 -0
  33. package/src/parser/rules/block/footnoteblock.ts +200 -0
  34. package/src/parser/rules/block/heading.ts +142 -0
  35. package/src/parser/rules/block/horizontal-rule.ts +61 -0
  36. package/src/parser/rules/block/html.ts +222 -0
  37. package/src/parser/rules/block/iframe.ts +239 -0
  38. package/src/parser/rules/block/iftags.ts +150 -0
  39. package/src/parser/rules/block/include.ts +179 -0
  40. package/src/parser/rules/block/index.ts +127 -0
  41. package/src/parser/rules/block/list.ts +244 -0
  42. package/src/parser/rules/block/math.ts +183 -0
  43. package/src/parser/rules/block/module/backlinks/index.ts +31 -0
  44. package/src/parser/rules/block/module/backlinks/types.ts +21 -0
  45. package/src/parser/rules/block/module/categories/index.ts +34 -0
  46. package/src/parser/rules/block/module/categories/types.ts +21 -0
  47. package/src/parser/rules/block/module/css/index.ts +37 -0
  48. package/src/parser/rules/block/module/iftags/condition.ts +109 -0
  49. package/src/parser/rules/block/module/iftags/index.ts +26 -0
  50. package/src/parser/rules/block/module/iftags/preprocess.ts +140 -0
  51. package/src/parser/rules/block/module/iftags/resolve.ts +73 -0
  52. package/src/parser/rules/block/module/iftags/types.ts +63 -0
  53. package/src/parser/rules/block/module/include/index.ts +20 -0
  54. package/src/parser/rules/block/module/include/resolve.ts +556 -0
  55. package/src/parser/rules/block/module/index.ts +122 -0
  56. package/src/parser/rules/block/module/join/index.ts +34 -0
  57. package/src/parser/rules/block/module/join/types.ts +23 -0
  58. package/src/parser/rules/block/module/listpages/compiler.ts +453 -0
  59. package/src/parser/rules/block/module/listpages/extract.ts +410 -0
  60. package/src/parser/rules/block/module/listpages/index.ts +83 -0
  61. package/src/parser/rules/block/module/listpages/normalize.ts +390 -0
  62. package/src/parser/rules/block/module/listpages/parser.ts +106 -0
  63. package/src/parser/rules/block/module/listpages/resolve.ts +130 -0
  64. package/src/parser/rules/block/module/listpages/types.ts +513 -0
  65. package/src/parser/rules/block/module/listpages/url-resolver.ts +186 -0
  66. package/src/parser/rules/block/module/listusers/compiler.ts +77 -0
  67. package/src/parser/rules/block/module/listusers/extract.ts +45 -0
  68. package/src/parser/rules/block/module/listusers/index.ts +36 -0
  69. package/src/parser/rules/block/module/listusers/parser.ts +54 -0
  70. package/src/parser/rules/block/module/listusers/resolve.ts +58 -0
  71. package/src/parser/rules/block/module/listusers/types.ts +93 -0
  72. package/src/parser/rules/block/module/mapping.ts +61 -0
  73. package/src/parser/rules/block/module/page-tree/index.ts +38 -0
  74. package/src/parser/rules/block/module/page-tree/types.ts +29 -0
  75. package/src/parser/rules/block/module/rate/index.ts +28 -0
  76. package/src/parser/rules/block/module/rate/types.ts +19 -0
  77. package/src/parser/rules/block/module/resolve.ts +411 -0
  78. package/src/parser/rules/block/module/types-common.ts +59 -0
  79. package/src/parser/rules/block/module/types.ts +61 -0
  80. package/src/parser/rules/block/module/utils.ts +43 -0
  81. package/src/parser/rules/block/module/walk.ts +380 -0
  82. package/src/parser/rules/block/module.ts +164 -0
  83. package/src/parser/rules/block/orphan-li.ts +177 -0
  84. package/src/parser/rules/block/paragraph.ts +157 -0
  85. package/src/parser/rules/block/table-block.ts +726 -0
  86. package/src/parser/rules/block/table.ts +441 -0
  87. package/src/parser/rules/block/tabview.ts +331 -0
  88. package/src/parser/rules/block/toc.ts +129 -0
  89. package/src/parser/rules/block/utils.ts +615 -0
  90. package/src/parser/rules/index.ts +49 -0
  91. package/src/parser/rules/inline/anchor-name.ts +154 -0
  92. package/src/parser/rules/inline/anchor.ts +327 -0
  93. package/src/parser/rules/inline/bibcite.ts +153 -0
  94. package/src/parser/rules/inline/bold.ts +86 -0
  95. package/src/parser/rules/inline/color.ts +140 -0
  96. package/src/parser/rules/inline/comment.ts +90 -0
  97. package/src/parser/rules/inline/equation-ref.ts +115 -0
  98. package/src/parser/rules/inline/expr.ts +526 -0
  99. package/src/parser/rules/inline/footnote.ts +223 -0
  100. package/src/parser/rules/inline/guillemet.ts +64 -0
  101. package/src/parser/rules/inline/html.ts +132 -0
  102. package/src/parser/rules/inline/image.ts +328 -0
  103. package/src/parser/rules/inline/index.ts +150 -0
  104. package/src/parser/rules/inline/italic.ts +74 -0
  105. package/src/parser/rules/inline/line-break.ts +326 -0
  106. package/src/parser/rules/inline/link-anchor.ts +147 -0
  107. package/src/parser/rules/inline/link-single.ts +164 -0
  108. package/src/parser/rules/inline/link-star.ts +134 -0
  109. package/src/parser/rules/inline/link-triple.ts +267 -0
  110. package/src/parser/rules/inline/math-inline.ts +126 -0
  111. package/src/parser/rules/inline/monospace.ts +78 -0
  112. package/src/parser/rules/inline/raw.ts +262 -0
  113. package/src/parser/rules/inline/size.ts +244 -0
  114. package/src/parser/rules/inline/span.ts +424 -0
  115. package/src/parser/rules/inline/strikethrough.ts +115 -0
  116. package/src/parser/rules/inline/subscript.ts +84 -0
  117. package/src/parser/rules/inline/superscript.ts +84 -0
  118. package/src/parser/rules/inline/text.ts +84 -0
  119. package/src/parser/rules/inline/underline.ts +127 -0
  120. package/src/parser/rules/inline/user.ts +147 -0
  121. package/src/parser/rules/inline/utils.ts +344 -0
  122. package/src/parser/rules/types.ts +252 -0
  123. package/src/parser/rules/utils.ts +155 -0
  124. package/src/parser/toc.ts +130 -0
@@ -0,0 +1,359 @@
1
+ /**
2
+ *
3
+ * Block rule for the Wikidot bibliography block: `[[bibliography]] ... [[/bibliography]]`.
4
+ *
5
+ * A bibliography block holds labelled citation entries in a definition-list
6
+ * format. Each entry follows the pattern:
7
+ *
8
+ * ```
9
+ * : label : Citation description text
10
+ * ```
11
+ *
12
+ * At render time the entries are cross-referenced with inline `((bibcite label))`
13
+ * markers that appear elsewhere in the document. The parser stores the entries
14
+ * in the AST as a `bibliography-block` element whose `entries` field is an
15
+ * array of {@link DefinitionListItem} objects.
16
+ *
17
+ * Optional attributes on the opening tag:
18
+ * - `title` -- custom heading for the bibliography section.
19
+ * - `hide` -- when `"true"` or empty string, hides the block from output.
20
+ *
21
+ * If no closing `[[/bibliography]]` tag is found, the rule fails to avoid
22
+ * accidentally consuming the rest of the document.
23
+ *
24
+ * @module
25
+ */
26
+ import type { Element, DefinitionListItem } from "@wdprlib/ast";
27
+ import type { BlockRule, ParseContext, RuleResult } from "../types";
28
+ import { currentToken } from "../types";
29
+ import { parseBlockName, parseAttributes } from "./utils";
30
+ import { parseInlineUntil } from "../inline/utils";
31
+
32
+ /**
33
+ * Internal representation of a single bibliography entry parsed from
34
+ * the `: label : content` line(s) inside the bibliography block.
35
+ */
36
+ interface BibliographyEntry {
37
+ /** The identifier used in `((bibcite label))` references. */
38
+ label: string;
39
+ /** Parsed inline elements for the label portion (the key). */
40
+ key: Element[];
41
+ /** Parsed inline elements for the citation text. */
42
+ content: Element[];
43
+ }
44
+
45
+ /**
46
+ * Parses one bibliography entry from the token stream.
47
+ *
48
+ * Expected format (one logical line):
49
+ * ```
50
+ * : label : Citation text possibly spanning lines
51
+ * ```
52
+ *
53
+ * The entry starts with a COLON token at line start, followed by mandatory
54
+ * whitespace, then the label text, a second COLON, and the citation content.
55
+ * Content parsing continues until a double newline, a new entry (`: ...`),
56
+ * or the closing `[[/bibliography]]` tag is reached. Single newlines within
57
+ * the citation text produce `line-break` elements.
58
+ *
59
+ * @param ctx - Current parse context.
60
+ * @param startPos - Token index where the entry begins (expected COLON).
61
+ * @returns The parsed entry and number of tokens consumed, or `null` on failure.
62
+ */
63
+ function parseBibliographyEntry(
64
+ ctx: ParseContext,
65
+ startPos: number,
66
+ ): { entry: BibliographyEntry; consumed: number } | null {
67
+ let pos = startPos;
68
+ let consumed = 0;
69
+
70
+ // Expect COLON at line start
71
+ const colonToken = ctx.tokens[pos];
72
+ if (!colonToken || colonToken.type !== "COLON" || !colonToken.lineStart) {
73
+ return null;
74
+ }
75
+ pos++;
76
+ consumed++;
77
+
78
+ // Wikidot requires whitespace after first colon
79
+ const whitespaceAfterColon = ctx.tokens[pos];
80
+ if (!whitespaceAfterColon || whitespaceAfterColon.type !== "WHITESPACE") {
81
+ return null;
82
+ }
83
+
84
+ // Skip whitespace after first colon
85
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
86
+ pos++;
87
+ consumed++;
88
+ }
89
+
90
+ // Collect label tokens until second COLON
91
+ let label = "";
92
+ let foundSecondColon = false;
93
+ const keyNodes: Element[] = [];
94
+
95
+ while (pos < ctx.tokens.length) {
96
+ const token = ctx.tokens[pos];
97
+ if (!token || token.type === "NEWLINE" || token.type === "EOF") {
98
+ break;
99
+ }
100
+ if (token.type === "COLON") {
101
+ foundSecondColon = true;
102
+ pos++;
103
+ consumed++;
104
+ break;
105
+ }
106
+
107
+ // For bibliography, key is just the label (identifier)
108
+ label += token.value;
109
+ keyNodes.push({ element: "text", data: token.value });
110
+ pos++;
111
+ consumed++;
112
+ }
113
+
114
+ if (!foundSecondColon) {
115
+ return null;
116
+ }
117
+
118
+ label = label.trim();
119
+
120
+ // Skip whitespace after second colon
121
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
122
+ pos++;
123
+ consumed++;
124
+ }
125
+
126
+ // Parse content (rest of line, can continue with line breaks)
127
+ const contentNodes: Element[] = [];
128
+ while (pos < ctx.tokens.length) {
129
+ const token = ctx.tokens[pos];
130
+ if (!token || token.type === "EOF") {
131
+ break;
132
+ }
133
+
134
+ // Check for [[/bibliography]]
135
+ if (token.type === "BLOCK_END_OPEN") {
136
+ const closeNameResult = parseBlockName(ctx, pos + 1);
137
+ if (closeNameResult?.name === "bibliography") {
138
+ break;
139
+ }
140
+ }
141
+
142
+ // Check for end of entry (newline followed by new entry or end)
143
+ if (token.type === "NEWLINE") {
144
+ const nextToken = ctx.tokens[pos + 1];
145
+ // Look ahead for new entry or block end
146
+ if (nextToken?.type === "COLON" && nextToken.lineStart) {
147
+ // New entry starts
148
+ pos++;
149
+ consumed++;
150
+ break;
151
+ }
152
+ if (nextToken?.type === "BLOCK_END_OPEN") {
153
+ // Block end
154
+ pos++;
155
+ consumed++;
156
+ break;
157
+ }
158
+ if (nextToken?.type === "NEWLINE" || !nextToken || nextToken.type === "EOF") {
159
+ // Double newline or end
160
+ pos++;
161
+ consumed++;
162
+ break;
163
+ }
164
+ // Single newline - add line break and continue
165
+ contentNodes.push({ element: "line-break" });
166
+ pos++;
167
+ consumed++;
168
+ continue;
169
+ }
170
+
171
+ // Parse inline content
172
+ const inlineCtx: ParseContext = { ...ctx, pos };
173
+ const result = parseInlineUntil(inlineCtx, "NEWLINE");
174
+ if (result.elements.length > 0) {
175
+ contentNodes.push(...result.elements);
176
+ pos += result.consumed;
177
+ consumed += result.consumed;
178
+ } else {
179
+ pos++;
180
+ consumed++;
181
+ }
182
+ }
183
+
184
+ // Trim key nodes
185
+ while (keyNodes.length > 0) {
186
+ const lastNode = keyNodes[keyNodes.length - 1];
187
+ if (
188
+ lastNode &&
189
+ lastNode.element === "text" &&
190
+ typeof lastNode.data === "string" &&
191
+ lastNode.data.trim() === ""
192
+ ) {
193
+ keyNodes.pop();
194
+ } else {
195
+ break;
196
+ }
197
+ }
198
+
199
+ return {
200
+ entry: {
201
+ label,
202
+ key: keyNodes,
203
+ content: contentNodes,
204
+ },
205
+ consumed,
206
+ };
207
+ }
208
+
209
+ /**
210
+ * Block rule for Wikidot `[[bibliography]]...[[/bibliography]]`.
211
+ *
212
+ * Parsing strategy:
213
+ * 1. Match BLOCK_OPEN + block name "bibliography".
214
+ * 2. Parse optional attributes (`title`, `hide`).
215
+ * 3. Consume the closing `]]` and optional newline.
216
+ * 4. Loop over the body, parsing each `: label : content` line via
217
+ * `parseBibliographyEntry()`. Whitespace, newlines, and unknown
218
+ * tokens between entries are skipped.
219
+ * 5. Stop when `[[/bibliography]]` is found and consume it.
220
+ * 6. If no closing tag is encountered, fail the rule entirely.
221
+ * 7. Convert entries into {@link DefinitionListItem} format and emit
222
+ * a `bibliography-block` element.
223
+ */
224
+ export const bibliographyRule: BlockRule = {
225
+ name: "bibliography",
226
+ startTokens: ["BLOCK_OPEN"],
227
+ requiresLineStart: false,
228
+
229
+ parse(ctx: ParseContext): RuleResult<Element> {
230
+ const openToken = currentToken(ctx);
231
+ if (openToken.type !== "BLOCK_OPEN") {
232
+ return { success: false };
233
+ }
234
+
235
+ let pos = ctx.pos + 1;
236
+ let consumed = 1;
237
+
238
+ // Parse block name
239
+ const nameResult = parseBlockName(ctx, pos);
240
+ if (!nameResult || nameResult.name !== "bibliography") {
241
+ return { success: false };
242
+ }
243
+
244
+ pos += nameResult.consumed;
245
+ consumed += nameResult.consumed;
246
+
247
+ // Parse optional attributes (title, hide)
248
+ const attrResult = parseAttributes(ctx, pos);
249
+ pos += attrResult.consumed;
250
+ consumed += attrResult.consumed;
251
+
252
+ // Expect ]]
253
+ if (ctx.tokens[pos]?.type !== "BLOCK_CLOSE") {
254
+ return { success: false };
255
+ }
256
+ pos++;
257
+ consumed++;
258
+
259
+ // Skip newline after opening tag
260
+ if (ctx.tokens[pos]?.type === "NEWLINE") {
261
+ pos++;
262
+ consumed++;
263
+ }
264
+
265
+ // Parse bibliography entries
266
+ const entries: BibliographyEntry[] = [];
267
+ let foundClose = false;
268
+
269
+ while (pos < ctx.tokens.length) {
270
+ const token = ctx.tokens[pos];
271
+ if (!token || token.type === "EOF") {
272
+ break;
273
+ }
274
+
275
+ // Check for [[/bibliography]]
276
+ if (token.type === "BLOCK_END_OPEN") {
277
+ const closeNameResult = parseBlockName(ctx, pos + 1);
278
+ if (closeNameResult?.name === "bibliography") {
279
+ foundClose = true;
280
+ // Consume [[/bibliography]]
281
+ pos++;
282
+ consumed++;
283
+ pos += closeNameResult.consumed;
284
+ consumed += closeNameResult.consumed;
285
+ // Skip whitespace
286
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
287
+ pos++;
288
+ consumed++;
289
+ }
290
+ // Expect ]]
291
+ if (ctx.tokens[pos]?.type === "BLOCK_CLOSE") {
292
+ pos++;
293
+ consumed++;
294
+ }
295
+ break;
296
+ }
297
+ }
298
+
299
+ // Skip whitespace and newlines
300
+ if (token.type === "WHITESPACE" || token.type === "NEWLINE") {
301
+ pos++;
302
+ consumed++;
303
+ continue;
304
+ }
305
+
306
+ // Parse entry
307
+ if (token.type === "COLON" && token.lineStart) {
308
+ const result = parseBibliographyEntry(ctx, pos);
309
+ if (result) {
310
+ entries.push(result.entry);
311
+ pos += result.consumed;
312
+ consumed += result.consumed;
313
+ continue;
314
+ }
315
+ }
316
+
317
+ // Skip unknown tokens
318
+ pos++;
319
+ consumed++;
320
+ }
321
+
322
+ // Require closing tag - without it, fail to prevent consuming entire document
323
+ if (!foundClose) {
324
+ ctx.diagnostics.push({
325
+ severity: "warning",
326
+ code: "unclosed-block",
327
+ message: "Missing closing tag [[/bibliography]] for [[bibliography]]",
328
+ position: openToken.position,
329
+ });
330
+ return { success: false };
331
+ }
332
+
333
+ // Convert to definition list format for AST storage
334
+ const definitionItems: DefinitionListItem[] = entries.map((entry) => ({
335
+ key_string: entry.label,
336
+ key: entry.key,
337
+ value: entry.content,
338
+ }));
339
+
340
+ // Get attributes
341
+ const title = attrResult.attrs.title ?? null;
342
+ const hide = attrResult.attrs.hide === "true" || attrResult.attrs.hide === "";
343
+
344
+ return {
345
+ success: true,
346
+ elements: [
347
+ {
348
+ element: "bibliography-block",
349
+ data: {
350
+ entries: definitionItems,
351
+ title: typeof title === "string" ? title : null,
352
+ hide,
353
+ },
354
+ },
355
+ ],
356
+ consumed,
357
+ };
358
+ },
359
+ };