@wdprlib/parser 3.1.2 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/dist/index.cjs +295 -118
  2. package/dist/index.js +272 -95
  3. package/package.json +5 -3
  4. package/src/index.ts +163 -0
  5. package/src/lexer/index.ts +20 -0
  6. package/src/lexer/lexer.ts +687 -0
  7. package/src/lexer/tokens.ts +141 -0
  8. package/src/parser/constants.ts +173 -0
  9. package/src/parser/depth.ts +251 -0
  10. package/src/parser/index.ts +18 -0
  11. package/src/parser/parse.ts +315 -0
  12. package/src/parser/postprocess/divAdjacentParagraph.ts +76 -0
  13. package/src/parser/postprocess/index.ts +15 -0
  14. package/src/parser/postprocess/spanStrip.ts +697 -0
  15. package/src/parser/preprocess/expr.ts +265 -0
  16. package/src/parser/preprocess/index.ts +38 -0
  17. package/src/parser/preprocess/typography.ts +67 -0
  18. package/src/parser/preprocess/utils.ts +250 -0
  19. package/src/parser/preprocess/whitespace.ts +111 -0
  20. package/src/parser/rules/block/align.ts +282 -0
  21. package/src/parser/rules/block/bibliography.ts +359 -0
  22. package/src/parser/rules/block/block-list.ts +689 -0
  23. package/src/parser/rules/block/blockquote.ts +238 -0
  24. package/src/parser/rules/block/center.ts +87 -0
  25. package/src/parser/rules/block/clear-float.ts +75 -0
  26. package/src/parser/rules/block/code.ts +187 -0
  27. package/src/parser/rules/block/collapsible.ts +337 -0
  28. package/src/parser/rules/block/comment.ts +73 -0
  29. package/src/parser/rules/block/content-separator.ts +79 -0
  30. package/src/parser/rules/block/definition-list.ts +270 -0
  31. package/src/parser/rules/block/div.ts +400 -0
  32. package/src/parser/rules/block/embed-block.ts +153 -0
  33. package/src/parser/rules/block/footnoteblock.ts +200 -0
  34. package/src/parser/rules/block/heading.ts +142 -0
  35. package/src/parser/rules/block/horizontal-rule.ts +61 -0
  36. package/src/parser/rules/block/html.ts +222 -0
  37. package/src/parser/rules/block/iframe.ts +239 -0
  38. package/src/parser/rules/block/iftags.ts +150 -0
  39. package/src/parser/rules/block/include.ts +179 -0
  40. package/src/parser/rules/block/index.ts +127 -0
  41. package/src/parser/rules/block/list.ts +244 -0
  42. package/src/parser/rules/block/math.ts +183 -0
  43. package/src/parser/rules/block/module/backlinks/index.ts +31 -0
  44. package/src/parser/rules/block/module/backlinks/types.ts +21 -0
  45. package/src/parser/rules/block/module/categories/index.ts +34 -0
  46. package/src/parser/rules/block/module/categories/types.ts +21 -0
  47. package/src/parser/rules/block/module/css/index.ts +37 -0
  48. package/src/parser/rules/block/module/iftags/condition.ts +109 -0
  49. package/src/parser/rules/block/module/iftags/index.ts +26 -0
  50. package/src/parser/rules/block/module/iftags/preprocess.ts +140 -0
  51. package/src/parser/rules/block/module/iftags/resolve.ts +73 -0
  52. package/src/parser/rules/block/module/iftags/types.ts +63 -0
  53. package/src/parser/rules/block/module/include/index.ts +20 -0
  54. package/src/parser/rules/block/module/include/resolve.ts +556 -0
  55. package/src/parser/rules/block/module/index.ts +122 -0
  56. package/src/parser/rules/block/module/join/index.ts +34 -0
  57. package/src/parser/rules/block/module/join/types.ts +23 -0
  58. package/src/parser/rules/block/module/listpages/compiler.ts +453 -0
  59. package/src/parser/rules/block/module/listpages/extract.ts +410 -0
  60. package/src/parser/rules/block/module/listpages/index.ts +83 -0
  61. package/src/parser/rules/block/module/listpages/normalize.ts +390 -0
  62. package/src/parser/rules/block/module/listpages/parser.ts +106 -0
  63. package/src/parser/rules/block/module/listpages/resolve.ts +130 -0
  64. package/src/parser/rules/block/module/listpages/types.ts +513 -0
  65. package/src/parser/rules/block/module/listpages/url-resolver.ts +186 -0
  66. package/src/parser/rules/block/module/listusers/compiler.ts +77 -0
  67. package/src/parser/rules/block/module/listusers/extract.ts +45 -0
  68. package/src/parser/rules/block/module/listusers/index.ts +36 -0
  69. package/src/parser/rules/block/module/listusers/parser.ts +54 -0
  70. package/src/parser/rules/block/module/listusers/resolve.ts +58 -0
  71. package/src/parser/rules/block/module/listusers/types.ts +93 -0
  72. package/src/parser/rules/block/module/mapping.ts +61 -0
  73. package/src/parser/rules/block/module/page-tree/index.ts +38 -0
  74. package/src/parser/rules/block/module/page-tree/types.ts +29 -0
  75. package/src/parser/rules/block/module/rate/index.ts +28 -0
  76. package/src/parser/rules/block/module/rate/types.ts +19 -0
  77. package/src/parser/rules/block/module/resolve.ts +411 -0
  78. package/src/parser/rules/block/module/types-common.ts +59 -0
  79. package/src/parser/rules/block/module/types.ts +61 -0
  80. package/src/parser/rules/block/module/utils.ts +43 -0
  81. package/src/parser/rules/block/module/walk.ts +380 -0
  82. package/src/parser/rules/block/module.ts +164 -0
  83. package/src/parser/rules/block/orphan-li.ts +177 -0
  84. package/src/parser/rules/block/paragraph.ts +157 -0
  85. package/src/parser/rules/block/table-block.ts +726 -0
  86. package/src/parser/rules/block/table.ts +441 -0
  87. package/src/parser/rules/block/tabview.ts +331 -0
  88. package/src/parser/rules/block/toc.ts +129 -0
  89. package/src/parser/rules/block/utils.ts +615 -0
  90. package/src/parser/rules/index.ts +49 -0
  91. package/src/parser/rules/inline/anchor-name.ts +154 -0
  92. package/src/parser/rules/inline/anchor.ts +327 -0
  93. package/src/parser/rules/inline/bibcite.ts +153 -0
  94. package/src/parser/rules/inline/bold.ts +86 -0
  95. package/src/parser/rules/inline/color.ts +140 -0
  96. package/src/parser/rules/inline/comment.ts +90 -0
  97. package/src/parser/rules/inline/equation-ref.ts +115 -0
  98. package/src/parser/rules/inline/expr.ts +526 -0
  99. package/src/parser/rules/inline/footnote.ts +223 -0
  100. package/src/parser/rules/inline/guillemet.ts +64 -0
  101. package/src/parser/rules/inline/html.ts +132 -0
  102. package/src/parser/rules/inline/image.ts +328 -0
  103. package/src/parser/rules/inline/index.ts +150 -0
  104. package/src/parser/rules/inline/italic.ts +74 -0
  105. package/src/parser/rules/inline/line-break.ts +326 -0
  106. package/src/parser/rules/inline/link-anchor.ts +147 -0
  107. package/src/parser/rules/inline/link-single.ts +164 -0
  108. package/src/parser/rules/inline/link-star.ts +134 -0
  109. package/src/parser/rules/inline/link-triple.ts +267 -0
  110. package/src/parser/rules/inline/math-inline.ts +126 -0
  111. package/src/parser/rules/inline/monospace.ts +78 -0
  112. package/src/parser/rules/inline/raw.ts +262 -0
  113. package/src/parser/rules/inline/size.ts +244 -0
  114. package/src/parser/rules/inline/span.ts +424 -0
  115. package/src/parser/rules/inline/strikethrough.ts +115 -0
  116. package/src/parser/rules/inline/subscript.ts +84 -0
  117. package/src/parser/rules/inline/superscript.ts +84 -0
  118. package/src/parser/rules/inline/text.ts +84 -0
  119. package/src/parser/rules/inline/underline.ts +127 -0
  120. package/src/parser/rules/inline/user.ts +147 -0
  121. package/src/parser/rules/inline/utils.ts +344 -0
  122. package/src/parser/rules/types.ts +252 -0
  123. package/src/parser/rules/utils.ts +155 -0
  124. package/src/parser/toc.ts +130 -0
@@ -0,0 +1,154 @@
1
+ /**
2
+ *
3
+ * Parses the Wikidot named anchor syntax: `[[# name]]`.
4
+ *
5
+ * A named anchor creates an invisible anchor target (`<a id="name">`)
6
+ * that can be referenced by page-internal links such as `[#name Label]`
7
+ * or triple-bracket anchor links like `[[[#name]]]`.
8
+ *
9
+ * The anchor name must consist exclusively of the characters
10
+ * `[-_A-Za-z0-9.%]` (matching the original Wikidot regex
11
+ * `/(\[\[# )([-_A-Za-z0-9.%]+?)(\]\])/i`).
12
+ *
13
+ * A whitespace gap is required between the `#` and the name
14
+ * (`[[# myAnchor]]` is valid; `[[#myAnchor]]` is not).
15
+ *
16
+ * Produces an `"anchor-name"` AST element whose `data` field contains
17
+ * the raw anchor name string.
18
+ *
19
+ * @module
20
+ */
21
+ import type { Element } from "@wdprlib/ast";
22
+ import type { InlineRule, ParseContext, RuleResult } from "../types";
23
+ import { currentToken } from "../types";
24
+
25
+ /**
26
+ * Tests whether a single character is a valid anchor name character.
27
+ *
28
+ * Wikidot restricts anchor names to ASCII alphanumerics, hyphens,
29
+ * underscores, dots, and percent signs.
30
+ *
31
+ * @param char - A single character to validate
32
+ * @returns `true` if the character is allowed in an anchor name
33
+ */
34
+ function isValidAnchorChar(char: string): boolean {
35
+ return /^[-_A-Za-z0-9.%]$/.test(char);
36
+ }
37
+
38
+ /**
39
+ * Inline rule for parsing `[[# name]]` named anchor targets.
40
+ *
41
+ * Triggered by a `BLOCK_OPEN` (`[[`) token. The rule looks for the `#`
42
+ * character followed by mandatory whitespace and then the anchor name.
43
+ *
44
+ * Parsing steps:
45
+ * 1. Consume `[[` and optional leading whitespace
46
+ * 2. Require a `#` token (HASH or TEXT `"#"`)
47
+ * 3. Require at least one whitespace token after `#`
48
+ * 4. Collect consecutive valid anchor-name characters as the name
49
+ * 5. Require closing `]]`
50
+ *
51
+ * Fails if the anchor name is empty or if `]]` is not found.
52
+ */
53
+ export const anchorNameRule: InlineRule = {
54
+ name: "anchorName",
55
+ startTokens: ["BLOCK_OPEN"],
56
+
57
+ /**
58
+ * Attempts to parse a `[[# name]]` named anchor at the current position.
59
+ *
60
+ * @param ctx - Parse context with token stream and current position
61
+ * @returns A successful result with an `"anchor-name"` element, or `{ success: false }`
62
+ */
63
+ parse(ctx: ParseContext): RuleResult<Element> {
64
+ const openToken = currentToken(ctx);
65
+ if (openToken.type !== "BLOCK_OPEN") {
66
+ return { success: false };
67
+ }
68
+
69
+ let pos = ctx.pos + 1;
70
+ let consumed = 1;
71
+
72
+ // Skip whitespace
73
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
74
+ pos++;
75
+ consumed++;
76
+ }
77
+
78
+ // Check for # (hash) - can be TEXT or HASH token
79
+ const hashToken = ctx.tokens[pos];
80
+ if (
81
+ !hashToken ||
82
+ (hashToken.type !== "HASH" && !(hashToken.type === "TEXT" && hashToken.value === "#"))
83
+ ) {
84
+ return { success: false };
85
+ }
86
+ pos++;
87
+ consumed++;
88
+
89
+ // Require whitespace after #
90
+ if (ctx.tokens[pos]?.type !== "WHITESPACE") {
91
+ return { success: false };
92
+ }
93
+ pos++;
94
+ consumed++;
95
+
96
+ // Skip additional whitespace
97
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
98
+ pos++;
99
+ consumed++;
100
+ }
101
+
102
+ // Parse anchor name - collect valid characters until ]]
103
+ let name = "";
104
+ while (pos < ctx.tokens.length) {
105
+ const token = ctx.tokens[pos];
106
+ if (
107
+ !token ||
108
+ token.type === "BLOCK_CLOSE" ||
109
+ token.type === "NEWLINE" ||
110
+ token.type === "EOF"
111
+ ) {
112
+ break;
113
+ }
114
+ // Check if all characters in token are valid anchor chars
115
+ const value = token.value;
116
+ let allValid = true;
117
+ for (const char of value) {
118
+ if (!isValidAnchorChar(char)) {
119
+ allValid = false;
120
+ break;
121
+ }
122
+ }
123
+ if (!allValid) {
124
+ break;
125
+ }
126
+ name += value;
127
+ pos++;
128
+ consumed++;
129
+ }
130
+
131
+ // Anchor name is required
132
+ if (!name) {
133
+ return { success: false };
134
+ }
135
+
136
+ // Expect ]]
137
+ if (ctx.tokens[pos]?.type !== "BLOCK_CLOSE") {
138
+ return { success: false };
139
+ }
140
+ pos++;
141
+ consumed++;
142
+
143
+ return {
144
+ success: true,
145
+ elements: [
146
+ {
147
+ element: "anchor-name",
148
+ data: name,
149
+ },
150
+ ],
151
+ consumed,
152
+ };
153
+ },
154
+ };
@@ -0,0 +1,327 @@
1
+ /**
2
+ *
3
+ * Parses the Wikidot anchor inline block syntax: `[[a]]...[[/a]]`.
4
+ *
5
+ * An anchor wraps inline content in an HTML `<a>` element, allowing
6
+ * href, target, and other HTML attributes to be specified.
7
+ *
8
+ * Wikidot syntax variants:
9
+ * - `[[a href="url"]]text[[/a]]` -- basic anchor with href
10
+ * - `[[a_ href="url"]]text[[/a]]` -- paragraph strip mode (trailing underscore)
11
+ *
12
+ * Paragraph strip mode (`[[a_]]`) suppresses newlines within the anchor
13
+ * body and strips at most one trailing newline after the closing tag
14
+ * (preserving double newlines as paragraph breaks). This prevents
15
+ * unwanted `<br>` elements when consecutive anchor blocks are placed on
16
+ * separate lines.
17
+ *
18
+ * The `target` attribute is extracted and mapped to a semantic enum value
19
+ * (`"new-tab"`, `"parent"`, `"top"`, `"same"`), while the remaining
20
+ * attributes (including `href`) are passed through after URL sanitization.
21
+ *
22
+ * @module
23
+ */
24
+ import type { Element } from "@wdprlib/ast";
25
+ import type { InlineRule, ParseContext, RuleResult } from "../types";
26
+ import { currentToken } from "../types";
27
+ import { inlineRules } from "../index";
28
+ import { sanitizeUrl as braintreeSanitizeUrl } from "@braintree/sanitize-url";
29
+ import { parseAttributes } from "../block/utils";
30
+ import { canApplyInlineRule } from "./utils";
31
+
32
+ /**
33
+ * Sanitizes a URL to prevent XSS attacks via dangerous URI schemes.
34
+ *
35
+ * Applies two layers of protection:
36
+ * 1. Pre-checks the whitespace-normalized URL against known dangerous schemes
37
+ * (`javascript:`, `data:`, `vbscript:`), catching evasion attempts like
38
+ * `"java script:"` with embedded whitespace.
39
+ * 2. Delegates to `@braintree/sanitize-url` for additional validation.
40
+ *
41
+ * Returns the original URL (not the normalized form) to avoid unintended
42
+ * modifications such as trailing-slash addition.
43
+ *
44
+ * @param url - The raw URL string to sanitize
45
+ * @returns The original URL if safe, or `"#invalid-url"` if the URL is deemed dangerous
46
+ */
47
+ function sanitizeUrl(url: string): string {
48
+ // Pre-process: normalize whitespace to catch evasion attempts like "java script:"
49
+ const normalizedForCheck = url.replace(/[\s\u0000-\u001f]/g, "").toLowerCase();
50
+
51
+ // Check for dangerous schemes after whitespace normalization
52
+ const dangerousSchemes = ["javascript:", "data:", "vbscript:"];
53
+ for (const scheme of dangerousSchemes) {
54
+ if (normalizedForCheck.startsWith(scheme)) {
55
+ return "#invalid-url";
56
+ }
57
+ }
58
+
59
+ // Use library for additional checks
60
+ const sanitized = braintreeSanitizeUrl(url);
61
+ if (sanitized === "about:blank") {
62
+ return "#invalid-url";
63
+ }
64
+
65
+ // Return original URL to avoid unwanted normalization (e.g., trailing slash addition)
66
+ return url;
67
+ }
68
+
69
+ /**
70
+ * Parses the block name portion of an anchor open/close tag, handling the
71
+ * optional underscore suffix that activates paragraph strip mode.
72
+ *
73
+ * Recognizes `a`, `anchor`, `a_`, and `anchor_` (case-insensitive).
74
+ * The underscore suffix is reported via the `score` field so the caller
75
+ * can decide how to handle newlines inside the anchor body.
76
+ *
77
+ * @param ctx - The current parse context containing the token stream
78
+ * @param startPos - Token index at which to begin scanning
79
+ * @returns An object with the lowercased name (including trailing `_` if present),
80
+ * a `score` boolean indicating paragraph strip mode, and the number of
81
+ * tokens consumed -- or `null` if no valid anchor block name was found
82
+ */
83
+ function parseAnchorBlockName(
84
+ ctx: ParseContext,
85
+ startPos: number,
86
+ ): { name: string; score: boolean; consumed: number } | null {
87
+ let pos = startPos;
88
+ let consumed = 0;
89
+
90
+ // Skip whitespace
91
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
92
+ pos++;
93
+ consumed++;
94
+ }
95
+
96
+ const token = ctx.tokens[pos];
97
+ if (!token || (token.type !== "TEXT" && token.type !== "IDENTIFIER")) {
98
+ return null;
99
+ }
100
+
101
+ let name = token.value.toLowerCase();
102
+ consumed++;
103
+ pos++;
104
+
105
+ // Check for underscore suffix (paragraph strip)
106
+ let score = false;
107
+ if (ctx.tokens[pos]?.type === "UNDERSCORE") {
108
+ score = true;
109
+ name += "_";
110
+ consumed++;
111
+ pos++;
112
+ }
113
+
114
+ return { name, score, consumed };
115
+ }
116
+
117
+ /**
118
+ * Inline rule for parsing `[[a]]...[[/a]]` blocks.
119
+ *
120
+ * Triggered by a `BLOCK_OPEN` (`[[`) token. The rule verifies the block name
121
+ * is `a` or `anchor` (optionally with `_` suffix), parses HTML attributes,
122
+ * then recursively parses inline content until the matching closing tag.
123
+ *
124
+ * Produces an `"anchor"` AST element containing the parsed children, a
125
+ * semantic `target` value, and the sanitized attribute map.
126
+ *
127
+ * Edge cases:
128
+ * - If no matching closing tag is found, the rule fails (returns `{ success: false }`),
129
+ * allowing the tokens to fall through to other rules or the text fallback.
130
+ * - In paragraph strip mode, newlines within the body are consumed silently
131
+ * rather than converted to line-break elements. After the closing tag,
132
+ * at most one trailing newline is consumed to prevent a line-break between
133
+ * consecutive `[[a_]]` blocks, but double newlines are preserved as
134
+ * paragraph breaks.
135
+ * - The `href` attribute is sanitized to block `javascript:`, `data:`, and
136
+ * `vbscript:` schemes.
137
+ */
138
+ export const anchorRule: InlineRule = {
139
+ name: "anchor",
140
+ startTokens: ["BLOCK_OPEN"],
141
+
142
+ /**
143
+ * Attempts to parse an anchor block starting at the current position.
144
+ *
145
+ * @param ctx - Parse context with token stream and current position
146
+ * @returns A successful result with an `"anchor"` element, or `{ success: false }`
147
+ */
148
+ parse(ctx: ParseContext): RuleResult<Element> {
149
+ const openToken = currentToken(ctx);
150
+ if (openToken.type !== "BLOCK_OPEN") {
151
+ return { success: false };
152
+ }
153
+
154
+ let pos = ctx.pos + 1;
155
+ let consumed = 1;
156
+
157
+ // Parse block name with flags
158
+ const nameResult = parseAnchorBlockName(ctx, pos);
159
+ if (!nameResult) {
160
+ return { success: false };
161
+ }
162
+
163
+ const baseName = nameResult.name.replace(/_$/, "");
164
+ if (baseName !== "a" && baseName !== "anchor") {
165
+ return { success: false };
166
+ }
167
+
168
+ const paragraphStrip = nameResult.score;
169
+
170
+ pos += nameResult.consumed;
171
+ consumed += nameResult.consumed;
172
+
173
+ // Parse attributes
174
+ const attrResult = parseAttributes(ctx, pos);
175
+ pos += attrResult.consumed;
176
+ consumed += attrResult.consumed;
177
+
178
+ // Expect ]]
179
+ if (ctx.tokens[pos]?.type !== "BLOCK_CLOSE") {
180
+ return { success: false };
181
+ }
182
+ pos++;
183
+ consumed++;
184
+
185
+ // Parse content until [[/a]] or [[/anchor]]
186
+ const children: Element[] = [];
187
+ let foundClose = false;
188
+
189
+ while (pos < ctx.tokens.length) {
190
+ const token = ctx.tokens[pos];
191
+ if (!token || token.type === "EOF") {
192
+ break;
193
+ }
194
+
195
+ // Check for closing tag
196
+ if (token.type === "BLOCK_END_OPEN") {
197
+ const closeNameResult = parseAnchorBlockName(ctx, pos + 1);
198
+ const closeBaseName = closeNameResult?.name.replace(/_$/, "");
199
+ if (closeNameResult && (closeBaseName === "a" || closeBaseName === "anchor")) {
200
+ pos++; // [[/
201
+ consumed++;
202
+ pos += closeNameResult.consumed;
203
+ consumed += closeNameResult.consumed;
204
+ if (ctx.tokens[pos]?.type === "BLOCK_CLOSE") {
205
+ pos++;
206
+ consumed++;
207
+ }
208
+ foundClose = true;
209
+
210
+ // In paragraph strip mode, consume one trailing newline after close tag
211
+ // This prevents a line-break between consecutive [[a_]] blocks
212
+ // but preserves paragraph breaks (double newlines)
213
+ if (
214
+ paragraphStrip &&
215
+ ctx.tokens[pos]?.type === "NEWLINE" &&
216
+ ctx.tokens[pos + 1]?.type !== "NEWLINE"
217
+ ) {
218
+ pos++;
219
+ consumed++;
220
+ }
221
+ break;
222
+ }
223
+ }
224
+
225
+ // Handle NEWLINE
226
+ if (token.type === "NEWLINE") {
227
+ if (paragraphStrip) {
228
+ // Skip newlines in paragraph strip mode
229
+ pos++;
230
+ consumed++;
231
+ continue;
232
+ }
233
+ // Convert to line-break
234
+ children.push({ element: "line-break" });
235
+ pos++;
236
+ consumed++;
237
+ // Skip leading whitespace after newline
238
+ while (ctx.tokens[pos]?.type === "WHITESPACE" && ctx.tokens[pos]?.lineStart) {
239
+ pos++;
240
+ consumed++;
241
+ }
242
+ continue;
243
+ }
244
+
245
+ // Skip whitespace at line start
246
+ if (token.type === "WHITESPACE" && token.lineStart) {
247
+ pos++;
248
+ consumed++;
249
+ continue;
250
+ }
251
+
252
+ // Try each inline rule
253
+ let matched = false;
254
+ const inlineCtx: ParseContext = { ...ctx, pos };
255
+
256
+ for (const rule of inlineRules) {
257
+ if (canApplyInlineRule(rule, token)) {
258
+ const result = rule.parse(inlineCtx);
259
+ if (result.success) {
260
+ children.push(...result.elements);
261
+ pos += result.consumed;
262
+ consumed += result.consumed;
263
+ matched = true;
264
+ break;
265
+ }
266
+ }
267
+ }
268
+
269
+ if (!matched) {
270
+ children.push({ element: "text", data: token.value });
271
+ pos++;
272
+ consumed++;
273
+ }
274
+ }
275
+
276
+ if (!foundClose) {
277
+ ctx.diagnostics.push({
278
+ severity: "warning",
279
+ code: "unclosed-block",
280
+ message: `Missing closing tag [[/a]] for [[${nameResult.name}]]`,
281
+ position: openToken.position,
282
+ });
283
+ return { success: false };
284
+ }
285
+
286
+ // Clean up children - remove leading/trailing line breaks if paragraph strip
287
+ if (paragraphStrip) {
288
+ while (children.length > 0 && children[0]?.element === "line-break") {
289
+ children.shift();
290
+ }
291
+ while (children.length > 0 && children[children.length - 1]?.element === "line-break") {
292
+ children.pop();
293
+ }
294
+ }
295
+
296
+ // Determine target from attributes
297
+ let target: "new-tab" | "parent" | "top" | "same" | null = null;
298
+ const targetAttr = attrResult.attrs.target;
299
+ if (targetAttr === "_blank") target = "new-tab";
300
+ else if (targetAttr === "_parent") target = "parent";
301
+ else if (targetAttr === "_top") target = "top";
302
+ else if (targetAttr === "_self") target = "same";
303
+
304
+ // Remove target from attributes (href stays in attributes)
305
+ const { target: _t, ...cleanAttrs } = attrResult.attrs;
306
+
307
+ // Sanitize href to prevent XSS
308
+ if (cleanAttrs.href) {
309
+ cleanAttrs.href = sanitizeUrl(cleanAttrs.href);
310
+ }
311
+
312
+ return {
313
+ success: true,
314
+ elements: [
315
+ {
316
+ element: "anchor",
317
+ data: {
318
+ target,
319
+ attributes: cleanAttrs,
320
+ elements: children,
321
+ },
322
+ },
323
+ ],
324
+ consumed,
325
+ };
326
+ },
327
+ };
@@ -0,0 +1,153 @@
1
+ /**
2
+ *
3
+ * Parses the Wikidot bibliography citation syntax: `((bibcite label))`.
4
+ *
5
+ * A bibcite creates a numbered inline reference (similar to footnotes)
6
+ * that links to a corresponding entry in a `[[bibliography]]` block
7
+ * elsewhere on the page. The `label` string is used to match the
8
+ * citation with its bibliography entry.
9
+ *
10
+ * Unlike most inline blocks that start with `[[`, bibcite uses double
11
+ * parentheses `((...))` as delimiters. The keyword `bibcite` must
12
+ * appear (case-insensitive) between the opening `((` and the label.
13
+ *
14
+ * Produces a `"bibliography-cite"` AST element. The label is also
15
+ * pushed into `ctx.bibcites` so the renderer can later resolve
16
+ * citation numbers.
17
+ *
18
+ * Wikidot syntax examples:
19
+ * - `((bibcite author2024))` -- cite with label "author2024"
20
+ * - `((bibcite my-source))` -- cite with label "my-source"
21
+ *
22
+ * @module
23
+ */
24
+ import type { Element } from "@wdprlib/ast";
25
+ import type { InlineRule, ParseContext, RuleResult } from "../types";
26
+ import { currentToken } from "../types";
27
+
28
+ /**
29
+ * Inline rule for parsing `((bibcite label))` bibliography citations.
30
+ *
31
+ * Triggered by a `TEXT` token (specifically the `(` character). The parser
32
+ * looks for two consecutive `(` tokens, the keyword `bibcite`, the label
33
+ * text, and then two consecutive `)` tokens.
34
+ *
35
+ * The label may span multiple tokens and is trimmed of surrounding whitespace.
36
+ * Parsing fails if the label is empty or if a newline/EOF is encountered
37
+ * before the closing `))`.
38
+ *
39
+ * Side effect: pushes the label into `ctx.bibcites` for later resolution
40
+ * during rendering.
41
+ */
42
+ export const bibciteRule: InlineRule = {
43
+ name: "bibcite",
44
+ startTokens: ["TEXT"],
45
+
46
+ /**
47
+ * Attempts to parse a `((bibcite label))` citation at the current position.
48
+ *
49
+ * @param ctx - Parse context with token stream and current position
50
+ * @returns A successful result with a `"bibliography-cite"` element, or `{ success: false }`
51
+ */
52
+ parse(ctx: ParseContext): RuleResult<Element> {
53
+ const token = currentToken(ctx);
54
+
55
+ // Must start with (
56
+ if (token.type !== "TEXT" || token.value !== "(") {
57
+ return { success: false };
58
+ }
59
+
60
+ // Check for second (
61
+ const nextToken = ctx.tokens[ctx.pos + 1];
62
+ if (!nextToken || nextToken.type !== "TEXT" || nextToken.value !== "(") {
63
+ return { success: false };
64
+ }
65
+
66
+ // Check for "bibcite" identifier
67
+ let pos = ctx.pos + 2;
68
+ let consumed = 2;
69
+
70
+ // Skip whitespace
71
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
72
+ pos++;
73
+ consumed++;
74
+ }
75
+
76
+ const nameToken = ctx.tokens[pos];
77
+ if (
78
+ !nameToken ||
79
+ nameToken.type !== "IDENTIFIER" ||
80
+ nameToken.value.toLowerCase() !== "bibcite"
81
+ ) {
82
+ return { success: false };
83
+ }
84
+ pos++;
85
+ consumed++;
86
+
87
+ // Skip whitespace
88
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
89
+ pos++;
90
+ consumed++;
91
+ }
92
+
93
+ // Parse label (identifier or text)
94
+ const labelToken = ctx.tokens[pos];
95
+ if (!labelToken || (labelToken.type !== "IDENTIFIER" && labelToken.type !== "TEXT")) {
96
+ return { success: false };
97
+ }
98
+
99
+ // Collect label (may span multiple tokens until ))
100
+ let label = "";
101
+ let foundClose = false;
102
+ while (pos < ctx.tokens.length) {
103
+ const t = ctx.tokens[pos];
104
+ if (!t) break;
105
+
106
+ // Check for ))
107
+ if (t.type === "TEXT" && t.value === ")") {
108
+ const nextT = ctx.tokens[pos + 1];
109
+ if (nextT?.type === "TEXT" && nextT.value === ")") {
110
+ // Found closing ))
111
+ consumed += 2;
112
+ foundClose = true;
113
+ break;
114
+ }
115
+ }
116
+
117
+ // Stop at newline or EOF
118
+ if (t.type === "NEWLINE" || t.type === "EOF") {
119
+ return { success: false };
120
+ }
121
+
122
+ label += t.value;
123
+ pos++;
124
+ consumed++;
125
+ }
126
+
127
+ if (!foundClose) {
128
+ return { success: false };
129
+ }
130
+
131
+ label = label.trim();
132
+ if (!label) {
133
+ return { success: false };
134
+ }
135
+
136
+ // Store bibcite reference in context for later resolution
137
+ ctx.bibcites.push(label);
138
+
139
+ return {
140
+ success: true,
141
+ elements: [
142
+ {
143
+ element: "bibliography-cite",
144
+ data: {
145
+ label,
146
+ brackets: false, // Wikidot adds brackets in output but they're not in the AST
147
+ },
148
+ },
149
+ ],
150
+ consumed,
151
+ };
152
+ },
153
+ };