@wdprlib/parser 3.1.2 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/dist/index.cjs +295 -118
  2. package/dist/index.js +272 -95
  3. package/package.json +5 -3
  4. package/src/index.ts +163 -0
  5. package/src/lexer/index.ts +20 -0
  6. package/src/lexer/lexer.ts +687 -0
  7. package/src/lexer/tokens.ts +141 -0
  8. package/src/parser/constants.ts +173 -0
  9. package/src/parser/depth.ts +251 -0
  10. package/src/parser/index.ts +18 -0
  11. package/src/parser/parse.ts +315 -0
  12. package/src/parser/postprocess/divAdjacentParagraph.ts +76 -0
  13. package/src/parser/postprocess/index.ts +15 -0
  14. package/src/parser/postprocess/spanStrip.ts +697 -0
  15. package/src/parser/preprocess/expr.ts +265 -0
  16. package/src/parser/preprocess/index.ts +38 -0
  17. package/src/parser/preprocess/typography.ts +67 -0
  18. package/src/parser/preprocess/utils.ts +250 -0
  19. package/src/parser/preprocess/whitespace.ts +111 -0
  20. package/src/parser/rules/block/align.ts +282 -0
  21. package/src/parser/rules/block/bibliography.ts +359 -0
  22. package/src/parser/rules/block/block-list.ts +689 -0
  23. package/src/parser/rules/block/blockquote.ts +238 -0
  24. package/src/parser/rules/block/center.ts +87 -0
  25. package/src/parser/rules/block/clear-float.ts +75 -0
  26. package/src/parser/rules/block/code.ts +187 -0
  27. package/src/parser/rules/block/collapsible.ts +337 -0
  28. package/src/parser/rules/block/comment.ts +73 -0
  29. package/src/parser/rules/block/content-separator.ts +79 -0
  30. package/src/parser/rules/block/definition-list.ts +270 -0
  31. package/src/parser/rules/block/div.ts +400 -0
  32. package/src/parser/rules/block/embed-block.ts +153 -0
  33. package/src/parser/rules/block/footnoteblock.ts +200 -0
  34. package/src/parser/rules/block/heading.ts +142 -0
  35. package/src/parser/rules/block/horizontal-rule.ts +61 -0
  36. package/src/parser/rules/block/html.ts +222 -0
  37. package/src/parser/rules/block/iframe.ts +239 -0
  38. package/src/parser/rules/block/iftags.ts +150 -0
  39. package/src/parser/rules/block/include.ts +179 -0
  40. package/src/parser/rules/block/index.ts +127 -0
  41. package/src/parser/rules/block/list.ts +244 -0
  42. package/src/parser/rules/block/math.ts +183 -0
  43. package/src/parser/rules/block/module/backlinks/index.ts +31 -0
  44. package/src/parser/rules/block/module/backlinks/types.ts +21 -0
  45. package/src/parser/rules/block/module/categories/index.ts +34 -0
  46. package/src/parser/rules/block/module/categories/types.ts +21 -0
  47. package/src/parser/rules/block/module/css/index.ts +37 -0
  48. package/src/parser/rules/block/module/iftags/condition.ts +109 -0
  49. package/src/parser/rules/block/module/iftags/index.ts +26 -0
  50. package/src/parser/rules/block/module/iftags/preprocess.ts +140 -0
  51. package/src/parser/rules/block/module/iftags/resolve.ts +73 -0
  52. package/src/parser/rules/block/module/iftags/types.ts +63 -0
  53. package/src/parser/rules/block/module/include/index.ts +20 -0
  54. package/src/parser/rules/block/module/include/resolve.ts +556 -0
  55. package/src/parser/rules/block/module/index.ts +122 -0
  56. package/src/parser/rules/block/module/join/index.ts +34 -0
  57. package/src/parser/rules/block/module/join/types.ts +23 -0
  58. package/src/parser/rules/block/module/listpages/compiler.ts +453 -0
  59. package/src/parser/rules/block/module/listpages/extract.ts +410 -0
  60. package/src/parser/rules/block/module/listpages/index.ts +83 -0
  61. package/src/parser/rules/block/module/listpages/normalize.ts +390 -0
  62. package/src/parser/rules/block/module/listpages/parser.ts +106 -0
  63. package/src/parser/rules/block/module/listpages/resolve.ts +130 -0
  64. package/src/parser/rules/block/module/listpages/types.ts +513 -0
  65. package/src/parser/rules/block/module/listpages/url-resolver.ts +186 -0
  66. package/src/parser/rules/block/module/listusers/compiler.ts +77 -0
  67. package/src/parser/rules/block/module/listusers/extract.ts +45 -0
  68. package/src/parser/rules/block/module/listusers/index.ts +36 -0
  69. package/src/parser/rules/block/module/listusers/parser.ts +54 -0
  70. package/src/parser/rules/block/module/listusers/resolve.ts +58 -0
  71. package/src/parser/rules/block/module/listusers/types.ts +93 -0
  72. package/src/parser/rules/block/module/mapping.ts +61 -0
  73. package/src/parser/rules/block/module/page-tree/index.ts +38 -0
  74. package/src/parser/rules/block/module/page-tree/types.ts +29 -0
  75. package/src/parser/rules/block/module/rate/index.ts +28 -0
  76. package/src/parser/rules/block/module/rate/types.ts +19 -0
  77. package/src/parser/rules/block/module/resolve.ts +411 -0
  78. package/src/parser/rules/block/module/types-common.ts +59 -0
  79. package/src/parser/rules/block/module/types.ts +61 -0
  80. package/src/parser/rules/block/module/utils.ts +43 -0
  81. package/src/parser/rules/block/module/walk.ts +380 -0
  82. package/src/parser/rules/block/module.ts +164 -0
  83. package/src/parser/rules/block/orphan-li.ts +177 -0
  84. package/src/parser/rules/block/paragraph.ts +157 -0
  85. package/src/parser/rules/block/table-block.ts +726 -0
  86. package/src/parser/rules/block/table.ts +441 -0
  87. package/src/parser/rules/block/tabview.ts +331 -0
  88. package/src/parser/rules/block/toc.ts +129 -0
  89. package/src/parser/rules/block/utils.ts +615 -0
  90. package/src/parser/rules/index.ts +49 -0
  91. package/src/parser/rules/inline/anchor-name.ts +154 -0
  92. package/src/parser/rules/inline/anchor.ts +327 -0
  93. package/src/parser/rules/inline/bibcite.ts +153 -0
  94. package/src/parser/rules/inline/bold.ts +86 -0
  95. package/src/parser/rules/inline/color.ts +140 -0
  96. package/src/parser/rules/inline/comment.ts +90 -0
  97. package/src/parser/rules/inline/equation-ref.ts +115 -0
  98. package/src/parser/rules/inline/expr.ts +526 -0
  99. package/src/parser/rules/inline/footnote.ts +223 -0
  100. package/src/parser/rules/inline/guillemet.ts +64 -0
  101. package/src/parser/rules/inline/html.ts +132 -0
  102. package/src/parser/rules/inline/image.ts +328 -0
  103. package/src/parser/rules/inline/index.ts +150 -0
  104. package/src/parser/rules/inline/italic.ts +74 -0
  105. package/src/parser/rules/inline/line-break.ts +326 -0
  106. package/src/parser/rules/inline/link-anchor.ts +147 -0
  107. package/src/parser/rules/inline/link-single.ts +164 -0
  108. package/src/parser/rules/inline/link-star.ts +134 -0
  109. package/src/parser/rules/inline/link-triple.ts +267 -0
  110. package/src/parser/rules/inline/math-inline.ts +126 -0
  111. package/src/parser/rules/inline/monospace.ts +78 -0
  112. package/src/parser/rules/inline/raw.ts +262 -0
  113. package/src/parser/rules/inline/size.ts +244 -0
  114. package/src/parser/rules/inline/span.ts +424 -0
  115. package/src/parser/rules/inline/strikethrough.ts +115 -0
  116. package/src/parser/rules/inline/subscript.ts +84 -0
  117. package/src/parser/rules/inline/superscript.ts +84 -0
  118. package/src/parser/rules/inline/text.ts +84 -0
  119. package/src/parser/rules/inline/underline.ts +127 -0
  120. package/src/parser/rules/inline/user.ts +147 -0
  121. package/src/parser/rules/inline/utils.ts +344 -0
  122. package/src/parser/rules/types.ts +252 -0
  123. package/src/parser/rules/utils.ts +155 -0
  124. package/src/parser/toc.ts +130 -0
@@ -0,0 +1,326 @@
1
+ /**
2
+ *
3
+ * Parses the various Wikidot line-break syntaxes.
4
+ *
5
+ * Wikidot supports three distinct mechanisms for producing `<br />` elements:
6
+ *
7
+ * 1. Implicit newline: a single `NEWLINE` token within a paragraph
8
+ * becomes a `<br />`, unless it precedes a block-level element
9
+ * (heading, list, blockquote, etc.) or another newline (paragraph break).
10
+ *
11
+ * 2. Backslash at end of line: `\` followed by newline. The preprocessor
12
+ * converts `\\\n` to a `BACKSLASH_BREAK` token (U+E000), which this
13
+ * rule then handles. Wikidot preserves a space after the line break
14
+ * in this case.
15
+ *
16
+ * 3. Underscore at end of line: ` _` followed by newline, or `_` at the
17
+ * start of a line followed by newline. This is a more explicit
18
+ * line-break syntax.
19
+ *
20
+ * All three rules mark their line-break elements with `_preservedTrailingBreak`
21
+ * when the break was explicitly requested (backslash or underscore syntax),
22
+ * so the paragraph postprocessor knows not to strip trailing breaks.
23
+ *
24
+ * The newline rule suppresses line-breaks in several situations to avoid
25
+ * spurious `<br />` elements before block-level constructs.
26
+ *
27
+ * @module
28
+ */
29
+ import type { Element } from "@wdprlib/ast";
30
+ import type { InlineRule, ParseContext, RuleResult } from "../types";
31
+ import type { TokenType } from "../../../lexer";
32
+
33
+ /**
34
+ * Token types that indicate the start of a block-level element.
35
+ *
36
+ * When a NEWLINE is followed (after optional whitespace) by one of
37
+ * these token types, the newline line-break rule suppresses the
38
+ * `<br />` to prevent extra whitespace before block elements.
39
+ */
40
+ const BLOCK_START_TOKENS: TokenType[] = [
41
+ "BLOCKQUOTE_MARKER", // >
42
+ "LIST_BULLET", // *
43
+ "LIST_NUMBER", // #
44
+ "HEADING_MARKER", // + ++ +++
45
+ "HR_MARKER", // ----
46
+ "TABLE_MARKER", // ||
47
+ ];
48
+
49
+ /**
50
+ * Checks whether a token type represents the start of a block-level element.
51
+ *
52
+ * @param type - The token type to check
53
+ * @returns `true` if the token type is in the {@link BLOCK_START_TOKENS} list
54
+ */
55
+ function isBlockStartToken(type: TokenType): boolean {
56
+ return BLOCK_START_TOKENS.includes(type);
57
+ }
58
+
59
+ /**
60
+ * Inline rule for implicit newline-to-line-break conversion.
61
+ *
62
+ * A single `NEWLINE` token within inline content typically becomes a
63
+ * `<br />` element. However, the line break is suppressed in several
64
+ * situations to match Wikidot's behavior:
65
+ *
66
+ * - End of input (no meaningful token follows)
67
+ * - Another NEWLINE follows (this is a paragraph break, not a line break)
68
+ * - A valid block-start token follows at line start (heading, list, etc.)
69
+ * - A `BACKSLASH_BREAK` token follows (the backslash rule handles the break)
70
+ *
71
+ * Additional validation is performed for heading and list markers to ensure
72
+ * they actually form valid block structures (e.g. a heading marker of 7+
73
+ * characters is not a valid heading).
74
+ */
75
+ export const newlineLineBreakRule: InlineRule = {
76
+ name: "newlineLineBreak",
77
+ startTokens: ["NEWLINE"],
78
+
79
+ /**
80
+ * Attempts to convert a NEWLINE token into a line-break element.
81
+ *
82
+ * @param ctx - Parse context with token stream and current position
83
+ * @returns A successful result with either a `"line-break"` element or
84
+ * an empty array (when the break is suppressed)
85
+ */
86
+ parse(ctx: ParseContext): RuleResult<Element> {
87
+ const currentTok = ctx.tokens[ctx.pos];
88
+ if (!currentTok || currentTok.type !== "NEWLINE") {
89
+ return { success: false };
90
+ }
91
+
92
+ // Check what comes after the newline
93
+ let lookAhead = 1;
94
+
95
+ // Skip optional whitespace
96
+ while (ctx.tokens[ctx.pos + lookAhead]?.type === "WHITESPACE") {
97
+ lookAhead++;
98
+ }
99
+
100
+ const nextMeaningfulToken = ctx.tokens[ctx.pos + lookAhead];
101
+
102
+ // Check if HEADING_MARKER would actually form a valid heading
103
+ // Block-start tokens are only valid when at actual line start
104
+ let isValidBlock = isBlockStartToken(nextMeaningfulToken?.type as TokenType);
105
+ if (isValidBlock && !nextMeaningfulToken?.lineStart) {
106
+ isValidBlock = false;
107
+ }
108
+ if (isValidBlock && nextMeaningfulToken?.type === "HEADING_MARKER") {
109
+ const markerLen = nextMeaningfulToken.value.length;
110
+ const afterPos = ctx.pos + lookAhead + 1;
111
+ const afterMarker = ctx.tokens[afterPos];
112
+ if (markerLen > 6) {
113
+ isValidBlock = false;
114
+ } else if (afterMarker?.type === "STAR") {
115
+ if (ctx.tokens[afterPos + 1]?.type !== "WHITESPACE") isValidBlock = false;
116
+ } else if (afterMarker?.type !== "WHITESPACE") {
117
+ isValidBlock = false;
118
+ }
119
+ }
120
+
121
+ // Check if there's a BACKSLASH_BREAK ahead (skip whitespace)
122
+ // Pattern: NEWLINE + WHITESPACE? + BACKSLASH_BREAK
123
+ // In this case, the BACKSLASH_BREAK rule will handle the line-break
124
+ let hasBackslashBreak = false;
125
+ {
126
+ let ahead = 1;
127
+ while (ctx.tokens[ctx.pos + ahead]?.type === "WHITESPACE") {
128
+ ahead++;
129
+ }
130
+ if (ctx.tokens[ctx.pos + ahead]?.type === "BACKSLASH_BREAK") {
131
+ hasBackslashBreak = true;
132
+ }
133
+ }
134
+
135
+ // Skip line-break if:
136
+ // - End of input
137
+ // - Another NEWLINE (paragraph break will handle this)
138
+ // - Valid block start token
139
+ // - BACKSLASH_BREAK ahead (that rule will create the line-break)
140
+ if (
141
+ !nextMeaningfulToken ||
142
+ nextMeaningfulToken.type === "EOF" ||
143
+ nextMeaningfulToken.type === "NEWLINE" ||
144
+ isValidBlock ||
145
+ hasBackslashBreak
146
+ ) {
147
+ // Don't generate line-break, return empty array
148
+ return {
149
+ success: true,
150
+ elements: [],
151
+ consumed: 1,
152
+ };
153
+ }
154
+
155
+ return {
156
+ success: true,
157
+ elements: [{ element: "line-break" }],
158
+ consumed: 1,
159
+ };
160
+ },
161
+ };
162
+
163
+ /**
164
+ * Inline rule for backslash-at-end-of-line line breaks.
165
+ *
166
+ * In Wikidot, a backslash at the end of a line (`\` followed by newline)
167
+ * creates a line break. The preprocessor converts this `\\\n` sequence
168
+ * into a special `BACKSLASH_BREAK` token (U+E000).
169
+ *
170
+ * This rule handles two token patterns:
171
+ * - `WHITESPACE + BACKSLASH_BREAK`: produces a line-break followed by a
172
+ * space text element (Wikidot preserves the space after the break)
173
+ * - Standalone `BACKSLASH_BREAK`: produces only a line-break
174
+ *
175
+ * A special case exists when the backslash break is followed by an
176
+ * underscore line-break pattern (` _\n`): in that case, the trailing
177
+ * space is omitted to avoid doubled spacing.
178
+ *
179
+ * All line-break elements produced by this rule are marked with
180
+ * `_preservedTrailingBreak = true` so the paragraph postprocessor
181
+ * does not strip them.
182
+ */
183
+ export const backslashLineBreakRule: InlineRule = {
184
+ name: "backslashLineBreak",
185
+ startTokens: ["WHITESPACE", "BACKSLASH_BREAK"],
186
+
187
+ /**
188
+ * Attempts to parse a backslash line break at the current position.
189
+ *
190
+ * @param ctx - Parse context with token stream and current position
191
+ * @returns A successful result with line-break elements (and possibly a
192
+ * trailing space), or `{ success: false }` if the pattern does not match
193
+ */
194
+ parse(ctx: ParseContext): RuleResult<Element> {
195
+ const currentTok = ctx.tokens[ctx.pos];
196
+ if (!currentTok) {
197
+ return { success: false };
198
+ }
199
+
200
+ // Pattern: WHITESPACE + BACKSLASH_BREAK → line-break + text(" ")
201
+ // But if followed by underscore line-break pattern, don't include the space
202
+ if (currentTok.type === "WHITESPACE") {
203
+ const nextTok = ctx.tokens[ctx.pos + 1];
204
+ if (nextTok?.type === "BACKSLASH_BREAK") {
205
+ // Check if followed by " _\n" pattern (underscore line-break)
206
+ const afterBreak = ctx.tokens[ctx.pos + 2];
207
+ const afterAfter = ctx.tokens[ctx.pos + 3];
208
+ const afterAfterAfter = ctx.tokens[ctx.pos + 4];
209
+
210
+ const isFollowedByUnderscoreBreak =
211
+ afterBreak?.type === "WHITESPACE" &&
212
+ afterAfter?.type === "UNDERSCORE" &&
213
+ (afterAfterAfter?.type === "NEWLINE" || afterAfterAfter?.type === "EOF");
214
+
215
+ if (isFollowedByUnderscoreBreak) {
216
+ // Don't include the space, let underscore rule handle the rest
217
+ // Mark as explicit line-break to preserve at paragraph end
218
+ const lb: any = { element: "line-break" };
219
+ lb._preservedTrailingBreak = true;
220
+ return {
221
+ success: true,
222
+ elements: [lb],
223
+ consumed: 2,
224
+ };
225
+ }
226
+
227
+ // Mark as explicit line-break to preserve at paragraph end
228
+ const lb: any = { element: "line-break" };
229
+ lb._preservedTrailingBreak = true;
230
+ return {
231
+ success: true,
232
+ elements: [lb, { element: "text", data: " " }],
233
+ consumed: 2,
234
+ };
235
+ }
236
+ return { success: false };
237
+ }
238
+
239
+ // Standalone BACKSLASH_BREAK
240
+ // Mark as explicit line-break to preserve at paragraph end
241
+ if (currentTok.type === "BACKSLASH_BREAK") {
242
+ const lb: any = { element: "line-break" };
243
+ lb._preservedTrailingBreak = true;
244
+ return {
245
+ success: true,
246
+ elements: [lb],
247
+ consumed: 1,
248
+ };
249
+ }
250
+
251
+ return { success: false };
252
+ },
253
+ };
254
+
255
+ /**
256
+ * Inline rule for underscore-at-end-of-line line breaks.
257
+ *
258
+ * Wikidot syntax: ` _` followed by newline (space + underscore + newline),
259
+ * or `_` at the start of a line followed by newline.
260
+ *
261
+ * This rule handles two token patterns:
262
+ * - Pattern 1: `WHITESPACE + UNDERSCORE + NEWLINE/EOF`
263
+ * - Pattern 2: `UNDERSCORE (at lineStart) + NEWLINE/EOF`
264
+ *
265
+ * Both patterns consume the newline as part of the line-break to prevent
266
+ * the newline rule from producing a duplicate break.
267
+ *
268
+ * All line-break elements are marked with `_preservedTrailingBreak = true`
269
+ * so the paragraph postprocessor does not strip them.
270
+ */
271
+ export const underscoreLineBreakRule: InlineRule = {
272
+ name: "underscoreLineBreak",
273
+ startTokens: ["WHITESPACE", "UNDERSCORE"],
274
+
275
+ /**
276
+ * Attempts to parse an underscore line break at the current position.
277
+ *
278
+ * @param ctx - Parse context with token stream and current position
279
+ * @returns A successful result with a `"line-break"` element,
280
+ * or `{ success: false }` if the pattern does not match
281
+ */
282
+ parse(ctx: ParseContext): RuleResult<Element> {
283
+ const currentTok = ctx.tokens[ctx.pos];
284
+ if (!currentTok) {
285
+ return { success: false };
286
+ }
287
+
288
+ // Pattern 1: WHITESPACE followed by UNDERSCORE, then NEWLINE
289
+ // Mark as explicit line-break to preserve at paragraph end
290
+ if (currentTok.type === "WHITESPACE") {
291
+ const nextTok = ctx.tokens[ctx.pos + 1];
292
+ const afterTok = ctx.tokens[ctx.pos + 2];
293
+
294
+ if (
295
+ nextTok?.type === "UNDERSCORE" &&
296
+ afterTok &&
297
+ (afterTok.type === "NEWLINE" || afterTok.type === "EOF")
298
+ ) {
299
+ const lb: any = { element: "line-break" };
300
+ lb._preservedTrailingBreak = true;
301
+ return {
302
+ success: true,
303
+ elements: [lb],
304
+ consumed: 3, // WHITESPACE + UNDERSCORE + NEWLINE
305
+ };
306
+ }
307
+ }
308
+
309
+ // Pattern 2: UNDERSCORE at start of line, then NEWLINE
310
+ // Mark as explicit line-break to preserve at paragraph end
311
+ if (currentTok.type === "UNDERSCORE" && currentTok.lineStart) {
312
+ const nextTok = ctx.tokens[ctx.pos + 1];
313
+ if (nextTok && (nextTok.type === "NEWLINE" || nextTok.type === "EOF")) {
314
+ const lb: any = { element: "line-break" };
315
+ lb._preservedTrailingBreak = true;
316
+ return {
317
+ success: true,
318
+ elements: [lb],
319
+ consumed: 2, // UNDERSCORE + NEWLINE
320
+ };
321
+ }
322
+ }
323
+
324
+ return { success: false };
325
+ },
326
+ };
@@ -0,0 +1,147 @@
1
+ /**
2
+ *
3
+ * Parses the Wikidot anchor link syntax: `[#anchor-name Label text]`
4
+ * and the "fake link" variant `[# Label text]`.
5
+ *
6
+ * An anchor link creates a hyperlink that targets a named anchor on
7
+ * the same page. The link's `href` is set to `#normalized-anchor-name`.
8
+ *
9
+ * The "fake link" variant (`[# Label]`) has no anchor name and generates
10
+ * a link with `href="javascript:;"`. This is used in Wikidot for
11
+ * interactive elements like collapsible blocks where the link serves
12
+ * as a click target rather than navigation.
13
+ *
14
+ * Anchor names are normalized to lowercase with spaces replaced by hyphens.
15
+ *
16
+ * The opening delimiter is tokenized as `BRACKET_ANCHOR` (`[#`) by the
17
+ * lexer, distinguishing it from regular bracket links.
18
+ *
19
+ * Produces a `"link"` AST element with `type: "anchor"`.
20
+ *
21
+ * @module
22
+ */
23
+ import type { Element, LinkLabel } from "@wdprlib/ast";
24
+ import type { InlineRule, ParseContext, RuleResult } from "../types";
25
+ import { hasClosingMarkerBeforeNewline } from "../types";
26
+
27
+ /**
28
+ * Inline rule for parsing `[#anchor Label]` anchor links.
29
+ *
30
+ * Triggered by a `BRACKET_ANCHOR` (`[#`) token. Collects the optional
31
+ * anchor name, then the required label text, stopping at the closing
32
+ * `]` bracket.
33
+ *
34
+ * Fails if:
35
+ * - No closing `]` is found on the same line
36
+ * - The label text is empty
37
+ */
38
+ export const linkAnchorRule: InlineRule = {
39
+ name: "linkAnchor",
40
+ startTokens: ["BRACKET_ANCHOR"],
41
+
42
+ /**
43
+ * Attempts to parse an anchor link at the current position.
44
+ *
45
+ * @param ctx - Parse context with token stream and current position
46
+ * @returns A successful result with a `"link"` element of type `"anchor"`,
47
+ * or `{ success: false }`
48
+ */
49
+ parse(ctx: ParseContext): RuleResult<Element> {
50
+ // Check if closing bracket exists
51
+ if (!hasClosingMarkerBeforeNewline({ ...ctx, pos: ctx.pos + 1 }, "BRACKET_CLOSE")) {
52
+ return { success: false };
53
+ }
54
+
55
+ let pos = ctx.pos + 1;
56
+ let consumed = 1; // [#
57
+
58
+ // Collect anchor name (until whitespace)
59
+ let anchor = "";
60
+ while (pos < ctx.tokens.length) {
61
+ const token = ctx.tokens[pos];
62
+ if (
63
+ !token ||
64
+ token.type === "WHITESPACE" ||
65
+ token.type === "BRACKET_CLOSE" ||
66
+ token.type === "NEWLINE" ||
67
+ token.type === "EOF"
68
+ ) {
69
+ break;
70
+ }
71
+ anchor += token.value;
72
+ pos++;
73
+ consumed++;
74
+ }
75
+
76
+ // Skip whitespace between anchor and label
77
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
78
+ pos++;
79
+ consumed++;
80
+ }
81
+
82
+ // Collect label (until closing bracket)
83
+ let label = "";
84
+ while (pos < ctx.tokens.length) {
85
+ const token = ctx.tokens[pos];
86
+ if (
87
+ !token ||
88
+ token.type === "BRACKET_CLOSE" ||
89
+ token.type === "NEWLINE" ||
90
+ token.type === "EOF"
91
+ ) {
92
+ break;
93
+ }
94
+ label += token.value;
95
+ pos++;
96
+ consumed++;
97
+ }
98
+
99
+ // Consume closing bracket
100
+ if (ctx.tokens[pos]?.type === "BRACKET_CLOSE") {
101
+ pos++;
102
+ consumed++;
103
+ } else {
104
+ return { success: false };
105
+ }
106
+
107
+ const trimmedLabel = label.trim();
108
+ if (!trimmedLabel) {
109
+ return { success: false };
110
+ }
111
+
112
+ // Determine target: if anchor is empty, use javascript:; (fake link)
113
+ // Otherwise, normalize and prepend #
114
+ const target = anchor.trim() ? `#${normalizeAnchor(anchor.trim())}` : "javascript:;";
115
+ const linkLabel: LinkLabel = { text: trimmedLabel };
116
+
117
+ return {
118
+ success: true,
119
+ elements: [
120
+ {
121
+ element: "link",
122
+ data: {
123
+ type: "anchor",
124
+ link: target,
125
+ extra: null,
126
+ label: linkLabel,
127
+ target: null,
128
+ },
129
+ },
130
+ ],
131
+ consumed,
132
+ };
133
+ },
134
+ };
135
+
136
+ /**
137
+ * Normalizes an anchor name for use in a URL fragment.
138
+ *
139
+ * Converts to lowercase and replaces whitespace sequences with single
140
+ * hyphens, matching Wikidot's anchor normalization behavior.
141
+ *
142
+ * @param anchor - The raw anchor name from the markup
143
+ * @returns The normalized anchor name suitable for a URL fragment
144
+ */
145
+ function normalizeAnchor(anchor: string): string {
146
+ return anchor.toLowerCase().replace(/\s+/g, "-");
147
+ }
@@ -0,0 +1,164 @@
1
+ /**
2
+ *
3
+ * Parses the Wikidot single-bracket link syntax: `[url label]`.
4
+ *
5
+ * Single-bracket links create hyperlinks to external URLs or
6
+ * site-relative paths. The URL and label are separated by whitespace.
7
+ *
8
+ * Supported URL formats:
9
+ * - Absolute URLs: `[https://example.com/ Label]`
10
+ * - Relative paths: `[/some-page Label]`
11
+ *
12
+ * An optional `*` prefix on the URL opens the link in a new tab:
13
+ * `[*https://example.com/ Opens in new tab]`.
14
+ *
15
+ * Unlike triple-bracket links (`[[[page]]]`), single-bracket links
16
+ * require a full URL (starting with `http://`, `https://`, or `/`).
17
+ * The label text is required.
18
+ *
19
+ * Produces a `"link"` AST element with `type: "direct"`.
20
+ *
21
+ * @module
22
+ */
23
+ import type { Element, LinkLabel } from "@wdprlib/ast";
24
+ import type { InlineRule, ParseContext, RuleResult } from "../types";
25
+ import { hasClosingMarkerBeforeNewline } from "../types";
26
+
27
+ /**
28
+ * Inline rule for parsing `[url label]` single-bracket links.
29
+ *
30
+ * Triggered by a `BRACKET_OPEN` (`[`) token. Optionally detects a
31
+ * `*` prefix for new-tab behavior, then collects the URL (until
32
+ * whitespace) and the label text (until `]`).
33
+ *
34
+ * Fails if:
35
+ * - No closing `]` is found on the same line
36
+ * - The URL does not start with `http://`, `https://`, or `/`
37
+ * - The label text is empty
38
+ */
39
+ export const linkSingleRule: InlineRule = {
40
+ name: "linkSingle",
41
+ startTokens: ["BRACKET_OPEN"],
42
+
43
+ /**
44
+ * Attempts to parse a single-bracket link at the current position.
45
+ *
46
+ * @param ctx - Parse context with token stream and current position
47
+ * @returns A successful result with a `"link"` element of type `"direct"`,
48
+ * or `{ success: false }`
49
+ */
50
+ parse(ctx: ParseContext): RuleResult<Element> {
51
+ // Check if closing bracket exists
52
+ if (!hasClosingMarkerBeforeNewline({ ...ctx, pos: ctx.pos + 1 }, "BRACKET_CLOSE")) {
53
+ return { success: false };
54
+ }
55
+
56
+ let pos = ctx.pos + 1;
57
+ let consumed = 1;
58
+
59
+ // Check for new tab marker (*)
60
+ let newTab = false;
61
+ if (ctx.tokens[pos]?.type === "STAR") {
62
+ newTab = true;
63
+ pos++;
64
+ consumed++;
65
+ }
66
+
67
+ // Collect URL (until whitespace)
68
+ let url = "";
69
+ while (pos < ctx.tokens.length) {
70
+ const token = ctx.tokens[pos];
71
+ if (
72
+ !token ||
73
+ token.type === "WHITESPACE" ||
74
+ token.type === "BRACKET_CLOSE" ||
75
+ token.type === "NEWLINE" ||
76
+ token.type === "EOF"
77
+ ) {
78
+ break;
79
+ }
80
+ url += token.value;
81
+ pos++;
82
+ consumed++;
83
+ }
84
+
85
+ // URL must be valid (starts with http://, https://, or /)
86
+ const trimmedUrl = url.trim();
87
+ if (!isValidUrl(trimmedUrl)) {
88
+ return { success: false };
89
+ }
90
+
91
+ // Skip whitespace between URL and label
92
+ while (ctx.tokens[pos]?.type === "WHITESPACE") {
93
+ pos++;
94
+ consumed++;
95
+ }
96
+
97
+ // Collect label (until closing bracket)
98
+ let label = "";
99
+ while (pos < ctx.tokens.length) {
100
+ const token = ctx.tokens[pos];
101
+ if (
102
+ !token ||
103
+ token.type === "BRACKET_CLOSE" ||
104
+ token.type === "NEWLINE" ||
105
+ token.type === "EOF"
106
+ ) {
107
+ break;
108
+ }
109
+ label += token.value;
110
+ pos++;
111
+ consumed++;
112
+ }
113
+
114
+ // Consume closing bracket
115
+ if (ctx.tokens[pos]?.type === "BRACKET_CLOSE") {
116
+ pos++;
117
+ consumed++;
118
+ } else {
119
+ // No closing bracket found
120
+ return { success: false };
121
+ }
122
+
123
+ const trimmedLabel = label.trim();
124
+ if (!trimmedLabel) {
125
+ return { success: false };
126
+ }
127
+
128
+ const linkLabel: LinkLabel = { text: trimmedLabel };
129
+
130
+ return {
131
+ success: true,
132
+ elements: [
133
+ {
134
+ element: "link",
135
+ data: {
136
+ type: "direct",
137
+ link: trimmedUrl,
138
+ extra: null,
139
+ label: linkLabel,
140
+ target: newTab ? "new-tab" : null,
141
+ },
142
+ },
143
+ ],
144
+ consumed,
145
+ };
146
+ },
147
+ };
148
+
149
+ /**
150
+ * Validates whether a URL is acceptable for single-bracket links.
151
+ *
152
+ * Only absolute HTTP(S) URLs and site-relative paths (starting with `/`)
153
+ * are accepted. Page names, interwiki prefixes, and other formats require
154
+ * triple-bracket syntax instead.
155
+ *
156
+ * @param url - The trimmed URL string to validate
157
+ * @returns `true` if the URL starts with `http://`, `https://`, or `/`
158
+ */
159
+ function isValidUrl(url: string): boolean {
160
+ if (!url) return false;
161
+ if (url.startsWith("/")) return true;
162
+ if (url.startsWith("http://") || url.startsWith("https://")) return true;
163
+ return false;
164
+ }