@wdprlib/parser 3.1.2 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +295 -118
- package/dist/index.js +272 -95
- package/package.json +5 -3
- package/src/index.ts +163 -0
- package/src/lexer/index.ts +20 -0
- package/src/lexer/lexer.ts +687 -0
- package/src/lexer/tokens.ts +141 -0
- package/src/parser/constants.ts +173 -0
- package/src/parser/depth.ts +251 -0
- package/src/parser/index.ts +18 -0
- package/src/parser/parse.ts +315 -0
- package/src/parser/postprocess/divAdjacentParagraph.ts +76 -0
- package/src/parser/postprocess/index.ts +15 -0
- package/src/parser/postprocess/spanStrip.ts +697 -0
- package/src/parser/preprocess/expr.ts +265 -0
- package/src/parser/preprocess/index.ts +38 -0
- package/src/parser/preprocess/typography.ts +67 -0
- package/src/parser/preprocess/utils.ts +250 -0
- package/src/parser/preprocess/whitespace.ts +111 -0
- package/src/parser/rules/block/align.ts +282 -0
- package/src/parser/rules/block/bibliography.ts +359 -0
- package/src/parser/rules/block/block-list.ts +689 -0
- package/src/parser/rules/block/blockquote.ts +238 -0
- package/src/parser/rules/block/center.ts +87 -0
- package/src/parser/rules/block/clear-float.ts +75 -0
- package/src/parser/rules/block/code.ts +187 -0
- package/src/parser/rules/block/collapsible.ts +337 -0
- package/src/parser/rules/block/comment.ts +73 -0
- package/src/parser/rules/block/content-separator.ts +79 -0
- package/src/parser/rules/block/definition-list.ts +270 -0
- package/src/parser/rules/block/div.ts +400 -0
- package/src/parser/rules/block/embed-block.ts +153 -0
- package/src/parser/rules/block/footnoteblock.ts +200 -0
- package/src/parser/rules/block/heading.ts +142 -0
- package/src/parser/rules/block/horizontal-rule.ts +61 -0
- package/src/parser/rules/block/html.ts +222 -0
- package/src/parser/rules/block/iframe.ts +239 -0
- package/src/parser/rules/block/iftags.ts +150 -0
- package/src/parser/rules/block/include.ts +179 -0
- package/src/parser/rules/block/index.ts +127 -0
- package/src/parser/rules/block/list.ts +244 -0
- package/src/parser/rules/block/math.ts +183 -0
- package/src/parser/rules/block/module/backlinks/index.ts +31 -0
- package/src/parser/rules/block/module/backlinks/types.ts +21 -0
- package/src/parser/rules/block/module/categories/index.ts +34 -0
- package/src/parser/rules/block/module/categories/types.ts +21 -0
- package/src/parser/rules/block/module/css/index.ts +37 -0
- package/src/parser/rules/block/module/iftags/condition.ts +109 -0
- package/src/parser/rules/block/module/iftags/index.ts +26 -0
- package/src/parser/rules/block/module/iftags/preprocess.ts +140 -0
- package/src/parser/rules/block/module/iftags/resolve.ts +73 -0
- package/src/parser/rules/block/module/iftags/types.ts +63 -0
- package/src/parser/rules/block/module/include/index.ts +20 -0
- package/src/parser/rules/block/module/include/resolve.ts +556 -0
- package/src/parser/rules/block/module/index.ts +122 -0
- package/src/parser/rules/block/module/join/index.ts +34 -0
- package/src/parser/rules/block/module/join/types.ts +23 -0
- package/src/parser/rules/block/module/listpages/compiler.ts +453 -0
- package/src/parser/rules/block/module/listpages/extract.ts +410 -0
- package/src/parser/rules/block/module/listpages/index.ts +83 -0
- package/src/parser/rules/block/module/listpages/normalize.ts +390 -0
- package/src/parser/rules/block/module/listpages/parser.ts +106 -0
- package/src/parser/rules/block/module/listpages/resolve.ts +130 -0
- package/src/parser/rules/block/module/listpages/types.ts +513 -0
- package/src/parser/rules/block/module/listpages/url-resolver.ts +186 -0
- package/src/parser/rules/block/module/listusers/compiler.ts +77 -0
- package/src/parser/rules/block/module/listusers/extract.ts +45 -0
- package/src/parser/rules/block/module/listusers/index.ts +36 -0
- package/src/parser/rules/block/module/listusers/parser.ts +54 -0
- package/src/parser/rules/block/module/listusers/resolve.ts +58 -0
- package/src/parser/rules/block/module/listusers/types.ts +93 -0
- package/src/parser/rules/block/module/mapping.ts +61 -0
- package/src/parser/rules/block/module/page-tree/index.ts +38 -0
- package/src/parser/rules/block/module/page-tree/types.ts +29 -0
- package/src/parser/rules/block/module/rate/index.ts +28 -0
- package/src/parser/rules/block/module/rate/types.ts +19 -0
- package/src/parser/rules/block/module/resolve.ts +411 -0
- package/src/parser/rules/block/module/types-common.ts +59 -0
- package/src/parser/rules/block/module/types.ts +61 -0
- package/src/parser/rules/block/module/utils.ts +43 -0
- package/src/parser/rules/block/module/walk.ts +380 -0
- package/src/parser/rules/block/module.ts +164 -0
- package/src/parser/rules/block/orphan-li.ts +177 -0
- package/src/parser/rules/block/paragraph.ts +157 -0
- package/src/parser/rules/block/table-block.ts +726 -0
- package/src/parser/rules/block/table.ts +441 -0
- package/src/parser/rules/block/tabview.ts +331 -0
- package/src/parser/rules/block/toc.ts +129 -0
- package/src/parser/rules/block/utils.ts +615 -0
- package/src/parser/rules/index.ts +49 -0
- package/src/parser/rules/inline/anchor-name.ts +154 -0
- package/src/parser/rules/inline/anchor.ts +327 -0
- package/src/parser/rules/inline/bibcite.ts +153 -0
- package/src/parser/rules/inline/bold.ts +86 -0
- package/src/parser/rules/inline/color.ts +140 -0
- package/src/parser/rules/inline/comment.ts +90 -0
- package/src/parser/rules/inline/equation-ref.ts +115 -0
- package/src/parser/rules/inline/expr.ts +526 -0
- package/src/parser/rules/inline/footnote.ts +223 -0
- package/src/parser/rules/inline/guillemet.ts +64 -0
- package/src/parser/rules/inline/html.ts +132 -0
- package/src/parser/rules/inline/image.ts +328 -0
- package/src/parser/rules/inline/index.ts +150 -0
- package/src/parser/rules/inline/italic.ts +74 -0
- package/src/parser/rules/inline/line-break.ts +326 -0
- package/src/parser/rules/inline/link-anchor.ts +147 -0
- package/src/parser/rules/inline/link-single.ts +164 -0
- package/src/parser/rules/inline/link-star.ts +134 -0
- package/src/parser/rules/inline/link-triple.ts +267 -0
- package/src/parser/rules/inline/math-inline.ts +126 -0
- package/src/parser/rules/inline/monospace.ts +78 -0
- package/src/parser/rules/inline/raw.ts +262 -0
- package/src/parser/rules/inline/size.ts +244 -0
- package/src/parser/rules/inline/span.ts +424 -0
- package/src/parser/rules/inline/strikethrough.ts +115 -0
- package/src/parser/rules/inline/subscript.ts +84 -0
- package/src/parser/rules/inline/superscript.ts +84 -0
- package/src/parser/rules/inline/text.ts +84 -0
- package/src/parser/rules/inline/underline.ts +127 -0
- package/src/parser/rules/inline/user.ts +147 -0
- package/src/parser/rules/inline/utils.ts +344 -0
- package/src/parser/rules/types.ts +252 -0
- package/src/parser/rules/utils.ts +155 -0
- package/src/parser/toc.ts +130 -0
|
@@ -0,0 +1,326 @@
|
|
|
1
|
+
/**
|
|
2
|
+
*
|
|
3
|
+
* Parses the various Wikidot line-break syntaxes.
|
|
4
|
+
*
|
|
5
|
+
* Wikidot supports three distinct mechanisms for producing `<br />` elements:
|
|
6
|
+
*
|
|
7
|
+
* 1. Implicit newline: a single `NEWLINE` token within a paragraph
|
|
8
|
+
* becomes a `<br />`, unless it precedes a block-level element
|
|
9
|
+
* (heading, list, blockquote, etc.) or another newline (paragraph break).
|
|
10
|
+
*
|
|
11
|
+
* 2. Backslash at end of line: `\` followed by newline. The preprocessor
|
|
12
|
+
* converts `\\\n` to a `BACKSLASH_BREAK` token (U+E000), which this
|
|
13
|
+
* rule then handles. Wikidot preserves a space after the line break
|
|
14
|
+
* in this case.
|
|
15
|
+
*
|
|
16
|
+
* 3. Underscore at end of line: ` _` followed by newline, or `_` at the
|
|
17
|
+
* start of a line followed by newline. This is a more explicit
|
|
18
|
+
* line-break syntax.
|
|
19
|
+
*
|
|
20
|
+
* All three rules mark their line-break elements with `_preservedTrailingBreak`
|
|
21
|
+
* when the break was explicitly requested (backslash or underscore syntax),
|
|
22
|
+
* so the paragraph postprocessor knows not to strip trailing breaks.
|
|
23
|
+
*
|
|
24
|
+
* The newline rule suppresses line-breaks in several situations to avoid
|
|
25
|
+
* spurious `<br />` elements before block-level constructs.
|
|
26
|
+
*
|
|
27
|
+
* @module
|
|
28
|
+
*/
|
|
29
|
+
import type { Element } from "@wdprlib/ast";
|
|
30
|
+
import type { InlineRule, ParseContext, RuleResult } from "../types";
|
|
31
|
+
import type { TokenType } from "../../../lexer";
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Token types that indicate the start of a block-level element.
|
|
35
|
+
*
|
|
36
|
+
* When a NEWLINE is followed (after optional whitespace) by one of
|
|
37
|
+
* these token types, the newline line-break rule suppresses the
|
|
38
|
+
* `<br />` to prevent extra whitespace before block elements.
|
|
39
|
+
*/
|
|
40
|
+
const BLOCK_START_TOKENS: TokenType[] = [
|
|
41
|
+
"BLOCKQUOTE_MARKER", // >
|
|
42
|
+
"LIST_BULLET", // *
|
|
43
|
+
"LIST_NUMBER", // #
|
|
44
|
+
"HEADING_MARKER", // + ++ +++
|
|
45
|
+
"HR_MARKER", // ----
|
|
46
|
+
"TABLE_MARKER", // ||
|
|
47
|
+
];
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Checks whether a token type represents the start of a block-level element.
|
|
51
|
+
*
|
|
52
|
+
* @param type - The token type to check
|
|
53
|
+
* @returns `true` if the token type is in the {@link BLOCK_START_TOKENS} list
|
|
54
|
+
*/
|
|
55
|
+
function isBlockStartToken(type: TokenType): boolean {
|
|
56
|
+
return BLOCK_START_TOKENS.includes(type);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Inline rule for implicit newline-to-line-break conversion.
|
|
61
|
+
*
|
|
62
|
+
* A single `NEWLINE` token within inline content typically becomes a
|
|
63
|
+
* `<br />` element. However, the line break is suppressed in several
|
|
64
|
+
* situations to match Wikidot's behavior:
|
|
65
|
+
*
|
|
66
|
+
* - End of input (no meaningful token follows)
|
|
67
|
+
* - Another NEWLINE follows (this is a paragraph break, not a line break)
|
|
68
|
+
* - A valid block-start token follows at line start (heading, list, etc.)
|
|
69
|
+
* - A `BACKSLASH_BREAK` token follows (the backslash rule handles the break)
|
|
70
|
+
*
|
|
71
|
+
* Additional validation is performed for heading and list markers to ensure
|
|
72
|
+
* they actually form valid block structures (e.g. a heading marker of 7+
|
|
73
|
+
* characters is not a valid heading).
|
|
74
|
+
*/
|
|
75
|
+
export const newlineLineBreakRule: InlineRule = {
|
|
76
|
+
name: "newlineLineBreak",
|
|
77
|
+
startTokens: ["NEWLINE"],
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Attempts to convert a NEWLINE token into a line-break element.
|
|
81
|
+
*
|
|
82
|
+
* @param ctx - Parse context with token stream and current position
|
|
83
|
+
* @returns A successful result with either a `"line-break"` element or
|
|
84
|
+
* an empty array (when the break is suppressed)
|
|
85
|
+
*/
|
|
86
|
+
parse(ctx: ParseContext): RuleResult<Element> {
|
|
87
|
+
const currentTok = ctx.tokens[ctx.pos];
|
|
88
|
+
if (!currentTok || currentTok.type !== "NEWLINE") {
|
|
89
|
+
return { success: false };
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// Check what comes after the newline
|
|
93
|
+
let lookAhead = 1;
|
|
94
|
+
|
|
95
|
+
// Skip optional whitespace
|
|
96
|
+
while (ctx.tokens[ctx.pos + lookAhead]?.type === "WHITESPACE") {
|
|
97
|
+
lookAhead++;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
const nextMeaningfulToken = ctx.tokens[ctx.pos + lookAhead];
|
|
101
|
+
|
|
102
|
+
// Check if HEADING_MARKER would actually form a valid heading
|
|
103
|
+
// Block-start tokens are only valid when at actual line start
|
|
104
|
+
let isValidBlock = isBlockStartToken(nextMeaningfulToken?.type as TokenType);
|
|
105
|
+
if (isValidBlock && !nextMeaningfulToken?.lineStart) {
|
|
106
|
+
isValidBlock = false;
|
|
107
|
+
}
|
|
108
|
+
if (isValidBlock && nextMeaningfulToken?.type === "HEADING_MARKER") {
|
|
109
|
+
const markerLen = nextMeaningfulToken.value.length;
|
|
110
|
+
const afterPos = ctx.pos + lookAhead + 1;
|
|
111
|
+
const afterMarker = ctx.tokens[afterPos];
|
|
112
|
+
if (markerLen > 6) {
|
|
113
|
+
isValidBlock = false;
|
|
114
|
+
} else if (afterMarker?.type === "STAR") {
|
|
115
|
+
if (ctx.tokens[afterPos + 1]?.type !== "WHITESPACE") isValidBlock = false;
|
|
116
|
+
} else if (afterMarker?.type !== "WHITESPACE") {
|
|
117
|
+
isValidBlock = false;
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Check if there's a BACKSLASH_BREAK ahead (skip whitespace)
|
|
122
|
+
// Pattern: NEWLINE + WHITESPACE? + BACKSLASH_BREAK
|
|
123
|
+
// In this case, the BACKSLASH_BREAK rule will handle the line-break
|
|
124
|
+
let hasBackslashBreak = false;
|
|
125
|
+
{
|
|
126
|
+
let ahead = 1;
|
|
127
|
+
while (ctx.tokens[ctx.pos + ahead]?.type === "WHITESPACE") {
|
|
128
|
+
ahead++;
|
|
129
|
+
}
|
|
130
|
+
if (ctx.tokens[ctx.pos + ahead]?.type === "BACKSLASH_BREAK") {
|
|
131
|
+
hasBackslashBreak = true;
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// Skip line-break if:
|
|
136
|
+
// - End of input
|
|
137
|
+
// - Another NEWLINE (paragraph break will handle this)
|
|
138
|
+
// - Valid block start token
|
|
139
|
+
// - BACKSLASH_BREAK ahead (that rule will create the line-break)
|
|
140
|
+
if (
|
|
141
|
+
!nextMeaningfulToken ||
|
|
142
|
+
nextMeaningfulToken.type === "EOF" ||
|
|
143
|
+
nextMeaningfulToken.type === "NEWLINE" ||
|
|
144
|
+
isValidBlock ||
|
|
145
|
+
hasBackslashBreak
|
|
146
|
+
) {
|
|
147
|
+
// Don't generate line-break, return empty array
|
|
148
|
+
return {
|
|
149
|
+
success: true,
|
|
150
|
+
elements: [],
|
|
151
|
+
consumed: 1,
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
return {
|
|
156
|
+
success: true,
|
|
157
|
+
elements: [{ element: "line-break" }],
|
|
158
|
+
consumed: 1,
|
|
159
|
+
};
|
|
160
|
+
},
|
|
161
|
+
};
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* Inline rule for backslash-at-end-of-line line breaks.
|
|
165
|
+
*
|
|
166
|
+
* In Wikidot, a backslash at the end of a line (`\` followed by newline)
|
|
167
|
+
* creates a line break. The preprocessor converts this `\\\n` sequence
|
|
168
|
+
* into a special `BACKSLASH_BREAK` token (U+E000).
|
|
169
|
+
*
|
|
170
|
+
* This rule handles two token patterns:
|
|
171
|
+
* - `WHITESPACE + BACKSLASH_BREAK`: produces a line-break followed by a
|
|
172
|
+
* space text element (Wikidot preserves the space after the break)
|
|
173
|
+
* - Standalone `BACKSLASH_BREAK`: produces only a line-break
|
|
174
|
+
*
|
|
175
|
+
* A special case exists when the backslash break is followed by an
|
|
176
|
+
* underscore line-break pattern (` _\n`): in that case, the trailing
|
|
177
|
+
* space is omitted to avoid doubled spacing.
|
|
178
|
+
*
|
|
179
|
+
* All line-break elements produced by this rule are marked with
|
|
180
|
+
* `_preservedTrailingBreak = true` so the paragraph postprocessor
|
|
181
|
+
* does not strip them.
|
|
182
|
+
*/
|
|
183
|
+
export const backslashLineBreakRule: InlineRule = {
|
|
184
|
+
name: "backslashLineBreak",
|
|
185
|
+
startTokens: ["WHITESPACE", "BACKSLASH_BREAK"],
|
|
186
|
+
|
|
187
|
+
/**
|
|
188
|
+
* Attempts to parse a backslash line break at the current position.
|
|
189
|
+
*
|
|
190
|
+
* @param ctx - Parse context with token stream and current position
|
|
191
|
+
* @returns A successful result with line-break elements (and possibly a
|
|
192
|
+
* trailing space), or `{ success: false }` if the pattern does not match
|
|
193
|
+
*/
|
|
194
|
+
parse(ctx: ParseContext): RuleResult<Element> {
|
|
195
|
+
const currentTok = ctx.tokens[ctx.pos];
|
|
196
|
+
if (!currentTok) {
|
|
197
|
+
return { success: false };
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// Pattern: WHITESPACE + BACKSLASH_BREAK → line-break + text(" ")
|
|
201
|
+
// But if followed by underscore line-break pattern, don't include the space
|
|
202
|
+
if (currentTok.type === "WHITESPACE") {
|
|
203
|
+
const nextTok = ctx.tokens[ctx.pos + 1];
|
|
204
|
+
if (nextTok?.type === "BACKSLASH_BREAK") {
|
|
205
|
+
// Check if followed by " _\n" pattern (underscore line-break)
|
|
206
|
+
const afterBreak = ctx.tokens[ctx.pos + 2];
|
|
207
|
+
const afterAfter = ctx.tokens[ctx.pos + 3];
|
|
208
|
+
const afterAfterAfter = ctx.tokens[ctx.pos + 4];
|
|
209
|
+
|
|
210
|
+
const isFollowedByUnderscoreBreak =
|
|
211
|
+
afterBreak?.type === "WHITESPACE" &&
|
|
212
|
+
afterAfter?.type === "UNDERSCORE" &&
|
|
213
|
+
(afterAfterAfter?.type === "NEWLINE" || afterAfterAfter?.type === "EOF");
|
|
214
|
+
|
|
215
|
+
if (isFollowedByUnderscoreBreak) {
|
|
216
|
+
// Don't include the space, let underscore rule handle the rest
|
|
217
|
+
// Mark as explicit line-break to preserve at paragraph end
|
|
218
|
+
const lb: any = { element: "line-break" };
|
|
219
|
+
lb._preservedTrailingBreak = true;
|
|
220
|
+
return {
|
|
221
|
+
success: true,
|
|
222
|
+
elements: [lb],
|
|
223
|
+
consumed: 2,
|
|
224
|
+
};
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
// Mark as explicit line-break to preserve at paragraph end
|
|
228
|
+
const lb: any = { element: "line-break" };
|
|
229
|
+
lb._preservedTrailingBreak = true;
|
|
230
|
+
return {
|
|
231
|
+
success: true,
|
|
232
|
+
elements: [lb, { element: "text", data: " " }],
|
|
233
|
+
consumed: 2,
|
|
234
|
+
};
|
|
235
|
+
}
|
|
236
|
+
return { success: false };
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
// Standalone BACKSLASH_BREAK
|
|
240
|
+
// Mark as explicit line-break to preserve at paragraph end
|
|
241
|
+
if (currentTok.type === "BACKSLASH_BREAK") {
|
|
242
|
+
const lb: any = { element: "line-break" };
|
|
243
|
+
lb._preservedTrailingBreak = true;
|
|
244
|
+
return {
|
|
245
|
+
success: true,
|
|
246
|
+
elements: [lb],
|
|
247
|
+
consumed: 1,
|
|
248
|
+
};
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
return { success: false };
|
|
252
|
+
},
|
|
253
|
+
};
|
|
254
|
+
|
|
255
|
+
/**
|
|
256
|
+
* Inline rule for underscore-at-end-of-line line breaks.
|
|
257
|
+
*
|
|
258
|
+
* Wikidot syntax: ` _` followed by newline (space + underscore + newline),
|
|
259
|
+
* or `_` at the start of a line followed by newline.
|
|
260
|
+
*
|
|
261
|
+
* This rule handles two token patterns:
|
|
262
|
+
* - Pattern 1: `WHITESPACE + UNDERSCORE + NEWLINE/EOF`
|
|
263
|
+
* - Pattern 2: `UNDERSCORE (at lineStart) + NEWLINE/EOF`
|
|
264
|
+
*
|
|
265
|
+
* Both patterns consume the newline as part of the line-break to prevent
|
|
266
|
+
* the newline rule from producing a duplicate break.
|
|
267
|
+
*
|
|
268
|
+
* All line-break elements are marked with `_preservedTrailingBreak = true`
|
|
269
|
+
* so the paragraph postprocessor does not strip them.
|
|
270
|
+
*/
|
|
271
|
+
export const underscoreLineBreakRule: InlineRule = {
|
|
272
|
+
name: "underscoreLineBreak",
|
|
273
|
+
startTokens: ["WHITESPACE", "UNDERSCORE"],
|
|
274
|
+
|
|
275
|
+
/**
|
|
276
|
+
* Attempts to parse an underscore line break at the current position.
|
|
277
|
+
*
|
|
278
|
+
* @param ctx - Parse context with token stream and current position
|
|
279
|
+
* @returns A successful result with a `"line-break"` element,
|
|
280
|
+
* or `{ success: false }` if the pattern does not match
|
|
281
|
+
*/
|
|
282
|
+
parse(ctx: ParseContext): RuleResult<Element> {
|
|
283
|
+
const currentTok = ctx.tokens[ctx.pos];
|
|
284
|
+
if (!currentTok) {
|
|
285
|
+
return { success: false };
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
// Pattern 1: WHITESPACE followed by UNDERSCORE, then NEWLINE
|
|
289
|
+
// Mark as explicit line-break to preserve at paragraph end
|
|
290
|
+
if (currentTok.type === "WHITESPACE") {
|
|
291
|
+
const nextTok = ctx.tokens[ctx.pos + 1];
|
|
292
|
+
const afterTok = ctx.tokens[ctx.pos + 2];
|
|
293
|
+
|
|
294
|
+
if (
|
|
295
|
+
nextTok?.type === "UNDERSCORE" &&
|
|
296
|
+
afterTok &&
|
|
297
|
+
(afterTok.type === "NEWLINE" || afterTok.type === "EOF")
|
|
298
|
+
) {
|
|
299
|
+
const lb: any = { element: "line-break" };
|
|
300
|
+
lb._preservedTrailingBreak = true;
|
|
301
|
+
return {
|
|
302
|
+
success: true,
|
|
303
|
+
elements: [lb],
|
|
304
|
+
consumed: 3, // WHITESPACE + UNDERSCORE + NEWLINE
|
|
305
|
+
};
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
// Pattern 2: UNDERSCORE at start of line, then NEWLINE
|
|
310
|
+
// Mark as explicit line-break to preserve at paragraph end
|
|
311
|
+
if (currentTok.type === "UNDERSCORE" && currentTok.lineStart) {
|
|
312
|
+
const nextTok = ctx.tokens[ctx.pos + 1];
|
|
313
|
+
if (nextTok && (nextTok.type === "NEWLINE" || nextTok.type === "EOF")) {
|
|
314
|
+
const lb: any = { element: "line-break" };
|
|
315
|
+
lb._preservedTrailingBreak = true;
|
|
316
|
+
return {
|
|
317
|
+
success: true,
|
|
318
|
+
elements: [lb],
|
|
319
|
+
consumed: 2, // UNDERSCORE + NEWLINE
|
|
320
|
+
};
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
return { success: false };
|
|
325
|
+
},
|
|
326
|
+
};
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
/**
|
|
2
|
+
*
|
|
3
|
+
* Parses the Wikidot anchor link syntax: `[#anchor-name Label text]`
|
|
4
|
+
* and the "fake link" variant `[# Label text]`.
|
|
5
|
+
*
|
|
6
|
+
* An anchor link creates a hyperlink that targets a named anchor on
|
|
7
|
+
* the same page. The link's `href` is set to `#normalized-anchor-name`.
|
|
8
|
+
*
|
|
9
|
+
* The "fake link" variant (`[# Label]`) has no anchor name and generates
|
|
10
|
+
* a link with `href="javascript:;"`. This is used in Wikidot for
|
|
11
|
+
* interactive elements like collapsible blocks where the link serves
|
|
12
|
+
* as a click target rather than navigation.
|
|
13
|
+
*
|
|
14
|
+
* Anchor names are normalized to lowercase with spaces replaced by hyphens.
|
|
15
|
+
*
|
|
16
|
+
* The opening delimiter is tokenized as `BRACKET_ANCHOR` (`[#`) by the
|
|
17
|
+
* lexer, distinguishing it from regular bracket links.
|
|
18
|
+
*
|
|
19
|
+
* Produces a `"link"` AST element with `type: "anchor"`.
|
|
20
|
+
*
|
|
21
|
+
* @module
|
|
22
|
+
*/
|
|
23
|
+
import type { Element, LinkLabel } from "@wdprlib/ast";
|
|
24
|
+
import type { InlineRule, ParseContext, RuleResult } from "../types";
|
|
25
|
+
import { hasClosingMarkerBeforeNewline } from "../types";
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Inline rule for parsing `[#anchor Label]` anchor links.
|
|
29
|
+
*
|
|
30
|
+
* Triggered by a `BRACKET_ANCHOR` (`[#`) token. Collects the optional
|
|
31
|
+
* anchor name, then the required label text, stopping at the closing
|
|
32
|
+
* `]` bracket.
|
|
33
|
+
*
|
|
34
|
+
* Fails if:
|
|
35
|
+
* - No closing `]` is found on the same line
|
|
36
|
+
* - The label text is empty
|
|
37
|
+
*/
|
|
38
|
+
export const linkAnchorRule: InlineRule = {
|
|
39
|
+
name: "linkAnchor",
|
|
40
|
+
startTokens: ["BRACKET_ANCHOR"],
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Attempts to parse an anchor link at the current position.
|
|
44
|
+
*
|
|
45
|
+
* @param ctx - Parse context with token stream and current position
|
|
46
|
+
* @returns A successful result with a `"link"` element of type `"anchor"`,
|
|
47
|
+
* or `{ success: false }`
|
|
48
|
+
*/
|
|
49
|
+
parse(ctx: ParseContext): RuleResult<Element> {
|
|
50
|
+
// Check if closing bracket exists
|
|
51
|
+
if (!hasClosingMarkerBeforeNewline({ ...ctx, pos: ctx.pos + 1 }, "BRACKET_CLOSE")) {
|
|
52
|
+
return { success: false };
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
let pos = ctx.pos + 1;
|
|
56
|
+
let consumed = 1; // [#
|
|
57
|
+
|
|
58
|
+
// Collect anchor name (until whitespace)
|
|
59
|
+
let anchor = "";
|
|
60
|
+
while (pos < ctx.tokens.length) {
|
|
61
|
+
const token = ctx.tokens[pos];
|
|
62
|
+
if (
|
|
63
|
+
!token ||
|
|
64
|
+
token.type === "WHITESPACE" ||
|
|
65
|
+
token.type === "BRACKET_CLOSE" ||
|
|
66
|
+
token.type === "NEWLINE" ||
|
|
67
|
+
token.type === "EOF"
|
|
68
|
+
) {
|
|
69
|
+
break;
|
|
70
|
+
}
|
|
71
|
+
anchor += token.value;
|
|
72
|
+
pos++;
|
|
73
|
+
consumed++;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Skip whitespace between anchor and label
|
|
77
|
+
while (ctx.tokens[pos]?.type === "WHITESPACE") {
|
|
78
|
+
pos++;
|
|
79
|
+
consumed++;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// Collect label (until closing bracket)
|
|
83
|
+
let label = "";
|
|
84
|
+
while (pos < ctx.tokens.length) {
|
|
85
|
+
const token = ctx.tokens[pos];
|
|
86
|
+
if (
|
|
87
|
+
!token ||
|
|
88
|
+
token.type === "BRACKET_CLOSE" ||
|
|
89
|
+
token.type === "NEWLINE" ||
|
|
90
|
+
token.type === "EOF"
|
|
91
|
+
) {
|
|
92
|
+
break;
|
|
93
|
+
}
|
|
94
|
+
label += token.value;
|
|
95
|
+
pos++;
|
|
96
|
+
consumed++;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// Consume closing bracket
|
|
100
|
+
if (ctx.tokens[pos]?.type === "BRACKET_CLOSE") {
|
|
101
|
+
pos++;
|
|
102
|
+
consumed++;
|
|
103
|
+
} else {
|
|
104
|
+
return { success: false };
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
const trimmedLabel = label.trim();
|
|
108
|
+
if (!trimmedLabel) {
|
|
109
|
+
return { success: false };
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Determine target: if anchor is empty, use javascript:; (fake link)
|
|
113
|
+
// Otherwise, normalize and prepend #
|
|
114
|
+
const target = anchor.trim() ? `#${normalizeAnchor(anchor.trim())}` : "javascript:;";
|
|
115
|
+
const linkLabel: LinkLabel = { text: trimmedLabel };
|
|
116
|
+
|
|
117
|
+
return {
|
|
118
|
+
success: true,
|
|
119
|
+
elements: [
|
|
120
|
+
{
|
|
121
|
+
element: "link",
|
|
122
|
+
data: {
|
|
123
|
+
type: "anchor",
|
|
124
|
+
link: target,
|
|
125
|
+
extra: null,
|
|
126
|
+
label: linkLabel,
|
|
127
|
+
target: null,
|
|
128
|
+
},
|
|
129
|
+
},
|
|
130
|
+
],
|
|
131
|
+
consumed,
|
|
132
|
+
};
|
|
133
|
+
},
|
|
134
|
+
};
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Normalizes an anchor name for use in a URL fragment.
|
|
138
|
+
*
|
|
139
|
+
* Converts to lowercase and replaces whitespace sequences with single
|
|
140
|
+
* hyphens, matching Wikidot's anchor normalization behavior.
|
|
141
|
+
*
|
|
142
|
+
* @param anchor - The raw anchor name from the markup
|
|
143
|
+
* @returns The normalized anchor name suitable for a URL fragment
|
|
144
|
+
*/
|
|
145
|
+
function normalizeAnchor(anchor: string): string {
|
|
146
|
+
return anchor.toLowerCase().replace(/\s+/g, "-");
|
|
147
|
+
}
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
/**
|
|
2
|
+
*
|
|
3
|
+
* Parses the Wikidot single-bracket link syntax: `[url label]`.
|
|
4
|
+
*
|
|
5
|
+
* Single-bracket links create hyperlinks to external URLs or
|
|
6
|
+
* site-relative paths. The URL and label are separated by whitespace.
|
|
7
|
+
*
|
|
8
|
+
* Supported URL formats:
|
|
9
|
+
* - Absolute URLs: `[https://example.com/ Label]`
|
|
10
|
+
* - Relative paths: `[/some-page Label]`
|
|
11
|
+
*
|
|
12
|
+
* An optional `*` prefix on the URL opens the link in a new tab:
|
|
13
|
+
* `[*https://example.com/ Opens in new tab]`.
|
|
14
|
+
*
|
|
15
|
+
* Unlike triple-bracket links (`[[[page]]]`), single-bracket links
|
|
16
|
+
* require a full URL (starting with `http://`, `https://`, or `/`).
|
|
17
|
+
* The label text is required.
|
|
18
|
+
*
|
|
19
|
+
* Produces a `"link"` AST element with `type: "direct"`.
|
|
20
|
+
*
|
|
21
|
+
* @module
|
|
22
|
+
*/
|
|
23
|
+
import type { Element, LinkLabel } from "@wdprlib/ast";
|
|
24
|
+
import type { InlineRule, ParseContext, RuleResult } from "../types";
|
|
25
|
+
import { hasClosingMarkerBeforeNewline } from "../types";
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Inline rule for parsing `[url label]` single-bracket links.
|
|
29
|
+
*
|
|
30
|
+
* Triggered by a `BRACKET_OPEN` (`[`) token. Optionally detects a
|
|
31
|
+
* `*` prefix for new-tab behavior, then collects the URL (until
|
|
32
|
+
* whitespace) and the label text (until `]`).
|
|
33
|
+
*
|
|
34
|
+
* Fails if:
|
|
35
|
+
* - No closing `]` is found on the same line
|
|
36
|
+
* - The URL does not start with `http://`, `https://`, or `/`
|
|
37
|
+
* - The label text is empty
|
|
38
|
+
*/
|
|
39
|
+
export const linkSingleRule: InlineRule = {
|
|
40
|
+
name: "linkSingle",
|
|
41
|
+
startTokens: ["BRACKET_OPEN"],
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Attempts to parse a single-bracket link at the current position.
|
|
45
|
+
*
|
|
46
|
+
* @param ctx - Parse context with token stream and current position
|
|
47
|
+
* @returns A successful result with a `"link"` element of type `"direct"`,
|
|
48
|
+
* or `{ success: false }`
|
|
49
|
+
*/
|
|
50
|
+
parse(ctx: ParseContext): RuleResult<Element> {
|
|
51
|
+
// Check if closing bracket exists
|
|
52
|
+
if (!hasClosingMarkerBeforeNewline({ ...ctx, pos: ctx.pos + 1 }, "BRACKET_CLOSE")) {
|
|
53
|
+
return { success: false };
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
let pos = ctx.pos + 1;
|
|
57
|
+
let consumed = 1;
|
|
58
|
+
|
|
59
|
+
// Check for new tab marker (*)
|
|
60
|
+
let newTab = false;
|
|
61
|
+
if (ctx.tokens[pos]?.type === "STAR") {
|
|
62
|
+
newTab = true;
|
|
63
|
+
pos++;
|
|
64
|
+
consumed++;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Collect URL (until whitespace)
|
|
68
|
+
let url = "";
|
|
69
|
+
while (pos < ctx.tokens.length) {
|
|
70
|
+
const token = ctx.tokens[pos];
|
|
71
|
+
if (
|
|
72
|
+
!token ||
|
|
73
|
+
token.type === "WHITESPACE" ||
|
|
74
|
+
token.type === "BRACKET_CLOSE" ||
|
|
75
|
+
token.type === "NEWLINE" ||
|
|
76
|
+
token.type === "EOF"
|
|
77
|
+
) {
|
|
78
|
+
break;
|
|
79
|
+
}
|
|
80
|
+
url += token.value;
|
|
81
|
+
pos++;
|
|
82
|
+
consumed++;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// URL must be valid (starts with http://, https://, or /)
|
|
86
|
+
const trimmedUrl = url.trim();
|
|
87
|
+
if (!isValidUrl(trimmedUrl)) {
|
|
88
|
+
return { success: false };
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// Skip whitespace between URL and label
|
|
92
|
+
while (ctx.tokens[pos]?.type === "WHITESPACE") {
|
|
93
|
+
pos++;
|
|
94
|
+
consumed++;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// Collect label (until closing bracket)
|
|
98
|
+
let label = "";
|
|
99
|
+
while (pos < ctx.tokens.length) {
|
|
100
|
+
const token = ctx.tokens[pos];
|
|
101
|
+
if (
|
|
102
|
+
!token ||
|
|
103
|
+
token.type === "BRACKET_CLOSE" ||
|
|
104
|
+
token.type === "NEWLINE" ||
|
|
105
|
+
token.type === "EOF"
|
|
106
|
+
) {
|
|
107
|
+
break;
|
|
108
|
+
}
|
|
109
|
+
label += token.value;
|
|
110
|
+
pos++;
|
|
111
|
+
consumed++;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Consume closing bracket
|
|
115
|
+
if (ctx.tokens[pos]?.type === "BRACKET_CLOSE") {
|
|
116
|
+
pos++;
|
|
117
|
+
consumed++;
|
|
118
|
+
} else {
|
|
119
|
+
// No closing bracket found
|
|
120
|
+
return { success: false };
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
const trimmedLabel = label.trim();
|
|
124
|
+
if (!trimmedLabel) {
|
|
125
|
+
return { success: false };
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
const linkLabel: LinkLabel = { text: trimmedLabel };
|
|
129
|
+
|
|
130
|
+
return {
|
|
131
|
+
success: true,
|
|
132
|
+
elements: [
|
|
133
|
+
{
|
|
134
|
+
element: "link",
|
|
135
|
+
data: {
|
|
136
|
+
type: "direct",
|
|
137
|
+
link: trimmedUrl,
|
|
138
|
+
extra: null,
|
|
139
|
+
label: linkLabel,
|
|
140
|
+
target: newTab ? "new-tab" : null,
|
|
141
|
+
},
|
|
142
|
+
},
|
|
143
|
+
],
|
|
144
|
+
consumed,
|
|
145
|
+
};
|
|
146
|
+
},
|
|
147
|
+
};
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Validates whether a URL is acceptable for single-bracket links.
|
|
151
|
+
*
|
|
152
|
+
* Only absolute HTTP(S) URLs and site-relative paths (starting with `/`)
|
|
153
|
+
* are accepted. Page names, interwiki prefixes, and other formats require
|
|
154
|
+
* triple-bracket syntax instead.
|
|
155
|
+
*
|
|
156
|
+
* @param url - The trimmed URL string to validate
|
|
157
|
+
* @returns `true` if the URL starts with `http://`, `https://`, or `/`
|
|
158
|
+
*/
|
|
159
|
+
function isValidUrl(url: string): boolean {
|
|
160
|
+
if (!url) return false;
|
|
161
|
+
if (url.startsWith("/")) return true;
|
|
162
|
+
if (url.startsWith("http://") || url.startsWith("https://")) return true;
|
|
163
|
+
return false;
|
|
164
|
+
}
|