@wdprlib/parser 3.1.2 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +295 -118
- package/dist/index.js +272 -95
- package/package.json +5 -3
- package/src/index.ts +163 -0
- package/src/lexer/index.ts +20 -0
- package/src/lexer/lexer.ts +687 -0
- package/src/lexer/tokens.ts +141 -0
- package/src/parser/constants.ts +173 -0
- package/src/parser/depth.ts +251 -0
- package/src/parser/index.ts +18 -0
- package/src/parser/parse.ts +315 -0
- package/src/parser/postprocess/divAdjacentParagraph.ts +76 -0
- package/src/parser/postprocess/index.ts +15 -0
- package/src/parser/postprocess/spanStrip.ts +697 -0
- package/src/parser/preprocess/expr.ts +265 -0
- package/src/parser/preprocess/index.ts +38 -0
- package/src/parser/preprocess/typography.ts +67 -0
- package/src/parser/preprocess/utils.ts +250 -0
- package/src/parser/preprocess/whitespace.ts +111 -0
- package/src/parser/rules/block/align.ts +282 -0
- package/src/parser/rules/block/bibliography.ts +359 -0
- package/src/parser/rules/block/block-list.ts +689 -0
- package/src/parser/rules/block/blockquote.ts +238 -0
- package/src/parser/rules/block/center.ts +87 -0
- package/src/parser/rules/block/clear-float.ts +75 -0
- package/src/parser/rules/block/code.ts +187 -0
- package/src/parser/rules/block/collapsible.ts +337 -0
- package/src/parser/rules/block/comment.ts +73 -0
- package/src/parser/rules/block/content-separator.ts +79 -0
- package/src/parser/rules/block/definition-list.ts +270 -0
- package/src/parser/rules/block/div.ts +400 -0
- package/src/parser/rules/block/embed-block.ts +153 -0
- package/src/parser/rules/block/footnoteblock.ts +200 -0
- package/src/parser/rules/block/heading.ts +142 -0
- package/src/parser/rules/block/horizontal-rule.ts +61 -0
- package/src/parser/rules/block/html.ts +222 -0
- package/src/parser/rules/block/iframe.ts +239 -0
- package/src/parser/rules/block/iftags.ts +150 -0
- package/src/parser/rules/block/include.ts +179 -0
- package/src/parser/rules/block/index.ts +127 -0
- package/src/parser/rules/block/list.ts +244 -0
- package/src/parser/rules/block/math.ts +183 -0
- package/src/parser/rules/block/module/backlinks/index.ts +31 -0
- package/src/parser/rules/block/module/backlinks/types.ts +21 -0
- package/src/parser/rules/block/module/categories/index.ts +34 -0
- package/src/parser/rules/block/module/categories/types.ts +21 -0
- package/src/parser/rules/block/module/css/index.ts +37 -0
- package/src/parser/rules/block/module/iftags/condition.ts +109 -0
- package/src/parser/rules/block/module/iftags/index.ts +26 -0
- package/src/parser/rules/block/module/iftags/preprocess.ts +140 -0
- package/src/parser/rules/block/module/iftags/resolve.ts +73 -0
- package/src/parser/rules/block/module/iftags/types.ts +63 -0
- package/src/parser/rules/block/module/include/index.ts +20 -0
- package/src/parser/rules/block/module/include/resolve.ts +556 -0
- package/src/parser/rules/block/module/index.ts +122 -0
- package/src/parser/rules/block/module/join/index.ts +34 -0
- package/src/parser/rules/block/module/join/types.ts +23 -0
- package/src/parser/rules/block/module/listpages/compiler.ts +453 -0
- package/src/parser/rules/block/module/listpages/extract.ts +410 -0
- package/src/parser/rules/block/module/listpages/index.ts +83 -0
- package/src/parser/rules/block/module/listpages/normalize.ts +390 -0
- package/src/parser/rules/block/module/listpages/parser.ts +106 -0
- package/src/parser/rules/block/module/listpages/resolve.ts +130 -0
- package/src/parser/rules/block/module/listpages/types.ts +513 -0
- package/src/parser/rules/block/module/listpages/url-resolver.ts +186 -0
- package/src/parser/rules/block/module/listusers/compiler.ts +77 -0
- package/src/parser/rules/block/module/listusers/extract.ts +45 -0
- package/src/parser/rules/block/module/listusers/index.ts +36 -0
- package/src/parser/rules/block/module/listusers/parser.ts +54 -0
- package/src/parser/rules/block/module/listusers/resolve.ts +58 -0
- package/src/parser/rules/block/module/listusers/types.ts +93 -0
- package/src/parser/rules/block/module/mapping.ts +61 -0
- package/src/parser/rules/block/module/page-tree/index.ts +38 -0
- package/src/parser/rules/block/module/page-tree/types.ts +29 -0
- package/src/parser/rules/block/module/rate/index.ts +28 -0
- package/src/parser/rules/block/module/rate/types.ts +19 -0
- package/src/parser/rules/block/module/resolve.ts +411 -0
- package/src/parser/rules/block/module/types-common.ts +59 -0
- package/src/parser/rules/block/module/types.ts +61 -0
- package/src/parser/rules/block/module/utils.ts +43 -0
- package/src/parser/rules/block/module/walk.ts +380 -0
- package/src/parser/rules/block/module.ts +164 -0
- package/src/parser/rules/block/orphan-li.ts +177 -0
- package/src/parser/rules/block/paragraph.ts +157 -0
- package/src/parser/rules/block/table-block.ts +726 -0
- package/src/parser/rules/block/table.ts +441 -0
- package/src/parser/rules/block/tabview.ts +331 -0
- package/src/parser/rules/block/toc.ts +129 -0
- package/src/parser/rules/block/utils.ts +615 -0
- package/src/parser/rules/index.ts +49 -0
- package/src/parser/rules/inline/anchor-name.ts +154 -0
- package/src/parser/rules/inline/anchor.ts +327 -0
- package/src/parser/rules/inline/bibcite.ts +153 -0
- package/src/parser/rules/inline/bold.ts +86 -0
- package/src/parser/rules/inline/color.ts +140 -0
- package/src/parser/rules/inline/comment.ts +90 -0
- package/src/parser/rules/inline/equation-ref.ts +115 -0
- package/src/parser/rules/inline/expr.ts +526 -0
- package/src/parser/rules/inline/footnote.ts +223 -0
- package/src/parser/rules/inline/guillemet.ts +64 -0
- package/src/parser/rules/inline/html.ts +132 -0
- package/src/parser/rules/inline/image.ts +328 -0
- package/src/parser/rules/inline/index.ts +150 -0
- package/src/parser/rules/inline/italic.ts +74 -0
- package/src/parser/rules/inline/line-break.ts +326 -0
- package/src/parser/rules/inline/link-anchor.ts +147 -0
- package/src/parser/rules/inline/link-single.ts +164 -0
- package/src/parser/rules/inline/link-star.ts +134 -0
- package/src/parser/rules/inline/link-triple.ts +267 -0
- package/src/parser/rules/inline/math-inline.ts +126 -0
- package/src/parser/rules/inline/monospace.ts +78 -0
- package/src/parser/rules/inline/raw.ts +262 -0
- package/src/parser/rules/inline/size.ts +244 -0
- package/src/parser/rules/inline/span.ts +424 -0
- package/src/parser/rules/inline/strikethrough.ts +115 -0
- package/src/parser/rules/inline/subscript.ts +84 -0
- package/src/parser/rules/inline/superscript.ts +84 -0
- package/src/parser/rules/inline/text.ts +84 -0
- package/src/parser/rules/inline/underline.ts +127 -0
- package/src/parser/rules/inline/user.ts +147 -0
- package/src/parser/rules/inline/utils.ts +344 -0
- package/src/parser/rules/types.ts +252 -0
- package/src/parser/rules/utils.ts +155 -0
- package/src/parser/toc.ts +130 -0
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
/**
|
|
2
|
+
*
|
|
3
|
+
* Provides the two lowest-priority inline rules: `textRule` and `fallbackRule`.
|
|
4
|
+
*
|
|
5
|
+
* These rules act as catch-alls that convert unrecognized tokens into
|
|
6
|
+
* plain `"text"` AST elements, ensuring no token is ever silently dropped
|
|
7
|
+
* during inline parsing.
|
|
8
|
+
*
|
|
9
|
+
* `textRule` handles `TEXT` and `WHITESPACE` tokens specifically and is
|
|
10
|
+
* included in the main `inlineRules` array as the last entry before
|
|
11
|
+
* the fallback.
|
|
12
|
+
*
|
|
13
|
+
* `fallbackRule` has an empty `startTokens` array, which means it matches
|
|
14
|
+
* ANY token type. It is exported separately as `inlineFallbackRule` and
|
|
15
|
+
* is NOT included in the `inlineRules` array to prevent it from
|
|
16
|
+
* short-circuiting more specific rules. Instead, it is invoked explicitly
|
|
17
|
+
* by the parser when no other rule matches.
|
|
18
|
+
*
|
|
19
|
+
* @module
|
|
20
|
+
*/
|
|
21
|
+
import type { Element } from "@wdprlib/ast";
|
|
22
|
+
import type { InlineRule, ParseContext, RuleResult } from "../types";
|
|
23
|
+
import { currentToken } from "../types";
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Inline rule for plain text and whitespace tokens.
|
|
27
|
+
*
|
|
28
|
+
* Matches `TEXT` and `WHITESPACE` token types and converts them
|
|
29
|
+
* directly to `"text"` AST elements. This rule always succeeds.
|
|
30
|
+
*
|
|
31
|
+
* Placed last (before the fallback) in the inline rules array so
|
|
32
|
+
* that all formatting and structural rules are tried first.
|
|
33
|
+
*/
|
|
34
|
+
export const textRule: InlineRule = {
|
|
35
|
+
name: "text",
|
|
36
|
+
startTokens: ["TEXT", "WHITESPACE"],
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Converts a TEXT or WHITESPACE token into a text element.
|
|
40
|
+
*
|
|
41
|
+
* @param ctx - Parse context with token stream and current position
|
|
42
|
+
* @returns Always returns `{ success: true }` with a single `"text"` element
|
|
43
|
+
*/
|
|
44
|
+
parse(ctx: ParseContext): RuleResult<Element> {
|
|
45
|
+
const token = currentToken(ctx);
|
|
46
|
+
|
|
47
|
+
return {
|
|
48
|
+
success: true,
|
|
49
|
+
elements: [{ element: "text", data: token.value }],
|
|
50
|
+
consumed: 1,
|
|
51
|
+
};
|
|
52
|
+
},
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Universal fallback rule for any token type not matched by other rules.
|
|
57
|
+
*
|
|
58
|
+
* The empty `startTokens` array signals to the parser that this rule
|
|
59
|
+
* can match any token. It converts the token's value to a `"text"`
|
|
60
|
+
* element, ensuring no token is silently dropped.
|
|
61
|
+
*
|
|
62
|
+
* This rule is used as a last-resort handler and is intentionally
|
|
63
|
+
* excluded from the main `inlineRules` array.
|
|
64
|
+
*/
|
|
65
|
+
export const fallbackRule: InlineRule = {
|
|
66
|
+
name: "fallback",
|
|
67
|
+
startTokens: [], // matches anything not matched by other rules
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Converts any unrecognized token into a text element.
|
|
71
|
+
*
|
|
72
|
+
* @param ctx - Parse context with token stream and current position
|
|
73
|
+
* @returns Always returns `{ success: true }` with a single `"text"` element
|
|
74
|
+
*/
|
|
75
|
+
parse(ctx: ParseContext): RuleResult<Element> {
|
|
76
|
+
const token = currentToken(ctx);
|
|
77
|
+
|
|
78
|
+
return {
|
|
79
|
+
success: true,
|
|
80
|
+
elements: [{ element: "text", data: token.value }],
|
|
81
|
+
consumed: 1,
|
|
82
|
+
};
|
|
83
|
+
},
|
|
84
|
+
};
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
/**
|
|
2
|
+
*
|
|
3
|
+
* Parses the Wikidot underline formatting syntax: `__text__`.
|
|
4
|
+
*
|
|
5
|
+
* Underline text is delimited by double underscores. Unlike most
|
|
6
|
+
* inline formatting markers (bold, italic, etc.) which require the
|
|
7
|
+
* closing marker on the same line, underline markers can span
|
|
8
|
+
* multiple lines within the same paragraph. The closing marker
|
|
9
|
+
* must appear before a paragraph break (blank line).
|
|
10
|
+
*
|
|
11
|
+
* Single newlines within underlined content are converted to
|
|
12
|
+
* `<br />` elements, matching Wikidot's multiline underline behavior.
|
|
13
|
+
*
|
|
14
|
+
* If no closing `__` is found before a paragraph break, the opening
|
|
15
|
+
* marker is emitted as literal text.
|
|
16
|
+
*
|
|
17
|
+
* Empty underline (`____`) is silently discarded by Wikidot (produces
|
|
18
|
+
* no output).
|
|
19
|
+
*
|
|
20
|
+
* Renders as a `<u>` element in HTML.
|
|
21
|
+
*
|
|
22
|
+
* Produces a `"container"` AST element with `type: "underline"`.
|
|
23
|
+
*
|
|
24
|
+
* @module
|
|
25
|
+
*/
|
|
26
|
+
import type { Element } from "@wdprlib/ast";
|
|
27
|
+
import type { InlineRule, ParseContext, RuleResult } from "../types";
|
|
28
|
+
import { currentToken, hasClosingMarkerBeforeParagraphBreak } from "../types";
|
|
29
|
+
import { parseInlineUntil } from "./utils";
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Inline rule for parsing `__underline__` formatting.
|
|
33
|
+
*
|
|
34
|
+
* Triggered by an `UNDERLINE_MARKER` token (`__`). Uses
|
|
35
|
+
* {@link hasClosingMarkerBeforeParagraphBreak} instead of the
|
|
36
|
+
* single-line variant because Wikidot allows underline to span
|
|
37
|
+
* multiple lines within a paragraph.
|
|
38
|
+
*
|
|
39
|
+
* When no closing marker is found before a paragraph break, the
|
|
40
|
+
* opening `__` is treated as literal text.
|
|
41
|
+
*/
|
|
42
|
+
export const underlineRule: InlineRule = {
|
|
43
|
+
name: "underline",
|
|
44
|
+
startTokens: ["UNDERLINE_MARKER"],
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Attempts to parse underline formatting at the current position.
|
|
48
|
+
*
|
|
49
|
+
* @param ctx - Parse context with token stream and current position
|
|
50
|
+
* @returns A successful result containing either a `"container"` element
|
|
51
|
+
* with `type: "underline"`, an empty array (for `____`), or a
|
|
52
|
+
* text fallback for unmatched markers
|
|
53
|
+
*/
|
|
54
|
+
parse(ctx: ParseContext): RuleResult<Element> {
|
|
55
|
+
const startToken = currentToken(ctx);
|
|
56
|
+
|
|
57
|
+
// Check if closing marker exists before paragraph break
|
|
58
|
+
if (!hasClosingMarkerBeforeParagraphBreak({ ...ctx, pos: ctx.pos + 1 }, "UNDERLINE_MARKER")) {
|
|
59
|
+
return {
|
|
60
|
+
success: true,
|
|
61
|
+
elements: [{ element: "text", data: startToken.value }],
|
|
62
|
+
consumed: 1,
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// Parse content between markers, handling newlines as line-breaks
|
|
67
|
+
const children: Element[] = [];
|
|
68
|
+
let pos = ctx.pos + 1;
|
|
69
|
+
let consumed = 1; // opening marker
|
|
70
|
+
|
|
71
|
+
while (pos < ctx.tokens.length) {
|
|
72
|
+
const token = ctx.tokens[pos];
|
|
73
|
+
if (!token || token.type === "EOF") break;
|
|
74
|
+
|
|
75
|
+
// Found closing marker
|
|
76
|
+
if (token.type === "UNDERLINE_MARKER") {
|
|
77
|
+
consumed++;
|
|
78
|
+
break;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Handle newlines as line-breaks
|
|
82
|
+
if (token.type === "NEWLINE") {
|
|
83
|
+
children.push({ element: "line-break" });
|
|
84
|
+
pos++;
|
|
85
|
+
consumed++;
|
|
86
|
+
continue;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// Parse inline content until NEWLINE or closing marker
|
|
90
|
+
const inlineCtx = { ...ctx, pos };
|
|
91
|
+
const result = parseInlineUntil(inlineCtx, "UNDERLINE_MARKER");
|
|
92
|
+
if (result.elements.length > 0) {
|
|
93
|
+
children.push(...result.elements);
|
|
94
|
+
pos += result.consumed;
|
|
95
|
+
consumed += result.consumed;
|
|
96
|
+
} else {
|
|
97
|
+
children.push({ element: "text", data: token.value });
|
|
98
|
+
pos++;
|
|
99
|
+
consumed++;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// Empty underline (____) is discarded entirely in Wikidot
|
|
104
|
+
if (children.length === 0) {
|
|
105
|
+
return {
|
|
106
|
+
success: true,
|
|
107
|
+
elements: [],
|
|
108
|
+
consumed,
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
return {
|
|
113
|
+
success: true,
|
|
114
|
+
elements: [
|
|
115
|
+
{
|
|
116
|
+
element: "container",
|
|
117
|
+
data: {
|
|
118
|
+
type: "underline",
|
|
119
|
+
attributes: {},
|
|
120
|
+
elements: children,
|
|
121
|
+
},
|
|
122
|
+
},
|
|
123
|
+
],
|
|
124
|
+
consumed,
|
|
125
|
+
};
|
|
126
|
+
},
|
|
127
|
+
};
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
/**
|
|
2
|
+
*
|
|
3
|
+
* Parses the Wikidot user reference syntax: `[[user name]]` and
|
|
4
|
+
* `[[*user name]]`.
|
|
5
|
+
*
|
|
6
|
+
* A user reference displays a linked username (typically linking to
|
|
7
|
+
* the user's profile page). The variant with a star prefix (`[[*user]]`)
|
|
8
|
+
* also displays the user's avatar alongside the username.
|
|
9
|
+
*
|
|
10
|
+
* Wikidot syntax:
|
|
11
|
+
* - `[[user some-user]]` -- displays username as a link
|
|
12
|
+
* - `[[*user some-user]]` -- displays avatar and username
|
|
13
|
+
*
|
|
14
|
+
* Note: Wikidot requires no whitespace immediately after `[[`. This
|
|
15
|
+
* means `[[ user name]]` is invalid, but `[[user name]]` and
|
|
16
|
+
* `[[*user name]]` are valid.
|
|
17
|
+
*
|
|
18
|
+
* The username may contain any characters except `]]` and newlines.
|
|
19
|
+
* Leading/trailing whitespace around the username is trimmed.
|
|
20
|
+
*
|
|
21
|
+
* Produces a `"user"` AST element with `data.name` (the username)
|
|
22
|
+
* and `data["show-avatar"]` (boolean).
|
|
23
|
+
*
|
|
24
|
+
* @module
|
|
25
|
+
*/
|
|
26
|
+
import type { Element } from "@wdprlib/ast";
|
|
27
|
+
import type { InlineRule, ParseContext, RuleResult } from "../types";
|
|
28
|
+
import { currentToken } from "../types";
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Inline rule for parsing `[[user name]]` and `[[*user name]]` references.
|
|
32
|
+
*
|
|
33
|
+
* Triggered by a `BLOCK_OPEN` (`[[`) token. Optionally detects a `*`
|
|
34
|
+
* prefix for avatar display, then verifies the keyword `user`, and
|
|
35
|
+
* collects the username until `]]`.
|
|
36
|
+
*
|
|
37
|
+
* Fails if:
|
|
38
|
+
* - Whitespace immediately follows `[[` (Wikidot requires no leading space)
|
|
39
|
+
* - The keyword is not `user`
|
|
40
|
+
* - The username is empty
|
|
41
|
+
* - No closing `]]` is found
|
|
42
|
+
*/
|
|
43
|
+
export const userRule: InlineRule = {
|
|
44
|
+
name: "user",
|
|
45
|
+
startTokens: ["BLOCK_OPEN"],
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Attempts to parse a user reference at the current position.
|
|
49
|
+
*
|
|
50
|
+
* @param ctx - Parse context with token stream and current position
|
|
51
|
+
* @returns A successful result with a `"user"` element, or `{ success: false }`
|
|
52
|
+
*/
|
|
53
|
+
parse(ctx: ParseContext): RuleResult<Element> {
|
|
54
|
+
const openToken = currentToken(ctx);
|
|
55
|
+
if (openToken.type !== "BLOCK_OPEN") {
|
|
56
|
+
return { success: false };
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
let pos = ctx.pos + 1;
|
|
60
|
+
let consumed = 1;
|
|
61
|
+
|
|
62
|
+
// Wikidot requires no whitespace immediately after [[
|
|
63
|
+
// [[ user]] is invalid, [[user]] is valid
|
|
64
|
+
if (ctx.tokens[pos]?.type === "WHITESPACE") {
|
|
65
|
+
return { success: false };
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// Check for star (avatar flag)
|
|
69
|
+
let showAvatar = false;
|
|
70
|
+
if (ctx.tokens[pos]?.type === "STAR") {
|
|
71
|
+
showAvatar = true;
|
|
72
|
+
pos++;
|
|
73
|
+
consumed++;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Skip whitespace after star
|
|
77
|
+
while (ctx.tokens[pos]?.type === "WHITESPACE") {
|
|
78
|
+
pos++;
|
|
79
|
+
consumed++;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// Parse block name
|
|
83
|
+
const nameToken = ctx.tokens[pos];
|
|
84
|
+
if (!nameToken || (nameToken.type !== "TEXT" && nameToken.type !== "IDENTIFIER")) {
|
|
85
|
+
return { success: false };
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
const blockName = nameToken.value.toLowerCase();
|
|
89
|
+
if (blockName !== "user") {
|
|
90
|
+
return { success: false };
|
|
91
|
+
}
|
|
92
|
+
pos++;
|
|
93
|
+
consumed++;
|
|
94
|
+
|
|
95
|
+
// Skip whitespace
|
|
96
|
+
while (ctx.tokens[pos]?.type === "WHITESPACE") {
|
|
97
|
+
pos++;
|
|
98
|
+
consumed++;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// Parse username - collect all tokens until ]]
|
|
102
|
+
let username = "";
|
|
103
|
+
while (pos < ctx.tokens.length) {
|
|
104
|
+
const token = ctx.tokens[pos];
|
|
105
|
+
if (
|
|
106
|
+
!token ||
|
|
107
|
+
token.type === "BLOCK_CLOSE" ||
|
|
108
|
+
token.type === "NEWLINE" ||
|
|
109
|
+
token.type === "EOF"
|
|
110
|
+
) {
|
|
111
|
+
break;
|
|
112
|
+
}
|
|
113
|
+
username += token.value;
|
|
114
|
+
pos++;
|
|
115
|
+
consumed++;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// Trim whitespace from username
|
|
119
|
+
username = username.trim();
|
|
120
|
+
|
|
121
|
+
// Username is required
|
|
122
|
+
if (!username) {
|
|
123
|
+
return { success: false };
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Expect ]]
|
|
127
|
+
if (ctx.tokens[pos]?.type !== "BLOCK_CLOSE") {
|
|
128
|
+
return { success: false };
|
|
129
|
+
}
|
|
130
|
+
pos++;
|
|
131
|
+
consumed++;
|
|
132
|
+
|
|
133
|
+
return {
|
|
134
|
+
success: true,
|
|
135
|
+
elements: [
|
|
136
|
+
{
|
|
137
|
+
element: "user",
|
|
138
|
+
data: {
|
|
139
|
+
name: username,
|
|
140
|
+
"show-avatar": showAvatar,
|
|
141
|
+
},
|
|
142
|
+
},
|
|
143
|
+
],
|
|
144
|
+
consumed,
|
|
145
|
+
};
|
|
146
|
+
},
|
|
147
|
+
};
|
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
import type { TokenType, Token } from "../../../lexer";
|
|
2
|
+
import type { Element } from "@wdprlib/ast";
|
|
3
|
+
import type { ParseContext, InlineRule } from "../types";
|
|
4
|
+
import {
|
|
5
|
+
BLOCK_START_TOKENS,
|
|
6
|
+
INDENT_ACCEPTING_BLOCK_NAMES,
|
|
7
|
+
KNOWN_BLOCK_NAMES,
|
|
8
|
+
} from "../../constants";
|
|
9
|
+
import { parseBlockName } from "../utils";
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Checks whether the block token at `tokenPos` (BLOCK_OPEN or BLOCK_END_OPEN)
|
|
13
|
+
* names a block in the excluded set.
|
|
14
|
+
*/
|
|
15
|
+
function isExcludedBlockToken(ctx: ParseContext, tokenPos: number): boolean {
|
|
16
|
+
const excluded = ctx.scope.excludedBlockNames;
|
|
17
|
+
if (!excluded?.size) return false;
|
|
18
|
+
const token = ctx.tokens[tokenPos];
|
|
19
|
+
if (token?.type !== "BLOCK_OPEN" && token?.type !== "BLOCK_END_OPEN") return false;
|
|
20
|
+
const nameResult = parseBlockName(ctx, tokenPos + 1);
|
|
21
|
+
return nameResult !== null && excluded.has(nameResult.name);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Checks whether the block token at `tokenPos` names a block that no rule
|
|
26
|
+
* recognizes (e.g. `[[foo]]`). Wikidot leaves such tokens inside paragraphs
|
|
27
|
+
* rather than treating them as paragraph boundaries.
|
|
28
|
+
*
|
|
29
|
+
* Align blocks (`[[=]]`, `[[==]]`) are recognized as a special case: their
|
|
30
|
+
* marker tokens are `EQUALS`, not `TEXT`/`IDENTIFIER`, so `parseBlockName`
|
|
31
|
+
* cannot extract a name. They are still real block boundaries.
|
|
32
|
+
*/
|
|
33
|
+
function isUnknownBlockToken(ctx: ParseContext, tokenPos: number): boolean {
|
|
34
|
+
const token = ctx.tokens[tokenPos];
|
|
35
|
+
if (token?.type !== "BLOCK_OPEN" && token?.type !== "BLOCK_END_OPEN") return false;
|
|
36
|
+
const nameResult = parseBlockName(ctx, tokenPos + 1);
|
|
37
|
+
if (nameResult === null) {
|
|
38
|
+
// `[[=]]` / `[[==]]` align markers tokenize as EQUALS, not TEXT/IDENTIFIER.
|
|
39
|
+
if (ctx.tokens[tokenPos + 1]?.type === "EQUALS") {
|
|
40
|
+
return false;
|
|
41
|
+
}
|
|
42
|
+
// No recognizable identifier after [[ / [[/ — treat as inline.
|
|
43
|
+
return true;
|
|
44
|
+
}
|
|
45
|
+
return !KNOWN_BLOCK_NAMES.has(nameResult.name);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Checks whether the block token at `tokenPos` names a block whose rule
|
|
50
|
+
* accepts leading whitespace before the opener (`requiresLineStart: false`).
|
|
51
|
+
*
|
|
52
|
+
* Used to decide whether a `\n<indent>[[name]]` sequence should end a
|
|
53
|
+
* paragraph: only when the matching block rule would actually consume
|
|
54
|
+
* the indented token. Otherwise the boundary check would split the
|
|
55
|
+
* paragraph for tokens that the block dispatcher then refuses, leaving
|
|
56
|
+
* literal `[[toc]]` text in a fresh paragraph.
|
|
57
|
+
*/
|
|
58
|
+
function isIndentAcceptingBlock(ctx: ParseContext, tokenPos: number): boolean {
|
|
59
|
+
const token = ctx.tokens[tokenPos];
|
|
60
|
+
if (token?.type !== "BLOCK_OPEN" && token?.type !== "BLOCK_END_OPEN") return false;
|
|
61
|
+
const nameResult = parseBlockName(ctx, tokenPos + 1);
|
|
62
|
+
if (nameResult === null) return false;
|
|
63
|
+
return INDENT_ACCEPTING_BLOCK_NAMES.has(nameResult.name);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Result of parsing inline content
|
|
68
|
+
*/
|
|
69
|
+
export interface InlineParseResult {
|
|
70
|
+
elements: Element[];
|
|
71
|
+
consumed: number;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Check if an inline rule can be applied
|
|
76
|
+
*/
|
|
77
|
+
export function canApplyInlineRule(rule: InlineRule, token: { type: TokenType }): boolean {
|
|
78
|
+
if (rule.startTokens.length === 0) {
|
|
79
|
+
return true; // fallback rule
|
|
80
|
+
}
|
|
81
|
+
return rule.startTokens.includes(token.type);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Parse inline content until a specific token type
|
|
86
|
+
*
|
|
87
|
+
* When endType is "PARAGRAPH_BREAK", handles NEWLINEs and stops at:
|
|
88
|
+
* - Double NEWLINE (paragraph break)
|
|
89
|
+
* - NEWLINE followed by block-start token
|
|
90
|
+
* - EOF
|
|
91
|
+
*/
|
|
92
|
+
export function parseInlineUntil(ctx: ParseContext, endType: TokenType): InlineParseResult {
|
|
93
|
+
const nodes: Element[] = [];
|
|
94
|
+
let consumed = 0;
|
|
95
|
+
let pos = ctx.pos;
|
|
96
|
+
|
|
97
|
+
// Check if we're in paragraph mode (handle NEWLINEs inline)
|
|
98
|
+
const paragraphMode = endType === ("PARAGRAPH_BREAK" as TokenType);
|
|
99
|
+
|
|
100
|
+
const { inlineRules } = ctx;
|
|
101
|
+
|
|
102
|
+
while (pos < ctx.tokens.length) {
|
|
103
|
+
const token = ctx.tokens[pos];
|
|
104
|
+
if (!token || token.type === "EOF") {
|
|
105
|
+
break;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// Stop at block close condition if set in context
|
|
109
|
+
// This allows paragraph parser to respect parent block's close condition
|
|
110
|
+
if (paragraphMode && ctx.scope.blockCloseCondition) {
|
|
111
|
+
const checkCtx: ParseContext = { ...ctx, pos };
|
|
112
|
+
if (ctx.scope.blockCloseCondition(checkCtx)) {
|
|
113
|
+
break;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// Standard mode: stop at NEWLINE
|
|
118
|
+
if (!paragraphMode && token.type === "NEWLINE") {
|
|
119
|
+
break;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// Paragraph mode: check for paragraph break or block start
|
|
123
|
+
if (paragraphMode && token.type === "NEWLINE") {
|
|
124
|
+
// Look ahead to check what's after the newline
|
|
125
|
+
let lookAhead = 1;
|
|
126
|
+
while (ctx.tokens[pos + lookAhead]?.type === "WHITESPACE") {
|
|
127
|
+
lookAhead++;
|
|
128
|
+
}
|
|
129
|
+
const nextMeaningfulToken = ctx.tokens[pos + lookAhead];
|
|
130
|
+
|
|
131
|
+
// Check if this is [[/span]] - don't treat as block start, handle inline
|
|
132
|
+
let isOrphanCloseSpan = false;
|
|
133
|
+
if (nextMeaningfulToken?.type === "BLOCK_END_OPEN") {
|
|
134
|
+
// Check if it's [[/span]]
|
|
135
|
+
const namePos = pos + lookAhead + 1;
|
|
136
|
+
let nameLookAhead = 0;
|
|
137
|
+
while (ctx.tokens[namePos + nameLookAhead]?.type === "WHITESPACE") {
|
|
138
|
+
nameLookAhead++;
|
|
139
|
+
}
|
|
140
|
+
const nameToken = ctx.tokens[namePos + nameLookAhead];
|
|
141
|
+
if (nameToken?.type === "IDENTIFIER" && nameToken.value.toLowerCase() === "span") {
|
|
142
|
+
isOrphanCloseSpan = true;
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// Check if this is [[# name]] - anchor name is inline, not block start
|
|
147
|
+
let isAnchorName = false;
|
|
148
|
+
if (nextMeaningfulToken?.type === "BLOCK_OPEN") {
|
|
149
|
+
const namePos = pos + lookAhead + 1;
|
|
150
|
+
let nameLookAhead = 0;
|
|
151
|
+
while (ctx.tokens[namePos + nameLookAhead]?.type === "WHITESPACE") {
|
|
152
|
+
nameLookAhead++;
|
|
153
|
+
}
|
|
154
|
+
const hashToken = ctx.tokens[namePos + nameLookAhead];
|
|
155
|
+
if (hashToken?.type === "HASH" || (hashToken?.type === "TEXT" && hashToken.value === "#")) {
|
|
156
|
+
isAnchorName = true;
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// Check if this is [[>X or [[<X (where X is not ]]) - not a valid block opener
|
|
161
|
+
// [[>]] and [[<]] are valid align blocks, but [[>toc]] etc. are invalid
|
|
162
|
+
let isInvalidBlockOpen = false;
|
|
163
|
+
if (nextMeaningfulToken?.type === "BLOCK_OPEN") {
|
|
164
|
+
const afterOpen = pos + lookAhead + 1;
|
|
165
|
+
const firstAfter = ctx.tokens[afterOpen];
|
|
166
|
+
if (firstAfter?.type === "TEXT" && (firstAfter.value === ">" || firstAfter.value === "<")) {
|
|
167
|
+
const secondAfter = ctx.tokens[afterOpen + 1];
|
|
168
|
+
if (secondAfter && secondAfter.type !== "BLOCK_CLOSE") {
|
|
169
|
+
isInvalidBlockOpen = true;
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
// Check if this is [[footnoteblock]] but already parsed (2nd+ occurrence)
|
|
173
|
+
let skipWhitespace = 0;
|
|
174
|
+
while (ctx.tokens[afterOpen + skipWhitespace]?.type === "WHITESPACE") {
|
|
175
|
+
skipWhitespace++;
|
|
176
|
+
}
|
|
177
|
+
const blockNameToken = ctx.tokens[afterOpen + skipWhitespace];
|
|
178
|
+
if (
|
|
179
|
+
blockNameToken &&
|
|
180
|
+
(blockNameToken.type === "TEXT" || blockNameToken.type === "IDENTIFIER") &&
|
|
181
|
+
blockNameToken.value.toLowerCase() === "footnoteblock" &&
|
|
182
|
+
ctx.scope.footnoteBlockParsed
|
|
183
|
+
) {
|
|
184
|
+
isInvalidBlockOpen = true;
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// Check if HEADING_MARKER would actually succeed as a heading
|
|
189
|
+
// Wikidot requires: 1-6 plus signs + whitespace. Otherwise it's plain text.
|
|
190
|
+
let isInvalidHeading = false;
|
|
191
|
+
if (nextMeaningfulToken?.type === "HEADING_MARKER") {
|
|
192
|
+
const markerLen = nextMeaningfulToken.value.length;
|
|
193
|
+
const afterMarkerPos = pos + lookAhead + 1;
|
|
194
|
+
const afterMarker = ctx.tokens[afterMarkerPos];
|
|
195
|
+
// Invalid if: 7+ plus signs, or no whitespace after marker (or after optional *)
|
|
196
|
+
if (markerLen > 6) {
|
|
197
|
+
isInvalidHeading = true;
|
|
198
|
+
} else if (afterMarker?.type === "STAR") {
|
|
199
|
+
// +* pattern - check whitespace after *
|
|
200
|
+
const afterStar = ctx.tokens[afterMarkerPos + 1];
|
|
201
|
+
if (afterStar?.type !== "WHITESPACE") {
|
|
202
|
+
isInvalidHeading = true;
|
|
203
|
+
}
|
|
204
|
+
} else if (afterMarker?.type !== "WHITESPACE") {
|
|
205
|
+
isInvalidHeading = true;
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// Check if this block token names an excluded block (e.g. nested collapsible)
|
|
210
|
+
const isExcludedBlock =
|
|
211
|
+
(nextMeaningfulToken?.type === "BLOCK_OPEN" ||
|
|
212
|
+
nextMeaningfulToken?.type === "BLOCK_END_OPEN") &&
|
|
213
|
+
isExcludedBlockToken(ctx, pos + lookAhead);
|
|
214
|
+
|
|
215
|
+
// Wikidot treats `[[foo]]` (where `foo` is not a known block name) as
|
|
216
|
+
// inline text rather than a paragraph-breaking block. Mirror that here.
|
|
217
|
+
const isUnknownBlock =
|
|
218
|
+
(nextMeaningfulToken?.type === "BLOCK_OPEN" ||
|
|
219
|
+
nextMeaningfulToken?.type === "BLOCK_END_OPEN") &&
|
|
220
|
+
isUnknownBlockToken(ctx, pos + lookAhead);
|
|
221
|
+
|
|
222
|
+
// Stop at double NEWLINE, EOF, or block start token (at line start)
|
|
223
|
+
// But don't stop at [[/span]], [[# name]], [[>/[[<, invalid headings,
|
|
224
|
+
// excluded block names, or unrecognized block names.
|
|
225
|
+
//
|
|
226
|
+
// Most block-start tokens require the strict `lineStart` flag (no
|
|
227
|
+
// leading whitespace at all): ` # one` is NOT a list item in
|
|
228
|
+
// Wikidot, ` + Heading` is NOT a heading, etc. We preserve that.
|
|
229
|
+
//
|
|
230
|
+
// A subset of `[[...]]` block constructs is the exception:
|
|
231
|
+
// their rules declare `requiresLineStart: false`, so Wikidot
|
|
232
|
+
// accepts leading whitespace before them and `[[/<name>]]` at
|
|
233
|
+
// arbitrary indentation also has to close such a block. The
|
|
234
|
+
// `lookAhead` walk above already consumed the NEWLINE and any
|
|
235
|
+
// leading WHITESPACE, so we know `nextMeaningfulToken` sits at
|
|
236
|
+
// the semantic start of the next line. We relax the `lineStart`
|
|
237
|
+
// check only when the block name's rule will actually accept the
|
|
238
|
+
// indented opener ({@link INDENT_ACCEPTING_BLOCK_NAMES});
|
|
239
|
+
// otherwise (e.g. `[[toc]]`, `[[footnoteblock]]`, align markers)
|
|
240
|
+
// the dispatcher would reject the indented token anyway and we
|
|
241
|
+
// would end up splitting the paragraph only to leave literal
|
|
242
|
+
// `[[…]]` text behind.
|
|
243
|
+
const isIndentedBlockOpener =
|
|
244
|
+
nextMeaningfulToken &&
|
|
245
|
+
(nextMeaningfulToken.type === "BLOCK_OPEN" ||
|
|
246
|
+
nextMeaningfulToken.type === "BLOCK_END_OPEN") &&
|
|
247
|
+
isIndentAcceptingBlock(ctx, pos + lookAhead);
|
|
248
|
+
const isBlockStart =
|
|
249
|
+
nextMeaningfulToken &&
|
|
250
|
+
BLOCK_START_TOKENS.includes(nextMeaningfulToken.type) &&
|
|
251
|
+
(nextMeaningfulToken.lineStart || isIndentedBlockOpener) &&
|
|
252
|
+
!isOrphanCloseSpan &&
|
|
253
|
+
!isAnchorName &&
|
|
254
|
+
!isInvalidBlockOpen &&
|
|
255
|
+
!isInvalidHeading &&
|
|
256
|
+
!isExcludedBlock &&
|
|
257
|
+
!isUnknownBlock;
|
|
258
|
+
if (
|
|
259
|
+
!nextMeaningfulToken ||
|
|
260
|
+
nextMeaningfulToken.type === "NEWLINE" ||
|
|
261
|
+
nextMeaningfulToken.type === "EOF" ||
|
|
262
|
+
isBlockStart
|
|
263
|
+
) {
|
|
264
|
+
// Check if a block rule with preservesPrecedingLineBreak matches at the next position.
|
|
265
|
+
// Wikidot's Divalign expands content inline, so \n before it becomes <br />.
|
|
266
|
+
// Other blocks (Code, Div, etc.) suppress this by prepending \n\n to their token.
|
|
267
|
+
if (isBlockStart && nodes.length > 0) {
|
|
268
|
+
const nextPos = pos + lookAhead;
|
|
269
|
+
const shouldPreserve = ctx.blockRules.some(
|
|
270
|
+
(rule) => rule.preservesPrecedingLineBreak && rule.isStartPattern?.(ctx, nextPos),
|
|
271
|
+
);
|
|
272
|
+
if (shouldPreserve) {
|
|
273
|
+
const lb: any = { element: "line-break" };
|
|
274
|
+
lb._preservedTrailingBreak = true;
|
|
275
|
+
nodes.push(lb);
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
// Consume the NEWLINE and stop
|
|
279
|
+
consumed++;
|
|
280
|
+
if (nextMeaningfulToken?.type === "NEWLINE") {
|
|
281
|
+
consumed++; // Also consume second newline for paragraph break
|
|
282
|
+
}
|
|
283
|
+
break;
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
if (token.type === endType) {
|
|
288
|
+
break;
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
const inlineCtx: ParseContext = {
|
|
292
|
+
...ctx,
|
|
293
|
+
pos,
|
|
294
|
+
};
|
|
295
|
+
|
|
296
|
+
let matched = false;
|
|
297
|
+
for (const rule of inlineRules) {
|
|
298
|
+
// Skip the rule that would match the end type to avoid infinite recursion
|
|
299
|
+
if (rule.startTokens.includes(endType)) {
|
|
300
|
+
continue;
|
|
301
|
+
}
|
|
302
|
+
if (canApplyInlineRule(rule, token)) {
|
|
303
|
+
const result = rule.parse(inlineCtx);
|
|
304
|
+
if (result.success) {
|
|
305
|
+
nodes.push(...result.elements);
|
|
306
|
+
consumed += result.consumed;
|
|
307
|
+
pos += result.consumed;
|
|
308
|
+
matched = true;
|
|
309
|
+
break;
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
if (!matched) {
|
|
315
|
+
// Fallback to text
|
|
316
|
+
nodes.push({ element: "text", data: token.value });
|
|
317
|
+
consumed++;
|
|
318
|
+
pos++;
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
return { elements: nodes, consumed };
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
/**
|
|
326
|
+
* Collect tokens until newline or EOF
|
|
327
|
+
*/
|
|
328
|
+
export function collectUntilNewline(ctx: ParseContext): { tokens: Token[]; consumed: number } {
|
|
329
|
+
const tokens: Token[] = [];
|
|
330
|
+
let consumed = 0;
|
|
331
|
+
let pos = ctx.pos;
|
|
332
|
+
|
|
333
|
+
while (pos < ctx.tokens.length) {
|
|
334
|
+
const token = ctx.tokens[pos];
|
|
335
|
+
if (!token || token.type === "NEWLINE" || token.type === "EOF") {
|
|
336
|
+
break;
|
|
337
|
+
}
|
|
338
|
+
tokens.push(token);
|
|
339
|
+
consumed++;
|
|
340
|
+
pos++;
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
return { tokens, consumed };
|
|
344
|
+
}
|