@wdprlib/parser 3.1.2 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +295 -118
- package/dist/index.js +272 -95
- package/package.json +5 -3
- package/src/index.ts +163 -0
- package/src/lexer/index.ts +20 -0
- package/src/lexer/lexer.ts +687 -0
- package/src/lexer/tokens.ts +141 -0
- package/src/parser/constants.ts +173 -0
- package/src/parser/depth.ts +251 -0
- package/src/parser/index.ts +18 -0
- package/src/parser/parse.ts +315 -0
- package/src/parser/postprocess/divAdjacentParagraph.ts +76 -0
- package/src/parser/postprocess/index.ts +15 -0
- package/src/parser/postprocess/spanStrip.ts +697 -0
- package/src/parser/preprocess/expr.ts +265 -0
- package/src/parser/preprocess/index.ts +38 -0
- package/src/parser/preprocess/typography.ts +67 -0
- package/src/parser/preprocess/utils.ts +250 -0
- package/src/parser/preprocess/whitespace.ts +111 -0
- package/src/parser/rules/block/align.ts +282 -0
- package/src/parser/rules/block/bibliography.ts +359 -0
- package/src/parser/rules/block/block-list.ts +689 -0
- package/src/parser/rules/block/blockquote.ts +238 -0
- package/src/parser/rules/block/center.ts +87 -0
- package/src/parser/rules/block/clear-float.ts +75 -0
- package/src/parser/rules/block/code.ts +187 -0
- package/src/parser/rules/block/collapsible.ts +337 -0
- package/src/parser/rules/block/comment.ts +73 -0
- package/src/parser/rules/block/content-separator.ts +79 -0
- package/src/parser/rules/block/definition-list.ts +270 -0
- package/src/parser/rules/block/div.ts +400 -0
- package/src/parser/rules/block/embed-block.ts +153 -0
- package/src/parser/rules/block/footnoteblock.ts +200 -0
- package/src/parser/rules/block/heading.ts +142 -0
- package/src/parser/rules/block/horizontal-rule.ts +61 -0
- package/src/parser/rules/block/html.ts +222 -0
- package/src/parser/rules/block/iframe.ts +239 -0
- package/src/parser/rules/block/iftags.ts +150 -0
- package/src/parser/rules/block/include.ts +179 -0
- package/src/parser/rules/block/index.ts +127 -0
- package/src/parser/rules/block/list.ts +244 -0
- package/src/parser/rules/block/math.ts +183 -0
- package/src/parser/rules/block/module/backlinks/index.ts +31 -0
- package/src/parser/rules/block/module/backlinks/types.ts +21 -0
- package/src/parser/rules/block/module/categories/index.ts +34 -0
- package/src/parser/rules/block/module/categories/types.ts +21 -0
- package/src/parser/rules/block/module/css/index.ts +37 -0
- package/src/parser/rules/block/module/iftags/condition.ts +109 -0
- package/src/parser/rules/block/module/iftags/index.ts +26 -0
- package/src/parser/rules/block/module/iftags/preprocess.ts +140 -0
- package/src/parser/rules/block/module/iftags/resolve.ts +73 -0
- package/src/parser/rules/block/module/iftags/types.ts +63 -0
- package/src/parser/rules/block/module/include/index.ts +20 -0
- package/src/parser/rules/block/module/include/resolve.ts +556 -0
- package/src/parser/rules/block/module/index.ts +122 -0
- package/src/parser/rules/block/module/join/index.ts +34 -0
- package/src/parser/rules/block/module/join/types.ts +23 -0
- package/src/parser/rules/block/module/listpages/compiler.ts +453 -0
- package/src/parser/rules/block/module/listpages/extract.ts +410 -0
- package/src/parser/rules/block/module/listpages/index.ts +83 -0
- package/src/parser/rules/block/module/listpages/normalize.ts +390 -0
- package/src/parser/rules/block/module/listpages/parser.ts +106 -0
- package/src/parser/rules/block/module/listpages/resolve.ts +130 -0
- package/src/parser/rules/block/module/listpages/types.ts +513 -0
- package/src/parser/rules/block/module/listpages/url-resolver.ts +186 -0
- package/src/parser/rules/block/module/listusers/compiler.ts +77 -0
- package/src/parser/rules/block/module/listusers/extract.ts +45 -0
- package/src/parser/rules/block/module/listusers/index.ts +36 -0
- package/src/parser/rules/block/module/listusers/parser.ts +54 -0
- package/src/parser/rules/block/module/listusers/resolve.ts +58 -0
- package/src/parser/rules/block/module/listusers/types.ts +93 -0
- package/src/parser/rules/block/module/mapping.ts +61 -0
- package/src/parser/rules/block/module/page-tree/index.ts +38 -0
- package/src/parser/rules/block/module/page-tree/types.ts +29 -0
- package/src/parser/rules/block/module/rate/index.ts +28 -0
- package/src/parser/rules/block/module/rate/types.ts +19 -0
- package/src/parser/rules/block/module/resolve.ts +411 -0
- package/src/parser/rules/block/module/types-common.ts +59 -0
- package/src/parser/rules/block/module/types.ts +61 -0
- package/src/parser/rules/block/module/utils.ts +43 -0
- package/src/parser/rules/block/module/walk.ts +380 -0
- package/src/parser/rules/block/module.ts +164 -0
- package/src/parser/rules/block/orphan-li.ts +177 -0
- package/src/parser/rules/block/paragraph.ts +157 -0
- package/src/parser/rules/block/table-block.ts +726 -0
- package/src/parser/rules/block/table.ts +441 -0
- package/src/parser/rules/block/tabview.ts +331 -0
- package/src/parser/rules/block/toc.ts +129 -0
- package/src/parser/rules/block/utils.ts +615 -0
- package/src/parser/rules/index.ts +49 -0
- package/src/parser/rules/inline/anchor-name.ts +154 -0
- package/src/parser/rules/inline/anchor.ts +327 -0
- package/src/parser/rules/inline/bibcite.ts +153 -0
- package/src/parser/rules/inline/bold.ts +86 -0
- package/src/parser/rules/inline/color.ts +140 -0
- package/src/parser/rules/inline/comment.ts +90 -0
- package/src/parser/rules/inline/equation-ref.ts +115 -0
- package/src/parser/rules/inline/expr.ts +526 -0
- package/src/parser/rules/inline/footnote.ts +223 -0
- package/src/parser/rules/inline/guillemet.ts +64 -0
- package/src/parser/rules/inline/html.ts +132 -0
- package/src/parser/rules/inline/image.ts +328 -0
- package/src/parser/rules/inline/index.ts +150 -0
- package/src/parser/rules/inline/italic.ts +74 -0
- package/src/parser/rules/inline/line-break.ts +326 -0
- package/src/parser/rules/inline/link-anchor.ts +147 -0
- package/src/parser/rules/inline/link-single.ts +164 -0
- package/src/parser/rules/inline/link-star.ts +134 -0
- package/src/parser/rules/inline/link-triple.ts +267 -0
- package/src/parser/rules/inline/math-inline.ts +126 -0
- package/src/parser/rules/inline/monospace.ts +78 -0
- package/src/parser/rules/inline/raw.ts +262 -0
- package/src/parser/rules/inline/size.ts +244 -0
- package/src/parser/rules/inline/span.ts +424 -0
- package/src/parser/rules/inline/strikethrough.ts +115 -0
- package/src/parser/rules/inline/subscript.ts +84 -0
- package/src/parser/rules/inline/superscript.ts +84 -0
- package/src/parser/rules/inline/text.ts +84 -0
- package/src/parser/rules/inline/underline.ts +127 -0
- package/src/parser/rules/inline/user.ts +147 -0
- package/src/parser/rules/inline/utils.ts +344 -0
- package/src/parser/rules/types.ts +252 -0
- package/src/parser/rules/utils.ts +155 -0
- package/src/parser/toc.ts +130 -0
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
/**
|
|
2
|
+
*
|
|
3
|
+
* Parses the Wikidot star-link syntax: `[*url label]`.
|
|
4
|
+
*
|
|
5
|
+
* A star link is a convenience syntax for creating links that open
|
|
6
|
+
* in a new tab/window. The `[*` opening delimiter is tokenized as
|
|
7
|
+
* `BRACKET_STAR` by the lexer.
|
|
8
|
+
*
|
|
9
|
+
* Unlike the regular single-bracket link, the star link does not
|
|
10
|
+
* require a specific URL scheme -- any non-empty URL is accepted.
|
|
11
|
+
* If no label text is provided, the URL itself is used as the display text.
|
|
12
|
+
*
|
|
13
|
+
* The link always has `target: "new-tab"` regardless of the URL content.
|
|
14
|
+
*
|
|
15
|
+
* Wikidot syntax examples:
|
|
16
|
+
* - `[*https://example.com/ Visit Example]` -- with label
|
|
17
|
+
* - `[*https://example.com/]` -- URL used as label
|
|
18
|
+
*
|
|
19
|
+
* Produces a `"link"` AST element with `type: "direct"` and
|
|
20
|
+
* `target: "new-tab"`.
|
|
21
|
+
*
|
|
22
|
+
* @module
|
|
23
|
+
*/
|
|
24
|
+
import type { Element, LinkLabel } from "@wdprlib/ast";
|
|
25
|
+
import type { InlineRule, ParseContext, RuleResult } from "../types";
|
|
26
|
+
import { hasClosingMarkerBeforeNewline } from "../types";
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Inline rule for parsing `[*url label]` star links.
|
|
30
|
+
*
|
|
31
|
+
* Triggered by a `BRACKET_STAR` (`[*`) token. Collects the URL
|
|
32
|
+
* (until whitespace) and the optional label text (until `]`).
|
|
33
|
+
* When no label is provided, the URL serves as display text.
|
|
34
|
+
*
|
|
35
|
+
* Fails if:
|
|
36
|
+
* - No closing `]` is found on the same line
|
|
37
|
+
* - The URL is empty
|
|
38
|
+
*/
|
|
39
|
+
export const linkStarRule: InlineRule = {
|
|
40
|
+
name: "linkStar",
|
|
41
|
+
startTokens: ["BRACKET_STAR"],
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Attempts to parse a star link at the current position.
|
|
45
|
+
*
|
|
46
|
+
* @param ctx - Parse context with token stream and current position
|
|
47
|
+
* @returns A successful result with a `"link"` element, or `{ success: false }`
|
|
48
|
+
*/
|
|
49
|
+
parse(ctx: ParseContext): RuleResult<Element> {
|
|
50
|
+
// Check if closing bracket exists
|
|
51
|
+
if (!hasClosingMarkerBeforeNewline({ ...ctx, pos: ctx.pos + 1 }, "BRACKET_CLOSE")) {
|
|
52
|
+
return { success: false };
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
let pos = ctx.pos + 1;
|
|
56
|
+
let consumed = 1; // [*
|
|
57
|
+
|
|
58
|
+
// Collect URL (until whitespace)
|
|
59
|
+
let url = "";
|
|
60
|
+
while (pos < ctx.tokens.length) {
|
|
61
|
+
const token = ctx.tokens[pos];
|
|
62
|
+
if (
|
|
63
|
+
!token ||
|
|
64
|
+
token.type === "WHITESPACE" ||
|
|
65
|
+
token.type === "BRACKET_CLOSE" ||
|
|
66
|
+
token.type === "NEWLINE" ||
|
|
67
|
+
token.type === "EOF"
|
|
68
|
+
) {
|
|
69
|
+
break;
|
|
70
|
+
}
|
|
71
|
+
url += token.value;
|
|
72
|
+
pos++;
|
|
73
|
+
consumed++;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const trimmedUrl = url.trim();
|
|
77
|
+
if (!trimmedUrl) {
|
|
78
|
+
return { success: false };
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Skip whitespace between URL and label
|
|
82
|
+
while (ctx.tokens[pos]?.type === "WHITESPACE") {
|
|
83
|
+
pos++;
|
|
84
|
+
consumed++;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Collect label (until closing bracket)
|
|
88
|
+
let label = "";
|
|
89
|
+
while (pos < ctx.tokens.length) {
|
|
90
|
+
const token = ctx.tokens[pos];
|
|
91
|
+
if (
|
|
92
|
+
!token ||
|
|
93
|
+
token.type === "BRACKET_CLOSE" ||
|
|
94
|
+
token.type === "NEWLINE" ||
|
|
95
|
+
token.type === "EOF"
|
|
96
|
+
) {
|
|
97
|
+
break;
|
|
98
|
+
}
|
|
99
|
+
label += token.value;
|
|
100
|
+
pos++;
|
|
101
|
+
consumed++;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// Consume closing bracket
|
|
105
|
+
if (ctx.tokens[pos]?.type === "BRACKET_CLOSE") {
|
|
106
|
+
pos++;
|
|
107
|
+
consumed++;
|
|
108
|
+
} else {
|
|
109
|
+
return { success: false };
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
const trimmedLabel = label.trim();
|
|
113
|
+
// If no label, use URL as label
|
|
114
|
+
const displayLabel = trimmedLabel || trimmedUrl;
|
|
115
|
+
const linkLabel: LinkLabel = { text: displayLabel };
|
|
116
|
+
|
|
117
|
+
return {
|
|
118
|
+
success: true,
|
|
119
|
+
elements: [
|
|
120
|
+
{
|
|
121
|
+
element: "link",
|
|
122
|
+
data: {
|
|
123
|
+
type: "direct",
|
|
124
|
+
link: trimmedUrl,
|
|
125
|
+
extra: null,
|
|
126
|
+
label: linkLabel,
|
|
127
|
+
target: "new-tab",
|
|
128
|
+
},
|
|
129
|
+
},
|
|
130
|
+
],
|
|
131
|
+
consumed,
|
|
132
|
+
};
|
|
133
|
+
},
|
|
134
|
+
};
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
/**
|
|
2
|
+
*
|
|
3
|
+
* Parses the Wikidot triple-bracket link syntax: `[[[target | label]]]`.
|
|
4
|
+
*
|
|
5
|
+
* Triple-bracket links are Wikidot's primary page-linking mechanism.
|
|
6
|
+
* They support several target formats:
|
|
7
|
+
*
|
|
8
|
+
* - Page links: `[[[page-name]]]` or `[[[page-name | Label]]]`
|
|
9
|
+
* - Category pages: `[[[category:page-name]]]` (display shows text after colon)
|
|
10
|
+
* - Anchor links: `[[[#anchor-name]]]`
|
|
11
|
+
* - External URLs: `[[[https://example.com | Label]]]`
|
|
12
|
+
* - Interwiki links: `[[[wikipedia:Article]]]` (for known prefixes)
|
|
13
|
+
*
|
|
14
|
+
* Special syntax:
|
|
15
|
+
* - `[[[*target]]]` -- `*` prefix is stripped from target; for external URLs,
|
|
16
|
+
* adds `target="_blank"` (new tab)
|
|
17
|
+
* - `[[[*|label]]]` -- links to root `/` with the given label
|
|
18
|
+
* - `[[[page|]]]` -- empty label after pipe defaults to the page name
|
|
19
|
+
*
|
|
20
|
+
* Multi-line support: a single newline within the link is converted to
|
|
21
|
+
* a space (in both target and label portions), but a double newline
|
|
22
|
+
* (paragraph break) or a newline directly before `]]]` invalidates the link.
|
|
23
|
+
*
|
|
24
|
+
* When the opening `[[[` has no valid closing `]]]`, it falls through
|
|
25
|
+
* as literal text rather than failing.
|
|
26
|
+
*
|
|
27
|
+
* Produces a `"link"` AST element with an appropriate `type` field
|
|
28
|
+
* (`"page"`, `"anchor"`, `"direct"`, or `"interwiki"`).
|
|
29
|
+
*
|
|
30
|
+
* @module
|
|
31
|
+
*/
|
|
32
|
+
import type { Element, LinkType, LinkLocation, LinkLabel } from "@wdprlib/ast";
|
|
33
|
+
import type { InlineRule, ParseContext, RuleResult } from "../types";
|
|
34
|
+
import { currentToken } from "../types";
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Scans ahead to check whether a valid `LINK_CLOSE` (`]]]`) token
|
|
38
|
+
* exists, respecting Wikidot's multiline link rules.
|
|
39
|
+
*
|
|
40
|
+
* Allows at most one newline within the link content (typically after
|
|
41
|
+
* the pipe separator). Rejects the link if:
|
|
42
|
+
* - A double newline (paragraph break) is found
|
|
43
|
+
* - A newline appears directly before the closing `]]]`
|
|
44
|
+
* - EOF is reached without finding `]]]`
|
|
45
|
+
*
|
|
46
|
+
* @param ctx - The current parse context
|
|
47
|
+
* @param startPos - Token index at which to begin scanning (after `[[[`)
|
|
48
|
+
* @returns `true` if a valid closing `]]]` is found
|
|
49
|
+
*/
|
|
50
|
+
function hasClosingLinkMarker(ctx: ParseContext, startPos: number): boolean {
|
|
51
|
+
let pos = startPos;
|
|
52
|
+
while (pos < ctx.tokens.length) {
|
|
53
|
+
const token = ctx.tokens[pos];
|
|
54
|
+
if (!token || token.type === "EOF") {
|
|
55
|
+
return false;
|
|
56
|
+
}
|
|
57
|
+
if (token.type === "LINK_CLOSE") {
|
|
58
|
+
return true;
|
|
59
|
+
}
|
|
60
|
+
// Allow at most one newline, but not directly before LINK_CLOSE
|
|
61
|
+
if (token.type === "NEWLINE") {
|
|
62
|
+
const next = ctx.tokens[pos + 1];
|
|
63
|
+
if (next?.type === "NEWLINE") {
|
|
64
|
+
return false; // Double newline = paragraph break
|
|
65
|
+
}
|
|
66
|
+
// Newline directly before close = invalid
|
|
67
|
+
if (next?.type === "LINK_CLOSE") {
|
|
68
|
+
return false;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
pos++;
|
|
72
|
+
}
|
|
73
|
+
return false;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Inline rule for parsing `[[[target | label]]]` triple-bracket links.
|
|
78
|
+
*
|
|
79
|
+
* Triggered by a `LINK_OPEN` (`[[[`) token. Collects the target string
|
|
80
|
+
* and optional pipe-separated label, then determines the link type
|
|
81
|
+
* (page, anchor, direct URL, or interwiki) based on the target format.
|
|
82
|
+
*
|
|
83
|
+
* When no valid closing `]]]` is found, the opening `[[[` is emitted
|
|
84
|
+
* as literal text.
|
|
85
|
+
*
|
|
86
|
+
* Edge cases handled:
|
|
87
|
+
* - Empty target with pipe (`[[[|text]]]`) is invalid
|
|
88
|
+
* - Multiple consecutive `#` in the target (`[[[page##anchor]]]`) is invalid
|
|
89
|
+
* - `[[[*|label]]]` links to root `/`
|
|
90
|
+
* - `[[[*target]]]` strips `*`; adds `target="_blank"` for external URLs
|
|
91
|
+
* - Category pages show only the text after the colon when no label is given
|
|
92
|
+
*/
|
|
93
|
+
export const linkTripleRule: InlineRule = {
|
|
94
|
+
name: "linkTriple",
|
|
95
|
+
startTokens: ["LINK_OPEN"],
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Attempts to parse a triple-bracket link at the current position.
|
|
99
|
+
*
|
|
100
|
+
* @param ctx - Parse context with token stream and current position
|
|
101
|
+
* @returns A successful result with a `"link"` element, or a text
|
|
102
|
+
* fallback when the syntax is invalid
|
|
103
|
+
*/
|
|
104
|
+
parse(ctx: ParseContext): RuleResult<Element> {
|
|
105
|
+
const startToken = currentToken(ctx);
|
|
106
|
+
|
|
107
|
+
if (!hasClosingLinkMarker(ctx, ctx.pos + 1)) {
|
|
108
|
+
return {
|
|
109
|
+
success: true,
|
|
110
|
+
elements: [{ element: "text", data: startToken.value }],
|
|
111
|
+
consumed: 1,
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// Collect tokens until LINK_CLOSE (newlines converted to spaces)
|
|
116
|
+
let target = "";
|
|
117
|
+
let labelText = "";
|
|
118
|
+
let foundPipe = false;
|
|
119
|
+
let consumed = 1; // opening [[[
|
|
120
|
+
let pos = ctx.pos + 1;
|
|
121
|
+
|
|
122
|
+
while (pos < ctx.tokens.length) {
|
|
123
|
+
const token = ctx.tokens[pos];
|
|
124
|
+
if (!token || token.type === "LINK_CLOSE" || token.type === "EOF") {
|
|
125
|
+
break;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// Convert newlines to spaces in link content (Wikidot allows single newlines)
|
|
129
|
+
if (token.type === "NEWLINE") {
|
|
130
|
+
if (foundPipe) {
|
|
131
|
+
labelText += " ";
|
|
132
|
+
} else {
|
|
133
|
+
target += " ";
|
|
134
|
+
}
|
|
135
|
+
consumed++;
|
|
136
|
+
pos++;
|
|
137
|
+
continue;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
if (token.type === "PIPE" && !foundPipe) {
|
|
141
|
+
foundPipe = true;
|
|
142
|
+
} else if (foundPipe) {
|
|
143
|
+
labelText += token.value;
|
|
144
|
+
} else {
|
|
145
|
+
target += token.value;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
consumed++;
|
|
149
|
+
pos++;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// Consume closing ]]]
|
|
153
|
+
if (ctx.tokens[pos]?.type === "LINK_CLOSE") {
|
|
154
|
+
consumed++;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
const trimmedTarget = target.trim();
|
|
158
|
+
|
|
159
|
+
// Invalid: empty target with pipe (e.g., [[[|some-page]]])
|
|
160
|
+
if (trimmedTarget === "" && foundPipe) {
|
|
161
|
+
return {
|
|
162
|
+
success: true,
|
|
163
|
+
elements: [{ element: "text", data: startToken.value }],
|
|
164
|
+
consumed: 1,
|
|
165
|
+
};
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// Invalid: multiple consecutive # in target (e.g., [[[home###|Home]]], [[[page##anchor]]])
|
|
169
|
+
// Wikidot rejects these as invalid link syntax
|
|
170
|
+
if (/#{2,}/.test(trimmedTarget)) {
|
|
171
|
+
return {
|
|
172
|
+
success: true,
|
|
173
|
+
elements: [{ element: "text", data: startToken.value }],
|
|
174
|
+
consumed: 1,
|
|
175
|
+
};
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// `*` prefix: stripped from target; sets target="_blank" for external URLs
|
|
179
|
+
let finalTarget = trimmedTarget;
|
|
180
|
+
let hasStar = false;
|
|
181
|
+
if (trimmedTarget.startsWith("*")) {
|
|
182
|
+
hasStar = true;
|
|
183
|
+
finalTarget = trimmedTarget.slice(1);
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
const { linkType, link } = determineLinkTypeAndLocation(finalTarget);
|
|
187
|
+
const trimmedLabel = labelText.trim();
|
|
188
|
+
|
|
189
|
+
// Determine display text
|
|
190
|
+
let displayText: string;
|
|
191
|
+
if (foundPipe) {
|
|
192
|
+
// If label is empty (e.g., [[[page|]]]), use page name
|
|
193
|
+
displayText = trimmedLabel || finalTarget;
|
|
194
|
+
} else {
|
|
195
|
+
// For category pages (system:Recent Changes), use only the part after colon
|
|
196
|
+
// Use trimmedTarget (preserves * prefix) for display when no pipe
|
|
197
|
+
const colonIdx = trimmedTarget.indexOf(":");
|
|
198
|
+
if (colonIdx !== -1 && !trimmedTarget.startsWith("http") && !trimmedTarget.startsWith("*")) {
|
|
199
|
+
displayText = trimmedTarget.slice(colonIdx + 1).trim();
|
|
200
|
+
} else {
|
|
201
|
+
displayText = trimmedTarget;
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
const label: LinkLabel = { text: displayText };
|
|
206
|
+
|
|
207
|
+
return {
|
|
208
|
+
success: true,
|
|
209
|
+
elements: [
|
|
210
|
+
{
|
|
211
|
+
element: "link",
|
|
212
|
+
data: {
|
|
213
|
+
type: linkType,
|
|
214
|
+
link,
|
|
215
|
+
extra: null,
|
|
216
|
+
label,
|
|
217
|
+
target: hasStar && linkType === "direct" ? "new-tab" : null,
|
|
218
|
+
},
|
|
219
|
+
},
|
|
220
|
+
],
|
|
221
|
+
consumed,
|
|
222
|
+
};
|
|
223
|
+
},
|
|
224
|
+
};
|
|
225
|
+
|
|
226
|
+
/**
|
|
227
|
+
* Known interwiki prefixes recognized by Wikidot.
|
|
228
|
+
*
|
|
229
|
+
* Links whose target starts with one of these prefixes followed by a colon
|
|
230
|
+
* (e.g. `wikipedia:Article`) are classified as interwiki links rather than
|
|
231
|
+
* category page links.
|
|
232
|
+
*/
|
|
233
|
+
const INTERWIKI_PREFIXES = new Set(["wikipedia", "google", "dictionary", "wikidot"]);
|
|
234
|
+
|
|
235
|
+
/**
|
|
236
|
+
* Determines the link type and structured location data from a raw
|
|
237
|
+
* triple-bracket link target string.
|
|
238
|
+
*
|
|
239
|
+
* Classification order:
|
|
240
|
+
* 1. Targets starting with `#` are anchor links
|
|
241
|
+
* 2. Targets starting with `http://` or `https://` are direct (external) links
|
|
242
|
+
* 3. Targets with a colon and a known interwiki prefix (without slashes)
|
|
243
|
+
* are interwiki links
|
|
244
|
+
* 4. Everything else is a page link (including category pages like
|
|
245
|
+
* `system:Recent Changes`)
|
|
246
|
+
*
|
|
247
|
+
* @param target - The trimmed, processed link target string
|
|
248
|
+
* @returns An object with `linkType` and `link` (the structured location data)
|
|
249
|
+
*/
|
|
250
|
+
function determineLinkTypeAndLocation(target: string): { linkType: LinkType; link: LinkLocation } {
|
|
251
|
+
if (target.startsWith("#")) {
|
|
252
|
+
return { linkType: "anchor", link: target };
|
|
253
|
+
}
|
|
254
|
+
if (target.startsWith("http://") || target.startsWith("https://")) {
|
|
255
|
+
return { linkType: "direct", link: target };
|
|
256
|
+
}
|
|
257
|
+
// Check for interwiki links (only known prefixes)
|
|
258
|
+
const colonIdx = target.indexOf(":");
|
|
259
|
+
if (colonIdx > 0 && !target.includes("/")) {
|
|
260
|
+
const prefix = target.slice(0, colonIdx).toLowerCase();
|
|
261
|
+
if (INTERWIKI_PREFIXES.has(prefix)) {
|
|
262
|
+
return { linkType: "interwiki", link: target };
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
// Page link (includes category pages like "system:Recent Changes")
|
|
266
|
+
return { linkType: "page", link: { site: null, page: target } };
|
|
267
|
+
}
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
/**
|
|
2
|
+
*
|
|
3
|
+
* Parses the Wikidot inline math syntax: `[[$ LaTeX $]]`.
|
|
4
|
+
*
|
|
5
|
+
* Inline math renders a LaTeX expression inline with the surrounding
|
|
6
|
+
* text (as opposed to the block-level `[[math]]` which produces a
|
|
7
|
+
* display-mode equation).
|
|
8
|
+
*
|
|
9
|
+
* The LaTeX source is captured as-is between the `$` delimiters and
|
|
10
|
+
* stored in the AST for later rendering by a LaTeX engine (e.g. KaTeX
|
|
11
|
+
* or MathJax).
|
|
12
|
+
*
|
|
13
|
+
* Newlines are NOT allowed within inline math; if a `NEWLINE` token is
|
|
14
|
+
* encountered before the closing `$]]`, the parse fails.
|
|
15
|
+
*
|
|
16
|
+
* Wikidot syntax: `[[$ E = mc^2 $]]`
|
|
17
|
+
*
|
|
18
|
+
* Produces a `"math-inline"` AST element with `data["latex-source"]`
|
|
19
|
+
* containing the trimmed LaTeX string.
|
|
20
|
+
*
|
|
21
|
+
* @module
|
|
22
|
+
*/
|
|
23
|
+
import type { Element } from "@wdprlib/ast";
|
|
24
|
+
import type { InlineRule, ParseContext, RuleResult } from "../types";
|
|
25
|
+
import { currentToken } from "../types";
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Inline rule for parsing `[[$ LaTeX $]]` inline math.
|
|
29
|
+
*
|
|
30
|
+
* Triggered by a `BLOCK_OPEN` (`[[`) token. Looks for a `$` text
|
|
31
|
+
* token immediately after, collects the LaTeX source until the closing
|
|
32
|
+
* `$]]` sequence, and produces a math-inline element.
|
|
33
|
+
*
|
|
34
|
+
* Fails if:
|
|
35
|
+
* - No `$` follows the `[[`
|
|
36
|
+
* - A newline is encountered within the LaTeX source
|
|
37
|
+
* - The closing `$]]` sequence is not found
|
|
38
|
+
*/
|
|
39
|
+
export const mathInlineRule: InlineRule = {
|
|
40
|
+
name: "math-inline",
|
|
41
|
+
startTokens: ["BLOCK_OPEN"],
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Attempts to parse inline math at the current position.
|
|
45
|
+
*
|
|
46
|
+
* @param ctx - Parse context with token stream and current position
|
|
47
|
+
* @returns A successful result with a `"math-inline"` element, or `{ success: false }`
|
|
48
|
+
*/
|
|
49
|
+
parse(ctx: ParseContext): RuleResult<Element> {
|
|
50
|
+
const openToken = currentToken(ctx);
|
|
51
|
+
if (openToken.type !== "BLOCK_OPEN") {
|
|
52
|
+
return { success: false };
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
let pos = ctx.pos + 1;
|
|
56
|
+
let consumed = 1;
|
|
57
|
+
|
|
58
|
+
// Expect $
|
|
59
|
+
if (ctx.tokens[pos]?.type !== "TEXT" || ctx.tokens[pos]?.value !== "$") {
|
|
60
|
+
return { success: false };
|
|
61
|
+
}
|
|
62
|
+
pos++;
|
|
63
|
+
consumed++;
|
|
64
|
+
|
|
65
|
+
// Skip whitespace
|
|
66
|
+
while (ctx.tokens[pos]?.type === "WHITESPACE") {
|
|
67
|
+
pos++;
|
|
68
|
+
consumed++;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// Collect LaTeX content until $]]
|
|
72
|
+
let latexSource = "";
|
|
73
|
+
|
|
74
|
+
while (pos < ctx.tokens.length) {
|
|
75
|
+
const token = ctx.tokens[pos];
|
|
76
|
+
if (!token) break;
|
|
77
|
+
|
|
78
|
+
// No newlines allowed in inline math
|
|
79
|
+
if (token.type === "NEWLINE") {
|
|
80
|
+
return { success: false };
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// Check for closing $]]
|
|
84
|
+
if (
|
|
85
|
+
token.type === "TEXT" &&
|
|
86
|
+
token.value === "$" &&
|
|
87
|
+
ctx.tokens[pos + 1]?.type === "BLOCK_CLOSE"
|
|
88
|
+
) {
|
|
89
|
+
break;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
latexSource += token.value;
|
|
93
|
+
pos++;
|
|
94
|
+
consumed++;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// Expect $]]
|
|
98
|
+
if (ctx.tokens[pos]?.type !== "TEXT" || ctx.tokens[pos]?.value !== "$") {
|
|
99
|
+
return { success: false };
|
|
100
|
+
}
|
|
101
|
+
pos++;
|
|
102
|
+
consumed++;
|
|
103
|
+
|
|
104
|
+
if (ctx.tokens[pos]?.type !== "BLOCK_CLOSE") {
|
|
105
|
+
return { success: false };
|
|
106
|
+
}
|
|
107
|
+
pos++;
|
|
108
|
+
consumed++;
|
|
109
|
+
|
|
110
|
+
// Trim the LaTeX source
|
|
111
|
+
latexSource = latexSource.trim();
|
|
112
|
+
|
|
113
|
+
return {
|
|
114
|
+
success: true,
|
|
115
|
+
elements: [
|
|
116
|
+
{
|
|
117
|
+
element: "math-inline",
|
|
118
|
+
data: {
|
|
119
|
+
"latex-source": latexSource,
|
|
120
|
+
},
|
|
121
|
+
},
|
|
122
|
+
],
|
|
123
|
+
consumed,
|
|
124
|
+
};
|
|
125
|
+
},
|
|
126
|
+
};
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
/**
|
|
2
|
+
*
|
|
3
|
+
* Parses the Wikidot monospace (teletype) formatting syntax: `{{text}}`.
|
|
4
|
+
*
|
|
5
|
+
* Monospace text is delimited by double curly braces. The opening and
|
|
6
|
+
* closing markers must appear on the same line. If no closing `}}`
|
|
7
|
+
* is found before a newline, the opening marker is emitted as literal text.
|
|
8
|
+
*
|
|
9
|
+
* Note: the opening marker is `MONO_MARKER` (`{{`) and the closing marker
|
|
10
|
+
* is `MONO_CLOSE` (`}}`). These are distinct token types because `{` and
|
|
11
|
+
* `}` have different lexer significance in some contexts.
|
|
12
|
+
*
|
|
13
|
+
* Monospace is a "container" element, meaning it can nest other inline
|
|
14
|
+
* formatting within its body. It renders as a `<tt>` element in HTML.
|
|
15
|
+
*
|
|
16
|
+
* Wikidot syntax: `{{monospace text}}`
|
|
17
|
+
*
|
|
18
|
+
* Produces a `"container"` AST element with `type: "monospace"`.
|
|
19
|
+
*
|
|
20
|
+
* @module
|
|
21
|
+
*/
|
|
22
|
+
import type { Element } from "@wdprlib/ast";
|
|
23
|
+
import type { InlineRule, ParseContext, RuleResult } from "../types";
|
|
24
|
+
import { currentToken, hasClosingMarkerBeforeNewline } from "../types";
|
|
25
|
+
import { parseInlineUntil } from "./utils";
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Inline rule for parsing `{{monospace}}` formatting.
|
|
29
|
+
*
|
|
30
|
+
* Triggered by a `MONO_MARKER` token (`{{`). Checks for a matching
|
|
31
|
+
* `MONO_CLOSE` (`}}`) on the same line, then recursively parses
|
|
32
|
+
* inline content between the markers.
|
|
33
|
+
*
|
|
34
|
+
* When no closing marker is found, the opening `{{` is treated as
|
|
35
|
+
* literal text.
|
|
36
|
+
*/
|
|
37
|
+
export const monospaceRule: InlineRule = {
|
|
38
|
+
name: "monospace",
|
|
39
|
+
startTokens: ["MONO_MARKER"],
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Attempts to parse monospace formatting at the current position.
|
|
43
|
+
*
|
|
44
|
+
* @param ctx - Parse context with token stream and current position
|
|
45
|
+
* @returns A successful result containing either a `"container"` element
|
|
46
|
+
* with `type: "monospace"`, or a text fallback for unmatched markers
|
|
47
|
+
*/
|
|
48
|
+
parse(ctx: ParseContext): RuleResult<Element> {
|
|
49
|
+
const startToken = currentToken(ctx);
|
|
50
|
+
|
|
51
|
+
// Check if closing marker exists
|
|
52
|
+
if (!hasClosingMarkerBeforeNewline({ ...ctx, pos: ctx.pos + 1 }, "MONO_CLOSE")) {
|
|
53
|
+
return {
|
|
54
|
+
success: true,
|
|
55
|
+
elements: [{ element: "text", data: startToken.value }],
|
|
56
|
+
consumed: 1,
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// Parse content between markers
|
|
61
|
+
const result = parseInlineUntil({ ...ctx, pos: ctx.pos + 1 }, "MONO_CLOSE");
|
|
62
|
+
|
|
63
|
+
return {
|
|
64
|
+
success: true,
|
|
65
|
+
elements: [
|
|
66
|
+
{
|
|
67
|
+
element: "container",
|
|
68
|
+
data: {
|
|
69
|
+
type: "monospace",
|
|
70
|
+
attributes: {},
|
|
71
|
+
elements: result.elements,
|
|
72
|
+
},
|
|
73
|
+
},
|
|
74
|
+
],
|
|
75
|
+
consumed: 1 + result.consumed + 1, // open + content + close
|
|
76
|
+
};
|
|
77
|
+
},
|
|
78
|
+
};
|