@wdprlib/parser 3.1.2 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +295 -118
- package/dist/index.js +272 -95
- package/package.json +5 -3
- package/src/index.ts +163 -0
- package/src/lexer/index.ts +20 -0
- package/src/lexer/lexer.ts +687 -0
- package/src/lexer/tokens.ts +141 -0
- package/src/parser/constants.ts +173 -0
- package/src/parser/depth.ts +251 -0
- package/src/parser/index.ts +18 -0
- package/src/parser/parse.ts +315 -0
- package/src/parser/postprocess/divAdjacentParagraph.ts +76 -0
- package/src/parser/postprocess/index.ts +15 -0
- package/src/parser/postprocess/spanStrip.ts +697 -0
- package/src/parser/preprocess/expr.ts +265 -0
- package/src/parser/preprocess/index.ts +38 -0
- package/src/parser/preprocess/typography.ts +67 -0
- package/src/parser/preprocess/utils.ts +250 -0
- package/src/parser/preprocess/whitespace.ts +111 -0
- package/src/parser/rules/block/align.ts +282 -0
- package/src/parser/rules/block/bibliography.ts +359 -0
- package/src/parser/rules/block/block-list.ts +689 -0
- package/src/parser/rules/block/blockquote.ts +238 -0
- package/src/parser/rules/block/center.ts +87 -0
- package/src/parser/rules/block/clear-float.ts +75 -0
- package/src/parser/rules/block/code.ts +187 -0
- package/src/parser/rules/block/collapsible.ts +337 -0
- package/src/parser/rules/block/comment.ts +73 -0
- package/src/parser/rules/block/content-separator.ts +79 -0
- package/src/parser/rules/block/definition-list.ts +270 -0
- package/src/parser/rules/block/div.ts +400 -0
- package/src/parser/rules/block/embed-block.ts +153 -0
- package/src/parser/rules/block/footnoteblock.ts +200 -0
- package/src/parser/rules/block/heading.ts +142 -0
- package/src/parser/rules/block/horizontal-rule.ts +61 -0
- package/src/parser/rules/block/html.ts +222 -0
- package/src/parser/rules/block/iframe.ts +239 -0
- package/src/parser/rules/block/iftags.ts +150 -0
- package/src/parser/rules/block/include.ts +179 -0
- package/src/parser/rules/block/index.ts +127 -0
- package/src/parser/rules/block/list.ts +244 -0
- package/src/parser/rules/block/math.ts +183 -0
- package/src/parser/rules/block/module/backlinks/index.ts +31 -0
- package/src/parser/rules/block/module/backlinks/types.ts +21 -0
- package/src/parser/rules/block/module/categories/index.ts +34 -0
- package/src/parser/rules/block/module/categories/types.ts +21 -0
- package/src/parser/rules/block/module/css/index.ts +37 -0
- package/src/parser/rules/block/module/iftags/condition.ts +109 -0
- package/src/parser/rules/block/module/iftags/index.ts +26 -0
- package/src/parser/rules/block/module/iftags/preprocess.ts +140 -0
- package/src/parser/rules/block/module/iftags/resolve.ts +73 -0
- package/src/parser/rules/block/module/iftags/types.ts +63 -0
- package/src/parser/rules/block/module/include/index.ts +20 -0
- package/src/parser/rules/block/module/include/resolve.ts +556 -0
- package/src/parser/rules/block/module/index.ts +122 -0
- package/src/parser/rules/block/module/join/index.ts +34 -0
- package/src/parser/rules/block/module/join/types.ts +23 -0
- package/src/parser/rules/block/module/listpages/compiler.ts +453 -0
- package/src/parser/rules/block/module/listpages/extract.ts +410 -0
- package/src/parser/rules/block/module/listpages/index.ts +83 -0
- package/src/parser/rules/block/module/listpages/normalize.ts +390 -0
- package/src/parser/rules/block/module/listpages/parser.ts +106 -0
- package/src/parser/rules/block/module/listpages/resolve.ts +130 -0
- package/src/parser/rules/block/module/listpages/types.ts +513 -0
- package/src/parser/rules/block/module/listpages/url-resolver.ts +186 -0
- package/src/parser/rules/block/module/listusers/compiler.ts +77 -0
- package/src/parser/rules/block/module/listusers/extract.ts +45 -0
- package/src/parser/rules/block/module/listusers/index.ts +36 -0
- package/src/parser/rules/block/module/listusers/parser.ts +54 -0
- package/src/parser/rules/block/module/listusers/resolve.ts +58 -0
- package/src/parser/rules/block/module/listusers/types.ts +93 -0
- package/src/parser/rules/block/module/mapping.ts +61 -0
- package/src/parser/rules/block/module/page-tree/index.ts +38 -0
- package/src/parser/rules/block/module/page-tree/types.ts +29 -0
- package/src/parser/rules/block/module/rate/index.ts +28 -0
- package/src/parser/rules/block/module/rate/types.ts +19 -0
- package/src/parser/rules/block/module/resolve.ts +411 -0
- package/src/parser/rules/block/module/types-common.ts +59 -0
- package/src/parser/rules/block/module/types.ts +61 -0
- package/src/parser/rules/block/module/utils.ts +43 -0
- package/src/parser/rules/block/module/walk.ts +380 -0
- package/src/parser/rules/block/module.ts +164 -0
- package/src/parser/rules/block/orphan-li.ts +177 -0
- package/src/parser/rules/block/paragraph.ts +157 -0
- package/src/parser/rules/block/table-block.ts +726 -0
- package/src/parser/rules/block/table.ts +441 -0
- package/src/parser/rules/block/tabview.ts +331 -0
- package/src/parser/rules/block/toc.ts +129 -0
- package/src/parser/rules/block/utils.ts +615 -0
- package/src/parser/rules/index.ts +49 -0
- package/src/parser/rules/inline/anchor-name.ts +154 -0
- package/src/parser/rules/inline/anchor.ts +327 -0
- package/src/parser/rules/inline/bibcite.ts +153 -0
- package/src/parser/rules/inline/bold.ts +86 -0
- package/src/parser/rules/inline/color.ts +140 -0
- package/src/parser/rules/inline/comment.ts +90 -0
- package/src/parser/rules/inline/equation-ref.ts +115 -0
- package/src/parser/rules/inline/expr.ts +526 -0
- package/src/parser/rules/inline/footnote.ts +223 -0
- package/src/parser/rules/inline/guillemet.ts +64 -0
- package/src/parser/rules/inline/html.ts +132 -0
- package/src/parser/rules/inline/image.ts +328 -0
- package/src/parser/rules/inline/index.ts +150 -0
- package/src/parser/rules/inline/italic.ts +74 -0
- package/src/parser/rules/inline/line-break.ts +326 -0
- package/src/parser/rules/inline/link-anchor.ts +147 -0
- package/src/parser/rules/inline/link-single.ts +164 -0
- package/src/parser/rules/inline/link-star.ts +134 -0
- package/src/parser/rules/inline/link-triple.ts +267 -0
- package/src/parser/rules/inline/math-inline.ts +126 -0
- package/src/parser/rules/inline/monospace.ts +78 -0
- package/src/parser/rules/inline/raw.ts +262 -0
- package/src/parser/rules/inline/size.ts +244 -0
- package/src/parser/rules/inline/span.ts +424 -0
- package/src/parser/rules/inline/strikethrough.ts +115 -0
- package/src/parser/rules/inline/subscript.ts +84 -0
- package/src/parser/rules/inline/superscript.ts +84 -0
- package/src/parser/rules/inline/text.ts +84 -0
- package/src/parser/rules/inline/underline.ts +127 -0
- package/src/parser/rules/inline/user.ts +147 -0
- package/src/parser/rules/inline/utils.ts +344 -0
- package/src/parser/rules/types.ts +252 -0
- package/src/parser/rules/utils.ts +155 -0
- package/src/parser/toc.ts +130 -0
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
/**
|
|
2
|
+
*
|
|
3
|
+
* Block rule for `[[li]]...[[/li]]` appearing outside of any `[[ul]]`/`[[ol]]` block.
|
|
4
|
+
*
|
|
5
|
+
* When `[[li]]` is used without an enclosing list block, Wikidot does NOT
|
|
6
|
+
* create a list item. Instead, it treats the tags as literal text and
|
|
7
|
+
* renders the body content without `<p>` wrapping, using `<br />` for
|
|
8
|
+
* newlines.
|
|
9
|
+
*
|
|
10
|
+
* Example input:
|
|
11
|
+
* ```
|
|
12
|
+
* [[li]]
|
|
13
|
+
* Baz
|
|
14
|
+
* [[/li]]
|
|
15
|
+
* ```
|
|
16
|
+
*
|
|
17
|
+
* Rendered output:
|
|
18
|
+
* ```
|
|
19
|
+
* [[li]]<br />Baz<br />[[/li]]
|
|
20
|
+
* ```
|
|
21
|
+
*
|
|
22
|
+
* This rule exists to correctly consume and reproduce that output. Without
|
|
23
|
+
* it, the parser would try to match `[[li]]` against other block rules
|
|
24
|
+
* and potentially produce incorrect results.
|
|
25
|
+
*
|
|
26
|
+
* If no `[[/li]]` closing tag is found, the rule fails.
|
|
27
|
+
*
|
|
28
|
+
* @module
|
|
29
|
+
*/
|
|
30
|
+
import type { Element } from "@wdprlib/ast";
|
|
31
|
+
import type { BlockRule, ParseContext, RuleResult } from "../types";
|
|
32
|
+
import { currentToken } from "../types";
|
|
33
|
+
import { parseBlockName } from "./utils";
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Tests whether the tokens at `pos` form a `[[li]]` opening tag.
|
|
37
|
+
* Only the exact name `"li"` matches; `[[li_]]` is not recognised.
|
|
38
|
+
*
|
|
39
|
+
* @param ctx - Parse context.
|
|
40
|
+
* @param pos - Token index to inspect.
|
|
41
|
+
* @returns The number of tokens consumed, or `null` if not matched.
|
|
42
|
+
*/
|
|
43
|
+
function isLiOpen(ctx: ParseContext, pos: number): { consumed: number } | null {
|
|
44
|
+
if (ctx.tokens[pos]?.type !== "BLOCK_OPEN") return null;
|
|
45
|
+
const nameResult = parseBlockName(ctx, pos + 1);
|
|
46
|
+
if (!nameResult) return null;
|
|
47
|
+
if (nameResult.name === "li") {
|
|
48
|
+
return { consumed: 1 + nameResult.consumed };
|
|
49
|
+
}
|
|
50
|
+
return null;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Tests whether the tokens at `pos` form a `[[/li]]` closing tag.
|
|
55
|
+
*
|
|
56
|
+
* @param ctx - Parse context.
|
|
57
|
+
* @param pos - Token index to inspect.
|
|
58
|
+
* @returns The number of tokens consumed (including BLOCK_CLOSE), or `null`.
|
|
59
|
+
*/
|
|
60
|
+
function isLiClose(ctx: ParseContext, pos: number): { consumed: number } | null {
|
|
61
|
+
if (ctx.tokens[pos]?.type !== "BLOCK_END_OPEN") return null;
|
|
62
|
+
const nameResult = parseBlockName(ctx, pos + 1);
|
|
63
|
+
if (!nameResult || nameResult.name !== "li") return null;
|
|
64
|
+
let consumed = 1 + nameResult.consumed;
|
|
65
|
+
if (ctx.tokens[pos + consumed]?.type === "BLOCK_CLOSE") consumed++;
|
|
66
|
+
return { consumed };
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Block rule for orphaned `[[li]]...[[/li]]` (outside any list block).
|
|
71
|
+
*
|
|
72
|
+
* The opening and closing tags are emitted as literal text elements, and
|
|
73
|
+
* newlines within the body become `<br />` elements. Leading whitespace
|
|
74
|
+
* on each line is discarded.
|
|
75
|
+
*/
|
|
76
|
+
export const orphanLiRule: BlockRule = {
|
|
77
|
+
name: "orphan-li",
|
|
78
|
+
startTokens: ["BLOCK_OPEN"],
|
|
79
|
+
requiresLineStart: false,
|
|
80
|
+
|
|
81
|
+
parse(ctx: ParseContext): RuleResult<Element> {
|
|
82
|
+
const openToken = currentToken(ctx);
|
|
83
|
+
if (openToken.type !== "BLOCK_OPEN") {
|
|
84
|
+
return { success: false };
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Check for [[li]] (not [[li_]])
|
|
88
|
+
const liOpen = isLiOpen(ctx, ctx.pos);
|
|
89
|
+
if (!liOpen) {
|
|
90
|
+
return { success: false };
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
let pos = ctx.pos + liOpen.consumed;
|
|
94
|
+
let consumed = liOpen.consumed;
|
|
95
|
+
|
|
96
|
+
// Expect ]]
|
|
97
|
+
if (ctx.tokens[pos]?.type !== "BLOCK_CLOSE") {
|
|
98
|
+
return { success: false };
|
|
99
|
+
}
|
|
100
|
+
pos++;
|
|
101
|
+
consumed++;
|
|
102
|
+
|
|
103
|
+
// Collect content until [[/li]]
|
|
104
|
+
const elements: Element[] = [];
|
|
105
|
+
let foundClose = false;
|
|
106
|
+
|
|
107
|
+
// Output [[li]] as text
|
|
108
|
+
elements.push({ element: "text", data: "[[" });
|
|
109
|
+
elements.push({ element: "text", data: "li" });
|
|
110
|
+
elements.push({ element: "text", data: "]]" });
|
|
111
|
+
|
|
112
|
+
// Wikidot outputs: [[li]]<br />Baz<br />[[/li]]
|
|
113
|
+
// - Newline after [[li]] becomes <br />
|
|
114
|
+
// - Newline after content becomes <br />
|
|
115
|
+
// - No <br /> right before [[/li]]
|
|
116
|
+
|
|
117
|
+
while (pos < ctx.tokens.length) {
|
|
118
|
+
const token = ctx.tokens[pos];
|
|
119
|
+
if (!token || token.type === "EOF") break;
|
|
120
|
+
|
|
121
|
+
// Check for [[/li]] close
|
|
122
|
+
const liClose = isLiClose(ctx, pos);
|
|
123
|
+
if (liClose) {
|
|
124
|
+
foundClose = true;
|
|
125
|
+
// Output [[/li]] as text (no <br /> before it)
|
|
126
|
+
elements.push({ element: "text", data: "[[/" });
|
|
127
|
+
elements.push({ element: "text", data: "li" });
|
|
128
|
+
elements.push({ element: "text", data: "]]" });
|
|
129
|
+
consumed += liClose.consumed;
|
|
130
|
+
pos += liClose.consumed;
|
|
131
|
+
// Skip trailing newline
|
|
132
|
+
if (ctx.tokens[pos]?.type === "NEWLINE") {
|
|
133
|
+
pos++;
|
|
134
|
+
consumed++;
|
|
135
|
+
}
|
|
136
|
+
break;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// Handle newlines - convert to <br />
|
|
140
|
+
if (token.type === "NEWLINE") {
|
|
141
|
+
elements.push({ element: "line-break" });
|
|
142
|
+
pos++;
|
|
143
|
+
consumed++;
|
|
144
|
+
continue;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// Skip leading whitespace on lines
|
|
148
|
+
if (token.type === "WHITESPACE" && token.lineStart) {
|
|
149
|
+
pos++;
|
|
150
|
+
consumed++;
|
|
151
|
+
continue;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
// Other content
|
|
155
|
+
elements.push({ element: "text", data: token.value });
|
|
156
|
+
pos++;
|
|
157
|
+
consumed++;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// Require closing tag - without it, fail to prevent consuming entire document
|
|
161
|
+
if (!foundClose) {
|
|
162
|
+
ctx.diagnostics.push({
|
|
163
|
+
severity: "warning",
|
|
164
|
+
code: "unclosed-block",
|
|
165
|
+
message: "Missing closing tag [[/li]] for [[li]]",
|
|
166
|
+
position: openToken.position,
|
|
167
|
+
});
|
|
168
|
+
return { success: false };
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
return {
|
|
172
|
+
success: true,
|
|
173
|
+
elements,
|
|
174
|
+
consumed,
|
|
175
|
+
};
|
|
176
|
+
},
|
|
177
|
+
};
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Paragraph rule
|
|
3
|
+
*
|
|
4
|
+
* Collects inline content until paragraph break (double newline) or end of input.
|
|
5
|
+
* Line breaks within paragraphs are handled by the newlineLineBreakRule.
|
|
6
|
+
*/
|
|
7
|
+
import type { Element } from "@wdprlib/ast";
|
|
8
|
+
import type { BlockRule, ParseContext, RuleResult } from "../types";
|
|
9
|
+
import { parseInlineUntil } from "../inline/utils";
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Process closeSpan markers in inline content
|
|
13
|
+
* When we find a _closeSpan marker, wrap all preceding content in a span
|
|
14
|
+
*/
|
|
15
|
+
function processCloseSpanMarkers(elements: Element[]): Element[] {
|
|
16
|
+
const result: Element[] = [];
|
|
17
|
+
|
|
18
|
+
for (let i = 0; i < elements.length; i++) {
|
|
19
|
+
const elem = elements[i];
|
|
20
|
+
|
|
21
|
+
if (!elem) continue;
|
|
22
|
+
|
|
23
|
+
// Check for closeSpan marker
|
|
24
|
+
if (
|
|
25
|
+
elem.element === "container" &&
|
|
26
|
+
elem.data &&
|
|
27
|
+
typeof elem.data === "object" &&
|
|
28
|
+
"type" in elem.data &&
|
|
29
|
+
elem.data.type === "span" &&
|
|
30
|
+
"attributes" in elem.data &&
|
|
31
|
+
typeof elem.data.attributes === "object" &&
|
|
32
|
+
elem.data.attributes &&
|
|
33
|
+
"_closeSpan" in elem.data.attributes
|
|
34
|
+
) {
|
|
35
|
+
// Wrap all preceding content in a span
|
|
36
|
+
if (result.length > 0) {
|
|
37
|
+
const spanContent = [...result];
|
|
38
|
+
result.length = 0; // Clear result
|
|
39
|
+
result.push({
|
|
40
|
+
element: "container",
|
|
41
|
+
data: {
|
|
42
|
+
type: "span",
|
|
43
|
+
attributes: {},
|
|
44
|
+
elements: spanContent,
|
|
45
|
+
},
|
|
46
|
+
});
|
|
47
|
+
}
|
|
48
|
+
// Don't add the marker itself to output
|
|
49
|
+
} else {
|
|
50
|
+
result.push(elem);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
return result;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Paragraph is the fallback block rule
|
|
59
|
+
* It collects inline content until blank line (double newline)
|
|
60
|
+
*
|
|
61
|
+
* Wikidot behavior:
|
|
62
|
+
* - Single newline → <br> (handled by newlineLineBreakRule)
|
|
63
|
+
* - Blank line (double newline) → new paragraph
|
|
64
|
+
*/
|
|
65
|
+
export const paragraphRule: BlockRule = {
|
|
66
|
+
name: "paragraph",
|
|
67
|
+
startTokens: [], // matches anything not matched by other rules
|
|
68
|
+
requiresLineStart: false,
|
|
69
|
+
|
|
70
|
+
parse(ctx: ParseContext): RuleResult<Element> {
|
|
71
|
+
// Parse inline content, including NEWLINEs
|
|
72
|
+
// Stop at double NEWLINE (paragraph break) or EOF
|
|
73
|
+
const result = parseInlineContent(ctx);
|
|
74
|
+
|
|
75
|
+
// Filter out empty paragraphs
|
|
76
|
+
if (result.elements.length === 0) {
|
|
77
|
+
return { success: false };
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// Process closeSpan markers (for split spans)
|
|
81
|
+
let elements = processCloseSpanMarkers(result.elements);
|
|
82
|
+
|
|
83
|
+
// Remove trailing line-breaks (they shouldn't appear at end of paragraph)
|
|
84
|
+
// Exception: line-breaks flagged by preserveTrailingLineBreak context are kept
|
|
85
|
+
while (elements.length > 0 && elements[elements.length - 1]?.element === "line-break") {
|
|
86
|
+
const lastEl = elements[elements.length - 1] as any;
|
|
87
|
+
if (lastEl._preservedTrailingBreak) {
|
|
88
|
+
delete lastEl._preservedTrailingBreak;
|
|
89
|
+
break;
|
|
90
|
+
}
|
|
91
|
+
elements.pop();
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// Remove trailing whitespace-only text nodes
|
|
95
|
+
while (elements.length > 0) {
|
|
96
|
+
const last = elements[elements.length - 1];
|
|
97
|
+
if (
|
|
98
|
+
last?.element === "text" &&
|
|
99
|
+
"data" in last &&
|
|
100
|
+
typeof last.data === "string" &&
|
|
101
|
+
last.data.trim() === ""
|
|
102
|
+
) {
|
|
103
|
+
elements.pop();
|
|
104
|
+
} else {
|
|
105
|
+
break;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// Remove leading line-breaks
|
|
110
|
+
while (elements.length > 0 && elements[0]?.element === "line-break") {
|
|
111
|
+
elements.shift();
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
if (elements.length === 0) {
|
|
115
|
+
return { success: false };
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// Wikidot: text lines immediately before a definition list are not
|
|
119
|
+
// wrapped in <p>. Check if next token starts a definition list.
|
|
120
|
+
const nextPos = ctx.pos + result.consumed;
|
|
121
|
+
const nextToken = ctx.tokens[nextPos];
|
|
122
|
+
if (nextToken?.type === "COLON" && nextToken.lineStart) {
|
|
123
|
+
return {
|
|
124
|
+
success: true,
|
|
125
|
+
elements: [...elements, { element: "line-break" }],
|
|
126
|
+
consumed: result.consumed,
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
return {
|
|
131
|
+
success: true,
|
|
132
|
+
elements: [
|
|
133
|
+
{
|
|
134
|
+
element: "container",
|
|
135
|
+
data: {
|
|
136
|
+
type: "paragraph",
|
|
137
|
+
attributes: {},
|
|
138
|
+
elements,
|
|
139
|
+
},
|
|
140
|
+
},
|
|
141
|
+
],
|
|
142
|
+
consumed: result.consumed,
|
|
143
|
+
};
|
|
144
|
+
},
|
|
145
|
+
};
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Parse inline content until paragraph break or EOF
|
|
149
|
+
*/
|
|
150
|
+
function parseInlineContent(ctx: ParseContext): {
|
|
151
|
+
elements: Element[];
|
|
152
|
+
consumed: number;
|
|
153
|
+
} {
|
|
154
|
+
// Use the standard inline parser which now handles NEWLINEs
|
|
155
|
+
// The parser will stop at double NEWLINE (paragraph break)
|
|
156
|
+
return parseInlineUntil(ctx, "PARAGRAPH_BREAK" as any);
|
|
157
|
+
}
|