@wdprlib/parser 3.1.2 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +295 -118
- package/dist/index.js +272 -95
- package/package.json +5 -3
- package/src/index.ts +163 -0
- package/src/lexer/index.ts +20 -0
- package/src/lexer/lexer.ts +687 -0
- package/src/lexer/tokens.ts +141 -0
- package/src/parser/constants.ts +173 -0
- package/src/parser/depth.ts +251 -0
- package/src/parser/index.ts +18 -0
- package/src/parser/parse.ts +315 -0
- package/src/parser/postprocess/divAdjacentParagraph.ts +76 -0
- package/src/parser/postprocess/index.ts +15 -0
- package/src/parser/postprocess/spanStrip.ts +697 -0
- package/src/parser/preprocess/expr.ts +265 -0
- package/src/parser/preprocess/index.ts +38 -0
- package/src/parser/preprocess/typography.ts +67 -0
- package/src/parser/preprocess/utils.ts +250 -0
- package/src/parser/preprocess/whitespace.ts +111 -0
- package/src/parser/rules/block/align.ts +282 -0
- package/src/parser/rules/block/bibliography.ts +359 -0
- package/src/parser/rules/block/block-list.ts +689 -0
- package/src/parser/rules/block/blockquote.ts +238 -0
- package/src/parser/rules/block/center.ts +87 -0
- package/src/parser/rules/block/clear-float.ts +75 -0
- package/src/parser/rules/block/code.ts +187 -0
- package/src/parser/rules/block/collapsible.ts +337 -0
- package/src/parser/rules/block/comment.ts +73 -0
- package/src/parser/rules/block/content-separator.ts +79 -0
- package/src/parser/rules/block/definition-list.ts +270 -0
- package/src/parser/rules/block/div.ts +400 -0
- package/src/parser/rules/block/embed-block.ts +153 -0
- package/src/parser/rules/block/footnoteblock.ts +200 -0
- package/src/parser/rules/block/heading.ts +142 -0
- package/src/parser/rules/block/horizontal-rule.ts +61 -0
- package/src/parser/rules/block/html.ts +222 -0
- package/src/parser/rules/block/iframe.ts +239 -0
- package/src/parser/rules/block/iftags.ts +150 -0
- package/src/parser/rules/block/include.ts +179 -0
- package/src/parser/rules/block/index.ts +127 -0
- package/src/parser/rules/block/list.ts +244 -0
- package/src/parser/rules/block/math.ts +183 -0
- package/src/parser/rules/block/module/backlinks/index.ts +31 -0
- package/src/parser/rules/block/module/backlinks/types.ts +21 -0
- package/src/parser/rules/block/module/categories/index.ts +34 -0
- package/src/parser/rules/block/module/categories/types.ts +21 -0
- package/src/parser/rules/block/module/css/index.ts +37 -0
- package/src/parser/rules/block/module/iftags/condition.ts +109 -0
- package/src/parser/rules/block/module/iftags/index.ts +26 -0
- package/src/parser/rules/block/module/iftags/preprocess.ts +140 -0
- package/src/parser/rules/block/module/iftags/resolve.ts +73 -0
- package/src/parser/rules/block/module/iftags/types.ts +63 -0
- package/src/parser/rules/block/module/include/index.ts +20 -0
- package/src/parser/rules/block/module/include/resolve.ts +556 -0
- package/src/parser/rules/block/module/index.ts +122 -0
- package/src/parser/rules/block/module/join/index.ts +34 -0
- package/src/parser/rules/block/module/join/types.ts +23 -0
- package/src/parser/rules/block/module/listpages/compiler.ts +453 -0
- package/src/parser/rules/block/module/listpages/extract.ts +410 -0
- package/src/parser/rules/block/module/listpages/index.ts +83 -0
- package/src/parser/rules/block/module/listpages/normalize.ts +390 -0
- package/src/parser/rules/block/module/listpages/parser.ts +106 -0
- package/src/parser/rules/block/module/listpages/resolve.ts +130 -0
- package/src/parser/rules/block/module/listpages/types.ts +513 -0
- package/src/parser/rules/block/module/listpages/url-resolver.ts +186 -0
- package/src/parser/rules/block/module/listusers/compiler.ts +77 -0
- package/src/parser/rules/block/module/listusers/extract.ts +45 -0
- package/src/parser/rules/block/module/listusers/index.ts +36 -0
- package/src/parser/rules/block/module/listusers/parser.ts +54 -0
- package/src/parser/rules/block/module/listusers/resolve.ts +58 -0
- package/src/parser/rules/block/module/listusers/types.ts +93 -0
- package/src/parser/rules/block/module/mapping.ts +61 -0
- package/src/parser/rules/block/module/page-tree/index.ts +38 -0
- package/src/parser/rules/block/module/page-tree/types.ts +29 -0
- package/src/parser/rules/block/module/rate/index.ts +28 -0
- package/src/parser/rules/block/module/rate/types.ts +19 -0
- package/src/parser/rules/block/module/resolve.ts +411 -0
- package/src/parser/rules/block/module/types-common.ts +59 -0
- package/src/parser/rules/block/module/types.ts +61 -0
- package/src/parser/rules/block/module/utils.ts +43 -0
- package/src/parser/rules/block/module/walk.ts +380 -0
- package/src/parser/rules/block/module.ts +164 -0
- package/src/parser/rules/block/orphan-li.ts +177 -0
- package/src/parser/rules/block/paragraph.ts +157 -0
- package/src/parser/rules/block/table-block.ts +726 -0
- package/src/parser/rules/block/table.ts +441 -0
- package/src/parser/rules/block/tabview.ts +331 -0
- package/src/parser/rules/block/toc.ts +129 -0
- package/src/parser/rules/block/utils.ts +615 -0
- package/src/parser/rules/index.ts +49 -0
- package/src/parser/rules/inline/anchor-name.ts +154 -0
- package/src/parser/rules/inline/anchor.ts +327 -0
- package/src/parser/rules/inline/bibcite.ts +153 -0
- package/src/parser/rules/inline/bold.ts +86 -0
- package/src/parser/rules/inline/color.ts +140 -0
- package/src/parser/rules/inline/comment.ts +90 -0
- package/src/parser/rules/inline/equation-ref.ts +115 -0
- package/src/parser/rules/inline/expr.ts +526 -0
- package/src/parser/rules/inline/footnote.ts +223 -0
- package/src/parser/rules/inline/guillemet.ts +64 -0
- package/src/parser/rules/inline/html.ts +132 -0
- package/src/parser/rules/inline/image.ts +328 -0
- package/src/parser/rules/inline/index.ts +150 -0
- package/src/parser/rules/inline/italic.ts +74 -0
- package/src/parser/rules/inline/line-break.ts +326 -0
- package/src/parser/rules/inline/link-anchor.ts +147 -0
- package/src/parser/rules/inline/link-single.ts +164 -0
- package/src/parser/rules/inline/link-star.ts +134 -0
- package/src/parser/rules/inline/link-triple.ts +267 -0
- package/src/parser/rules/inline/math-inline.ts +126 -0
- package/src/parser/rules/inline/monospace.ts +78 -0
- package/src/parser/rules/inline/raw.ts +262 -0
- package/src/parser/rules/inline/size.ts +244 -0
- package/src/parser/rules/inline/span.ts +424 -0
- package/src/parser/rules/inline/strikethrough.ts +115 -0
- package/src/parser/rules/inline/subscript.ts +84 -0
- package/src/parser/rules/inline/superscript.ts +84 -0
- package/src/parser/rules/inline/text.ts +84 -0
- package/src/parser/rules/inline/underline.ts +127 -0
- package/src/parser/rules/inline/user.ts +147 -0
- package/src/parser/rules/inline/utils.ts +344 -0
- package/src/parser/rules/types.ts +252 -0
- package/src/parser/rules/utils.ts +155 -0
- package/src/parser/toc.ts +130 -0
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
import type { Element, PageRef, VariableMap } from "@wdprlib/ast";
|
|
2
|
+
import type { BlockRule, ParseContext, RuleResult } from "../types";
|
|
3
|
+
import { currentToken } from "../types";
|
|
4
|
+
import { parseBlockName } from "./utils";
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Parse page reference from include target
|
|
8
|
+
* Formats:
|
|
9
|
+
* - "page" -> { site: null, page: "page" }
|
|
10
|
+
* - ":site:page" -> { site: "site", page: "page" }
|
|
11
|
+
* - "fragment:name" -> { site: null, page: "fragment:name" }
|
|
12
|
+
*/
|
|
13
|
+
function parsePageRef(target: string): PageRef {
|
|
14
|
+
// Check for :site:page format
|
|
15
|
+
if (target.startsWith(":")) {
|
|
16
|
+
const rest = target.slice(1);
|
|
17
|
+
const colonIndex = rest.indexOf(":");
|
|
18
|
+
if (colonIndex !== -1) {
|
|
19
|
+
return {
|
|
20
|
+
site: rest.slice(0, colonIndex),
|
|
21
|
+
page: rest.slice(colonIndex + 1),
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
return { site: null, page: target };
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Parse variables from include arguments
|
|
30
|
+
* Format: key=value separated by |
|
|
31
|
+
*/
|
|
32
|
+
function parseVariables(tokens: string[]): VariableMap {
|
|
33
|
+
const vars: VariableMap = {};
|
|
34
|
+
|
|
35
|
+
let current = "";
|
|
36
|
+
for (const token of tokens) {
|
|
37
|
+
if (token === "|") {
|
|
38
|
+
if (current.trim()) {
|
|
39
|
+
const eqIndex = current.indexOf("=");
|
|
40
|
+
if (eqIndex !== -1) {
|
|
41
|
+
const key = current.slice(0, eqIndex).trim();
|
|
42
|
+
const value = current.slice(eqIndex + 1).trim();
|
|
43
|
+
if (key) {
|
|
44
|
+
vars[key] = value;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
current = "";
|
|
49
|
+
} else {
|
|
50
|
+
current += token;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// Handle last segment
|
|
55
|
+
if (current.trim()) {
|
|
56
|
+
const eqIndex = current.indexOf("=");
|
|
57
|
+
if (eqIndex !== -1) {
|
|
58
|
+
const key = current.slice(0, eqIndex).trim();
|
|
59
|
+
const value = current.slice(eqIndex + 1).trim();
|
|
60
|
+
if (key) {
|
|
61
|
+
vars[key] = value;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
return vars;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export const includeRule: BlockRule = {
|
|
70
|
+
name: "include",
|
|
71
|
+
startTokens: ["BLOCK_OPEN"],
|
|
72
|
+
requiresLineStart: false,
|
|
73
|
+
|
|
74
|
+
parse(ctx: ParseContext): RuleResult<Element> {
|
|
75
|
+
const openToken = currentToken(ctx);
|
|
76
|
+
if (openToken.type !== "BLOCK_OPEN") {
|
|
77
|
+
return { success: false };
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
let pos = ctx.pos + 1;
|
|
81
|
+
let consumed = 1;
|
|
82
|
+
|
|
83
|
+
// Parse block name
|
|
84
|
+
const nameResult = parseBlockName(ctx, pos);
|
|
85
|
+
if (!nameResult || nameResult.name.toLowerCase() !== "include") {
|
|
86
|
+
return { success: false };
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// Page syntax disabled (e.g., forum-post mode)
|
|
90
|
+
if (!ctx.settings.enablePageSyntax) {
|
|
91
|
+
return { success: false };
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
pos += nameResult.consumed;
|
|
95
|
+
consumed += nameResult.consumed;
|
|
96
|
+
|
|
97
|
+
// Skip whitespace
|
|
98
|
+
while (ctx.tokens[pos]?.type === "WHITESPACE") {
|
|
99
|
+
pos++;
|
|
100
|
+
consumed++;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// Collect target and arguments until ]]
|
|
104
|
+
const argTokens: string[] = [];
|
|
105
|
+
let target = "";
|
|
106
|
+
let inTarget = true;
|
|
107
|
+
|
|
108
|
+
while (pos < ctx.tokens.length) {
|
|
109
|
+
const token = ctx.tokens[pos];
|
|
110
|
+
if (!token) break;
|
|
111
|
+
|
|
112
|
+
if (token.type === "BLOCK_CLOSE") {
|
|
113
|
+
break;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// Newlines are allowed in include arguments
|
|
117
|
+
if (token.type === "NEWLINE") {
|
|
118
|
+
pos++;
|
|
119
|
+
consumed++;
|
|
120
|
+
continue;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
if (inTarget) {
|
|
124
|
+
// First non-whitespace segment is the target
|
|
125
|
+
if (token.type === "WHITESPACE") {
|
|
126
|
+
if (target) {
|
|
127
|
+
inTarget = false;
|
|
128
|
+
}
|
|
129
|
+
} else if (token.type === "PIPE") {
|
|
130
|
+
inTarget = false;
|
|
131
|
+
argTokens.push("|");
|
|
132
|
+
} else {
|
|
133
|
+
target += token.value;
|
|
134
|
+
}
|
|
135
|
+
} else {
|
|
136
|
+
argTokens.push(token.value);
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
pos++;
|
|
140
|
+
consumed++;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// Expect ]]
|
|
144
|
+
if (ctx.tokens[pos]?.type !== "BLOCK_CLOSE") {
|
|
145
|
+
return { success: false };
|
|
146
|
+
}
|
|
147
|
+
pos++;
|
|
148
|
+
consumed++;
|
|
149
|
+
|
|
150
|
+
// Skip trailing newline
|
|
151
|
+
if (ctx.tokens[pos]?.type === "NEWLINE") {
|
|
152
|
+
pos++;
|
|
153
|
+
consumed++;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
if (!target) {
|
|
157
|
+
return { success: false };
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
const location = parsePageRef(target);
|
|
161
|
+
const variables = parseVariables(argTokens);
|
|
162
|
+
|
|
163
|
+
return {
|
|
164
|
+
success: true,
|
|
165
|
+
elements: [
|
|
166
|
+
{
|
|
167
|
+
element: "include",
|
|
168
|
+
data: {
|
|
169
|
+
"paragraph-safe": false,
|
|
170
|
+
variables,
|
|
171
|
+
location,
|
|
172
|
+
elements: [],
|
|
173
|
+
},
|
|
174
|
+
},
|
|
175
|
+
],
|
|
176
|
+
consumed,
|
|
177
|
+
};
|
|
178
|
+
},
|
|
179
|
+
};
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
/**
|
|
2
|
+
*
|
|
3
|
+
* Central registry for all block-level parser rules.
|
|
4
|
+
*
|
|
5
|
+
* This module imports every block rule, re-exports each one individually
|
|
6
|
+
* (for selective use), and assembles them into the ordered `blockRules`
|
|
7
|
+
* array that the main parser iterates through.
|
|
8
|
+
*
|
|
9
|
+
* Rule ordering matters: rules earlier in the array are tried first.
|
|
10
|
+
* Line-start-only rules (comments, headings, lists, etc.) are naturally
|
|
11
|
+
* filtered by the `requiresLineStart` flag, but among rules that share
|
|
12
|
+
* a start token (e.g. BLOCK_OPEN), position in this array determines
|
|
13
|
+
* priority. For example, `codeBlockRule` is tried before `collapsibleRule`
|
|
14
|
+
* because both start with BLOCK_OPEN but code blocks should be matched
|
|
15
|
+
* first.
|
|
16
|
+
*
|
|
17
|
+
* The paragraph rule is deliberately excluded from the array -- it serves
|
|
18
|
+
* as a fallback and is exported separately as `blockFallbackRule`.
|
|
19
|
+
*
|
|
20
|
+
* @module
|
|
21
|
+
*/
|
|
22
|
+
import type { BlockRule } from "../types";
|
|
23
|
+
import { headingRule } from "./heading";
|
|
24
|
+
import { horizontalRuleRule } from "./horizontal-rule";
|
|
25
|
+
import { listRule } from "./list";
|
|
26
|
+
import { blockListRule } from "./block-list";
|
|
27
|
+
import { blockquoteRule } from "./blockquote";
|
|
28
|
+
import { definitionListRule } from "./definition-list";
|
|
29
|
+
import { paragraphRule } from "./paragraph";
|
|
30
|
+
import { divRule } from "./div";
|
|
31
|
+
import { codeBlockRule } from "./code";
|
|
32
|
+
import { collapsibleRule } from "./collapsible";
|
|
33
|
+
import { tableRule } from "./table";
|
|
34
|
+
import { tableBlockRule } from "./table-block";
|
|
35
|
+
import { moduleRule } from "./module";
|
|
36
|
+
import { footnoteBlockRule } from "./footnoteblock";
|
|
37
|
+
import { blockCommentRule } from "./comment";
|
|
38
|
+
import { centerRule } from "./center";
|
|
39
|
+
import { contentSeparatorRule } from "./content-separator";
|
|
40
|
+
import { clearFloatRule } from "./clear-float";
|
|
41
|
+
import { alignRule } from "./align";
|
|
42
|
+
import { tabviewRule } from "./tabview";
|
|
43
|
+
import { includeRule } from "./include";
|
|
44
|
+
import { mathBlockRule } from "./math";
|
|
45
|
+
import { htmlBlockRule } from "./html";
|
|
46
|
+
import { embedBlockRule } from "./embed-block";
|
|
47
|
+
import { iframeRule } from "./iframe";
|
|
48
|
+
import { iftagsRule } from "./iftags";
|
|
49
|
+
import { tocRule } from "./toc";
|
|
50
|
+
import { orphanLiRule } from "./orphan-li";
|
|
51
|
+
import { bibliographyRule } from "./bibliography";
|
|
52
|
+
|
|
53
|
+
export { headingRule } from "./heading";
|
|
54
|
+
export { horizontalRuleRule } from "./horizontal-rule";
|
|
55
|
+
export { listRule } from "./list";
|
|
56
|
+
export { blockListRule } from "./block-list";
|
|
57
|
+
export { blockquoteRule } from "./blockquote";
|
|
58
|
+
export { definitionListRule } from "./definition-list";
|
|
59
|
+
export { paragraphRule } from "./paragraph";
|
|
60
|
+
export { divRule } from "./div";
|
|
61
|
+
export { codeBlockRule } from "./code";
|
|
62
|
+
export { collapsibleRule } from "./collapsible";
|
|
63
|
+
export { tableRule } from "./table";
|
|
64
|
+
export { tableBlockRule } from "./table-block";
|
|
65
|
+
export { moduleRule } from "./module";
|
|
66
|
+
export { footnoteBlockRule } from "./footnoteblock";
|
|
67
|
+
export { blockCommentRule } from "./comment";
|
|
68
|
+
export { centerRule } from "./center";
|
|
69
|
+
export { contentSeparatorRule } from "./content-separator";
|
|
70
|
+
export { clearFloatRule } from "./clear-float";
|
|
71
|
+
export { alignRule } from "./align";
|
|
72
|
+
export { tabviewRule } from "./tabview";
|
|
73
|
+
export { includeRule } from "./include";
|
|
74
|
+
export { mathBlockRule } from "./math";
|
|
75
|
+
export { htmlBlockRule } from "./html";
|
|
76
|
+
export { embedBlockRule } from "./embed-block";
|
|
77
|
+
export { iframeRule } from "./iframe";
|
|
78
|
+
export { iftagsRule } from "./iftags";
|
|
79
|
+
export { tocRule } from "./toc";
|
|
80
|
+
export { orphanLiRule } from "./orphan-li";
|
|
81
|
+
export { bibliographyRule } from "./bibliography";
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* All block rules in priority order.
|
|
85
|
+
*
|
|
86
|
+
* The main parser tries each rule in sequence for the current token.
|
|
87
|
+
* Rules with `requiresLineStart: true` are only attempted when the token
|
|
88
|
+
* is at line start, so their position relative to non-line-start rules
|
|
89
|
+
* is less critical. Among rules that share the same `startTokens` entry,
|
|
90
|
+
* earlier position wins.
|
|
91
|
+
*
|
|
92
|
+
* The paragraph rule is intentionally absent -- it is used as a fallback
|
|
93
|
+
* when no other rule matches (see `blockFallbackRule`).
|
|
94
|
+
*/
|
|
95
|
+
export const blockRules: BlockRule[] = [
|
|
96
|
+
blockCommentRule,
|
|
97
|
+
clearFloatRule,
|
|
98
|
+
contentSeparatorRule,
|
|
99
|
+
centerRule,
|
|
100
|
+
headingRule,
|
|
101
|
+
horizontalRuleRule,
|
|
102
|
+
tableRule,
|
|
103
|
+
tableBlockRule,
|
|
104
|
+
listRule,
|
|
105
|
+
blockListRule,
|
|
106
|
+
orphanLiRule,
|
|
107
|
+
blockquoteRule,
|
|
108
|
+
definitionListRule,
|
|
109
|
+
codeBlockRule,
|
|
110
|
+
collapsibleRule,
|
|
111
|
+
tocRule,
|
|
112
|
+
footnoteBlockRule,
|
|
113
|
+
moduleRule,
|
|
114
|
+
alignRule,
|
|
115
|
+
tabviewRule,
|
|
116
|
+
includeRule,
|
|
117
|
+
mathBlockRule,
|
|
118
|
+
htmlBlockRule,
|
|
119
|
+
embedBlockRule,
|
|
120
|
+
iframeRule,
|
|
121
|
+
iftagsRule,
|
|
122
|
+
bibliographyRule,
|
|
123
|
+
divRule,
|
|
124
|
+
// paragraphRule is not included - used as fallback
|
|
125
|
+
];
|
|
126
|
+
|
|
127
|
+
export { paragraphRule as blockFallbackRule };
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
/**
|
|
2
|
+
*
|
|
3
|
+
* Block rule for Wikidot marker-based lists (`* item`, `# item`).
|
|
4
|
+
*
|
|
5
|
+
* Wikidot lists use leading `*` (bullet) or `#` (numbered) markers at the
|
|
6
|
+
* start of a line. Nesting is achieved by prepending spaces:
|
|
7
|
+
*
|
|
8
|
+
* ```
|
|
9
|
+
* * Item 1
|
|
10
|
+
* * Nested bullet
|
|
11
|
+
* # Nested numbered
|
|
12
|
+
* * Item 2
|
|
13
|
+
* ```
|
|
14
|
+
*
|
|
15
|
+
* The depth of each item is determined by the number of leading spaces
|
|
16
|
+
* before the marker. Mixed bullet/numbered lists are supported: when the
|
|
17
|
+
* list type changes at the same depth, a new sub-list is created.
|
|
18
|
+
*
|
|
19
|
+
* The flat depth-annotated items are converted into a recursive tree by
|
|
20
|
+
* `processDepths()`, then transformed into nested `list` AST elements
|
|
21
|
+
* by `buildListElement()`.
|
|
22
|
+
*
|
|
23
|
+
* Maximum nesting depth is capped at `MAX_LIST_DEPTH` (20).
|
|
24
|
+
*
|
|
25
|
+
* @module
|
|
26
|
+
*/
|
|
27
|
+
import type { Element, ListData, ListItem, ListType } from "@wdprlib/ast";
|
|
28
|
+
import type { BlockRule, ParseContext, RuleResult } from "../types";
|
|
29
|
+
import { currentToken } from "../types";
|
|
30
|
+
import { parseInlineUntil } from "../inline/utils";
|
|
31
|
+
import { processDepths, type DepthList } from "../../depth";
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Safety limit for list nesting depth.
|
|
35
|
+
* Items deeper than this are not parsed, preventing stack overflow on
|
|
36
|
+
* deeply nested or adversarial input.
|
|
37
|
+
*/
|
|
38
|
+
const MAX_LIST_DEPTH = 20;
|
|
39
|
+
|
|
40
|
+
/** Internal discriminated type for bullet vs numbered items during parsing. */
|
|
41
|
+
type InternalListType = "bullet" | "numbered";
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Default list type used as the top-level placeholder in
|
|
45
|
+
* `processDepths()`. The actual type of each sub-list is determined
|
|
46
|
+
* by its first item's marker.
|
|
47
|
+
*/
|
|
48
|
+
const GENERIC_LIST_TYPE: InternalListType = "bullet";
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Block rule for marker-based lists (`* ` bullet, `# ` numbered).
|
|
52
|
+
*
|
|
53
|
+
* Parsing strategy:
|
|
54
|
+
* 1. Verify the first token is LIST_BULLET or LIST_NUMBER at line start.
|
|
55
|
+
* 2. Collect consecutive list lines, recording each item's depth (number
|
|
56
|
+
* of leading spaces), type (bullet/numbered), and inline content.
|
|
57
|
+
* 3. Feed the flat depth array into `processDepths()` with type
|
|
58
|
+
* comparison, producing a nested tree.
|
|
59
|
+
* 4. Convert the tree into `list` AST elements via `buildListElement()`.
|
|
60
|
+
*/
|
|
61
|
+
export const listRule: BlockRule = {
|
|
62
|
+
name: "list",
|
|
63
|
+
startTokens: ["LIST_BULLET", "LIST_NUMBER"],
|
|
64
|
+
requiresLineStart: true,
|
|
65
|
+
|
|
66
|
+
parse(ctx: ParseContext): RuleResult<Element> {
|
|
67
|
+
const firstToken = currentToken(ctx);
|
|
68
|
+
|
|
69
|
+
if (!firstToken.lineStart) {
|
|
70
|
+
return { success: false };
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// Wikidot: list must start with list marker directly (no leading whitespace)
|
|
74
|
+
if (firstToken.type !== "LIST_BULLET" && firstToken.type !== "LIST_NUMBER") {
|
|
75
|
+
return { success: false };
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// Collect depth-annotated items
|
|
79
|
+
const depths: Array<{
|
|
80
|
+
depth: number;
|
|
81
|
+
ltype: InternalListType;
|
|
82
|
+
value: Element[];
|
|
83
|
+
}> = [];
|
|
84
|
+
let pos = ctx.pos;
|
|
85
|
+
let consumed = 0;
|
|
86
|
+
|
|
87
|
+
while (pos < ctx.tokens.length) {
|
|
88
|
+
const token = ctx.tokens[pos];
|
|
89
|
+
|
|
90
|
+
if (!token || !token.lineStart) {
|
|
91
|
+
break;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// Determine depth from leading whitespace
|
|
95
|
+
let depth = 0;
|
|
96
|
+
if (token.type === "WHITESPACE") {
|
|
97
|
+
// Count spaces for depth (each space is 1 level)
|
|
98
|
+
depth = token.value.length;
|
|
99
|
+
pos++;
|
|
100
|
+
consumed++;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// Check for list marker
|
|
104
|
+
const markerToken = ctx.tokens[pos];
|
|
105
|
+
if (
|
|
106
|
+
!markerToken ||
|
|
107
|
+
(markerToken.type !== "LIST_BULLET" && markerToken.type !== "LIST_NUMBER")
|
|
108
|
+
) {
|
|
109
|
+
// Undo whitespace consumption if not followed by list marker
|
|
110
|
+
if (depth > 0) {
|
|
111
|
+
pos--;
|
|
112
|
+
consumed--;
|
|
113
|
+
}
|
|
114
|
+
break;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// Check maximum depth
|
|
118
|
+
if (depth > MAX_LIST_DEPTH) {
|
|
119
|
+
break;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// Get list type
|
|
123
|
+
const ltype: InternalListType = markerToken.type === "LIST_BULLET" ? "bullet" : "numbered";
|
|
124
|
+
|
|
125
|
+
// Skip marker
|
|
126
|
+
pos++;
|
|
127
|
+
consumed++;
|
|
128
|
+
|
|
129
|
+
// Expect whitespace after marker
|
|
130
|
+
if (ctx.tokens[pos]?.type === "WHITESPACE") {
|
|
131
|
+
pos++;
|
|
132
|
+
consumed++;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// Parse inline content until newline
|
|
136
|
+
const inlineCtx: ParseContext = { ...ctx, pos };
|
|
137
|
+
const inlineResult = parseInlineUntil(inlineCtx, "NEWLINE");
|
|
138
|
+
const elements: Element[] = inlineResult.elements;
|
|
139
|
+
consumed += inlineResult.consumed;
|
|
140
|
+
pos += inlineResult.consumed;
|
|
141
|
+
|
|
142
|
+
// Consume newline
|
|
143
|
+
if (ctx.tokens[pos]?.type === "NEWLINE") {
|
|
144
|
+
pos++;
|
|
145
|
+
consumed++;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// Add to depths
|
|
149
|
+
depths.push({
|
|
150
|
+
depth,
|
|
151
|
+
ltype,
|
|
152
|
+
value: elements,
|
|
153
|
+
});
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// No items parsed - rule fails
|
|
157
|
+
if (depths.length === 0) {
|
|
158
|
+
return { success: false };
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// Process depths with list type comparison
|
|
162
|
+
const depthTrees = processDepths<InternalListType, Element[]>(
|
|
163
|
+
GENERIC_LIST_TYPE,
|
|
164
|
+
depths,
|
|
165
|
+
(a, b) => a === b,
|
|
166
|
+
);
|
|
167
|
+
|
|
168
|
+
// Convert depth trees to list elements
|
|
169
|
+
const lists = depthTrees.map(({ ltype, list }) => buildListElement(ltype, list));
|
|
170
|
+
|
|
171
|
+
return {
|
|
172
|
+
success: true,
|
|
173
|
+
elements: lists,
|
|
174
|
+
consumed,
|
|
175
|
+
};
|
|
176
|
+
},
|
|
177
|
+
};
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Converts the internal list type enum to the AST's {@link ListType}.
|
|
181
|
+
*
|
|
182
|
+
* @param ltype - Internal "bullet" or "numbered".
|
|
183
|
+
* @returns The corresponding AST list type.
|
|
184
|
+
*/
|
|
185
|
+
function toListType(ltype: InternalListType): ListType {
|
|
186
|
+
return ltype === "numbered" ? "numbered" : "bullet";
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
/**
|
|
190
|
+
* Builds a `list` AST element from a depth tree produced by
|
|
191
|
+
* `processDepths()`.
|
|
192
|
+
*
|
|
193
|
+
* @param topLtype - The list type for the top-level list.
|
|
194
|
+
* @param list - The depth tree of items and sub-lists.
|
|
195
|
+
* @returns A `list` element.
|
|
196
|
+
*/
|
|
197
|
+
function buildListElement(
|
|
198
|
+
topLtype: InternalListType,
|
|
199
|
+
list: DepthList<InternalListType, Element[]>,
|
|
200
|
+
): Element {
|
|
201
|
+
return {
|
|
202
|
+
element: "list",
|
|
203
|
+
data: buildListData(topLtype, list),
|
|
204
|
+
};
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
/**
|
|
208
|
+
* Recursively builds the {@link ListData} payload from a depth tree.
|
|
209
|
+
*
|
|
210
|
+
* Leaf items become `"elements"` list items; nested sub-trees become
|
|
211
|
+
* `"sub-list"` items with their own recursive {@link ListData}.
|
|
212
|
+
*
|
|
213
|
+
* @param topLtype - List type for this level.
|
|
214
|
+
* @param list - The depth tree nodes at this level.
|
|
215
|
+
* @returns Fully constructed {@link ListData}.
|
|
216
|
+
*/
|
|
217
|
+
function buildListData(
|
|
218
|
+
topLtype: InternalListType,
|
|
219
|
+
list: DepthList<InternalListType, Element[]>,
|
|
220
|
+
): ListData {
|
|
221
|
+
const items: ListItem[] = [];
|
|
222
|
+
|
|
223
|
+
for (const item of list) {
|
|
224
|
+
if (item.kind === "item") {
|
|
225
|
+
items.push({
|
|
226
|
+
"item-type": "elements",
|
|
227
|
+
attributes: {},
|
|
228
|
+
elements: item.value,
|
|
229
|
+
});
|
|
230
|
+
} else {
|
|
231
|
+
items.push({
|
|
232
|
+
"item-type": "sub-list",
|
|
233
|
+
element: "list",
|
|
234
|
+
data: buildListData(item.ltype, item.children),
|
|
235
|
+
});
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
return {
|
|
240
|
+
type: toListType(topLtype),
|
|
241
|
+
attributes: {},
|
|
242
|
+
items,
|
|
243
|
+
};
|
|
244
|
+
}
|