@wdprlib/parser 3.1.2 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +295 -118
- package/dist/index.js +272 -95
- package/package.json +5 -3
- package/src/index.ts +163 -0
- package/src/lexer/index.ts +20 -0
- package/src/lexer/lexer.ts +687 -0
- package/src/lexer/tokens.ts +141 -0
- package/src/parser/constants.ts +173 -0
- package/src/parser/depth.ts +251 -0
- package/src/parser/index.ts +18 -0
- package/src/parser/parse.ts +315 -0
- package/src/parser/postprocess/divAdjacentParagraph.ts +76 -0
- package/src/parser/postprocess/index.ts +15 -0
- package/src/parser/postprocess/spanStrip.ts +697 -0
- package/src/parser/preprocess/expr.ts +265 -0
- package/src/parser/preprocess/index.ts +38 -0
- package/src/parser/preprocess/typography.ts +67 -0
- package/src/parser/preprocess/utils.ts +250 -0
- package/src/parser/preprocess/whitespace.ts +111 -0
- package/src/parser/rules/block/align.ts +282 -0
- package/src/parser/rules/block/bibliography.ts +359 -0
- package/src/parser/rules/block/block-list.ts +689 -0
- package/src/parser/rules/block/blockquote.ts +238 -0
- package/src/parser/rules/block/center.ts +87 -0
- package/src/parser/rules/block/clear-float.ts +75 -0
- package/src/parser/rules/block/code.ts +187 -0
- package/src/parser/rules/block/collapsible.ts +337 -0
- package/src/parser/rules/block/comment.ts +73 -0
- package/src/parser/rules/block/content-separator.ts +79 -0
- package/src/parser/rules/block/definition-list.ts +270 -0
- package/src/parser/rules/block/div.ts +400 -0
- package/src/parser/rules/block/embed-block.ts +153 -0
- package/src/parser/rules/block/footnoteblock.ts +200 -0
- package/src/parser/rules/block/heading.ts +142 -0
- package/src/parser/rules/block/horizontal-rule.ts +61 -0
- package/src/parser/rules/block/html.ts +222 -0
- package/src/parser/rules/block/iframe.ts +239 -0
- package/src/parser/rules/block/iftags.ts +150 -0
- package/src/parser/rules/block/include.ts +179 -0
- package/src/parser/rules/block/index.ts +127 -0
- package/src/parser/rules/block/list.ts +244 -0
- package/src/parser/rules/block/math.ts +183 -0
- package/src/parser/rules/block/module/backlinks/index.ts +31 -0
- package/src/parser/rules/block/module/backlinks/types.ts +21 -0
- package/src/parser/rules/block/module/categories/index.ts +34 -0
- package/src/parser/rules/block/module/categories/types.ts +21 -0
- package/src/parser/rules/block/module/css/index.ts +37 -0
- package/src/parser/rules/block/module/iftags/condition.ts +109 -0
- package/src/parser/rules/block/module/iftags/index.ts +26 -0
- package/src/parser/rules/block/module/iftags/preprocess.ts +140 -0
- package/src/parser/rules/block/module/iftags/resolve.ts +73 -0
- package/src/parser/rules/block/module/iftags/types.ts +63 -0
- package/src/parser/rules/block/module/include/index.ts +20 -0
- package/src/parser/rules/block/module/include/resolve.ts +556 -0
- package/src/parser/rules/block/module/index.ts +122 -0
- package/src/parser/rules/block/module/join/index.ts +34 -0
- package/src/parser/rules/block/module/join/types.ts +23 -0
- package/src/parser/rules/block/module/listpages/compiler.ts +453 -0
- package/src/parser/rules/block/module/listpages/extract.ts +410 -0
- package/src/parser/rules/block/module/listpages/index.ts +83 -0
- package/src/parser/rules/block/module/listpages/normalize.ts +390 -0
- package/src/parser/rules/block/module/listpages/parser.ts +106 -0
- package/src/parser/rules/block/module/listpages/resolve.ts +130 -0
- package/src/parser/rules/block/module/listpages/types.ts +513 -0
- package/src/parser/rules/block/module/listpages/url-resolver.ts +186 -0
- package/src/parser/rules/block/module/listusers/compiler.ts +77 -0
- package/src/parser/rules/block/module/listusers/extract.ts +45 -0
- package/src/parser/rules/block/module/listusers/index.ts +36 -0
- package/src/parser/rules/block/module/listusers/parser.ts +54 -0
- package/src/parser/rules/block/module/listusers/resolve.ts +58 -0
- package/src/parser/rules/block/module/listusers/types.ts +93 -0
- package/src/parser/rules/block/module/mapping.ts +61 -0
- package/src/parser/rules/block/module/page-tree/index.ts +38 -0
- package/src/parser/rules/block/module/page-tree/types.ts +29 -0
- package/src/parser/rules/block/module/rate/index.ts +28 -0
- package/src/parser/rules/block/module/rate/types.ts +19 -0
- package/src/parser/rules/block/module/resolve.ts +411 -0
- package/src/parser/rules/block/module/types-common.ts +59 -0
- package/src/parser/rules/block/module/types.ts +61 -0
- package/src/parser/rules/block/module/utils.ts +43 -0
- package/src/parser/rules/block/module/walk.ts +380 -0
- package/src/parser/rules/block/module.ts +164 -0
- package/src/parser/rules/block/orphan-li.ts +177 -0
- package/src/parser/rules/block/paragraph.ts +157 -0
- package/src/parser/rules/block/table-block.ts +726 -0
- package/src/parser/rules/block/table.ts +441 -0
- package/src/parser/rules/block/tabview.ts +331 -0
- package/src/parser/rules/block/toc.ts +129 -0
- package/src/parser/rules/block/utils.ts +615 -0
- package/src/parser/rules/index.ts +49 -0
- package/src/parser/rules/inline/anchor-name.ts +154 -0
- package/src/parser/rules/inline/anchor.ts +327 -0
- package/src/parser/rules/inline/bibcite.ts +153 -0
- package/src/parser/rules/inline/bold.ts +86 -0
- package/src/parser/rules/inline/color.ts +140 -0
- package/src/parser/rules/inline/comment.ts +90 -0
- package/src/parser/rules/inline/equation-ref.ts +115 -0
- package/src/parser/rules/inline/expr.ts +526 -0
- package/src/parser/rules/inline/footnote.ts +223 -0
- package/src/parser/rules/inline/guillemet.ts +64 -0
- package/src/parser/rules/inline/html.ts +132 -0
- package/src/parser/rules/inline/image.ts +328 -0
- package/src/parser/rules/inline/index.ts +150 -0
- package/src/parser/rules/inline/italic.ts +74 -0
- package/src/parser/rules/inline/line-break.ts +326 -0
- package/src/parser/rules/inline/link-anchor.ts +147 -0
- package/src/parser/rules/inline/link-single.ts +164 -0
- package/src/parser/rules/inline/link-star.ts +134 -0
- package/src/parser/rules/inline/link-triple.ts +267 -0
- package/src/parser/rules/inline/math-inline.ts +126 -0
- package/src/parser/rules/inline/monospace.ts +78 -0
- package/src/parser/rules/inline/raw.ts +262 -0
- package/src/parser/rules/inline/size.ts +244 -0
- package/src/parser/rules/inline/span.ts +424 -0
- package/src/parser/rules/inline/strikethrough.ts +115 -0
- package/src/parser/rules/inline/subscript.ts +84 -0
- package/src/parser/rules/inline/superscript.ts +84 -0
- package/src/parser/rules/inline/text.ts +84 -0
- package/src/parser/rules/inline/underline.ts +127 -0
- package/src/parser/rules/inline/user.ts +147 -0
- package/src/parser/rules/inline/utils.ts +344 -0
- package/src/parser/rules/types.ts +252 -0
- package/src/parser/rules/utils.ts +155 -0
- package/src/parser/toc.ts +130 -0
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
/**
|
|
2
|
+
*
|
|
3
|
+
* Block rule for Wikidot definition lists written with the `: key : value` syntax.
|
|
4
|
+
*
|
|
5
|
+
* Each item starts at the beginning of a line with a COLON, followed by
|
|
6
|
+
* mandatory whitespace, the key (term), a second COLON, and then the value
|
|
7
|
+
* (definition). Multiple consecutive items form a single `<dl>` block.
|
|
8
|
+
*
|
|
9
|
+
* ```
|
|
10
|
+
* : Apple : A fruit that grows on trees.
|
|
11
|
+
* : Banana : A yellow curved fruit.
|
|
12
|
+
* ```
|
|
13
|
+
*
|
|
14
|
+
* Key parsing details:
|
|
15
|
+
* - Whitespace after the first colon is required (`": key"` not `":key"`).
|
|
16
|
+
* - The key portion supports inline markup (bold, links, etc.).
|
|
17
|
+
* - The value continues until a double newline, a new definition entry, or
|
|
18
|
+
* the end of the document.
|
|
19
|
+
* - A single newline within the value does NOT break the entry -- parsing
|
|
20
|
+
* continues on the next line.
|
|
21
|
+
*
|
|
22
|
+
* @module
|
|
23
|
+
*/
|
|
24
|
+
import type { Element, DefinitionListItem } from "@wdprlib/ast";
|
|
25
|
+
import type { BlockRule, ParseContext, RuleResult } from "../types";
|
|
26
|
+
import { parseInlineUntil } from "../inline/utils";
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Internal representation of one definition list item before conversion
|
|
30
|
+
* to the AST's {@link DefinitionListItem} format.
|
|
31
|
+
*/
|
|
32
|
+
interface ParsedDefinitionItem {
|
|
33
|
+
/** Raw string of the key, used for `key_string` in the AST. */
|
|
34
|
+
keyString: string;
|
|
35
|
+
/** Parsed inline elements representing the key / term. */
|
|
36
|
+
key: Element[];
|
|
37
|
+
/** Parsed inline elements representing the value / definition. */
|
|
38
|
+
value: Element[];
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Parses a single definition list entry of the form `: key : value`.
|
|
43
|
+
*
|
|
44
|
+
* The function expects `startPos` to point at a line-start COLON token.
|
|
45
|
+
* It consumes the first colon, mandatory whitespace, key tokens up to
|
|
46
|
+
* the second colon, then value tokens until one of the following:
|
|
47
|
+
* - A double newline (paragraph break).
|
|
48
|
+
* - A new entry (COLON at line start).
|
|
49
|
+
* - End of input.
|
|
50
|
+
*
|
|
51
|
+
* The key is parsed for inline content using {@link parseInlineUntil}
|
|
52
|
+
* (stopping at COLON), and the value uses the same utility (stopping at
|
|
53
|
+
* NEWLINE).
|
|
54
|
+
*
|
|
55
|
+
* @param ctx - Parse context.
|
|
56
|
+
* @param startPos - Token index of the expected line-start COLON.
|
|
57
|
+
* @returns The parsed item and token count, or `null` on failure.
|
|
58
|
+
*/
|
|
59
|
+
function parseDefinitionItem(
|
|
60
|
+
ctx: ParseContext,
|
|
61
|
+
startPos: number,
|
|
62
|
+
): { item: ParsedDefinitionItem; consumed: number } | null {
|
|
63
|
+
let pos = startPos;
|
|
64
|
+
let consumed = 0;
|
|
65
|
+
|
|
66
|
+
// Expect COLON at line start
|
|
67
|
+
const colonToken = ctx.tokens[pos];
|
|
68
|
+
if (!colonToken || colonToken.type !== "COLON" || !colonToken.lineStart) {
|
|
69
|
+
return null;
|
|
70
|
+
}
|
|
71
|
+
pos++;
|
|
72
|
+
consumed++;
|
|
73
|
+
|
|
74
|
+
// Wikidot requires whitespace after first colon: ": key : value"
|
|
75
|
+
const whitespaceAfterColon = ctx.tokens[pos];
|
|
76
|
+
if (!whitespaceAfterColon || whitespaceAfterColon.type !== "WHITESPACE") {
|
|
77
|
+
return null;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// Skip whitespace after first colon
|
|
81
|
+
while (ctx.tokens[pos]?.type === "WHITESPACE") {
|
|
82
|
+
pos++;
|
|
83
|
+
consumed++;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// Collect key tokens until second COLON
|
|
87
|
+
const keyTokens: string[] = [];
|
|
88
|
+
const keyNodes: Element[] = [];
|
|
89
|
+
let foundSecondColon = false;
|
|
90
|
+
|
|
91
|
+
while (pos < ctx.tokens.length) {
|
|
92
|
+
const token = ctx.tokens[pos];
|
|
93
|
+
if (!token || token.type === "NEWLINE" || token.type === "EOF") {
|
|
94
|
+
break;
|
|
95
|
+
}
|
|
96
|
+
if (token.type === "COLON") {
|
|
97
|
+
foundSecondColon = true;
|
|
98
|
+
pos++;
|
|
99
|
+
consumed++;
|
|
100
|
+
break;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// Parse inline content for key
|
|
104
|
+
const inlineCtx: ParseContext = { ...ctx, pos };
|
|
105
|
+
const result = parseInlineUntil(inlineCtx, "COLON");
|
|
106
|
+
if (result.elements.length > 0) {
|
|
107
|
+
keyNodes.push(...result.elements);
|
|
108
|
+
// Collect raw key string
|
|
109
|
+
for (let i = 0; i < result.consumed; i++) {
|
|
110
|
+
const t = ctx.tokens[pos + i];
|
|
111
|
+
if (t) keyTokens.push(t.value);
|
|
112
|
+
}
|
|
113
|
+
pos += result.consumed;
|
|
114
|
+
consumed += result.consumed;
|
|
115
|
+
} else {
|
|
116
|
+
keyTokens.push(token.value);
|
|
117
|
+
pos++;
|
|
118
|
+
consumed++;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
if (!foundSecondColon) {
|
|
123
|
+
return null;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Skip whitespace after second colon
|
|
127
|
+
while (ctx.tokens[pos]?.type === "WHITESPACE") {
|
|
128
|
+
pos++;
|
|
129
|
+
consumed++;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// Parse value (rest of line, can continue with line breaks)
|
|
133
|
+
const valueNodes: Element[] = [];
|
|
134
|
+
while (pos < ctx.tokens.length) {
|
|
135
|
+
const token = ctx.tokens[pos];
|
|
136
|
+
if (!token || token.type === "EOF") {
|
|
137
|
+
break;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// Check for end of definition (double newline or new definition)
|
|
141
|
+
if (token.type === "NEWLINE") {
|
|
142
|
+
const nextToken = ctx.tokens[pos + 1];
|
|
143
|
+
// Look ahead for continuation with underscore line break
|
|
144
|
+
if (nextToken?.type === "COLON" && nextToken.lineStart) {
|
|
145
|
+
// New definition item starts
|
|
146
|
+
pos++;
|
|
147
|
+
consumed++;
|
|
148
|
+
break;
|
|
149
|
+
}
|
|
150
|
+
if (nextToken?.type === "NEWLINE" || !nextToken || nextToken.type === "EOF") {
|
|
151
|
+
// Double newline or end - stop
|
|
152
|
+
pos++;
|
|
153
|
+
consumed++;
|
|
154
|
+
break;
|
|
155
|
+
}
|
|
156
|
+
// Single newline - continue parsing (becomes line break)
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// Parse inline content
|
|
160
|
+
const inlineCtx: ParseContext = { ...ctx, pos };
|
|
161
|
+
const result = parseInlineUntil(inlineCtx, "NEWLINE");
|
|
162
|
+
if (result.elements.length > 0) {
|
|
163
|
+
valueNodes.push(...result.elements);
|
|
164
|
+
pos += result.consumed;
|
|
165
|
+
consumed += result.consumed;
|
|
166
|
+
} else {
|
|
167
|
+
pos++;
|
|
168
|
+
consumed++;
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// Remove trailing whitespace from key
|
|
173
|
+
const keyString = keyTokens.join("").trim();
|
|
174
|
+
|
|
175
|
+
// Remove trailing whitespace nodes from key
|
|
176
|
+
while (keyNodes.length > 0) {
|
|
177
|
+
const lastNode = keyNodes[keyNodes.length - 1];
|
|
178
|
+
if (
|
|
179
|
+
lastNode &&
|
|
180
|
+
lastNode.element === "text" &&
|
|
181
|
+
typeof lastNode.data === "string" &&
|
|
182
|
+
lastNode.data.trim() === ""
|
|
183
|
+
) {
|
|
184
|
+
keyNodes.pop();
|
|
185
|
+
} else {
|
|
186
|
+
break;
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
return {
|
|
191
|
+
item: {
|
|
192
|
+
keyString,
|
|
193
|
+
key: keyNodes,
|
|
194
|
+
value: valueNodes,
|
|
195
|
+
},
|
|
196
|
+
consumed,
|
|
197
|
+
};
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
/**
|
|
201
|
+
* Block rule for Wikidot definition lists (`: key : value`).
|
|
202
|
+
*
|
|
203
|
+
* Parsing strategy:
|
|
204
|
+
* 1. Verify the first token is a line-start COLON.
|
|
205
|
+
* 2. Repeatedly call `parseDefinitionItem()` to collect entries.
|
|
206
|
+
* 3. Stop when the current token is no longer a line-start COLON (i.e.
|
|
207
|
+
* the definition list block has ended).
|
|
208
|
+
* 4. Convert internal items into the AST {@link DefinitionListItem} format.
|
|
209
|
+
* 5. Emit a single `definition-list` element.
|
|
210
|
+
*/
|
|
211
|
+
export const definitionListRule: BlockRule = {
|
|
212
|
+
name: "definitionList",
|
|
213
|
+
startTokens: ["COLON"],
|
|
214
|
+
requiresLineStart: true,
|
|
215
|
+
|
|
216
|
+
parse(ctx: ParseContext): RuleResult<Element> {
|
|
217
|
+
const items: ParsedDefinitionItem[] = [];
|
|
218
|
+
let pos = ctx.pos;
|
|
219
|
+
let totalConsumed = 0;
|
|
220
|
+
|
|
221
|
+
while (pos < ctx.tokens.length) {
|
|
222
|
+
const token = ctx.tokens[pos];
|
|
223
|
+
if (!token || token.type === "EOF") {
|
|
224
|
+
break;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
// Only parse lines starting with :
|
|
228
|
+
if (token.type !== "COLON" || !token.lineStart) {
|
|
229
|
+
break;
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
const result = parseDefinitionItem(ctx, pos);
|
|
233
|
+
if (!result) {
|
|
234
|
+
break;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
items.push(result.item);
|
|
238
|
+
pos += result.consumed;
|
|
239
|
+
totalConsumed += result.consumed;
|
|
240
|
+
|
|
241
|
+
// Skip any whitespace between items
|
|
242
|
+
while (ctx.tokens[pos]?.type === "WHITESPACE") {
|
|
243
|
+
pos++;
|
|
244
|
+
totalConsumed++;
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
if (items.length === 0) {
|
|
249
|
+
return { success: false };
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
// Convert items to DefinitionListItem format
|
|
253
|
+
const definitionItems: DefinitionListItem[] = items.map((item) => ({
|
|
254
|
+
key_string: item.keyString,
|
|
255
|
+
key: item.key,
|
|
256
|
+
value: item.value,
|
|
257
|
+
}));
|
|
258
|
+
|
|
259
|
+
return {
|
|
260
|
+
success: true,
|
|
261
|
+
elements: [
|
|
262
|
+
{
|
|
263
|
+
element: "definition-list",
|
|
264
|
+
data: definitionItems,
|
|
265
|
+
},
|
|
266
|
+
],
|
|
267
|
+
consumed: totalConsumed,
|
|
268
|
+
};
|
|
269
|
+
},
|
|
270
|
+
};
|
|
@@ -0,0 +1,400 @@
|
|
|
1
|
+
/**
|
|
2
|
+
*
|
|
3
|
+
* Block rule for Wikidot `[[div]]` and `[[div_]]` container blocks.
|
|
4
|
+
*
|
|
5
|
+
* `[[div]]` wraps its body content in a `<div>` element, with full
|
|
6
|
+
* paragraph processing for the body. `[[div_]]` (paragraph strip mode)
|
|
7
|
+
* unwraps the first and last paragraphs so their content appears directly
|
|
8
|
+
* inside the `<div>`, while middle paragraphs keep their `<p>` wrappers.
|
|
9
|
+
*
|
|
10
|
+
* Both variants accept HTML attributes (class, style, id, etc.) on the
|
|
11
|
+
* opening tag.
|
|
12
|
+
*
|
|
13
|
+
* Wikidot-specific edge cases:
|
|
14
|
+
* - The opening `]]` MUST be followed by a NEWLINE for the block to be
|
|
15
|
+
* recognised. `[[div]]inline[[/div]]` is NOT a valid div -- it becomes
|
|
16
|
+
* a failed div (see `consumeFailedDiv()`).
|
|
17
|
+
* - When a div fails, everything from the opening `[[div]]` through the
|
|
18
|
+
* last `[[/div]]` is collected as a single paragraph of text/line-break
|
|
19
|
+
* elements. Blank lines within that span are silently removed.
|
|
20
|
+
* - `[[div_]]` uses `unwrapEdgeParagraphs()` to strip paragraph
|
|
21
|
+
* wrappers from the first and last elements.
|
|
22
|
+
*
|
|
23
|
+
* @module
|
|
24
|
+
*/
|
|
25
|
+
import type { Element } from "@wdprlib/ast";
|
|
26
|
+
import type { BlockRule, ParseContext, RuleResult } from "../types";
|
|
27
|
+
import { currentToken } from "../types";
|
|
28
|
+
import { parseBlockName, parseAttributes, parseBlocksUntil } from "./utils";
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Block rule for `[[div]]`/`[[div_]]` container blocks.
|
|
32
|
+
*
|
|
33
|
+
* `requiresLineStart` is `false` because nested `[[div_]]` inside another
|
|
34
|
+
* `[[div_]]` may appear after inline content.
|
|
35
|
+
*/
|
|
36
|
+
export const divRule: BlockRule = {
|
|
37
|
+
name: "div",
|
|
38
|
+
startTokens: ["BLOCK_OPEN"],
|
|
39
|
+
requiresLineStart: false, // Allow nested [[div_]] inside [[div_]]
|
|
40
|
+
|
|
41
|
+
parse(ctx: ParseContext): RuleResult<Element> {
|
|
42
|
+
const openToken = currentToken(ctx);
|
|
43
|
+
if (openToken.type !== "BLOCK_OPEN") {
|
|
44
|
+
return { success: false };
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
let pos = ctx.pos + 1;
|
|
48
|
+
let consumed = 1;
|
|
49
|
+
|
|
50
|
+
// Parse block name
|
|
51
|
+
const nameResult = parseBlockName(ctx, pos);
|
|
52
|
+
if (!nameResult) {
|
|
53
|
+
return { success: false };
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
const blockName = nameResult.name;
|
|
57
|
+
// Check if it's a div or div_
|
|
58
|
+
if (blockName !== "div" && blockName !== "div_") {
|
|
59
|
+
return { success: false };
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// div_ means paragraph strip (no paragraph wrapping)
|
|
63
|
+
const paragraphStrip = blockName === "div_";
|
|
64
|
+
|
|
65
|
+
pos += nameResult.consumed;
|
|
66
|
+
consumed += nameResult.consumed;
|
|
67
|
+
|
|
68
|
+
// Parse attributes
|
|
69
|
+
const attrResult = parseAttributes(ctx, pos);
|
|
70
|
+
pos += attrResult.consumed;
|
|
71
|
+
consumed += attrResult.consumed;
|
|
72
|
+
|
|
73
|
+
// Expect ]]
|
|
74
|
+
if (ctx.tokens[pos]?.type !== "BLOCK_CLOSE") {
|
|
75
|
+
return { success: false };
|
|
76
|
+
}
|
|
77
|
+
pos++;
|
|
78
|
+
consumed++;
|
|
79
|
+
|
|
80
|
+
// Wikidot: [[div]] must be followed by newline to be recognized as block
|
|
81
|
+
// [[div]]inline[[/div]] is NOT recognized as div
|
|
82
|
+
// When this fails, Wikidot consumes everything up to the last [[/div]]
|
|
83
|
+
// as text in a single paragraph (blank lines are ignored)
|
|
84
|
+
if (ctx.tokens[pos]?.type !== "NEWLINE") {
|
|
85
|
+
return consumeFailedDiv(ctx);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// Wikidot matches [[div]]/[[/div]] pairs from outside-in. When there are
|
|
89
|
+
// more opens than closes, the innermost excess opens become text. We enforce
|
|
90
|
+
// this with a "closes budget": the number of additional nested divs that can
|
|
91
|
+
// open. When budget reaches 0, this div cannot open.
|
|
92
|
+
if (ctx.scope.divClosesBudget === 0) {
|
|
93
|
+
return { success: false };
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
pos++;
|
|
97
|
+
consumed++;
|
|
98
|
+
|
|
99
|
+
// Record opening tag position for diagnostics
|
|
100
|
+
const openPosition = openToken.position;
|
|
101
|
+
|
|
102
|
+
// Calculate closes budget for nested divs in the body.
|
|
103
|
+
// Count [[/div]] from body start to scope boundary, subtract 1 (for self).
|
|
104
|
+
let bodyBudget: number | undefined;
|
|
105
|
+
if (ctx.scope.divClosesBudget !== undefined) {
|
|
106
|
+
bodyBudget = ctx.scope.divClosesBudget - 1;
|
|
107
|
+
} else {
|
|
108
|
+
const closesInScope = countDivCloses(ctx, pos);
|
|
109
|
+
bodyBudget = closesInScope > 0 ? closesInScope - 1 : 0;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Close condition for [[/div]]
|
|
113
|
+
const closeCondition = (checkCtx: ParseContext): boolean => {
|
|
114
|
+
const token = checkCtx.tokens[checkCtx.pos];
|
|
115
|
+
if (token?.type === "BLOCK_END_OPEN") {
|
|
116
|
+
const closeNameResult = parseBlockName(checkCtx, checkCtx.pos + 1);
|
|
117
|
+
if (closeNameResult?.name === "div") {
|
|
118
|
+
return true;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
return false;
|
|
122
|
+
};
|
|
123
|
+
|
|
124
|
+
const bodyCtx: ParseContext = {
|
|
125
|
+
...ctx,
|
|
126
|
+
pos,
|
|
127
|
+
scope: { ...ctx.scope, divClosesBudget: bodyBudget },
|
|
128
|
+
};
|
|
129
|
+
let children: Element[];
|
|
130
|
+
|
|
131
|
+
if (paragraphStrip) {
|
|
132
|
+
// div_ - parse as blocks, then unwrap first/last paragraphs
|
|
133
|
+
// Wikidot: blank lines create <p> for middle blocks only
|
|
134
|
+
const bodyResult = parseBlocksUntil(bodyCtx, closeCondition);
|
|
135
|
+
consumed += bodyResult.consumed;
|
|
136
|
+
pos += bodyResult.consumed;
|
|
137
|
+
children = unwrapEdgeParagraphs(bodyResult.elements);
|
|
138
|
+
} else {
|
|
139
|
+
// div - parse blocks with paragraph wrapping
|
|
140
|
+
const bodyResult = parseBlocksUntil(bodyCtx, closeCondition);
|
|
141
|
+
consumed += bodyResult.consumed;
|
|
142
|
+
pos += bodyResult.consumed;
|
|
143
|
+
children = bodyResult.elements;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// Check for missing close tag
|
|
147
|
+
if (ctx.tokens[pos]?.type !== "BLOCK_END_OPEN") {
|
|
148
|
+
ctx.diagnostics.push({
|
|
149
|
+
severity: "warning",
|
|
150
|
+
code: "unclosed-block",
|
|
151
|
+
message: `Missing closing tag [[/div]] for [[${blockName}]]`,
|
|
152
|
+
position: openPosition,
|
|
153
|
+
});
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// Consume [[/div]]
|
|
157
|
+
if (ctx.tokens[pos]?.type === "BLOCK_END_OPEN") {
|
|
158
|
+
pos++;
|
|
159
|
+
consumed++;
|
|
160
|
+
const closeNameResult = parseBlockName(ctx, pos);
|
|
161
|
+
if (closeNameResult) {
|
|
162
|
+
pos += closeNameResult.consumed;
|
|
163
|
+
consumed += closeNameResult.consumed;
|
|
164
|
+
}
|
|
165
|
+
if (ctx.tokens[pos]?.type === "BLOCK_CLOSE") {
|
|
166
|
+
pos++;
|
|
167
|
+
consumed++;
|
|
168
|
+
}
|
|
169
|
+
if (ctx.tokens[pos]?.type === "NEWLINE") {
|
|
170
|
+
pos++;
|
|
171
|
+
consumed++;
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
return {
|
|
176
|
+
success: true,
|
|
177
|
+
elements: [
|
|
178
|
+
{
|
|
179
|
+
element: "container",
|
|
180
|
+
data: {
|
|
181
|
+
type: "div",
|
|
182
|
+
attributes: attrResult.attrs,
|
|
183
|
+
elements: children,
|
|
184
|
+
},
|
|
185
|
+
},
|
|
186
|
+
],
|
|
187
|
+
consumed,
|
|
188
|
+
};
|
|
189
|
+
},
|
|
190
|
+
};
|
|
191
|
+
|
|
192
|
+
/**
|
|
193
|
+
* Counts `[[/div]]` close tags from a given position to the end of the
|
|
194
|
+
* token stream. Used to calculate the nesting budget for div blocks.
|
|
195
|
+
*/
|
|
196
|
+
function countDivCloses(ctx: ParseContext, startPos: number): number {
|
|
197
|
+
let count = 0;
|
|
198
|
+
for (let i = startPos; i < ctx.tokens.length; i++) {
|
|
199
|
+
const t = ctx.tokens[i];
|
|
200
|
+
if (!t || t.type === "EOF") break;
|
|
201
|
+
if (t.type === "BLOCK_END_OPEN") {
|
|
202
|
+
const nameResult = parseBlockName(ctx, i + 1);
|
|
203
|
+
if (nameResult?.name === "div") {
|
|
204
|
+
count++;
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
return count;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
/**
|
|
212
|
+
* Handles the case where `[[div]]` fails as a block element because
|
|
213
|
+
* the closing `]]` is not followed by a NEWLINE.
|
|
214
|
+
*
|
|
215
|
+
* In Wikidot, this scenario causes the parser to scan forward for the
|
|
216
|
+
* LAST `[[/div]]` in the contiguous token stream and collect everything
|
|
217
|
+
* from the current position through that close tag as a single paragraph.
|
|
218
|
+
* Blank lines (double newlines) within the range are silently collapsed,
|
|
219
|
+
* and single newlines become `<br />`.
|
|
220
|
+
*
|
|
221
|
+
* If no `[[/div]]` is found at all, the rule fails entirely.
|
|
222
|
+
*
|
|
223
|
+
* @param ctx - Parse context, positioned at the opening `[[div...]]` tag.
|
|
224
|
+
* @returns A paragraph container with text/line-break elements, or failure.
|
|
225
|
+
*/
|
|
226
|
+
function consumeFailedDiv(ctx: ParseContext): RuleResult<Element> {
|
|
227
|
+
const elements: Element[] = [];
|
|
228
|
+
let pos = ctx.pos;
|
|
229
|
+
let consumed = 0;
|
|
230
|
+
let lastClosePos = -1;
|
|
231
|
+
let lastCloseConsumed = 0;
|
|
232
|
+
|
|
233
|
+
// Find the last [[/div]] before the next valid div block.
|
|
234
|
+
// A valid div block is [[div]]/[[div_]] at line start followed by ]] + NEWLINE.
|
|
235
|
+
// When a valid div block is found, stop scanning — it should be parsed as a
|
|
236
|
+
// separate block element, not absorbed into this failed div's text.
|
|
237
|
+
let scanPos = pos;
|
|
238
|
+
while (scanPos < ctx.tokens.length) {
|
|
239
|
+
const t = ctx.tokens[scanPos];
|
|
240
|
+
if (!t || t.type === "EOF") break;
|
|
241
|
+
|
|
242
|
+
// Check for a valid div block opening (skip the initial failed div at pos)
|
|
243
|
+
if (t.type === "BLOCK_OPEN" && t.lineStart && scanPos > pos) {
|
|
244
|
+
const nameResult = parseBlockName(ctx, scanPos + 1);
|
|
245
|
+
if (nameResult?.name === "div" || nameResult?.name === "div_") {
|
|
246
|
+
let checkPos = scanPos + 1 + nameResult.consumed;
|
|
247
|
+
const attrResult = parseAttributes(ctx, checkPos);
|
|
248
|
+
checkPos += attrResult.consumed;
|
|
249
|
+
if (ctx.tokens[checkPos]?.type === "BLOCK_CLOSE") {
|
|
250
|
+
checkPos++;
|
|
251
|
+
if (ctx.tokens[checkPos]?.type === "NEWLINE" || ctx.tokens[checkPos]?.type === "EOF") {
|
|
252
|
+
// Valid div block found — stop scanning here
|
|
253
|
+
break;
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
if (t.type === "BLOCK_END_OPEN") {
|
|
260
|
+
const nameResult = parseBlockName(ctx, scanPos + 1);
|
|
261
|
+
if (nameResult?.name === "div") {
|
|
262
|
+
// Found [[/div]] - record position after ]]
|
|
263
|
+
lastClosePos = scanPos;
|
|
264
|
+
lastCloseConsumed = 1 + nameResult.consumed; // [[/ + div
|
|
265
|
+
const closeToken = ctx.tokens[scanPos + 1 + nameResult.consumed];
|
|
266
|
+
if (closeToken?.type === "BLOCK_CLOSE") {
|
|
267
|
+
lastCloseConsumed++;
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
scanPos++;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
if (lastClosePos === -1) {
|
|
275
|
+
// No [[/div]] found, fall back to normal failure
|
|
276
|
+
return { success: false };
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
// Emit diagnostics for all inline [[div]] patterns in the absorbed range.
|
|
280
|
+
// The initial [[div]] at ctx.pos is always included; any additional [[div]]
|
|
281
|
+
// patterns within the range also get diagnostics.
|
|
282
|
+
const endPosForDiag = lastClosePos;
|
|
283
|
+
for (let diagPos = ctx.pos; diagPos < endPosForDiag; diagPos++) {
|
|
284
|
+
const t = ctx.tokens[diagPos];
|
|
285
|
+
if (t?.type === "BLOCK_OPEN") {
|
|
286
|
+
const nameResult = parseBlockName(ctx, diagPos + 1);
|
|
287
|
+
if (nameResult?.name === "div" || nameResult?.name === "div_") {
|
|
288
|
+
if (t.position) {
|
|
289
|
+
ctx.diagnostics.push({
|
|
290
|
+
severity: "error",
|
|
291
|
+
code: "inline-block-element",
|
|
292
|
+
message: `[[${nameResult.name}]] must be followed by a newline to be a block element`,
|
|
293
|
+
position: t.position,
|
|
294
|
+
});
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
// Consume everything from current position to after the last [[/div]]
|
|
301
|
+
const endPos = lastClosePos + lastCloseConsumed;
|
|
302
|
+
while (pos < endPos && pos < ctx.tokens.length) {
|
|
303
|
+
const t = ctx.tokens[pos];
|
|
304
|
+
if (!t || t.type === "EOF") break;
|
|
305
|
+
|
|
306
|
+
if (t.type === "NEWLINE") {
|
|
307
|
+
// Check if this is a blank line (NEWLINE+NEWLINE or NEWLINE+WHITESPACE+NEWLINE)
|
|
308
|
+
let peekPos = pos + 1;
|
|
309
|
+
while (ctx.tokens[peekPos]?.type === "WHITESPACE") peekPos++;
|
|
310
|
+
if (ctx.tokens[peekPos]?.type === "NEWLINE") {
|
|
311
|
+
// Blank line — skip all newlines and whitespace
|
|
312
|
+
while (ctx.tokens[pos]?.type === "NEWLINE" || ctx.tokens[pos]?.type === "WHITESPACE") {
|
|
313
|
+
pos++;
|
|
314
|
+
consumed++;
|
|
315
|
+
}
|
|
316
|
+
continue;
|
|
317
|
+
}
|
|
318
|
+
// Single newline → line-break
|
|
319
|
+
elements.push({ element: "line-break" });
|
|
320
|
+
pos++;
|
|
321
|
+
consumed++;
|
|
322
|
+
continue;
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
elements.push({ element: "text", data: t.value });
|
|
326
|
+
pos++;
|
|
327
|
+
consumed++;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
// Consume trailing newline after [[/div]] if present
|
|
331
|
+
if (ctx.tokens[pos]?.type === "NEWLINE") {
|
|
332
|
+
pos++;
|
|
333
|
+
consumed++;
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
return {
|
|
337
|
+
success: true,
|
|
338
|
+
elements: [
|
|
339
|
+
{
|
|
340
|
+
element: "container",
|
|
341
|
+
data: {
|
|
342
|
+
type: "paragraph",
|
|
343
|
+
attributes: {},
|
|
344
|
+
elements,
|
|
345
|
+
},
|
|
346
|
+
},
|
|
347
|
+
],
|
|
348
|
+
consumed,
|
|
349
|
+
};
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
/**
|
|
353
|
+
* Implements the `[[div_]]` paragraph-strip behaviour.
|
|
354
|
+
*
|
|
355
|
+
* In Wikidot's `div_` mode, the first and last paragraph containers have
|
|
356
|
+
* their `<p>` wrappers removed, leaving the inner elements bare. Any
|
|
357
|
+
* middle paragraphs retain their wrapping. This produces output where the
|
|
358
|
+
* opening and closing text sit directly inside the `<div>`.
|
|
359
|
+
*
|
|
360
|
+
* @param elements - Block elements produced by body parsing.
|
|
361
|
+
* @returns A new array with edge paragraphs unwrapped.
|
|
362
|
+
*/
|
|
363
|
+
function unwrapEdgeParagraphs(elements: Element[]): Element[] {
|
|
364
|
+
if (elements.length === 0) return elements;
|
|
365
|
+
|
|
366
|
+
const result = [...elements];
|
|
367
|
+
|
|
368
|
+
// Unwrap first element if paragraph
|
|
369
|
+
if (isParagraphContainer(result[0])) {
|
|
370
|
+
const inner = (result[0] as any).data.elements as Element[];
|
|
371
|
+
result.splice(0, 1, ...inner);
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
// Unwrap last element if paragraph (find new last index after splice)
|
|
375
|
+
const lastIdx = result.length - 1;
|
|
376
|
+
if (lastIdx >= 0 && isParagraphContainer(result[lastIdx])) {
|
|
377
|
+
const inner = (result[lastIdx] as any).data.elements as Element[];
|
|
378
|
+
result.splice(lastIdx, 1, ...inner);
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
return result;
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
/**
|
|
385
|
+
* Checks whether an element is a paragraph container
|
|
386
|
+
* (i.e. `{ element: "container", data: { type: "paragraph" } }`).
|
|
387
|
+
*
|
|
388
|
+
* @param el - Element to test, or `undefined`.
|
|
389
|
+
* @returns `true` if the element is a paragraph container.
|
|
390
|
+
*/
|
|
391
|
+
function isParagraphContainer(el: Element | undefined): boolean {
|
|
392
|
+
return (
|
|
393
|
+
el !== undefined &&
|
|
394
|
+
el.element === "container" &&
|
|
395
|
+
typeof el.data === "object" &&
|
|
396
|
+
el.data !== null &&
|
|
397
|
+
"type" in el.data &&
|
|
398
|
+
el.data.type === "paragraph"
|
|
399
|
+
);
|
|
400
|
+
}
|