@wdprlib/parser 3.1.1 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +312 -121
- package/dist/index.js +289 -98
- package/package.json +5 -3
- package/src/index.ts +163 -0
- package/src/lexer/index.ts +20 -0
- package/src/lexer/lexer.ts +687 -0
- package/src/lexer/tokens.ts +141 -0
- package/src/parser/constants.ts +173 -0
- package/src/parser/depth.ts +251 -0
- package/src/parser/index.ts +18 -0
- package/src/parser/parse.ts +315 -0
- package/src/parser/postprocess/divAdjacentParagraph.ts +76 -0
- package/src/parser/postprocess/index.ts +15 -0
- package/src/parser/postprocess/spanStrip.ts +697 -0
- package/src/parser/preprocess/expr.ts +265 -0
- package/src/parser/preprocess/index.ts +38 -0
- package/src/parser/preprocess/typography.ts +67 -0
- package/src/parser/preprocess/utils.ts +250 -0
- package/src/parser/preprocess/whitespace.ts +111 -0
- package/src/parser/rules/block/align.ts +282 -0
- package/src/parser/rules/block/bibliography.ts +359 -0
- package/src/parser/rules/block/block-list.ts +689 -0
- package/src/parser/rules/block/blockquote.ts +238 -0
- package/src/parser/rules/block/center.ts +87 -0
- package/src/parser/rules/block/clear-float.ts +75 -0
- package/src/parser/rules/block/code.ts +187 -0
- package/src/parser/rules/block/collapsible.ts +337 -0
- package/src/parser/rules/block/comment.ts +73 -0
- package/src/parser/rules/block/content-separator.ts +79 -0
- package/src/parser/rules/block/definition-list.ts +270 -0
- package/src/parser/rules/block/div.ts +400 -0
- package/src/parser/rules/block/embed-block.ts +153 -0
- package/src/parser/rules/block/footnoteblock.ts +200 -0
- package/src/parser/rules/block/heading.ts +142 -0
- package/src/parser/rules/block/horizontal-rule.ts +61 -0
- package/src/parser/rules/block/html.ts +222 -0
- package/src/parser/rules/block/iframe.ts +239 -0
- package/src/parser/rules/block/iftags.ts +150 -0
- package/src/parser/rules/block/include.ts +179 -0
- package/src/parser/rules/block/index.ts +127 -0
- package/src/parser/rules/block/list.ts +244 -0
- package/src/parser/rules/block/math.ts +183 -0
- package/src/parser/rules/block/module/backlinks/index.ts +31 -0
- package/src/parser/rules/block/module/backlinks/types.ts +21 -0
- package/src/parser/rules/block/module/categories/index.ts +34 -0
- package/src/parser/rules/block/module/categories/types.ts +21 -0
- package/src/parser/rules/block/module/css/index.ts +37 -0
- package/src/parser/rules/block/module/iftags/condition.ts +109 -0
- package/src/parser/rules/block/module/iftags/index.ts +26 -0
- package/src/parser/rules/block/module/iftags/preprocess.ts +140 -0
- package/src/parser/rules/block/module/iftags/resolve.ts +73 -0
- package/src/parser/rules/block/module/iftags/types.ts +63 -0
- package/src/parser/rules/block/module/include/index.ts +20 -0
- package/src/parser/rules/block/module/include/resolve.ts +556 -0
- package/src/parser/rules/block/module/index.ts +122 -0
- package/src/parser/rules/block/module/join/index.ts +34 -0
- package/src/parser/rules/block/module/join/types.ts +23 -0
- package/src/parser/rules/block/module/listpages/compiler.ts +453 -0
- package/src/parser/rules/block/module/listpages/extract.ts +410 -0
- package/src/parser/rules/block/module/listpages/index.ts +83 -0
- package/src/parser/rules/block/module/listpages/normalize.ts +390 -0
- package/src/parser/rules/block/module/listpages/parser.ts +106 -0
- package/src/parser/rules/block/module/listpages/resolve.ts +130 -0
- package/src/parser/rules/block/module/listpages/types.ts +513 -0
- package/src/parser/rules/block/module/listpages/url-resolver.ts +186 -0
- package/src/parser/rules/block/module/listusers/compiler.ts +77 -0
- package/src/parser/rules/block/module/listusers/extract.ts +45 -0
- package/src/parser/rules/block/module/listusers/index.ts +36 -0
- package/src/parser/rules/block/module/listusers/parser.ts +54 -0
- package/src/parser/rules/block/module/listusers/resolve.ts +58 -0
- package/src/parser/rules/block/module/listusers/types.ts +93 -0
- package/src/parser/rules/block/module/mapping.ts +61 -0
- package/src/parser/rules/block/module/page-tree/index.ts +38 -0
- package/src/parser/rules/block/module/page-tree/types.ts +29 -0
- package/src/parser/rules/block/module/rate/index.ts +28 -0
- package/src/parser/rules/block/module/rate/types.ts +19 -0
- package/src/parser/rules/block/module/resolve.ts +411 -0
- package/src/parser/rules/block/module/types-common.ts +59 -0
- package/src/parser/rules/block/module/types.ts +61 -0
- package/src/parser/rules/block/module/utils.ts +43 -0
- package/src/parser/rules/block/module/walk.ts +380 -0
- package/src/parser/rules/block/module.ts +164 -0
- package/src/parser/rules/block/orphan-li.ts +177 -0
- package/src/parser/rules/block/paragraph.ts +157 -0
- package/src/parser/rules/block/table-block.ts +726 -0
- package/src/parser/rules/block/table.ts +441 -0
- package/src/parser/rules/block/tabview.ts +331 -0
- package/src/parser/rules/block/toc.ts +129 -0
- package/src/parser/rules/block/utils.ts +615 -0
- package/src/parser/rules/index.ts +49 -0
- package/src/parser/rules/inline/anchor-name.ts +154 -0
- package/src/parser/rules/inline/anchor.ts +327 -0
- package/src/parser/rules/inline/bibcite.ts +153 -0
- package/src/parser/rules/inline/bold.ts +86 -0
- package/src/parser/rules/inline/color.ts +140 -0
- package/src/parser/rules/inline/comment.ts +90 -0
- package/src/parser/rules/inline/equation-ref.ts +115 -0
- package/src/parser/rules/inline/expr.ts +526 -0
- package/src/parser/rules/inline/footnote.ts +223 -0
- package/src/parser/rules/inline/guillemet.ts +64 -0
- package/src/parser/rules/inline/html.ts +132 -0
- package/src/parser/rules/inline/image.ts +328 -0
- package/src/parser/rules/inline/index.ts +150 -0
- package/src/parser/rules/inline/italic.ts +74 -0
- package/src/parser/rules/inline/line-break.ts +326 -0
- package/src/parser/rules/inline/link-anchor.ts +147 -0
- package/src/parser/rules/inline/link-single.ts +164 -0
- package/src/parser/rules/inline/link-star.ts +134 -0
- package/src/parser/rules/inline/link-triple.ts +267 -0
- package/src/parser/rules/inline/math-inline.ts +126 -0
- package/src/parser/rules/inline/monospace.ts +78 -0
- package/src/parser/rules/inline/raw.ts +262 -0
- package/src/parser/rules/inline/size.ts +244 -0
- package/src/parser/rules/inline/span.ts +424 -0
- package/src/parser/rules/inline/strikethrough.ts +115 -0
- package/src/parser/rules/inline/subscript.ts +84 -0
- package/src/parser/rules/inline/superscript.ts +84 -0
- package/src/parser/rules/inline/text.ts +84 -0
- package/src/parser/rules/inline/underline.ts +127 -0
- package/src/parser/rules/inline/user.ts +147 -0
- package/src/parser/rules/inline/utils.ts +344 -0
- package/src/parser/rules/types.ts +252 -0
- package/src/parser/rules/utils.ts +155 -0
- package/src/parser/toc.ts +130 -0
|
@@ -0,0 +1,424 @@
|
|
|
1
|
+
/**
|
|
2
|
+
*
|
|
3
|
+
* Parses the Wikidot span block syntax: `[[span attributes]]content[[/span]]`
|
|
4
|
+
* and its paragraph-strip variant `[[span_]]`.
|
|
5
|
+
*
|
|
6
|
+
* A span wraps inline content in an HTML `<span>` element with arbitrary
|
|
7
|
+
* attributes (class, style, id, etc.). It supports multiline content
|
|
8
|
+
* where single newlines become `<br />` elements.
|
|
9
|
+
*
|
|
10
|
+
* Blank lines (double newlines) within spans trigger special behavior:
|
|
11
|
+
*
|
|
12
|
+
* Regular span (`[[span]]`):
|
|
13
|
+
* - Blank lines split the content into separate spans, each placed in
|
|
14
|
+
* its own paragraph. Segments after the first are marked with
|
|
15
|
+
* `_splitByBlankLine: true` for postprocessing.
|
|
16
|
+
*
|
|
17
|
+
* Paragraph-strip span (`[[span_]]`):
|
|
18
|
+
* - Content before a blank line gets `_paragraphStrip: true`, indicating
|
|
19
|
+
* it should merge with the surrounding paragraph.
|
|
20
|
+
* - Content after a blank line gets `_escapedFromParagraph: true`,
|
|
21
|
+
* indicating it should appear outside any paragraph wrapper.
|
|
22
|
+
* - An empty `[[span_]][[/span]]` produces a marker with
|
|
23
|
+
* `_emptyParagraphStrip: true` that absorbs adjacent line breaks.
|
|
24
|
+
*
|
|
25
|
+
* Spans support nesting -- a `[[span]]` inside another `[[span]]` will
|
|
26
|
+
* correctly find its own `[[/span]]` closing tag.
|
|
27
|
+
*
|
|
28
|
+
* The `closeSpanRule` handles orphaned `[[/span]]` tags that result
|
|
29
|
+
* from paragraph-break splitting. These wrap preceding inline content
|
|
30
|
+
* into a span, matching Wikidot's behavior.
|
|
31
|
+
*
|
|
32
|
+
* @module
|
|
33
|
+
*/
|
|
34
|
+
import type { Element } from "@wdprlib/ast";
|
|
35
|
+
import type { InlineRule, ParseContext, RuleResult } from "../types";
|
|
36
|
+
import { currentToken } from "../types";
|
|
37
|
+
import { inlineRules } from "../index";
|
|
38
|
+
import { parseBlockName } from "../utils";
|
|
39
|
+
import { parseAttributes } from "../block/utils";
|
|
40
|
+
import { canApplyInlineRule } from "./utils";
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Inline rule for parsing `[[span attributes]]content[[/span]]`.
|
|
44
|
+
*
|
|
45
|
+
* Triggered by a `BLOCK_OPEN` (`[[`) token. Recognizes both `span`
|
|
46
|
+
* and `span_` block names. Parses HTML attributes after the block name,
|
|
47
|
+
* then recursively parses inline content (including nested spans) until
|
|
48
|
+
* the matching `[[/span]]` closing tag.
|
|
49
|
+
*
|
|
50
|
+
* Fails if:
|
|
51
|
+
* - The block name is not `span` or `span_`
|
|
52
|
+
* - No `]]` follows the attributes
|
|
53
|
+
* - No matching `[[/span]]` closing tag is found
|
|
54
|
+
*/
|
|
55
|
+
export const spanRule: InlineRule = {
|
|
56
|
+
name: "span",
|
|
57
|
+
startTokens: ["BLOCK_OPEN"],
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Attempts to parse a span block at the current position.
|
|
61
|
+
*
|
|
62
|
+
* @param ctx - Parse context with token stream and current position
|
|
63
|
+
* @returns A successful result with one or more `"container"` elements
|
|
64
|
+
* of type `"span"`, or `{ success: false }`
|
|
65
|
+
*/
|
|
66
|
+
parse(ctx: ParseContext): RuleResult<Element> {
|
|
67
|
+
const openToken = currentToken(ctx);
|
|
68
|
+
if (openToken.type !== "BLOCK_OPEN") {
|
|
69
|
+
return { success: false };
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
let pos = ctx.pos + 1;
|
|
73
|
+
let consumed = 1;
|
|
74
|
+
|
|
75
|
+
// Parse block name
|
|
76
|
+
const nameResult = parseBlockName(ctx, pos);
|
|
77
|
+
if (!nameResult) {
|
|
78
|
+
return { success: false };
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
const blockName = nameResult.name;
|
|
82
|
+
// Handle both span and span_ (paragraph strip mode)
|
|
83
|
+
if (blockName !== "span" && blockName !== "span_") {
|
|
84
|
+
return { success: false };
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
const paragraphStrip = blockName === "span_";
|
|
88
|
+
|
|
89
|
+
pos += nameResult.consumed;
|
|
90
|
+
consumed += nameResult.consumed;
|
|
91
|
+
|
|
92
|
+
// Parse attributes
|
|
93
|
+
const attrResult = parseAttributes(ctx, pos);
|
|
94
|
+
pos += attrResult.consumed;
|
|
95
|
+
consumed += attrResult.consumed;
|
|
96
|
+
|
|
97
|
+
// Expect ]]
|
|
98
|
+
if (ctx.tokens[pos]?.type !== "BLOCK_CLOSE") {
|
|
99
|
+
return { success: false };
|
|
100
|
+
}
|
|
101
|
+
pos++;
|
|
102
|
+
consumed++;
|
|
103
|
+
|
|
104
|
+
// Parse inline content until [[/span]]
|
|
105
|
+
// Span supports multi-line content - newlines become line-breaks
|
|
106
|
+
// For regular span: blank lines split into separate paragraphs with spans
|
|
107
|
+
const children: Element[] = [];
|
|
108
|
+
const escapedChildren: Element[] = []; // For span_: content after blank line
|
|
109
|
+
const splitSpans: Element[][] = []; // For regular span: content segments split by blank lines
|
|
110
|
+
let foundClose = false;
|
|
111
|
+
let afterBlankLine = false; // For span_: tracks if we're after a blank line
|
|
112
|
+
|
|
113
|
+
while (pos < ctx.tokens.length) {
|
|
114
|
+
const token = ctx.tokens[pos];
|
|
115
|
+
if (!token || token.type === "EOF") {
|
|
116
|
+
break;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Check for [[/span]]
|
|
120
|
+
if (token.type === "BLOCK_END_OPEN") {
|
|
121
|
+
const closeNameResult = parseBlockName(ctx, pos + 1);
|
|
122
|
+
if (closeNameResult && closeNameResult.name === "span") {
|
|
123
|
+
// Skip [[/span]]
|
|
124
|
+
pos++; // [[/
|
|
125
|
+
consumed++;
|
|
126
|
+
pos += closeNameResult.consumed; // span
|
|
127
|
+
consumed += closeNameResult.consumed;
|
|
128
|
+
// Skip ]]
|
|
129
|
+
if (ctx.tokens[pos]?.type === "BLOCK_CLOSE") {
|
|
130
|
+
pos++;
|
|
131
|
+
consumed++;
|
|
132
|
+
}
|
|
133
|
+
foundClose = true;
|
|
134
|
+
break;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// Handle NEWLINE
|
|
139
|
+
if (token.type === "NEWLINE") {
|
|
140
|
+
// Check for paragraph break (blank line = double newline)
|
|
141
|
+
// Skip whitespace to find next meaningful token
|
|
142
|
+
let lookAhead = 1;
|
|
143
|
+
while (ctx.tokens[pos + lookAhead]?.type === "WHITESPACE") {
|
|
144
|
+
lookAhead++;
|
|
145
|
+
}
|
|
146
|
+
const nextToken = ctx.tokens[pos + lookAhead];
|
|
147
|
+
|
|
148
|
+
// If next token is another NEWLINE, this is a paragraph break
|
|
149
|
+
if (nextToken?.type === "NEWLINE") {
|
|
150
|
+
if (paragraphStrip) {
|
|
151
|
+
// For span_: blank lines split the span, content after goes outside paragraph
|
|
152
|
+
// Consume the blank lines and continue parsing
|
|
153
|
+
pos++; // First newline
|
|
154
|
+
consumed++;
|
|
155
|
+
while (ctx.tokens[pos]?.type === "WHITESPACE" || ctx.tokens[pos]?.type === "NEWLINE") {
|
|
156
|
+
pos++;
|
|
157
|
+
consumed++;
|
|
158
|
+
}
|
|
159
|
+
// Mark that we're now parsing escaped content (outside paragraph)
|
|
160
|
+
afterBlankLine = true;
|
|
161
|
+
continue;
|
|
162
|
+
}
|
|
163
|
+
// For regular span: blank lines split into separate spans in separate paragraphs
|
|
164
|
+
// Save current content and start a new segment
|
|
165
|
+
if (children.length > 0) {
|
|
166
|
+
splitSpans.push([...children]);
|
|
167
|
+
children.length = 0;
|
|
168
|
+
}
|
|
169
|
+
pos++; // First newline
|
|
170
|
+
consumed++;
|
|
171
|
+
// Skip whitespace and additional newlines
|
|
172
|
+
while (ctx.tokens[pos]?.type === "WHITESPACE" || ctx.tokens[pos]?.type === "NEWLINE") {
|
|
173
|
+
pos++;
|
|
174
|
+
consumed++;
|
|
175
|
+
}
|
|
176
|
+
continue;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// Single newline - convert to line-break
|
|
180
|
+
const targetChildren = afterBlankLine ? escapedChildren : children;
|
|
181
|
+
targetChildren.push({ element: "line-break" });
|
|
182
|
+
pos++;
|
|
183
|
+
consumed++;
|
|
184
|
+
// Skip leading whitespace after newline
|
|
185
|
+
while (ctx.tokens[pos]?.type === "WHITESPACE" && ctx.tokens[pos]?.lineStart) {
|
|
186
|
+
pos++;
|
|
187
|
+
consumed++;
|
|
188
|
+
}
|
|
189
|
+
continue;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// Skip whitespace at beginning of content (after ]] or after newline)
|
|
193
|
+
// But don't skip whitespace between words
|
|
194
|
+
if (token.type === "WHITESPACE" && token.lineStart) {
|
|
195
|
+
pos++;
|
|
196
|
+
consumed++;
|
|
197
|
+
continue;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// Determine which array to add content to
|
|
201
|
+
const targetChildren = afterBlankLine ? escapedChildren : children;
|
|
202
|
+
|
|
203
|
+
// Try each inline rule
|
|
204
|
+
let matched = false;
|
|
205
|
+
const inlineCtx: ParseContext = { ...ctx, pos };
|
|
206
|
+
|
|
207
|
+
for (const rule of inlineRules) {
|
|
208
|
+
// Allow nested spans - each nested span will find its own [[/span]] closing tag
|
|
209
|
+
// No infinite recursion because each span consumes its own opening and closing tags
|
|
210
|
+
if (canApplyInlineRule(rule, token)) {
|
|
211
|
+
const result = rule.parse(inlineCtx);
|
|
212
|
+
if (result.success) {
|
|
213
|
+
targetChildren.push(...result.elements);
|
|
214
|
+
pos += result.consumed;
|
|
215
|
+
consumed += result.consumed;
|
|
216
|
+
matched = true;
|
|
217
|
+
break;
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
if (!matched) {
|
|
223
|
+
// Fallback: just add as text
|
|
224
|
+
targetChildren.push({ element: "text", data: token.value });
|
|
225
|
+
pos++;
|
|
226
|
+
consumed++;
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
// If we didn't find [[/span]], this is not a valid span
|
|
231
|
+
if (!foundClose) {
|
|
232
|
+
ctx.diagnostics.push({
|
|
233
|
+
severity: "warning",
|
|
234
|
+
code: "unclosed-block",
|
|
235
|
+
message: `Missing closing tag [[/span]] for [[${blockName}]]`,
|
|
236
|
+
position: openToken.position,
|
|
237
|
+
});
|
|
238
|
+
return { success: false };
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
// For span_ (paragraph strip mode):
|
|
242
|
+
// - Empty span_ produces no output
|
|
243
|
+
// - Non-empty span_ gets a _paragraphStrip marker for paragraph merging
|
|
244
|
+
// - Content after blank line gets _escapedFromParagraph marker
|
|
245
|
+
if (paragraphStrip) {
|
|
246
|
+
// Remove leading/trailing line-breaks from both arrays
|
|
247
|
+
while (children.length > 0 && children[0]?.element === "line-break") {
|
|
248
|
+
children.shift();
|
|
249
|
+
}
|
|
250
|
+
while (children.length > 0 && children[children.length - 1]?.element === "line-break") {
|
|
251
|
+
children.pop();
|
|
252
|
+
}
|
|
253
|
+
while (escapedChildren.length > 0 && escapedChildren[0]?.element === "line-break") {
|
|
254
|
+
escapedChildren.shift();
|
|
255
|
+
}
|
|
256
|
+
while (
|
|
257
|
+
escapedChildren.length > 0 &&
|
|
258
|
+
escapedChildren[escapedChildren.length - 1]?.element === "line-break"
|
|
259
|
+
) {
|
|
260
|
+
escapedChildren.pop();
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
const elements: Element[] = [];
|
|
264
|
+
|
|
265
|
+
// Main span (before blank line) - gets _paragraphStrip
|
|
266
|
+
if (children.length > 0) {
|
|
267
|
+
elements.push({
|
|
268
|
+
element: "container",
|
|
269
|
+
data: {
|
|
270
|
+
type: "span",
|
|
271
|
+
attributes: attrResult.attrs,
|
|
272
|
+
elements: children,
|
|
273
|
+
_paragraphStrip: true,
|
|
274
|
+
},
|
|
275
|
+
});
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
// Escaped spans (after blank line) - get _escapedFromParagraph
|
|
279
|
+
if (escapedChildren.length > 0) {
|
|
280
|
+
elements.push({
|
|
281
|
+
element: "container",
|
|
282
|
+
data: {
|
|
283
|
+
type: "span",
|
|
284
|
+
attributes: {},
|
|
285
|
+
elements: escapedChildren,
|
|
286
|
+
_escapedFromParagraph: true,
|
|
287
|
+
},
|
|
288
|
+
});
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
// Empty span_ - return a marker that will absorb adjacent line-breaks
|
|
292
|
+
if (elements.length === 0) {
|
|
293
|
+
return {
|
|
294
|
+
success: true,
|
|
295
|
+
elements: [
|
|
296
|
+
{
|
|
297
|
+
element: "container",
|
|
298
|
+
data: {
|
|
299
|
+
type: "span",
|
|
300
|
+
attributes: {},
|
|
301
|
+
elements: [],
|
|
302
|
+
_emptyParagraphStrip: true,
|
|
303
|
+
},
|
|
304
|
+
},
|
|
305
|
+
],
|
|
306
|
+
consumed,
|
|
307
|
+
};
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
return {
|
|
311
|
+
success: true,
|
|
312
|
+
elements,
|
|
313
|
+
consumed,
|
|
314
|
+
};
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
// For regular span with blank lines: return multiple spans with _splitByBlankLine marker
|
|
318
|
+
// These will be processed by postprocess to create separate paragraphs
|
|
319
|
+
if (splitSpans.length > 0) {
|
|
320
|
+
// Add remaining children as last segment
|
|
321
|
+
if (children.length > 0) {
|
|
322
|
+
splitSpans.push(children);
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
const elements: Element[] = splitSpans.map((segment, index) => ({
|
|
326
|
+
element: "container" as const,
|
|
327
|
+
data: {
|
|
328
|
+
type: "span" as const,
|
|
329
|
+
attributes: index === 0 ? attrResult.attrs : {},
|
|
330
|
+
elements: segment,
|
|
331
|
+
_splitByBlankLine: index > 0, // Mark segments after first for paragraph splitting
|
|
332
|
+
},
|
|
333
|
+
}));
|
|
334
|
+
|
|
335
|
+
return {
|
|
336
|
+
success: true,
|
|
337
|
+
elements,
|
|
338
|
+
consumed,
|
|
339
|
+
};
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
return {
|
|
343
|
+
success: true,
|
|
344
|
+
elements: [
|
|
345
|
+
{
|
|
346
|
+
element: "container",
|
|
347
|
+
data: {
|
|
348
|
+
type: "span",
|
|
349
|
+
attributes: attrResult.attrs,
|
|
350
|
+
elements: children,
|
|
351
|
+
},
|
|
352
|
+
},
|
|
353
|
+
],
|
|
354
|
+
consumed,
|
|
355
|
+
};
|
|
356
|
+
},
|
|
357
|
+
};
|
|
358
|
+
|
|
359
|
+
/**
|
|
360
|
+
* Inline rule for handling orphaned `[[/span]]` closing tags.
|
|
361
|
+
*
|
|
362
|
+
* When a span is split across paragraphs by a blank line, the closing
|
|
363
|
+
* `[[/span]]` tag appears without a matching opening tag in the current
|
|
364
|
+
* inline context. This rule detects such orphaned closing tags and
|
|
365
|
+
* produces a special marker element with `_closeSpan: true`.
|
|
366
|
+
*
|
|
367
|
+
* The paragraph parser uses this marker to retroactively wrap all
|
|
368
|
+
* preceding inline content in the current paragraph into a span,
|
|
369
|
+
* replicating Wikidot's behavior for paragraph-split spans.
|
|
370
|
+
*
|
|
371
|
+
* Triggered by a `BLOCK_END_OPEN` (`[[/`) token, and only matches
|
|
372
|
+
* when the block name is `span`.
|
|
373
|
+
*/
|
|
374
|
+
export const closeSpanRule: InlineRule = {
|
|
375
|
+
name: "closeSpan",
|
|
376
|
+
startTokens: ["BLOCK_END_OPEN"],
|
|
377
|
+
|
|
378
|
+
/**
|
|
379
|
+
* Attempts to parse an orphaned `[[/span]]` tag at the current position.
|
|
380
|
+
*
|
|
381
|
+
* @param ctx - Parse context with token stream and current position
|
|
382
|
+
* @returns A successful result with a span marker element (with `_closeSpan: true`),
|
|
383
|
+
* or `{ success: false }` if this is not a `[[/span]]` tag
|
|
384
|
+
*/
|
|
385
|
+
parse(ctx: ParseContext): RuleResult<Element> {
|
|
386
|
+
const token = currentToken(ctx);
|
|
387
|
+
if (token.type !== "BLOCK_END_OPEN") {
|
|
388
|
+
return { success: false };
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
// Check if this is [[/span]]
|
|
392
|
+
const nameResult = parseBlockName(ctx, ctx.pos + 1);
|
|
393
|
+
if (!nameResult || nameResult.name !== "span") {
|
|
394
|
+
return { success: false };
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
let pos = ctx.pos + 1 + nameResult.consumed;
|
|
398
|
+
let consumed = 1 + nameResult.consumed;
|
|
399
|
+
|
|
400
|
+
// Skip ]]
|
|
401
|
+
if (ctx.tokens[pos]?.type === "BLOCK_CLOSE") {
|
|
402
|
+
pos++;
|
|
403
|
+
consumed++;
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
// Return a special marker that indicates "wrap preceding content in span"
|
|
407
|
+
// The paragraph parser will handle this
|
|
408
|
+
return {
|
|
409
|
+
success: true,
|
|
410
|
+
elements: [
|
|
411
|
+
{
|
|
412
|
+
element: "container",
|
|
413
|
+
data: {
|
|
414
|
+
type: "span",
|
|
415
|
+
attributes: {},
|
|
416
|
+
elements: [],
|
|
417
|
+
_closeSpan: true,
|
|
418
|
+
},
|
|
419
|
+
},
|
|
420
|
+
],
|
|
421
|
+
consumed,
|
|
422
|
+
};
|
|
423
|
+
},
|
|
424
|
+
};
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
/**
|
|
2
|
+
*
|
|
3
|
+
* Parses the Wikidot strikethrough formatting syntax: `--text--`.
|
|
4
|
+
*
|
|
5
|
+
* Strikethrough text is delimited by double hyphens. However, the `--`
|
|
6
|
+
* token has dual meaning in Wikidot: it can be either a strikethrough
|
|
7
|
+
* marker or an em-dash. The disambiguation rule is:
|
|
8
|
+
*
|
|
9
|
+
* - If a matching closing `--` is found on the same line AND the closing
|
|
10
|
+
* marker is NOT preceded by whitespace, it is treated as strikethrough.
|
|
11
|
+
* - Otherwise, the `--` is converted to an em-dash character (U+2014).
|
|
12
|
+
*
|
|
13
|
+
* This means `--word--` produces strikethrough, but `-- word --` produces
|
|
14
|
+
* two em-dashes with "word" between them.
|
|
15
|
+
*
|
|
16
|
+
* Produces a `"container"` AST element with `type: "strikethrough"`,
|
|
17
|
+
* or a `"text"` element containing the em-dash character.
|
|
18
|
+
*
|
|
19
|
+
* @module
|
|
20
|
+
*/
|
|
21
|
+
import type { Element } from "@wdprlib/ast";
|
|
22
|
+
import type { InlineRule, ParseContext, RuleResult } from "../types";
|
|
23
|
+
import { parseInlineUntil } from "./utils";
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Validates whether the current position contains a valid strikethrough
|
|
27
|
+
* pair (opening and closing `--` markers).
|
|
28
|
+
*
|
|
29
|
+
* Scans from the token after the opening marker to find a closing
|
|
30
|
+
* `STRIKE_MARKER`. The strikethrough is invalid if:
|
|
31
|
+
* - No closing marker is found before a newline or EOF
|
|
32
|
+
* - The closing marker is preceded by a whitespace token
|
|
33
|
+
*
|
|
34
|
+
* The whitespace restriction exists because Wikidot distinguishes
|
|
35
|
+
* `--text--` (strikethrough) from `-- text --` (em-dashes).
|
|
36
|
+
*
|
|
37
|
+
* @param ctx - Parse context positioned at the opening `--` marker
|
|
38
|
+
* @returns `true` if a valid strikethrough pair is found
|
|
39
|
+
*/
|
|
40
|
+
function isValidStrikethrough(ctx: ParseContext): boolean {
|
|
41
|
+
let pos = ctx.pos + 1; // Start after opening marker
|
|
42
|
+
let prevWasWhitespace = false;
|
|
43
|
+
|
|
44
|
+
while (pos < ctx.tokens.length) {
|
|
45
|
+
const token = ctx.tokens[pos];
|
|
46
|
+
if (!token || token.type === "NEWLINE" || token.type === "EOF") {
|
|
47
|
+
return false;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
if (token.type === "STRIKE_MARKER") {
|
|
51
|
+
// Found closing marker
|
|
52
|
+
// Invalid if preceded by whitespace
|
|
53
|
+
if (prevWasWhitespace) {
|
|
54
|
+
return false;
|
|
55
|
+
}
|
|
56
|
+
return true;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
prevWasWhitespace = token.type === "WHITESPACE";
|
|
60
|
+
pos++;
|
|
61
|
+
}
|
|
62
|
+
return false;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Inline rule for parsing `--strikethrough--` formatting or converting
|
|
67
|
+
* `--` to an em-dash.
|
|
68
|
+
*
|
|
69
|
+
* Triggered by a `STRIKE_MARKER` token (`--`). First validates whether
|
|
70
|
+
* a proper strikethrough pair exists. If yes, parses the content
|
|
71
|
+
* between markers as strikethrough. If no, converts the `--` to an
|
|
72
|
+
* em-dash character (U+2014).
|
|
73
|
+
*/
|
|
74
|
+
export const strikethroughRule: InlineRule = {
|
|
75
|
+
name: "strikethrough",
|
|
76
|
+
startTokens: ["STRIKE_MARKER"],
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Attempts to parse strikethrough formatting or produce an em-dash.
|
|
80
|
+
*
|
|
81
|
+
* @param ctx - Parse context with token stream and current position
|
|
82
|
+
* @returns A successful result with either a `"container"` element of
|
|
83
|
+
* type `"strikethrough"`, or a `"text"` element containing
|
|
84
|
+
* the em-dash character
|
|
85
|
+
*/
|
|
86
|
+
parse(ctx: ParseContext): RuleResult<Element> {
|
|
87
|
+
// Check if valid strikethrough (no whitespace before closing --)
|
|
88
|
+
if (!isValidStrikethrough(ctx)) {
|
|
89
|
+
// Not valid strikethrough, convert to em-dash
|
|
90
|
+
return {
|
|
91
|
+
success: true,
|
|
92
|
+
elements: [{ element: "text", data: "\u2014" }], // em-dash
|
|
93
|
+
consumed: 1,
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// Parse content between markers
|
|
98
|
+
const result = parseInlineUntil({ ...ctx, pos: ctx.pos + 1 }, "STRIKE_MARKER");
|
|
99
|
+
|
|
100
|
+
return {
|
|
101
|
+
success: true,
|
|
102
|
+
elements: [
|
|
103
|
+
{
|
|
104
|
+
element: "container",
|
|
105
|
+
data: {
|
|
106
|
+
type: "strikethrough",
|
|
107
|
+
attributes: {},
|
|
108
|
+
elements: result.elements,
|
|
109
|
+
},
|
|
110
|
+
},
|
|
111
|
+
],
|
|
112
|
+
consumed: 1 + result.consumed + 1, // open + content + close
|
|
113
|
+
};
|
|
114
|
+
},
|
|
115
|
+
};
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
/**
|
|
2
|
+
*
|
|
3
|
+
* Parses the Wikidot subscript formatting syntax: `,,text,,`.
|
|
4
|
+
*
|
|
5
|
+
* Subscript text is delimited by double commas. The opening and closing
|
|
6
|
+
* markers must appear on the same line. If no closing `,,` is found
|
|
7
|
+
* before a newline, the opening marker is emitted as literal text.
|
|
8
|
+
*
|
|
9
|
+
* Empty subscript (`,,,,`) is silently discarded by Wikidot (produces
|
|
10
|
+
* no output), matching the behavior of bold and superscript.
|
|
11
|
+
*
|
|
12
|
+
* Renders as a `<sub>` element in HTML.
|
|
13
|
+
*
|
|
14
|
+
* Produces a `"container"` AST element with `type: "subscript"`.
|
|
15
|
+
*
|
|
16
|
+
* @module
|
|
17
|
+
*/
|
|
18
|
+
import type { Element } from "@wdprlib/ast";
|
|
19
|
+
import type { InlineRule, ParseContext, RuleResult } from "../types";
|
|
20
|
+
import { currentToken, hasClosingMarkerBeforeNewline } from "../types";
|
|
21
|
+
import { parseInlineUntil } from "./utils";
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Inline rule for parsing `,,subscript,,` formatting.
|
|
25
|
+
*
|
|
26
|
+
* Triggered by a `SUB_MARKER` token (`,,`). Checks for a matching
|
|
27
|
+
* closing marker on the same line, then recursively parses inline
|
|
28
|
+
* content between the markers.
|
|
29
|
+
*
|
|
30
|
+
* When no closing marker is found, the opening `,,` is treated as
|
|
31
|
+
* literal text.
|
|
32
|
+
*/
|
|
33
|
+
export const subscriptRule: InlineRule = {
|
|
34
|
+
name: "subscript",
|
|
35
|
+
startTokens: ["SUB_MARKER"],
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Attempts to parse subscript formatting at the current position.
|
|
39
|
+
*
|
|
40
|
+
* @param ctx - Parse context with token stream and current position
|
|
41
|
+
* @returns A successful result containing either a `"container"` element
|
|
42
|
+
* with `type: "subscript"`, an empty array (for `,,,,`), or a
|
|
43
|
+
* text fallback for unmatched markers
|
|
44
|
+
*/
|
|
45
|
+
parse(ctx: ParseContext): RuleResult<Element> {
|
|
46
|
+
const startToken = currentToken(ctx);
|
|
47
|
+
|
|
48
|
+
// Check if closing marker exists
|
|
49
|
+
if (!hasClosingMarkerBeforeNewline({ ...ctx, pos: ctx.pos + 1 }, "SUB_MARKER")) {
|
|
50
|
+
return {
|
|
51
|
+
success: true,
|
|
52
|
+
elements: [{ element: "text", data: startToken.value }],
|
|
53
|
+
consumed: 1,
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Parse content between markers
|
|
58
|
+
const result = parseInlineUntil({ ...ctx, pos: ctx.pos + 1 }, "SUB_MARKER");
|
|
59
|
+
|
|
60
|
+
// Empty subscript (,,,,) is ignored in Wikidot
|
|
61
|
+
if (result.elements.length === 0) {
|
|
62
|
+
return {
|
|
63
|
+
success: true,
|
|
64
|
+
elements: [],
|
|
65
|
+
consumed: 1 + result.consumed + 1,
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
return {
|
|
70
|
+
success: true,
|
|
71
|
+
elements: [
|
|
72
|
+
{
|
|
73
|
+
element: "container",
|
|
74
|
+
data: {
|
|
75
|
+
type: "subscript",
|
|
76
|
+
attributes: {},
|
|
77
|
+
elements: result.elements,
|
|
78
|
+
},
|
|
79
|
+
},
|
|
80
|
+
],
|
|
81
|
+
consumed: 1 + result.consumed + 1,
|
|
82
|
+
};
|
|
83
|
+
},
|
|
84
|
+
};
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
/**
|
|
2
|
+
*
|
|
3
|
+
* Parses the Wikidot superscript formatting syntax: `^^text^^`.
|
|
4
|
+
*
|
|
5
|
+
* Superscript text is delimited by double carets. The opening and
|
|
6
|
+
* closing markers must appear on the same line. If no closing `^^`
|
|
7
|
+
* is found before a newline, the opening marker is emitted as literal text.
|
|
8
|
+
*
|
|
9
|
+
* Empty superscript (`^^^^`) is silently discarded by Wikidot (produces
|
|
10
|
+
* no output), matching the behavior of bold and subscript.
|
|
11
|
+
*
|
|
12
|
+
* Renders as a `<sup>` element in HTML.
|
|
13
|
+
*
|
|
14
|
+
* Produces a `"container"` AST element with `type: "superscript"`.
|
|
15
|
+
*
|
|
16
|
+
* @module
|
|
17
|
+
*/
|
|
18
|
+
import type { Element } from "@wdprlib/ast";
|
|
19
|
+
import type { InlineRule, ParseContext, RuleResult } from "../types";
|
|
20
|
+
import { currentToken, hasClosingMarkerBeforeNewline } from "../types";
|
|
21
|
+
import { parseInlineUntil } from "./utils";
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Inline rule for parsing `^^superscript^^` formatting.
|
|
25
|
+
*
|
|
26
|
+
* Triggered by a `SUPER_MARKER` token (`^^`). Checks for a matching
|
|
27
|
+
* closing marker on the same line, then recursively parses inline
|
|
28
|
+
* content between the markers.
|
|
29
|
+
*
|
|
30
|
+
* When no closing marker is found, the opening `^^` is treated as
|
|
31
|
+
* literal text.
|
|
32
|
+
*/
|
|
33
|
+
export const superscriptRule: InlineRule = {
|
|
34
|
+
name: "superscript",
|
|
35
|
+
startTokens: ["SUPER_MARKER"],
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Attempts to parse superscript formatting at the current position.
|
|
39
|
+
*
|
|
40
|
+
* @param ctx - Parse context with token stream and current position
|
|
41
|
+
* @returns A successful result containing either a `"container"` element
|
|
42
|
+
* with `type: "superscript"`, an empty array (for `^^^^`), or a
|
|
43
|
+
* text fallback for unmatched markers
|
|
44
|
+
*/
|
|
45
|
+
parse(ctx: ParseContext): RuleResult<Element> {
|
|
46
|
+
const startToken = currentToken(ctx);
|
|
47
|
+
|
|
48
|
+
// Check if closing marker exists
|
|
49
|
+
if (!hasClosingMarkerBeforeNewline({ ...ctx, pos: ctx.pos + 1 }, "SUPER_MARKER")) {
|
|
50
|
+
return {
|
|
51
|
+
success: true,
|
|
52
|
+
elements: [{ element: "text", data: startToken.value }],
|
|
53
|
+
consumed: 1,
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Parse content between markers
|
|
58
|
+
const result = parseInlineUntil({ ...ctx, pos: ctx.pos + 1 }, "SUPER_MARKER");
|
|
59
|
+
|
|
60
|
+
// Empty superscript (^^^^) is ignored in Wikidot
|
|
61
|
+
if (result.elements.length === 0) {
|
|
62
|
+
return {
|
|
63
|
+
success: true,
|
|
64
|
+
elements: [],
|
|
65
|
+
consumed: 1 + result.consumed + 1,
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
return {
|
|
70
|
+
success: true,
|
|
71
|
+
elements: [
|
|
72
|
+
{
|
|
73
|
+
element: "container",
|
|
74
|
+
data: {
|
|
75
|
+
type: "superscript",
|
|
76
|
+
attributes: {},
|
|
77
|
+
elements: result.elements,
|
|
78
|
+
},
|
|
79
|
+
},
|
|
80
|
+
],
|
|
81
|
+
consumed: 1 + result.consumed + 1,
|
|
82
|
+
};
|
|
83
|
+
},
|
|
84
|
+
};
|