@wdprlib/parser 3.1.1 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +312 -121
- package/dist/index.js +289 -98
- package/package.json +5 -3
- package/src/index.ts +163 -0
- package/src/lexer/index.ts +20 -0
- package/src/lexer/lexer.ts +687 -0
- package/src/lexer/tokens.ts +141 -0
- package/src/parser/constants.ts +173 -0
- package/src/parser/depth.ts +251 -0
- package/src/parser/index.ts +18 -0
- package/src/parser/parse.ts +315 -0
- package/src/parser/postprocess/divAdjacentParagraph.ts +76 -0
- package/src/parser/postprocess/index.ts +15 -0
- package/src/parser/postprocess/spanStrip.ts +697 -0
- package/src/parser/preprocess/expr.ts +265 -0
- package/src/parser/preprocess/index.ts +38 -0
- package/src/parser/preprocess/typography.ts +67 -0
- package/src/parser/preprocess/utils.ts +250 -0
- package/src/parser/preprocess/whitespace.ts +111 -0
- package/src/parser/rules/block/align.ts +282 -0
- package/src/parser/rules/block/bibliography.ts +359 -0
- package/src/parser/rules/block/block-list.ts +689 -0
- package/src/parser/rules/block/blockquote.ts +238 -0
- package/src/parser/rules/block/center.ts +87 -0
- package/src/parser/rules/block/clear-float.ts +75 -0
- package/src/parser/rules/block/code.ts +187 -0
- package/src/parser/rules/block/collapsible.ts +337 -0
- package/src/parser/rules/block/comment.ts +73 -0
- package/src/parser/rules/block/content-separator.ts +79 -0
- package/src/parser/rules/block/definition-list.ts +270 -0
- package/src/parser/rules/block/div.ts +400 -0
- package/src/parser/rules/block/embed-block.ts +153 -0
- package/src/parser/rules/block/footnoteblock.ts +200 -0
- package/src/parser/rules/block/heading.ts +142 -0
- package/src/parser/rules/block/horizontal-rule.ts +61 -0
- package/src/parser/rules/block/html.ts +222 -0
- package/src/parser/rules/block/iframe.ts +239 -0
- package/src/parser/rules/block/iftags.ts +150 -0
- package/src/parser/rules/block/include.ts +179 -0
- package/src/parser/rules/block/index.ts +127 -0
- package/src/parser/rules/block/list.ts +244 -0
- package/src/parser/rules/block/math.ts +183 -0
- package/src/parser/rules/block/module/backlinks/index.ts +31 -0
- package/src/parser/rules/block/module/backlinks/types.ts +21 -0
- package/src/parser/rules/block/module/categories/index.ts +34 -0
- package/src/parser/rules/block/module/categories/types.ts +21 -0
- package/src/parser/rules/block/module/css/index.ts +37 -0
- package/src/parser/rules/block/module/iftags/condition.ts +109 -0
- package/src/parser/rules/block/module/iftags/index.ts +26 -0
- package/src/parser/rules/block/module/iftags/preprocess.ts +140 -0
- package/src/parser/rules/block/module/iftags/resolve.ts +73 -0
- package/src/parser/rules/block/module/iftags/types.ts +63 -0
- package/src/parser/rules/block/module/include/index.ts +20 -0
- package/src/parser/rules/block/module/include/resolve.ts +556 -0
- package/src/parser/rules/block/module/index.ts +122 -0
- package/src/parser/rules/block/module/join/index.ts +34 -0
- package/src/parser/rules/block/module/join/types.ts +23 -0
- package/src/parser/rules/block/module/listpages/compiler.ts +453 -0
- package/src/parser/rules/block/module/listpages/extract.ts +410 -0
- package/src/parser/rules/block/module/listpages/index.ts +83 -0
- package/src/parser/rules/block/module/listpages/normalize.ts +390 -0
- package/src/parser/rules/block/module/listpages/parser.ts +106 -0
- package/src/parser/rules/block/module/listpages/resolve.ts +130 -0
- package/src/parser/rules/block/module/listpages/types.ts +513 -0
- package/src/parser/rules/block/module/listpages/url-resolver.ts +186 -0
- package/src/parser/rules/block/module/listusers/compiler.ts +77 -0
- package/src/parser/rules/block/module/listusers/extract.ts +45 -0
- package/src/parser/rules/block/module/listusers/index.ts +36 -0
- package/src/parser/rules/block/module/listusers/parser.ts +54 -0
- package/src/parser/rules/block/module/listusers/resolve.ts +58 -0
- package/src/parser/rules/block/module/listusers/types.ts +93 -0
- package/src/parser/rules/block/module/mapping.ts +61 -0
- package/src/parser/rules/block/module/page-tree/index.ts +38 -0
- package/src/parser/rules/block/module/page-tree/types.ts +29 -0
- package/src/parser/rules/block/module/rate/index.ts +28 -0
- package/src/parser/rules/block/module/rate/types.ts +19 -0
- package/src/parser/rules/block/module/resolve.ts +411 -0
- package/src/parser/rules/block/module/types-common.ts +59 -0
- package/src/parser/rules/block/module/types.ts +61 -0
- package/src/parser/rules/block/module/utils.ts +43 -0
- package/src/parser/rules/block/module/walk.ts +380 -0
- package/src/parser/rules/block/module.ts +164 -0
- package/src/parser/rules/block/orphan-li.ts +177 -0
- package/src/parser/rules/block/paragraph.ts +157 -0
- package/src/parser/rules/block/table-block.ts +726 -0
- package/src/parser/rules/block/table.ts +441 -0
- package/src/parser/rules/block/tabview.ts +331 -0
- package/src/parser/rules/block/toc.ts +129 -0
- package/src/parser/rules/block/utils.ts +615 -0
- package/src/parser/rules/index.ts +49 -0
- package/src/parser/rules/inline/anchor-name.ts +154 -0
- package/src/parser/rules/inline/anchor.ts +327 -0
- package/src/parser/rules/inline/bibcite.ts +153 -0
- package/src/parser/rules/inline/bold.ts +86 -0
- package/src/parser/rules/inline/color.ts +140 -0
- package/src/parser/rules/inline/comment.ts +90 -0
- package/src/parser/rules/inline/equation-ref.ts +115 -0
- package/src/parser/rules/inline/expr.ts +526 -0
- package/src/parser/rules/inline/footnote.ts +223 -0
- package/src/parser/rules/inline/guillemet.ts +64 -0
- package/src/parser/rules/inline/html.ts +132 -0
- package/src/parser/rules/inline/image.ts +328 -0
- package/src/parser/rules/inline/index.ts +150 -0
- package/src/parser/rules/inline/italic.ts +74 -0
- package/src/parser/rules/inline/line-break.ts +326 -0
- package/src/parser/rules/inline/link-anchor.ts +147 -0
- package/src/parser/rules/inline/link-single.ts +164 -0
- package/src/parser/rules/inline/link-star.ts +134 -0
- package/src/parser/rules/inline/link-triple.ts +267 -0
- package/src/parser/rules/inline/math-inline.ts +126 -0
- package/src/parser/rules/inline/monospace.ts +78 -0
- package/src/parser/rules/inline/raw.ts +262 -0
- package/src/parser/rules/inline/size.ts +244 -0
- package/src/parser/rules/inline/span.ts +424 -0
- package/src/parser/rules/inline/strikethrough.ts +115 -0
- package/src/parser/rules/inline/subscript.ts +84 -0
- package/src/parser/rules/inline/superscript.ts +84 -0
- package/src/parser/rules/inline/text.ts +84 -0
- package/src/parser/rules/inline/underline.ts +127 -0
- package/src/parser/rules/inline/user.ts +147 -0
- package/src/parser/rules/inline/utils.ts +344 -0
- package/src/parser/rules/types.ts +252 -0
- package/src/parser/rules/utils.ts +155 -0
- package/src/parser/toc.ts +130 -0
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
/**
|
|
2
|
+
*
|
|
3
|
+
* Block rule for the Wikidot HTML block: `[[html]]...[[/html]]`.
|
|
4
|
+
*
|
|
5
|
+
* An HTML block embeds raw HTML that Wikidot renders inside a sandboxed
|
|
6
|
+
* `<iframe>`. The content between the tags is captured verbatim (no inline
|
|
7
|
+
* parsing) and stored as an `html` element in the AST.
|
|
8
|
+
*
|
|
9
|
+
* Supported attributes on the opening tag:
|
|
10
|
+
* - `style` -- applied to the containing iframe. Other attributes are
|
|
11
|
+
* parsed but only `style` is used by Wikidot's renderer.
|
|
12
|
+
*
|
|
13
|
+
* The raw content is also pushed into `ctx.htmlBlocks` for document-level
|
|
14
|
+
* enumeration.
|
|
15
|
+
*
|
|
16
|
+
* If no `[[/html]]` closing tag is found, the rule fails and the opening
|
|
17
|
+
* tag falls through to text rendering (matching Wikidot behaviour).
|
|
18
|
+
*
|
|
19
|
+
* @module
|
|
20
|
+
*/
|
|
21
|
+
import type { Element } from "@wdprlib/ast";
|
|
22
|
+
import type { BlockRule, ParseContext, RuleResult } from "../types";
|
|
23
|
+
import { currentToken } from "../types";
|
|
24
|
+
import { parseBlockName, parseAttributesRaw } from "./utils";
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Scan forward from `from` to see whether a real `[[/html]]` close tag
|
|
28
|
+
* exists later in the token stream. Used by the disabled path to decide
|
|
29
|
+
* whether the blank-line stop should fire.
|
|
30
|
+
*
|
|
31
|
+
* Recognises whitespace between the name and the closing `]]` so the
|
|
32
|
+
* answer matches what the main consume loop would actually accept.
|
|
33
|
+
*/
|
|
34
|
+
export function lookaheadHasHtmlClose(ctx: ParseContext, from: number): boolean {
|
|
35
|
+
for (let i = from; i < ctx.tokens.length; i++) {
|
|
36
|
+
const t = ctx.tokens[i];
|
|
37
|
+
if (!t || t.type === "EOF") return false;
|
|
38
|
+
if (t.type !== "BLOCK_END_OPEN") continue;
|
|
39
|
+
const closeName = parseBlockName(ctx, i + 1);
|
|
40
|
+
if (closeName?.name.toLowerCase() !== "html") continue;
|
|
41
|
+
let cp = i + 1 + closeName.consumed;
|
|
42
|
+
while (ctx.tokens[cp]?.type === "WHITESPACE") cp++;
|
|
43
|
+
if (ctx.tokens[cp]?.type === "BLOCK_CLOSE") return true;
|
|
44
|
+
}
|
|
45
|
+
return false;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Block rule for `[[html]]...[[/html]]`.
|
|
50
|
+
*
|
|
51
|
+
* Body content is stored as raw text. The optional `style` attribute is
|
|
52
|
+
* passed through to the AST element for iframe styling.
|
|
53
|
+
*/
|
|
54
|
+
export const htmlBlockRule: BlockRule = {
|
|
55
|
+
name: "html",
|
|
56
|
+
startTokens: ["BLOCK_OPEN"],
|
|
57
|
+
requiresLineStart: false,
|
|
58
|
+
|
|
59
|
+
parse(ctx: ParseContext): RuleResult<Element> {
|
|
60
|
+
const openToken = currentToken(ctx);
|
|
61
|
+
if (openToken.type !== "BLOCK_OPEN") {
|
|
62
|
+
return { success: false };
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
let pos = ctx.pos + 1;
|
|
66
|
+
let consumed = 1;
|
|
67
|
+
|
|
68
|
+
// Parse block name
|
|
69
|
+
const nameResult = parseBlockName(ctx, pos);
|
|
70
|
+
if (!nameResult || nameResult.name.toLowerCase() !== "html") {
|
|
71
|
+
return { success: false };
|
|
72
|
+
}
|
|
73
|
+
pos += nameResult.consumed;
|
|
74
|
+
consumed += nameResult.consumed;
|
|
75
|
+
|
|
76
|
+
// Parse attributes (type="css", style="...", etc.)
|
|
77
|
+
// Only style attribute is used by Wikidot (applied to iframe)
|
|
78
|
+
const attrResult = parseAttributesRaw(ctx, pos);
|
|
79
|
+
pos += attrResult.consumed;
|
|
80
|
+
consumed += attrResult.consumed;
|
|
81
|
+
const style = attrResult.attrs.style;
|
|
82
|
+
|
|
83
|
+
// Expect ]]
|
|
84
|
+
if (ctx.tokens[pos]?.type !== "BLOCK_CLOSE") {
|
|
85
|
+
return { success: false };
|
|
86
|
+
}
|
|
87
|
+
pos++;
|
|
88
|
+
consumed++;
|
|
89
|
+
|
|
90
|
+
// Settings-level gate: when `[[html]]` is disabled, still consume the
|
|
91
|
+
// entire block so the raw body cannot leak as text, but produce no
|
|
92
|
+
// AST element and skip the `ctx.htmlBlocks` push. The malformed
|
|
93
|
+
// opener path above (missing `]]`) is intentionally not affected —
|
|
94
|
+
// it falls through to text rendering as before.
|
|
95
|
+
const disabled = ctx.settings.allowHtmlBlocks === false;
|
|
96
|
+
|
|
97
|
+
// When disabled, the blank-line stop must only kick in if no real
|
|
98
|
+
// `[[/html]]` exists later in the stream. A closed block legitimately
|
|
99
|
+
// contains blank lines between paragraphs.
|
|
100
|
+
const hasCloseAhead = disabled && lookaheadHasHtmlClose(ctx, pos);
|
|
101
|
+
|
|
102
|
+
// Collect HTML content until [[/html]]. When disabled, the body is
|
|
103
|
+
// discarded so accumulation is skipped entirely to avoid building a
|
|
104
|
+
// large string only to drop it.
|
|
105
|
+
let contents = "";
|
|
106
|
+
let foundClose = false;
|
|
107
|
+
|
|
108
|
+
while (pos < ctx.tokens.length) {
|
|
109
|
+
const token = ctx.tokens[pos];
|
|
110
|
+
if (!token || token.type === "EOF") break;
|
|
111
|
+
|
|
112
|
+
// When disabled with no close ahead, stop at a blank line so the
|
|
113
|
+
// rule does not swallow subsequent paragraphs.
|
|
114
|
+
if (
|
|
115
|
+
disabled &&
|
|
116
|
+
!hasCloseAhead &&
|
|
117
|
+
token.type === "NEWLINE" &&
|
|
118
|
+
ctx.tokens[pos + 1]?.type === "NEWLINE"
|
|
119
|
+
) {
|
|
120
|
+
break;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// Check for closing [[/html]] — require the trailing `]]` so a
|
|
124
|
+
// malformed `[[/html` without its close does not falsely terminate
|
|
125
|
+
// the body and leak the rest as text.
|
|
126
|
+
if (token.type === "BLOCK_END_OPEN") {
|
|
127
|
+
const closeNameResult = parseBlockName(ctx, pos + 1);
|
|
128
|
+
if (closeNameResult?.name.toLowerCase() === "html") {
|
|
129
|
+
let checkPos = pos + 1 + closeNameResult.consumed;
|
|
130
|
+
while (ctx.tokens[checkPos]?.type === "WHITESPACE") checkPos++;
|
|
131
|
+
if (ctx.tokens[checkPos]?.type === "BLOCK_CLOSE") {
|
|
132
|
+
foundClose = true;
|
|
133
|
+
break;
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
if (!disabled) {
|
|
139
|
+
contents += token.value;
|
|
140
|
+
}
|
|
141
|
+
pos++;
|
|
142
|
+
consumed++;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// If no closing tag found:
|
|
146
|
+
// - enabled: fail (matches Wikidot fallback to text)
|
|
147
|
+
// - disabled: still consume to EOF so the body cannot leak, but emit
|
|
148
|
+
// both the unclosed warning and the disabled-info diagnostics.
|
|
149
|
+
if (!foundClose) {
|
|
150
|
+
ctx.diagnostics.push({
|
|
151
|
+
severity: "warning",
|
|
152
|
+
code: "unclosed-block",
|
|
153
|
+
message: "Missing closing tag [[/html]] for [[html]]",
|
|
154
|
+
position: openToken.position,
|
|
155
|
+
});
|
|
156
|
+
if (!disabled) {
|
|
157
|
+
return { success: false };
|
|
158
|
+
}
|
|
159
|
+
ctx.diagnostics.push({
|
|
160
|
+
severity: "info",
|
|
161
|
+
code: "html-block-disabled",
|
|
162
|
+
message: "[[html]] block ignored: disabled by settings",
|
|
163
|
+
position: openToken.position,
|
|
164
|
+
});
|
|
165
|
+
return { success: true, elements: [], consumed };
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// Consume [[/html]] (skipping any whitespace between name and `]]`
|
|
169
|
+
// to match the close-detection above).
|
|
170
|
+
if (ctx.tokens[pos]?.type === "BLOCK_END_OPEN") {
|
|
171
|
+
pos++;
|
|
172
|
+
consumed++;
|
|
173
|
+
const closeNameResult = parseBlockName(ctx, pos);
|
|
174
|
+
if (closeNameResult) {
|
|
175
|
+
pos += closeNameResult.consumed;
|
|
176
|
+
consumed += closeNameResult.consumed;
|
|
177
|
+
}
|
|
178
|
+
while (ctx.tokens[pos]?.type === "WHITESPACE") {
|
|
179
|
+
pos++;
|
|
180
|
+
consumed++;
|
|
181
|
+
}
|
|
182
|
+
if (ctx.tokens[pos]?.type === "BLOCK_CLOSE") {
|
|
183
|
+
pos++;
|
|
184
|
+
consumed++;
|
|
185
|
+
}
|
|
186
|
+
if (ctx.tokens[pos]?.type === "NEWLINE") {
|
|
187
|
+
pos++;
|
|
188
|
+
consumed++;
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
if (disabled) {
|
|
193
|
+
ctx.diagnostics.push({
|
|
194
|
+
severity: "info",
|
|
195
|
+
code: "html-block-disabled",
|
|
196
|
+
message: "[[html]] block ignored: disabled by settings",
|
|
197
|
+
position: openToken.position,
|
|
198
|
+
});
|
|
199
|
+
return { success: true, elements: [], consumed };
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
// Trim the contents
|
|
203
|
+
contents = contents.trim();
|
|
204
|
+
|
|
205
|
+
// Store html block in context
|
|
206
|
+
ctx.htmlBlocks.push(contents);
|
|
207
|
+
|
|
208
|
+
return {
|
|
209
|
+
success: true,
|
|
210
|
+
elements: [
|
|
211
|
+
{
|
|
212
|
+
element: "html",
|
|
213
|
+
data: {
|
|
214
|
+
contents,
|
|
215
|
+
...(style && { style }),
|
|
216
|
+
},
|
|
217
|
+
},
|
|
218
|
+
],
|
|
219
|
+
consumed,
|
|
220
|
+
};
|
|
221
|
+
},
|
|
222
|
+
};
|
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
/**
|
|
2
|
+
*
|
|
3
|
+
* Block rule for the Wikidot iframe block: `[[iframe URL attributes]]`.
|
|
4
|
+
*
|
|
5
|
+
* The `[[iframe]]` tag is a self-closing block that embeds an external
|
|
6
|
+
* page in an `<iframe>`. The first argument after the block name is the
|
|
7
|
+
* URL, followed by optional attributes.
|
|
8
|
+
*
|
|
9
|
+
* Security measures:
|
|
10
|
+
* - Only `http://` and `https://` URLs are accepted.
|
|
11
|
+
* - `javascript:`, `data:`, and `vbscript:` schemes are rejected.
|
|
12
|
+
* - URL normalisation strips whitespace and control characters to prevent
|
|
13
|
+
* evasion via character insertion.
|
|
14
|
+
* - Only a specific set of HTML attributes is allowed (Wikidot filters
|
|
15
|
+
* out `id` but permits `class`).
|
|
16
|
+
*
|
|
17
|
+
* Allowed attributes: `align`, `class`, `frameborder`, `height`,
|
|
18
|
+
* `scrolling`, `style`, `width`.
|
|
19
|
+
*
|
|
20
|
+
* @module
|
|
21
|
+
*/
|
|
22
|
+
import type { AttributeMap, Element } from "@wdprlib/ast";
|
|
23
|
+
import type { BlockRule, ParseContext, RuleResult } from "../types";
|
|
24
|
+
import { currentToken } from "../types";
|
|
25
|
+
import { parseBlockName } from "./utils";
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Whitelist of attributes permitted on `[[iframe]]`. Wikidot strips
|
|
29
|
+
* `id` but permits `class`.
|
|
30
|
+
*/
|
|
31
|
+
const ALLOWED_IFRAME_ATTRS = new Set([
|
|
32
|
+
"align",
|
|
33
|
+
"class",
|
|
34
|
+
"frameborder",
|
|
35
|
+
"height",
|
|
36
|
+
"scrolling",
|
|
37
|
+
"style",
|
|
38
|
+
"width",
|
|
39
|
+
]);
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Normalises a URL string for security checks by removing whitespace and
|
|
43
|
+
* control characters (U+0000--U+001F, U+007F--U+009F) that could be used
|
|
44
|
+
* to evade scheme detection, then lowercasing the result.
|
|
45
|
+
*
|
|
46
|
+
* @param url - The raw URL string.
|
|
47
|
+
* @returns The normalised, lowercased URL.
|
|
48
|
+
*/
|
|
49
|
+
function normalizeUrl(url: string): string {
|
|
50
|
+
return url.replace(/[\s\u0000-\u001f\u007f-\u009f]/g, "").toLowerCase();
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Tests whether a normalised URL begins with a dangerous scheme
|
|
55
|
+
* (`javascript:`, `data:`, `vbscript:`) that must be rejected.
|
|
56
|
+
*
|
|
57
|
+
* @param normalizedUrl - The URL after {@link normalizeUrl} processing.
|
|
58
|
+
* @returns `true` if the URL has a dangerous scheme.
|
|
59
|
+
*/
|
|
60
|
+
function isDangerousUrl(normalizedUrl: string): boolean {
|
|
61
|
+
return /^(javascript|data|vbscript):/i.test(normalizedUrl);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Block rule for `[[iframe URL ...attributes]]`.
|
|
66
|
+
*
|
|
67
|
+
* Parsing strategy:
|
|
68
|
+
* 1. Match BLOCK_OPEN + name "iframe".
|
|
69
|
+
* 2. Consume the URL (all tokens until whitespace, BLOCK_CLOSE, or newline).
|
|
70
|
+
* 3. Validate the URL: normalise, reject dangerous schemes, require http(s).
|
|
71
|
+
* 4. Parse key/value attributes, filtering through `ALLOWED_IFRAME_ATTRS`.
|
|
72
|
+
* 5. Consume closing `]]` and optional trailing newline.
|
|
73
|
+
* 6. Emit an `iframe` element with `url` and `attributes`.
|
|
74
|
+
*/
|
|
75
|
+
export const iframeRule: BlockRule = {
|
|
76
|
+
name: "iframe",
|
|
77
|
+
startTokens: ["BLOCK_OPEN"],
|
|
78
|
+
requiresLineStart: false,
|
|
79
|
+
|
|
80
|
+
parse(ctx: ParseContext): RuleResult<Element> {
|
|
81
|
+
const openToken = currentToken(ctx);
|
|
82
|
+
if (openToken.type !== "BLOCK_OPEN") {
|
|
83
|
+
return { success: false };
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
let pos = ctx.pos + 1;
|
|
87
|
+
let consumed = 1;
|
|
88
|
+
|
|
89
|
+
// Parse block name
|
|
90
|
+
const nameResult = parseBlockName(ctx, pos);
|
|
91
|
+
if (!nameResult || nameResult.name.toLowerCase() !== "iframe") {
|
|
92
|
+
return { success: false };
|
|
93
|
+
}
|
|
94
|
+
pos += nameResult.consumed;
|
|
95
|
+
consumed += nameResult.consumed;
|
|
96
|
+
|
|
97
|
+
// Skip whitespace
|
|
98
|
+
while (ctx.tokens[pos]?.type === "WHITESPACE") {
|
|
99
|
+
pos++;
|
|
100
|
+
consumed++;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// Parse URL (first argument)
|
|
104
|
+
let url = "";
|
|
105
|
+
while (pos < ctx.tokens.length) {
|
|
106
|
+
const token = ctx.tokens[pos];
|
|
107
|
+
if (!token) break;
|
|
108
|
+
if (token.type === "BLOCK_CLOSE" || token.type === "WHITESPACE" || token.type === "NEWLINE") {
|
|
109
|
+
break;
|
|
110
|
+
}
|
|
111
|
+
url += token.value;
|
|
112
|
+
pos++;
|
|
113
|
+
consumed++;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
if (!url) {
|
|
117
|
+
return { success: false };
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// Normalize URL for consistent security checks
|
|
121
|
+
const normalizedUrl = normalizeUrl(url);
|
|
122
|
+
|
|
123
|
+
// Reject dangerous URLs (javascript:, data:, vbscript:)
|
|
124
|
+
// These will fall back to text rendering
|
|
125
|
+
if (isDangerousUrl(normalizedUrl)) {
|
|
126
|
+
return { success: false };
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// Only allow http:// and https:// URLs (checked against normalized URL)
|
|
130
|
+
// This blocks relative URLs and other schemes
|
|
131
|
+
if (!/^https?:\/\//i.test(normalizedUrl)) {
|
|
132
|
+
return { success: false };
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// Parse attributes
|
|
136
|
+
const attributes: AttributeMap = {};
|
|
137
|
+
|
|
138
|
+
while (pos < ctx.tokens.length) {
|
|
139
|
+
const token = ctx.tokens[pos];
|
|
140
|
+
if (!token || token.type === "BLOCK_CLOSE") break;
|
|
141
|
+
|
|
142
|
+
if (token.type === "NEWLINE") {
|
|
143
|
+
break;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
if (token.type === "WHITESPACE") {
|
|
147
|
+
pos++;
|
|
148
|
+
consumed++;
|
|
149
|
+
continue;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// Parse key=value or key="value"
|
|
153
|
+
if (token.type === "IDENTIFIER" || token.type === "TEXT") {
|
|
154
|
+
const key = token.value;
|
|
155
|
+
pos++;
|
|
156
|
+
consumed++;
|
|
157
|
+
|
|
158
|
+
// Skip whitespace
|
|
159
|
+
while (ctx.tokens[pos]?.type === "WHITESPACE") {
|
|
160
|
+
pos++;
|
|
161
|
+
consumed++;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// Expect =
|
|
165
|
+
if (ctx.tokens[pos]?.type === "EQUALS") {
|
|
166
|
+
pos++;
|
|
167
|
+
consumed++;
|
|
168
|
+
|
|
169
|
+
// Skip whitespace
|
|
170
|
+
while (ctx.tokens[pos]?.type === "WHITESPACE") {
|
|
171
|
+
pos++;
|
|
172
|
+
consumed++;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// Parse value
|
|
176
|
+
let value = "";
|
|
177
|
+
const valueToken = ctx.tokens[pos];
|
|
178
|
+
if (valueToken?.type === "QUOTED_STRING") {
|
|
179
|
+
// Remove quotes
|
|
180
|
+
value = valueToken.value.slice(1, -1);
|
|
181
|
+
pos++;
|
|
182
|
+
consumed++;
|
|
183
|
+
} else {
|
|
184
|
+
// Unquoted value
|
|
185
|
+
while (pos < ctx.tokens.length) {
|
|
186
|
+
const vt = ctx.tokens[pos];
|
|
187
|
+
if (
|
|
188
|
+
!vt ||
|
|
189
|
+
vt.type === "BLOCK_CLOSE" ||
|
|
190
|
+
vt.type === "WHITESPACE" ||
|
|
191
|
+
vt.type === "NEWLINE"
|
|
192
|
+
) {
|
|
193
|
+
break;
|
|
194
|
+
}
|
|
195
|
+
value += vt.value;
|
|
196
|
+
pos++;
|
|
197
|
+
consumed++;
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
// Only allow specific attributes (Wikidot filters out class/id)
|
|
202
|
+
if (ALLOWED_IFRAME_ATTRS.has(key.toLowerCase())) {
|
|
203
|
+
attributes[key.toLowerCase()] = value;
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
} else {
|
|
207
|
+
pos++;
|
|
208
|
+
consumed++;
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
// Expect ]]
|
|
213
|
+
if (ctx.tokens[pos]?.type !== "BLOCK_CLOSE") {
|
|
214
|
+
return { success: false };
|
|
215
|
+
}
|
|
216
|
+
pos++;
|
|
217
|
+
consumed++;
|
|
218
|
+
|
|
219
|
+
// Skip trailing newline
|
|
220
|
+
if (ctx.tokens[pos]?.type === "NEWLINE") {
|
|
221
|
+
pos++;
|
|
222
|
+
consumed++;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
return {
|
|
226
|
+
success: true,
|
|
227
|
+
elements: [
|
|
228
|
+
{
|
|
229
|
+
element: "iframe",
|
|
230
|
+
data: {
|
|
231
|
+
url,
|
|
232
|
+
attributes,
|
|
233
|
+
},
|
|
234
|
+
},
|
|
235
|
+
],
|
|
236
|
+
consumed,
|
|
237
|
+
};
|
|
238
|
+
},
|
|
239
|
+
};
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
/**
|
|
2
|
+
*
|
|
3
|
+
* Block rule for Wikidot conditional tag blocks: `[[iftags]]...[[/iftags]]`.
|
|
4
|
+
*
|
|
5
|
+
* The `[[iftags]]` construct conditionally includes or excludes its body
|
|
6
|
+
* content based on the page's tags. The condition expression is everything
|
|
7
|
+
* between the block name and `]]`, e.g.:
|
|
8
|
+
*
|
|
9
|
+
* ```
|
|
10
|
+
* [[iftags +scp -tale]]
|
|
11
|
+
* This content only shows if the page has tag "scp" and not "tale".
|
|
12
|
+
* [[/iftags]]
|
|
13
|
+
* ```
|
|
14
|
+
*
|
|
15
|
+
* The condition string is stored as-is in the AST; actual evaluation is
|
|
16
|
+
* performed at render time based on the page's tag set.
|
|
17
|
+
*
|
|
18
|
+
* Body content is parsed as normal block-level markup using
|
|
19
|
+
* {@link parseBlocksUntil}.
|
|
20
|
+
*
|
|
21
|
+
* @module
|
|
22
|
+
*/
|
|
23
|
+
import type { Element } from "@wdprlib/ast";
|
|
24
|
+
import type { BlockRule, ParseContext, RuleResult } from "../types";
|
|
25
|
+
import { currentToken } from "../types";
|
|
26
|
+
import { parseBlockName, parseBlocksUntil } from "./utils";
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Block rule for `[[iftags condition]]...[[/iftags]]`.
|
|
30
|
+
*
|
|
31
|
+
* Produces an `if-tags` element containing the condition string and
|
|
32
|
+
* the parsed body elements.
|
|
33
|
+
*/
|
|
34
|
+
export const iftagsRule: BlockRule = {
|
|
35
|
+
name: "iftags",
|
|
36
|
+
startTokens: ["BLOCK_OPEN"],
|
|
37
|
+
requiresLineStart: false,
|
|
38
|
+
|
|
39
|
+
parse(ctx: ParseContext): RuleResult<Element> {
|
|
40
|
+
const openToken = currentToken(ctx);
|
|
41
|
+
if (openToken.type !== "BLOCK_OPEN") {
|
|
42
|
+
return { success: false };
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
let pos = ctx.pos + 1;
|
|
46
|
+
let consumed = 1;
|
|
47
|
+
|
|
48
|
+
// Parse block name
|
|
49
|
+
const nameResult = parseBlockName(ctx, pos);
|
|
50
|
+
if (!nameResult || nameResult.name.toLowerCase() !== "iftags") {
|
|
51
|
+
return { success: false };
|
|
52
|
+
}
|
|
53
|
+
pos += nameResult.consumed;
|
|
54
|
+
consumed += nameResult.consumed;
|
|
55
|
+
|
|
56
|
+
// Skip whitespace
|
|
57
|
+
while (ctx.tokens[pos]?.type === "WHITESPACE") {
|
|
58
|
+
pos++;
|
|
59
|
+
consumed++;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// Parse condition (tag expressions)
|
|
63
|
+
let condition = "";
|
|
64
|
+
while (pos < ctx.tokens.length) {
|
|
65
|
+
const token = ctx.tokens[pos];
|
|
66
|
+
if (!token || token.type === "BLOCK_CLOSE" || token.type === "NEWLINE") {
|
|
67
|
+
break;
|
|
68
|
+
}
|
|
69
|
+
condition += token.value;
|
|
70
|
+
pos++;
|
|
71
|
+
consumed++;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// Expect ]]
|
|
75
|
+
if (ctx.tokens[pos]?.type !== "BLOCK_CLOSE") {
|
|
76
|
+
return { success: false };
|
|
77
|
+
}
|
|
78
|
+
pos++;
|
|
79
|
+
consumed++;
|
|
80
|
+
|
|
81
|
+
// Skip newline after opening tag
|
|
82
|
+
if (ctx.tokens[pos]?.type === "NEWLINE") {
|
|
83
|
+
pos++;
|
|
84
|
+
consumed++;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Close condition for [[/iftags]]
|
|
88
|
+
const closeCondition = (checkCtx: ParseContext): boolean => {
|
|
89
|
+
const token = checkCtx.tokens[checkCtx.pos];
|
|
90
|
+
if (token?.type === "BLOCK_END_OPEN") {
|
|
91
|
+
const closeNameResult = parseBlockName(checkCtx, checkCtx.pos + 1);
|
|
92
|
+
if (closeNameResult?.name.toLowerCase() === "iftags") {
|
|
93
|
+
return true;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
return false;
|
|
97
|
+
};
|
|
98
|
+
|
|
99
|
+
// Parse body
|
|
100
|
+
const bodyCtx: ParseContext = { ...ctx, pos };
|
|
101
|
+
const bodyResult = parseBlocksUntil(bodyCtx, closeCondition);
|
|
102
|
+
consumed += bodyResult.consumed;
|
|
103
|
+
pos += bodyResult.consumed;
|
|
104
|
+
|
|
105
|
+
// Check for missing close tag
|
|
106
|
+
if (ctx.tokens[pos]?.type !== "BLOCK_END_OPEN") {
|
|
107
|
+
ctx.diagnostics.push({
|
|
108
|
+
severity: "warning",
|
|
109
|
+
code: "unclosed-block",
|
|
110
|
+
message: "Missing closing tag [[/iftags]] for [[iftags]]",
|
|
111
|
+
position: openToken.position,
|
|
112
|
+
});
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// Consume [[/iftags]]
|
|
116
|
+
if (ctx.tokens[pos]?.type === "BLOCK_END_OPEN") {
|
|
117
|
+
pos++;
|
|
118
|
+
consumed++;
|
|
119
|
+
const closeNameResult = parseBlockName(ctx, pos);
|
|
120
|
+
if (closeNameResult) {
|
|
121
|
+
pos += closeNameResult.consumed;
|
|
122
|
+
consumed += closeNameResult.consumed;
|
|
123
|
+
}
|
|
124
|
+
if (ctx.tokens[pos]?.type === "BLOCK_CLOSE") {
|
|
125
|
+
pos++;
|
|
126
|
+
consumed++;
|
|
127
|
+
}
|
|
128
|
+
if (ctx.tokens[pos]?.type === "NEWLINE") {
|
|
129
|
+
pos++;
|
|
130
|
+
consumed++;
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
condition = condition.trim();
|
|
135
|
+
|
|
136
|
+
return {
|
|
137
|
+
success: true,
|
|
138
|
+
elements: [
|
|
139
|
+
{
|
|
140
|
+
element: "if-tags",
|
|
141
|
+
data: {
|
|
142
|
+
condition,
|
|
143
|
+
elements: bodyResult.elements,
|
|
144
|
+
},
|
|
145
|
+
},
|
|
146
|
+
],
|
|
147
|
+
consumed,
|
|
148
|
+
};
|
|
149
|
+
},
|
|
150
|
+
};
|