@wdprlib/render 1.4.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +126 -393
- package/dist/index.js +117 -384
- package/package.json +5 -3
- package/src/context.ts +422 -0
- package/src/elements/bibliography.ts +123 -0
- package/src/elements/clear-float.ts +27 -0
- package/src/elements/code.ts +49 -0
- package/src/elements/collapsible.ts +105 -0
- package/src/elements/color.ts +32 -0
- package/src/elements/container.ts +302 -0
- package/src/elements/date.ts +59 -0
- package/src/elements/embed-block.ts +327 -0
- package/src/elements/embed.ts +166 -0
- package/src/elements/expr.ts +102 -0
- package/src/elements/footnote.ts +76 -0
- package/src/elements/html.ts +79 -0
- package/src/elements/iframe.ts +44 -0
- package/src/elements/iftags.ts +118 -0
- package/src/elements/image.ts +154 -0
- package/src/elements/include.ts +43 -0
- package/src/elements/index.ts +35 -0
- package/src/elements/line-break.ts +22 -0
- package/src/elements/link.ts +201 -0
- package/src/elements/list.ts +241 -0
- package/src/elements/math.ts +177 -0
- package/src/elements/module/backlinks.ts +28 -0
- package/src/elements/module/categories.ts +27 -0
- package/src/elements/module/index.ts +67 -0
- package/src/elements/module/join.ts +33 -0
- package/src/elements/module/listpages.ts +27 -0
- package/src/elements/module/listusers.ts +27 -0
- package/src/elements/module/page-tree.ts +27 -0
- package/src/elements/module/rate.ts +44 -0
- package/src/elements/tab-view.ts +75 -0
- package/src/elements/table.ts +101 -0
- package/src/elements/text.ts +57 -0
- package/src/elements/toc.ts +147 -0
- package/src/elements/user.ts +79 -0
- package/src/escape.ts +829 -0
- package/src/hash.ts +62 -0
- package/src/index.ts +26 -0
- package/src/libs/highlighter/engine.ts +352 -0
- package/src/libs/highlighter/index.ts +70 -0
- package/src/libs/highlighter/languages/cpp.ts +345 -0
- package/src/libs/highlighter/languages/css.ts +104 -0
- package/src/libs/highlighter/languages/diff.ts +154 -0
- package/src/libs/highlighter/languages/dtd.ts +99 -0
- package/src/libs/highlighter/languages/html.ts +59 -0
- package/src/libs/highlighter/languages/java.ts +251 -0
- package/src/libs/highlighter/languages/javascript.ts +213 -0
- package/src/libs/highlighter/languages/php.ts +433 -0
- package/src/libs/highlighter/languages/python.ts +308 -0
- package/src/libs/highlighter/languages/ruby.ts +360 -0
- package/src/libs/highlighter/languages/sql.ts +125 -0
- package/src/libs/highlighter/languages/xml.ts +68 -0
- package/src/libs/highlighter/types.ts +44 -0
- package/src/render.ts +231 -0
- package/src/types.ts +140 -0
package/src/escape.ts
ADDED
|
@@ -0,0 +1,829 @@
|
|
|
1
|
+
/**
|
|
2
|
+
*
|
|
3
|
+
* HTML, CSS, URL, and attribute sanitization utilities for the render pipeline.
|
|
4
|
+
*
|
|
5
|
+
* Every piece of user-supplied content that flows into the HTML output must
|
|
6
|
+
* pass through one of these functions to prevent Cross-Site Scripting (XSS)
|
|
7
|
+
* and CSS injection attacks.
|
|
8
|
+
*
|
|
9
|
+
* The module provides several layers of defense:
|
|
10
|
+
* - Text escaping ({@link escapeHtml}, {@link escapeAttr}, {@link escapeJsString})
|
|
11
|
+
* - URL scheme blocking ({@link isDangerousUrl}) against `javascript:`, `data:`, `vbscript:`
|
|
12
|
+
* - Attribute allowlisting ({@link isSafeAttribute}) to block event handlers (`on*`)
|
|
13
|
+
* - CSS value sanitization ({@link isDangerousCssValue}, {@link sanitizeStyleValue})
|
|
14
|
+
* with normalization to defeat CSS escape/comment bypass techniques
|
|
15
|
+
* - Composite attribute sanitization ({@link sanitizeAttributes}) combining all checks
|
|
16
|
+
*
|
|
17
|
+
* @module
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Escape the three HTML-special characters (`&`, `<`, `>`) in text content.
|
|
22
|
+
*
|
|
23
|
+
* Suitable for text nodes. For attribute values, use {@link escapeAttr}
|
|
24
|
+
* which additionally escapes quotation marks.
|
|
25
|
+
*
|
|
26
|
+
* @param text - The raw text to escape.
|
|
27
|
+
* @returns The escaped string safe for embedding in HTML text content.
|
|
28
|
+
*/
|
|
29
|
+
export function escapeHtml(text: string): string {
|
|
30
|
+
return text.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">");
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Escape a string for safe use inside an HTML attribute value.
|
|
35
|
+
*
|
|
36
|
+
* Stricter than {@link escapeHtml}: in addition to `&`, `<`, and `>`,
|
|
37
|
+
* this also escapes both double and single quotes to prevent attribute
|
|
38
|
+
* breakout regardless of which quote character delimits the attribute.
|
|
39
|
+
*
|
|
40
|
+
* @param value - The raw attribute value to escape.
|
|
41
|
+
* @returns The escaped string safe for embedding in an HTML attribute.
|
|
42
|
+
*/
|
|
43
|
+
export function escapeAttr(value: string): string {
|
|
44
|
+
return value
|
|
45
|
+
.replace(/&/g, "&")
|
|
46
|
+
.replace(/</g, "<")
|
|
47
|
+
.replace(/>/g, ">")
|
|
48
|
+
.replace(/"/g, """)
|
|
49
|
+
.replace(/'/g, "'");
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Escape content destined for a `<style>` tag to prevent tag breakout.
|
|
54
|
+
*
|
|
55
|
+
* An attacker could include `</style><script>...` inside CSS to close
|
|
56
|
+
* the style element and inject a script. This function replaces every
|
|
57
|
+
* occurrence of `</style` (case-insensitive) with `<\/style`, which
|
|
58
|
+
* is harmless in CSS but prevents the HTML parser from seeing a
|
|
59
|
+
* closing `</style>` tag.
|
|
60
|
+
*
|
|
61
|
+
* @param css - The raw CSS text to sanitize.
|
|
62
|
+
* @returns The sanitized CSS string safe for embedding inside `<style>`.
|
|
63
|
+
*/
|
|
64
|
+
export function escapeStyleContent(css: string): string {
|
|
65
|
+
return css.replace(/<\/style/gi, "<\\/style");
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Escape a value for safe embedding inside a JavaScript string literal
|
|
70
|
+
* that itself appears within an HTML attribute (e.g. `onclick="fn('...')"` ).
|
|
71
|
+
*
|
|
72
|
+
* Uses hex escapes (`\xNN`) and unicode escapes (`\uNNNN`) for characters
|
|
73
|
+
* that could break either the JavaScript string or the enclosing HTML
|
|
74
|
+
* attribute context: backslash, quotes, angle brackets, ampersand,
|
|
75
|
+
* newlines, and the Unicode line/paragraph separators (U+2028/U+2029).
|
|
76
|
+
*
|
|
77
|
+
* @param value - The raw string to escape.
|
|
78
|
+
* @returns The escaped string safe for use inside a JS string literal in HTML.
|
|
79
|
+
*/
|
|
80
|
+
export function escapeJsString(value: string): string {
|
|
81
|
+
return value
|
|
82
|
+
.replace(/\\/g, "\\\\")
|
|
83
|
+
.replace(/'/g, "\\x27")
|
|
84
|
+
.replace(/"/g, "\\x22")
|
|
85
|
+
.replace(/</g, "\\x3c")
|
|
86
|
+
.replace(/>/g, "\\x3e")
|
|
87
|
+
.replace(/&/g, "\\x26")
|
|
88
|
+
.replace(/\n/g, "\\n")
|
|
89
|
+
.replace(/\r/g, "\\r")
|
|
90
|
+
.replace(/\u2028/g, "\\u2028")
|
|
91
|
+
.replace(/\u2029/g, "\\u2029");
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Allowlist of HTML attribute names considered safe for rendering.
|
|
96
|
+
*
|
|
97
|
+
* Based on the attributes that Wikidot permits users to set via markup.
|
|
98
|
+
* Event handler attributes (`on*`) are explicitly blocked in
|
|
99
|
+
* {@link isSafeAttribute}. The `aria-*` and `data-*` prefixes are
|
|
100
|
+
* allowed dynamically rather than being listed here.
|
|
101
|
+
*/
|
|
102
|
+
const SAFE_ATTRIBUTES = new Set([
|
|
103
|
+
"accept",
|
|
104
|
+
"align",
|
|
105
|
+
"alt",
|
|
106
|
+
"autocapitalize",
|
|
107
|
+
"autoplay",
|
|
108
|
+
"background",
|
|
109
|
+
"bgcolor",
|
|
110
|
+
"border",
|
|
111
|
+
"buffered",
|
|
112
|
+
"checked",
|
|
113
|
+
"cite",
|
|
114
|
+
"class",
|
|
115
|
+
"cols",
|
|
116
|
+
"colspan",
|
|
117
|
+
"contenteditable",
|
|
118
|
+
"controls",
|
|
119
|
+
"coords",
|
|
120
|
+
"datetime",
|
|
121
|
+
"decoding",
|
|
122
|
+
"default",
|
|
123
|
+
"dir",
|
|
124
|
+
"dirname",
|
|
125
|
+
"disabled",
|
|
126
|
+
"download",
|
|
127
|
+
"draggable",
|
|
128
|
+
"for",
|
|
129
|
+
"form",
|
|
130
|
+
"headers",
|
|
131
|
+
"height",
|
|
132
|
+
"hidden",
|
|
133
|
+
"high",
|
|
134
|
+
"href",
|
|
135
|
+
"hreflang",
|
|
136
|
+
"id",
|
|
137
|
+
"inputmode",
|
|
138
|
+
"ismap",
|
|
139
|
+
"itemprop",
|
|
140
|
+
"kind",
|
|
141
|
+
"label",
|
|
142
|
+
"lang",
|
|
143
|
+
"list",
|
|
144
|
+
"loop",
|
|
145
|
+
"low",
|
|
146
|
+
"max",
|
|
147
|
+
"maxlength",
|
|
148
|
+
"min",
|
|
149
|
+
"minlength",
|
|
150
|
+
"multiple",
|
|
151
|
+
"muted",
|
|
152
|
+
"name",
|
|
153
|
+
"optimum",
|
|
154
|
+
"pattern",
|
|
155
|
+
"placeholder",
|
|
156
|
+
"poster",
|
|
157
|
+
"preload",
|
|
158
|
+
"readonly",
|
|
159
|
+
"required",
|
|
160
|
+
"reversed",
|
|
161
|
+
"role",
|
|
162
|
+
"rows",
|
|
163
|
+
"rowspan",
|
|
164
|
+
"scope",
|
|
165
|
+
"selected",
|
|
166
|
+
"shape",
|
|
167
|
+
"size",
|
|
168
|
+
"sizes",
|
|
169
|
+
"span",
|
|
170
|
+
"spellcheck",
|
|
171
|
+
"src",
|
|
172
|
+
"srclang",
|
|
173
|
+
"srcset",
|
|
174
|
+
"start",
|
|
175
|
+
"step",
|
|
176
|
+
"style",
|
|
177
|
+
"tabindex",
|
|
178
|
+
"target",
|
|
179
|
+
"title",
|
|
180
|
+
"translate",
|
|
181
|
+
"type",
|
|
182
|
+
"usemap",
|
|
183
|
+
"value",
|
|
184
|
+
"width",
|
|
185
|
+
"wrap",
|
|
186
|
+
]);
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Check whether an HTML attribute name is safe to include in rendered output.
|
|
190
|
+
*
|
|
191
|
+
* The check applies three rules in order:
|
|
192
|
+
* 1. Block all event handlers (`on*` prefix) unconditionally
|
|
193
|
+
* 2. Allow accessibility (`aria-*`) and custom data (`data-*`) attributes
|
|
194
|
+
* 3. Allow only attributes in the `SAFE_ATTRIBUTES` allowlist
|
|
195
|
+
*
|
|
196
|
+
* @param name - The attribute name to validate (case-insensitive).
|
|
197
|
+
* @returns `true` if the attribute is safe to render.
|
|
198
|
+
*/
|
|
199
|
+
export function isSafeAttribute(name: string): boolean {
|
|
200
|
+
const lower = name.toLowerCase();
|
|
201
|
+
// Block all event handlers
|
|
202
|
+
if (lower.startsWith("on")) return false;
|
|
203
|
+
// Allow aria-* and data-* prefixes
|
|
204
|
+
if (lower.startsWith("aria-") || lower.startsWith("data-")) return true;
|
|
205
|
+
return SAFE_ATTRIBUTES.has(lower);
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* Check whether a URL contains a dangerous scheme (`javascript:`, `data:`, `vbscript:`).
|
|
210
|
+
*
|
|
211
|
+
* Before testing, the value is stripped of all whitespace and control
|
|
212
|
+
* characters (U+0000-U+001F, U+007F-U+009F) to defeat evasion techniques
|
|
213
|
+
* such as `"java\nscript:"` or `"java\x00script:"` that exploit browser
|
|
214
|
+
* whitespace tolerance in URL parsing.
|
|
215
|
+
*
|
|
216
|
+
* @param value - The URL string to check.
|
|
217
|
+
* @returns `true` if the URL uses a dangerous scheme and should be blocked.
|
|
218
|
+
*/
|
|
219
|
+
export function isDangerousUrl(value: string): boolean {
|
|
220
|
+
const normalized = value.replace(/[\s\u0000-\u001f\u007f-\u009f]/g, "");
|
|
221
|
+
return /^(javascript|data|vbscript):/i.test(normalized);
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
/**
|
|
225
|
+
* Complete set of CSS Level 4 named colors plus CSS-wide keywords
|
|
226
|
+
* (`transparent`, `currentcolor`, `inherit`, `initial`, `unset`).
|
|
227
|
+
*
|
|
228
|
+
* Used by {@link isValidCssColor} to validate color values without
|
|
229
|
+
* allowing arbitrary CSS expressions.
|
|
230
|
+
*/
|
|
231
|
+
const CSS_NAMED_COLORS = new Set([
|
|
232
|
+
"aliceblue",
|
|
233
|
+
"antiquewhite",
|
|
234
|
+
"aqua",
|
|
235
|
+
"aquamarine",
|
|
236
|
+
"azure",
|
|
237
|
+
"beige",
|
|
238
|
+
"bisque",
|
|
239
|
+
"black",
|
|
240
|
+
"blanchedalmond",
|
|
241
|
+
"blue",
|
|
242
|
+
"blueviolet",
|
|
243
|
+
"brown",
|
|
244
|
+
"burlywood",
|
|
245
|
+
"cadetblue",
|
|
246
|
+
"chartreuse",
|
|
247
|
+
"chocolate",
|
|
248
|
+
"coral",
|
|
249
|
+
"cornflowerblue",
|
|
250
|
+
"cornsilk",
|
|
251
|
+
"crimson",
|
|
252
|
+
"cyan",
|
|
253
|
+
"darkblue",
|
|
254
|
+
"darkcyan",
|
|
255
|
+
"darkgoldenrod",
|
|
256
|
+
"darkgray",
|
|
257
|
+
"darkgreen",
|
|
258
|
+
"darkgrey",
|
|
259
|
+
"darkkhaki",
|
|
260
|
+
"darkmagenta",
|
|
261
|
+
"darkolivegreen",
|
|
262
|
+
"darkorange",
|
|
263
|
+
"darkorchid",
|
|
264
|
+
"darkred",
|
|
265
|
+
"darksalmon",
|
|
266
|
+
"darkseagreen",
|
|
267
|
+
"darkslateblue",
|
|
268
|
+
"darkslategray",
|
|
269
|
+
"darkslategrey",
|
|
270
|
+
"darkturquoise",
|
|
271
|
+
"darkviolet",
|
|
272
|
+
"deeppink",
|
|
273
|
+
"deepskyblue",
|
|
274
|
+
"dimgray",
|
|
275
|
+
"dimgrey",
|
|
276
|
+
"dodgerblue",
|
|
277
|
+
"firebrick",
|
|
278
|
+
"floralwhite",
|
|
279
|
+
"forestgreen",
|
|
280
|
+
"fuchsia",
|
|
281
|
+
"gainsboro",
|
|
282
|
+
"ghostwhite",
|
|
283
|
+
"gold",
|
|
284
|
+
"goldenrod",
|
|
285
|
+
"gray",
|
|
286
|
+
"green",
|
|
287
|
+
"greenyellow",
|
|
288
|
+
"grey",
|
|
289
|
+
"honeydew",
|
|
290
|
+
"hotpink",
|
|
291
|
+
"indianred",
|
|
292
|
+
"indigo",
|
|
293
|
+
"ivory",
|
|
294
|
+
"khaki",
|
|
295
|
+
"lavender",
|
|
296
|
+
"lavenderblush",
|
|
297
|
+
"lawngreen",
|
|
298
|
+
"lemonchiffon",
|
|
299
|
+
"lightblue",
|
|
300
|
+
"lightcoral",
|
|
301
|
+
"lightcyan",
|
|
302
|
+
"lightgoldenrodyellow",
|
|
303
|
+
"lightgray",
|
|
304
|
+
"lightgreen",
|
|
305
|
+
"lightgrey",
|
|
306
|
+
"lightpink",
|
|
307
|
+
"lightsalmon",
|
|
308
|
+
"lightseagreen",
|
|
309
|
+
"lightskyblue",
|
|
310
|
+
"lightslategray",
|
|
311
|
+
"lightslategrey",
|
|
312
|
+
"lightsteelblue",
|
|
313
|
+
"lightyellow",
|
|
314
|
+
"lime",
|
|
315
|
+
"limegreen",
|
|
316
|
+
"linen",
|
|
317
|
+
"magenta",
|
|
318
|
+
"maroon",
|
|
319
|
+
"mediumaquamarine",
|
|
320
|
+
"mediumblue",
|
|
321
|
+
"mediumorchid",
|
|
322
|
+
"mediumpurple",
|
|
323
|
+
"mediumseagreen",
|
|
324
|
+
"mediumslateblue",
|
|
325
|
+
"mediumspringgreen",
|
|
326
|
+
"mediumturquoise",
|
|
327
|
+
"mediumvioletred",
|
|
328
|
+
"midnightblue",
|
|
329
|
+
"mintcream",
|
|
330
|
+
"mistyrose",
|
|
331
|
+
"moccasin",
|
|
332
|
+
"navajowhite",
|
|
333
|
+
"navy",
|
|
334
|
+
"oldlace",
|
|
335
|
+
"olive",
|
|
336
|
+
"olivedrab",
|
|
337
|
+
"orange",
|
|
338
|
+
"orangered",
|
|
339
|
+
"orchid",
|
|
340
|
+
"palegoldenrod",
|
|
341
|
+
"palegreen",
|
|
342
|
+
"paleturquoise",
|
|
343
|
+
"palevioletred",
|
|
344
|
+
"papayawhip",
|
|
345
|
+
"peachpuff",
|
|
346
|
+
"peru",
|
|
347
|
+
"pink",
|
|
348
|
+
"plum",
|
|
349
|
+
"powderblue",
|
|
350
|
+
"purple",
|
|
351
|
+
"rebeccapurple",
|
|
352
|
+
"red",
|
|
353
|
+
"rosybrown",
|
|
354
|
+
"royalblue",
|
|
355
|
+
"saddlebrown",
|
|
356
|
+
"salmon",
|
|
357
|
+
"sandybrown",
|
|
358
|
+
"seagreen",
|
|
359
|
+
"seashell",
|
|
360
|
+
"sienna",
|
|
361
|
+
"silver",
|
|
362
|
+
"skyblue",
|
|
363
|
+
"slateblue",
|
|
364
|
+
"slategray",
|
|
365
|
+
"slategrey",
|
|
366
|
+
"snow",
|
|
367
|
+
"springgreen",
|
|
368
|
+
"steelblue",
|
|
369
|
+
"tan",
|
|
370
|
+
"teal",
|
|
371
|
+
"thistle",
|
|
372
|
+
"tomato",
|
|
373
|
+
"turquoise",
|
|
374
|
+
"violet",
|
|
375
|
+
"wheat",
|
|
376
|
+
"white",
|
|
377
|
+
"whitesmoke",
|
|
378
|
+
"yellow",
|
|
379
|
+
"yellowgreen",
|
|
380
|
+
// Special values
|
|
381
|
+
"transparent",
|
|
382
|
+
"currentcolor",
|
|
383
|
+
"inherit",
|
|
384
|
+
"initial",
|
|
385
|
+
"unset",
|
|
386
|
+
]);
|
|
387
|
+
|
|
388
|
+
/**
|
|
389
|
+
* Validate that a string is a safe CSS color value.
|
|
390
|
+
*
|
|
391
|
+
* Accepts named colors, hex notation (`#RGB`, `#RGBA`, `#RRGGBB`,
|
|
392
|
+
* `#RRGGBBAA`), and functional notation (`rgb()`, `rgba()`, `hsl()`,
|
|
393
|
+
* `hsla()`) with strictly numeric arguments.
|
|
394
|
+
*
|
|
395
|
+
* Rejects anything else -- including semicolons, `url()`, `expression()`,
|
|
396
|
+
* and any other pattern that could be used for CSS injection.
|
|
397
|
+
*
|
|
398
|
+
* @param color - The CSS color value to validate.
|
|
399
|
+
* @returns `true` if the value is a recognized safe color format.
|
|
400
|
+
*/
|
|
401
|
+
export function isValidCssColor(color: string): boolean {
|
|
402
|
+
const trimmed = color.trim().toLowerCase();
|
|
403
|
+
|
|
404
|
+
// Empty is invalid
|
|
405
|
+
if (!trimmed) return false;
|
|
406
|
+
|
|
407
|
+
// Named colors
|
|
408
|
+
if (CSS_NAMED_COLORS.has(trimmed)) return true;
|
|
409
|
+
|
|
410
|
+
// Hex colors: #RGB, #RGBA, #RRGGBB, #RRGGBBAA
|
|
411
|
+
if (/^#[0-9a-f]{3}([0-9a-f])?$/.test(trimmed) || /^#[0-9a-f]{6}([0-9a-f]{2})?$/.test(trimmed)) {
|
|
412
|
+
return true;
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
// Extract function name and args separately to avoid ReDoS from repeated \s* quantifiers.
|
|
416
|
+
// Only strip whitespace from args, keeping function name validation strict.
|
|
417
|
+
const fnMatch = trimmed.match(/^(rgba?|hsla?)\(([^)]*)\)$/);
|
|
418
|
+
if (fnMatch) {
|
|
419
|
+
const fn = fnMatch[1]!;
|
|
420
|
+
// Only trim whitespace around commas (structural delimiters), not within tokens
|
|
421
|
+
const args = fnMatch[2]!
|
|
422
|
+
.split(",")
|
|
423
|
+
.map((s) => s.trim())
|
|
424
|
+
.join(",");
|
|
425
|
+
if (fn.startsWith("rgb")) {
|
|
426
|
+
if (/^\d{1,3},\d{1,3},\d{1,3}(,(0|1|0?\.\d+))?$/.test(args)) return true;
|
|
427
|
+
} else {
|
|
428
|
+
if (/^\d{1,3},\d{1,3}%,\d{1,3}%(,(0|1|0?\.\d+))?$/.test(args)) return true;
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
// Reject everything else (including semicolons, url(), expression(), etc.)
|
|
433
|
+
return false;
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
/**
|
|
437
|
+
* Sanitize a CSS color value, returning a fallback if validation fails.
|
|
438
|
+
*
|
|
439
|
+
* Delegates to {@link isValidCssColor} for validation. If the color
|
|
440
|
+
* is not a recognized safe format, the fallback value is returned
|
|
441
|
+
* instead (defaulting to `"inherit"`).
|
|
442
|
+
*
|
|
443
|
+
* @param color - The CSS color value to sanitize.
|
|
444
|
+
* @param fallback - The value to return if validation fails (default `"inherit"`).
|
|
445
|
+
* @returns The original color if valid, otherwise the fallback.
|
|
446
|
+
*/
|
|
447
|
+
export function sanitizeCssColor(color: string, fallback = "inherit"): string {
|
|
448
|
+
return isValidCssColor(color) ? color : fallback;
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
/**
|
|
452
|
+
* Normalize a CSS value by resolving escape sequences, removing comments,
|
|
453
|
+
* stripping whitespace and control characters, and lowercasing.
|
|
454
|
+
*
|
|
455
|
+
* This normalization is critical for security: attackers can use CSS
|
|
456
|
+
* comments (`/* ... */`), escape sequences (`\75rl` for `url`), and
|
|
457
|
+
* line continuations to disguise dangerous patterns. By normalizing
|
|
458
|
+
* first, the downstream checks in {@link isDangerousCssValue} operate
|
|
459
|
+
* on a canonical representation.
|
|
460
|
+
*
|
|
461
|
+
* @param value - The raw CSS property value.
|
|
462
|
+
* @returns The normalized, lowercase, whitespace-free representation.
|
|
463
|
+
*/
|
|
464
|
+
function normalizeCssValue(value: string): string {
|
|
465
|
+
let result = value;
|
|
466
|
+
|
|
467
|
+
// Remove CSS comments: /* ... */
|
|
468
|
+
result = result.replace(/\/\*[\s\S]*?\*\//g, "");
|
|
469
|
+
|
|
470
|
+
// Remove CSS line continuations: backslash followed by newline
|
|
471
|
+
result = result.replace(/\\(?:\r\n|[\n\r\f])/g, "");
|
|
472
|
+
|
|
473
|
+
// Decode CSS escapes: \XX (hex) or \char
|
|
474
|
+
// CSS allows \0-\10FFFF with optional trailing whitespace
|
|
475
|
+
result = result.replace(/\\([0-9a-f]{1,6})\s?/gi, (_, hex) => {
|
|
476
|
+
const code = Number.parseInt(hex, 16);
|
|
477
|
+
return code > 0 && code <= 0x10ffff ? String.fromCodePoint(code) : "";
|
|
478
|
+
});
|
|
479
|
+
|
|
480
|
+
// Remove remaining backslash escapes: \char -> char
|
|
481
|
+
result = result.replace(/\\(.)/g, "$1");
|
|
482
|
+
|
|
483
|
+
// Remove whitespace and control characters
|
|
484
|
+
result = result.replace(/[\s\u0000-\u001f\u007f-\u009f]/g, "");
|
|
485
|
+
|
|
486
|
+
// Lowercase at the end to catch decoded uppercase chars (e.g., \55 -> U)
|
|
487
|
+
return result.toLowerCase();
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
/**
|
|
491
|
+
* Allowlist check for a raw URL string extracted from a `url(...)` token.
|
|
492
|
+
*
|
|
493
|
+
* Wikidot itself allows arbitrary URLs (including `javascript:` and
|
|
494
|
+
* `expression()`) in `style` attributes, but we cannot match that
|
|
495
|
+
* exactly without re-introducing XSS. The schemes permitted below are
|
|
496
|
+
* the ones a CSS-side `url(...)` needs to actually fetch an image or
|
|
497
|
+
* background — anything else either has no visual effect or carries
|
|
498
|
+
* code-execution risk:
|
|
499
|
+
*
|
|
500
|
+
* - `http://`, `https://`, `//host/...` — load over the network.
|
|
501
|
+
* - `/path`, `./path`, `../path` — load relative to the document.
|
|
502
|
+
* - `#fragment` — same-document SVG / gradient reference.
|
|
503
|
+
* - `data:image/{png,jpeg,jpg,gif,webp}` — inline raster image.
|
|
504
|
+
* SVG is excluded because SVG documents can embed `<script>` and
|
|
505
|
+
* event handlers; treating an inline SVG as a `url()` payload is
|
|
506
|
+
* indistinguishable from running attacker-supplied JavaScript.
|
|
507
|
+
*
|
|
508
|
+
* Everything else (`javascript:`, `vbscript:`, `data:text/...`,
|
|
509
|
+
* `data:application/...`, `data:image/svg+xml`) is rejected — those
|
|
510
|
+
* payloads either execute scripts directly or are interpreted as
|
|
511
|
+
* markup that can host them.
|
|
512
|
+
*
|
|
513
|
+
* The input is assumed to come from a normalised CSS value (escapes,
|
|
514
|
+
* comments, whitespace, control chars stripped and lowercased), so this
|
|
515
|
+
* function only needs to handle surrounding `"` / `'` quotes.
|
|
516
|
+
*/
|
|
517
|
+
function isUrlAllowed(rawUrl: string): boolean {
|
|
518
|
+
let url = rawUrl;
|
|
519
|
+
|
|
520
|
+
// Strip a single layer of matched surrounding quotes if present
|
|
521
|
+
if (url.length >= 2) {
|
|
522
|
+
const first = url[0];
|
|
523
|
+
const last = url[url.length - 1];
|
|
524
|
+
if ((first === '"' && last === '"') || (first === "'" && last === "'")) {
|
|
525
|
+
url = url.slice(1, -1);
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
// Empty url() is treated as harmless (Wikidot pass-through)
|
|
530
|
+
if (url === "") return true;
|
|
531
|
+
|
|
532
|
+
// Fragment
|
|
533
|
+
if (url.startsWith("#")) return true;
|
|
534
|
+
|
|
535
|
+
// Path-relative
|
|
536
|
+
if (url.startsWith("./") || url.startsWith("../")) return true;
|
|
537
|
+
|
|
538
|
+
// Protocol-relative `//host/...`
|
|
539
|
+
if (url.startsWith("//")) return true;
|
|
540
|
+
|
|
541
|
+
// Root-relative `/path`. Does not match `//` (handled above).
|
|
542
|
+
if (url.startsWith("/")) return true;
|
|
543
|
+
|
|
544
|
+
if (url.startsWith("http://") || url.startsWith("https://")) return true;
|
|
545
|
+
|
|
546
|
+
// data: URLs — only raster image MIME types.
|
|
547
|
+
// Match `data:image/<mime>` followed by `;` (params) or `,` (start of data),
|
|
548
|
+
// so `data:image/png+xml` or `data:image/pngsomething` cannot sneak through
|
|
549
|
+
// the allowlist with a misleading prefix.
|
|
550
|
+
if (url.startsWith("data:image/")) {
|
|
551
|
+
const after = url.slice("data:image/".length);
|
|
552
|
+
const sep = Math.min(
|
|
553
|
+
after.indexOf(";") === -1 ? after.length : after.indexOf(";"),
|
|
554
|
+
after.indexOf(",") === -1 ? after.length : after.indexOf(","),
|
|
555
|
+
);
|
|
556
|
+
const mime = after.slice(0, sep);
|
|
557
|
+
if (mime === "png" || mime === "jpeg" || mime === "jpg" || mime === "gif" || mime === "webp") {
|
|
558
|
+
return true;
|
|
559
|
+
}
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
return false;
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
/**
|
|
566
|
+
* Extract every `url(...)` invocation from a normalised CSS value and
|
|
567
|
+
* yield each raw inner string (parentheses excluded).
|
|
568
|
+
*
|
|
569
|
+
* The walker tracks `"` and `'` quoted regions so that `)` inside a
|
|
570
|
+
* quoted URL string (e.g. `url("https://example.com/a)b.png")`) does
|
|
571
|
+
* not close the `url(` prematurely. Within a quoted region paren
|
|
572
|
+
* tracking is suspended.
|
|
573
|
+
*
|
|
574
|
+
* Returns an iterator of `{ inner, malformed }` records. `malformed`
|
|
575
|
+
* is `true` when a `url(` had no matching closing `)`, which the
|
|
576
|
+
* caller should treat as dangerous (fail-closed).
|
|
577
|
+
*/
|
|
578
|
+
function* iterateUrls(normalized: string): Generator<{ inner: string; malformed: boolean }> {
|
|
579
|
+
let searchPos = 0;
|
|
580
|
+
while (searchPos < normalized.length) {
|
|
581
|
+
const idx = normalized.indexOf("url(", searchPos);
|
|
582
|
+
if (idx === -1) return;
|
|
583
|
+
|
|
584
|
+
let depth = 1;
|
|
585
|
+
let quoteChar: string | null = null;
|
|
586
|
+
let i = idx + 4;
|
|
587
|
+
while (i < normalized.length && depth > 0) {
|
|
588
|
+
const ch = normalized[i];
|
|
589
|
+
if (quoteChar !== null) {
|
|
590
|
+
if (ch === quoteChar) quoteChar = null;
|
|
591
|
+
} else if (ch === '"' || ch === "'") {
|
|
592
|
+
quoteChar = ch;
|
|
593
|
+
} else if (ch === "(") {
|
|
594
|
+
depth++;
|
|
595
|
+
} else if (ch === ")") {
|
|
596
|
+
depth--;
|
|
597
|
+
}
|
|
598
|
+
i++;
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
if (depth > 0) {
|
|
602
|
+
// Unclosed url(
|
|
603
|
+
yield { inner: normalized.slice(idx + 4), malformed: true };
|
|
604
|
+
return;
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
yield { inner: normalized.slice(idx + 4, i - 1), malformed: false };
|
|
608
|
+
searchPos = i;
|
|
609
|
+
}
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
/**
|
|
613
|
+
* Check whether a CSS property value contains dangerous patterns that
|
|
614
|
+
* could enable script execution or external resource loading.
|
|
615
|
+
*
|
|
616
|
+
* The value is first normalized via `normalizeCssValue()` to resolve
|
|
617
|
+
* CSS escapes and comments, then checked against a blocklist:
|
|
618
|
+
* - `url(...)` -- only allowed when the inner URL passes
|
|
619
|
+
* {@link isUrlAllowed} (raster images, http(s), relative paths).
|
|
620
|
+
* Malformed `url(` (no closing paren) is treated as dangerous.
|
|
621
|
+
* - `expression()` -- blocks IE's CSS expression evaluation
|
|
622
|
+
* - `-moz-binding` -- blocks Firefox XBL binding injection
|
|
623
|
+
* - `behavior:` -- blocks IE behavior attachment
|
|
624
|
+
* - `@import` -- blocks external stylesheet loading
|
|
625
|
+
*
|
|
626
|
+
* @param value - The CSS property value to check.
|
|
627
|
+
* @returns `true` if the value contains a dangerous pattern and should be removed.
|
|
628
|
+
*/
|
|
629
|
+
export function isDangerousCssValue(value: string): boolean {
|
|
630
|
+
const normalized = normalizeCssValue(value);
|
|
631
|
+
|
|
632
|
+
for (const { inner, malformed } of iterateUrls(normalized)) {
|
|
633
|
+
if (malformed) return true;
|
|
634
|
+
if (!isUrlAllowed(inner)) return true;
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
// Block expression() (IE)
|
|
638
|
+
if (normalized.includes("expression(")) return true;
|
|
639
|
+
|
|
640
|
+
// Block -moz-binding (Firefox)
|
|
641
|
+
if (normalized.includes("-moz-binding")) return true;
|
|
642
|
+
|
|
643
|
+
// Block behavior (IE)
|
|
644
|
+
if (normalized.includes("behavior:")) return true;
|
|
645
|
+
|
|
646
|
+
// Block @import (can load external stylesheets)
|
|
647
|
+
if (normalized.includes("@import")) return true;
|
|
648
|
+
|
|
649
|
+
return false;
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
/**
|
|
653
|
+
* Sanitize a `style` attribute value by removing dangerous declarations
|
|
654
|
+
* while preserving safe ones.
|
|
655
|
+
*
|
|
656
|
+
* Splits the value on semicolons into individual declarations, checks
|
|
657
|
+
* each declaration's value via {@link isDangerousCssValue}, and drops
|
|
658
|
+
* any that fail. Also blocks the `-moz-binding` and `behavior`
|
|
659
|
+
* property names directly.
|
|
660
|
+
*
|
|
661
|
+
* The original formatting is preserved: if the input ended with a
|
|
662
|
+
* semicolon, the output will too (matching Wikidot's pass-through
|
|
663
|
+
* behavior for user-authored styles).
|
|
664
|
+
*
|
|
665
|
+
* @param style - The raw `style` attribute value.
|
|
666
|
+
* @returns The sanitized style string with dangerous declarations removed,
|
|
667
|
+
* or an empty string if nothing is safe.
|
|
668
|
+
*/
|
|
669
|
+
/**
|
|
670
|
+
* Split a CSS style attribute value into individual declarations,
|
|
671
|
+
* respecting parentheses and quoted strings.
|
|
672
|
+
*
|
|
673
|
+
* A simple `split(";")` would corrupt declarations whose value
|
|
674
|
+
* contains `;` inside a `url(...)` invocation, e.g. a base64 data URL
|
|
675
|
+
* passed via a CSS custom property:
|
|
676
|
+
*
|
|
677
|
+
* ```css
|
|
678
|
+
* --logo: url(data:image/png;base64,iVBORw0KGgo...)
|
|
679
|
+
* ```
|
|
680
|
+
*
|
|
681
|
+
* This walker only splits on `;` when not inside `(...)` and not inside
|
|
682
|
+
* a `"..."` / `'...'` string.
|
|
683
|
+
*/
|
|
684
|
+
function splitDeclarations(style: string): string[] {
|
|
685
|
+
const out: string[] = [];
|
|
686
|
+
let buf = "";
|
|
687
|
+
let parenDepth = 0;
|
|
688
|
+
let quoteChar: string | null = null;
|
|
689
|
+
|
|
690
|
+
for (const ch of style) {
|
|
691
|
+
if (quoteChar !== null) {
|
|
692
|
+
buf += ch;
|
|
693
|
+
if (ch === quoteChar) quoteChar = null;
|
|
694
|
+
continue;
|
|
695
|
+
}
|
|
696
|
+
if (ch === '"' || ch === "'") {
|
|
697
|
+
quoteChar = ch;
|
|
698
|
+
buf += ch;
|
|
699
|
+
continue;
|
|
700
|
+
}
|
|
701
|
+
if (ch === "(") {
|
|
702
|
+
parenDepth++;
|
|
703
|
+
buf += ch;
|
|
704
|
+
continue;
|
|
705
|
+
}
|
|
706
|
+
if (ch === ")") {
|
|
707
|
+
if (parenDepth > 0) parenDepth--;
|
|
708
|
+
buf += ch;
|
|
709
|
+
continue;
|
|
710
|
+
}
|
|
711
|
+
if (ch === ";" && parenDepth === 0) {
|
|
712
|
+
out.push(buf);
|
|
713
|
+
buf = "";
|
|
714
|
+
continue;
|
|
715
|
+
}
|
|
716
|
+
buf += ch;
|
|
717
|
+
}
|
|
718
|
+
if (buf.length > 0) out.push(buf);
|
|
719
|
+
return out;
|
|
720
|
+
}
|
|
721
|
+
|
|
722
|
+
export function sanitizeStyleValue(style: string): string {
|
|
723
|
+
// Remember if original ends with semicolon (Wikidot preserves this)
|
|
724
|
+
const endsWithSemicolon = style.trimEnd().endsWith(";");
|
|
725
|
+
|
|
726
|
+
// Split by semicolon (respecting parens/quotes) into individual declarations
|
|
727
|
+
const declarations = splitDeclarations(style)
|
|
728
|
+
.map((d) => d.trim())
|
|
729
|
+
.filter(Boolean);
|
|
730
|
+
const safe: string[] = [];
|
|
731
|
+
|
|
732
|
+
for (const decl of declarations) {
|
|
733
|
+
const colonIdx = decl.indexOf(":");
|
|
734
|
+
if (colonIdx === -1) continue;
|
|
735
|
+
|
|
736
|
+
const property = decl.slice(0, colonIdx).trim();
|
|
737
|
+
const value = decl.slice(colonIdx + 1).trim();
|
|
738
|
+
|
|
739
|
+
// Skip if value contains dangerous patterns
|
|
740
|
+
if (isDangerousCssValue(value)) continue;
|
|
741
|
+
|
|
742
|
+
// Skip dangerous properties. CSS allows escape sequences inside
|
|
743
|
+
// property names too (e.g. `-mo\7a-binding` → `-moz-binding`), so we
|
|
744
|
+
// run them through the same normaliser as values before matching.
|
|
745
|
+
const normalisedProperty = normalizeCssValue(property);
|
|
746
|
+
if (normalisedProperty.startsWith("-moz-binding")) continue;
|
|
747
|
+
if (normalisedProperty === "behavior") continue;
|
|
748
|
+
|
|
749
|
+
// Keep original format (Wikidot outputs input as is)
|
|
750
|
+
safe.push(decl);
|
|
751
|
+
}
|
|
752
|
+
|
|
753
|
+
if (safe.length === 0) return "";
|
|
754
|
+
|
|
755
|
+
// Preserve original trailing semicolon format
|
|
756
|
+
return endsWithSemicolon ? safe.join(";") + ";" : safe.join(";");
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
/**
|
|
760
|
+
* Validate that a string looks like a safe email address.
|
|
761
|
+
*
|
|
762
|
+
* Uses a deliberately simple pattern that accepts the vast majority of
|
|
763
|
+
* real-world addresses while blocking characters that could enable
|
|
764
|
+
* injection attacks when the address is used in a `mailto:` link.
|
|
765
|
+
*
|
|
766
|
+
* The percent character (`%`) is intentionally disallowed because
|
|
767
|
+
* `mailto:` URLs undergo percent-decoding, allowing an attacker to
|
|
768
|
+
* inject headers (e.g. `a%0d%0abcc%3aevil@example.com` decodes to
|
|
769
|
+
* a BCC header injection).
|
|
770
|
+
*
|
|
771
|
+
* @param email - The email string to validate.
|
|
772
|
+
* @returns `true` if the email matches the safe pattern.
|
|
773
|
+
*/
|
|
774
|
+
export function isValidEmail(email: string): boolean {
|
|
775
|
+
// Simple email pattern: local@domain
|
|
776
|
+
// - local: alphanumeric, dots, underscores, hyphens, plus signs (NO percent)
|
|
777
|
+
// - domain: alphanumeric, dots, hyphens
|
|
778
|
+
// Does NOT allow: spaces, colons, angle brackets, percent, or other special chars
|
|
779
|
+
return /^[a-zA-Z0-9._+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/.test(email);
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
/**
|
|
783
|
+
* Set of HTML attribute names whose values are interpreted as URLs
|
|
784
|
+
* by the browser. Values of these attributes must be checked via
|
|
785
|
+
* {@link isDangerousUrl} before rendering.
|
|
786
|
+
*/
|
|
787
|
+
const URL_ATTRIBUTES = new Set([
|
|
788
|
+
"href",
|
|
789
|
+
"src",
|
|
790
|
+
"action",
|
|
791
|
+
"formaction",
|
|
792
|
+
"srcset",
|
|
793
|
+
"poster",
|
|
794
|
+
"background",
|
|
795
|
+
]);
|
|
796
|
+
|
|
797
|
+
/**
|
|
798
|
+
* Sanitize a map of HTML attributes, returning a new map containing
|
|
799
|
+
* only entries that pass all safety checks.
|
|
800
|
+
*
|
|
801
|
+
* For each attribute, this function:
|
|
802
|
+
* 1. Drops attributes that fail {@link isSafeAttribute} (event handlers, unknown names)
|
|
803
|
+
* 2. Drops URL-bearing attributes whose values fail {@link isDangerousUrl}
|
|
804
|
+
* 3. Sanitizes `style` values via {@link sanitizeStyleValue}, dropping them entirely
|
|
805
|
+
* if the result is empty
|
|
806
|
+
* 4. Passes all other safe attributes through unchanged
|
|
807
|
+
*
|
|
808
|
+
* @param attributes - The raw attribute name-value map to sanitize.
|
|
809
|
+
* @returns A new map containing only the safe attributes and their (possibly sanitized) values.
|
|
810
|
+
*/
|
|
811
|
+
export function sanitizeAttributes(attributes: Record<string, string>): Record<string, string> {
|
|
812
|
+
const result: Record<string, string> = {};
|
|
813
|
+
for (const [key, value] of Object.entries(attributes)) {
|
|
814
|
+
if (!isSafeAttribute(key)) continue;
|
|
815
|
+
const lower = key.toLowerCase();
|
|
816
|
+
// Check URL attributes for dangerous schemes
|
|
817
|
+
if (URL_ATTRIBUTES.has(lower) && isDangerousUrl(value)) continue;
|
|
818
|
+
// Sanitize style attribute
|
|
819
|
+
if (lower === "style") {
|
|
820
|
+
const sanitized = sanitizeStyleValue(value);
|
|
821
|
+
if (sanitized) {
|
|
822
|
+
result[key] = sanitized;
|
|
823
|
+
}
|
|
824
|
+
continue;
|
|
825
|
+
}
|
|
826
|
+
result[key] = value;
|
|
827
|
+
}
|
|
828
|
+
return result;
|
|
829
|
+
}
|