@wdprlib/render 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/dist/index.cjs +11 -387
  2. package/dist/index.js +2 -378
  3. package/package.json +5 -3
  4. package/src/context.ts +422 -0
  5. package/src/elements/bibliography.ts +123 -0
  6. package/src/elements/clear-float.ts +27 -0
  7. package/src/elements/code.ts +49 -0
  8. package/src/elements/collapsible.ts +105 -0
  9. package/src/elements/color.ts +32 -0
  10. package/src/elements/container.ts +302 -0
  11. package/src/elements/date.ts +59 -0
  12. package/src/elements/embed-block.ts +327 -0
  13. package/src/elements/embed.ts +166 -0
  14. package/src/elements/expr.ts +102 -0
  15. package/src/elements/footnote.ts +76 -0
  16. package/src/elements/html.ts +79 -0
  17. package/src/elements/iframe.ts +44 -0
  18. package/src/elements/iftags.ts +118 -0
  19. package/src/elements/image.ts +154 -0
  20. package/src/elements/include.ts +43 -0
  21. package/src/elements/index.ts +35 -0
  22. package/src/elements/line-break.ts +22 -0
  23. package/src/elements/link.ts +201 -0
  24. package/src/elements/list.ts +241 -0
  25. package/src/elements/math.ts +177 -0
  26. package/src/elements/module/backlinks.ts +28 -0
  27. package/src/elements/module/categories.ts +27 -0
  28. package/src/elements/module/index.ts +67 -0
  29. package/src/elements/module/join.ts +33 -0
  30. package/src/elements/module/listpages.ts +27 -0
  31. package/src/elements/module/listusers.ts +27 -0
  32. package/src/elements/module/page-tree.ts +27 -0
  33. package/src/elements/module/rate.ts +44 -0
  34. package/src/elements/tab-view.ts +75 -0
  35. package/src/elements/table.ts +101 -0
  36. package/src/elements/text.ts +57 -0
  37. package/src/elements/toc.ts +147 -0
  38. package/src/elements/user.ts +79 -0
  39. package/src/escape.ts +829 -0
  40. package/src/hash.ts +62 -0
  41. package/src/index.ts +26 -0
  42. package/src/libs/highlighter/engine.ts +352 -0
  43. package/src/libs/highlighter/index.ts +70 -0
  44. package/src/libs/highlighter/languages/cpp.ts +345 -0
  45. package/src/libs/highlighter/languages/css.ts +104 -0
  46. package/src/libs/highlighter/languages/diff.ts +154 -0
  47. package/src/libs/highlighter/languages/dtd.ts +99 -0
  48. package/src/libs/highlighter/languages/html.ts +59 -0
  49. package/src/libs/highlighter/languages/java.ts +251 -0
  50. package/src/libs/highlighter/languages/javascript.ts +213 -0
  51. package/src/libs/highlighter/languages/php.ts +433 -0
  52. package/src/libs/highlighter/languages/python.ts +308 -0
  53. package/src/libs/highlighter/languages/ruby.ts +360 -0
  54. package/src/libs/highlighter/languages/sql.ts +125 -0
  55. package/src/libs/highlighter/languages/xml.ts +68 -0
  56. package/src/libs/highlighter/types.ts +44 -0
  57. package/src/render.ts +231 -0
  58. package/src/types.ts +140 -0
package/src/escape.ts ADDED
@@ -0,0 +1,829 @@
1
+ /**
2
+ *
3
+ * HTML, CSS, URL, and attribute sanitization utilities for the render pipeline.
4
+ *
5
+ * Every piece of user-supplied content that flows into the HTML output must
6
+ * pass through one of these functions to prevent Cross-Site Scripting (XSS)
7
+ * and CSS injection attacks.
8
+ *
9
+ * The module provides several layers of defense:
10
+ * - Text escaping ({@link escapeHtml}, {@link escapeAttr}, {@link escapeJsString})
11
+ * - URL scheme blocking ({@link isDangerousUrl}) against `javascript:`, `data:`, `vbscript:`
12
+ * - Attribute allowlisting ({@link isSafeAttribute}) to block event handlers (`on*`)
13
+ * - CSS value sanitization ({@link isDangerousCssValue}, {@link sanitizeStyleValue})
14
+ * with normalization to defeat CSS escape/comment bypass techniques
15
+ * - Composite attribute sanitization ({@link sanitizeAttributes}) combining all checks
16
+ *
17
+ * @module
18
+ */
19
+
20
+ /**
21
+ * Escape the three HTML-special characters (`&`, `<`, `>`) in text content.
22
+ *
23
+ * Suitable for text nodes. For attribute values, use {@link escapeAttr}
24
+ * which additionally escapes quotation marks.
25
+ *
26
+ * @param text - The raw text to escape.
27
+ * @returns The escaped string safe for embedding in HTML text content.
28
+ */
29
+ export function escapeHtml(text: string): string {
30
+ return text.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
31
+ }
32
+
33
+ /**
34
+ * Escape a string for safe use inside an HTML attribute value.
35
+ *
36
+ * Stricter than {@link escapeHtml}: in addition to `&`, `<`, and `>`,
37
+ * this also escapes both double and single quotes to prevent attribute
38
+ * breakout regardless of which quote character delimits the attribute.
39
+ *
40
+ * @param value - The raw attribute value to escape.
41
+ * @returns The escaped string safe for embedding in an HTML attribute.
42
+ */
43
+ export function escapeAttr(value: string): string {
44
+ return value
45
+ .replace(/&/g, "&amp;")
46
+ .replace(/</g, "&lt;")
47
+ .replace(/>/g, "&gt;")
48
+ .replace(/"/g, "&quot;")
49
+ .replace(/'/g, "&#39;");
50
+ }
51
+
52
+ /**
53
+ * Escape content destined for a `<style>` tag to prevent tag breakout.
54
+ *
55
+ * An attacker could include `</style><script>...` inside CSS to close
56
+ * the style element and inject a script. This function replaces every
57
+ * occurrence of `</style` (case-insensitive) with `<\/style`, which
58
+ * is harmless in CSS but prevents the HTML parser from seeing a
59
+ * closing `</style>` tag.
60
+ *
61
+ * @param css - The raw CSS text to sanitize.
62
+ * @returns The sanitized CSS string safe for embedding inside `<style>`.
63
+ */
64
+ export function escapeStyleContent(css: string): string {
65
+ return css.replace(/<\/style/gi, "<\\/style");
66
+ }
67
+
68
+ /**
69
+ * Escape a value for safe embedding inside a JavaScript string literal
70
+ * that itself appears within an HTML attribute (e.g. `onclick="fn('...')"` ).
71
+ *
72
+ * Uses hex escapes (`\xNN`) and unicode escapes (`\uNNNN`) for characters
73
+ * that could break either the JavaScript string or the enclosing HTML
74
+ * attribute context: backslash, quotes, angle brackets, ampersand,
75
+ * newlines, and the Unicode line/paragraph separators (U+2028/U+2029).
76
+ *
77
+ * @param value - The raw string to escape.
78
+ * @returns The escaped string safe for use inside a JS string literal in HTML.
79
+ */
80
+ export function escapeJsString(value: string): string {
81
+ return value
82
+ .replace(/\\/g, "\\\\")
83
+ .replace(/'/g, "\\x27")
84
+ .replace(/"/g, "\\x22")
85
+ .replace(/</g, "\\x3c")
86
+ .replace(/>/g, "\\x3e")
87
+ .replace(/&/g, "\\x26")
88
+ .replace(/\n/g, "\\n")
89
+ .replace(/\r/g, "\\r")
90
+ .replace(/\u2028/g, "\\u2028")
91
+ .replace(/\u2029/g, "\\u2029");
92
+ }
93
+
94
+ /**
95
+ * Allowlist of HTML attribute names considered safe for rendering.
96
+ *
97
+ * Based on the attributes that Wikidot permits users to set via markup.
98
+ * Event handler attributes (`on*`) are explicitly blocked in
99
+ * {@link isSafeAttribute}. The `aria-*` and `data-*` prefixes are
100
+ * allowed dynamically rather than being listed here.
101
+ */
102
+ const SAFE_ATTRIBUTES = new Set([
103
+ "accept",
104
+ "align",
105
+ "alt",
106
+ "autocapitalize",
107
+ "autoplay",
108
+ "background",
109
+ "bgcolor",
110
+ "border",
111
+ "buffered",
112
+ "checked",
113
+ "cite",
114
+ "class",
115
+ "cols",
116
+ "colspan",
117
+ "contenteditable",
118
+ "controls",
119
+ "coords",
120
+ "datetime",
121
+ "decoding",
122
+ "default",
123
+ "dir",
124
+ "dirname",
125
+ "disabled",
126
+ "download",
127
+ "draggable",
128
+ "for",
129
+ "form",
130
+ "headers",
131
+ "height",
132
+ "hidden",
133
+ "high",
134
+ "href",
135
+ "hreflang",
136
+ "id",
137
+ "inputmode",
138
+ "ismap",
139
+ "itemprop",
140
+ "kind",
141
+ "label",
142
+ "lang",
143
+ "list",
144
+ "loop",
145
+ "low",
146
+ "max",
147
+ "maxlength",
148
+ "min",
149
+ "minlength",
150
+ "multiple",
151
+ "muted",
152
+ "name",
153
+ "optimum",
154
+ "pattern",
155
+ "placeholder",
156
+ "poster",
157
+ "preload",
158
+ "readonly",
159
+ "required",
160
+ "reversed",
161
+ "role",
162
+ "rows",
163
+ "rowspan",
164
+ "scope",
165
+ "selected",
166
+ "shape",
167
+ "size",
168
+ "sizes",
169
+ "span",
170
+ "spellcheck",
171
+ "src",
172
+ "srclang",
173
+ "srcset",
174
+ "start",
175
+ "step",
176
+ "style",
177
+ "tabindex",
178
+ "target",
179
+ "title",
180
+ "translate",
181
+ "type",
182
+ "usemap",
183
+ "value",
184
+ "width",
185
+ "wrap",
186
+ ]);
187
+
188
+ /**
189
+ * Check whether an HTML attribute name is safe to include in rendered output.
190
+ *
191
+ * The check applies three rules in order:
192
+ * 1. Block all event handlers (`on*` prefix) unconditionally
193
+ * 2. Allow accessibility (`aria-*`) and custom data (`data-*`) attributes
194
+ * 3. Allow only attributes in the `SAFE_ATTRIBUTES` allowlist
195
+ *
196
+ * @param name - The attribute name to validate (case-insensitive).
197
+ * @returns `true` if the attribute is safe to render.
198
+ */
199
+ export function isSafeAttribute(name: string): boolean {
200
+ const lower = name.toLowerCase();
201
+ // Block all event handlers
202
+ if (lower.startsWith("on")) return false;
203
+ // Allow aria-* and data-* prefixes
204
+ if (lower.startsWith("aria-") || lower.startsWith("data-")) return true;
205
+ return SAFE_ATTRIBUTES.has(lower);
206
+ }
207
+
208
+ /**
209
+ * Check whether a URL contains a dangerous scheme (`javascript:`, `data:`, `vbscript:`).
210
+ *
211
+ * Before testing, the value is stripped of all whitespace and control
212
+ * characters (U+0000-U+001F, U+007F-U+009F) to defeat evasion techniques
213
+ * such as `"java\nscript:"` or `"java\x00script:"` that exploit browser
214
+ * whitespace tolerance in URL parsing.
215
+ *
216
+ * @param value - The URL string to check.
217
+ * @returns `true` if the URL uses a dangerous scheme and should be blocked.
218
+ */
219
+ export function isDangerousUrl(value: string): boolean {
220
+ const normalized = value.replace(/[\s\u0000-\u001f\u007f-\u009f]/g, "");
221
+ return /^(javascript|data|vbscript):/i.test(normalized);
222
+ }
223
+
224
+ /**
225
+ * Complete set of CSS Level 4 named colors plus CSS-wide keywords
226
+ * (`transparent`, `currentcolor`, `inherit`, `initial`, `unset`).
227
+ *
228
+ * Used by {@link isValidCssColor} to validate color values without
229
+ * allowing arbitrary CSS expressions.
230
+ */
231
+ const CSS_NAMED_COLORS = new Set([
232
+ "aliceblue",
233
+ "antiquewhite",
234
+ "aqua",
235
+ "aquamarine",
236
+ "azure",
237
+ "beige",
238
+ "bisque",
239
+ "black",
240
+ "blanchedalmond",
241
+ "blue",
242
+ "blueviolet",
243
+ "brown",
244
+ "burlywood",
245
+ "cadetblue",
246
+ "chartreuse",
247
+ "chocolate",
248
+ "coral",
249
+ "cornflowerblue",
250
+ "cornsilk",
251
+ "crimson",
252
+ "cyan",
253
+ "darkblue",
254
+ "darkcyan",
255
+ "darkgoldenrod",
256
+ "darkgray",
257
+ "darkgreen",
258
+ "darkgrey",
259
+ "darkkhaki",
260
+ "darkmagenta",
261
+ "darkolivegreen",
262
+ "darkorange",
263
+ "darkorchid",
264
+ "darkred",
265
+ "darksalmon",
266
+ "darkseagreen",
267
+ "darkslateblue",
268
+ "darkslategray",
269
+ "darkslategrey",
270
+ "darkturquoise",
271
+ "darkviolet",
272
+ "deeppink",
273
+ "deepskyblue",
274
+ "dimgray",
275
+ "dimgrey",
276
+ "dodgerblue",
277
+ "firebrick",
278
+ "floralwhite",
279
+ "forestgreen",
280
+ "fuchsia",
281
+ "gainsboro",
282
+ "ghostwhite",
283
+ "gold",
284
+ "goldenrod",
285
+ "gray",
286
+ "green",
287
+ "greenyellow",
288
+ "grey",
289
+ "honeydew",
290
+ "hotpink",
291
+ "indianred",
292
+ "indigo",
293
+ "ivory",
294
+ "khaki",
295
+ "lavender",
296
+ "lavenderblush",
297
+ "lawngreen",
298
+ "lemonchiffon",
299
+ "lightblue",
300
+ "lightcoral",
301
+ "lightcyan",
302
+ "lightgoldenrodyellow",
303
+ "lightgray",
304
+ "lightgreen",
305
+ "lightgrey",
306
+ "lightpink",
307
+ "lightsalmon",
308
+ "lightseagreen",
309
+ "lightskyblue",
310
+ "lightslategray",
311
+ "lightslategrey",
312
+ "lightsteelblue",
313
+ "lightyellow",
314
+ "lime",
315
+ "limegreen",
316
+ "linen",
317
+ "magenta",
318
+ "maroon",
319
+ "mediumaquamarine",
320
+ "mediumblue",
321
+ "mediumorchid",
322
+ "mediumpurple",
323
+ "mediumseagreen",
324
+ "mediumslateblue",
325
+ "mediumspringgreen",
326
+ "mediumturquoise",
327
+ "mediumvioletred",
328
+ "midnightblue",
329
+ "mintcream",
330
+ "mistyrose",
331
+ "moccasin",
332
+ "navajowhite",
333
+ "navy",
334
+ "oldlace",
335
+ "olive",
336
+ "olivedrab",
337
+ "orange",
338
+ "orangered",
339
+ "orchid",
340
+ "palegoldenrod",
341
+ "palegreen",
342
+ "paleturquoise",
343
+ "palevioletred",
344
+ "papayawhip",
345
+ "peachpuff",
346
+ "peru",
347
+ "pink",
348
+ "plum",
349
+ "powderblue",
350
+ "purple",
351
+ "rebeccapurple",
352
+ "red",
353
+ "rosybrown",
354
+ "royalblue",
355
+ "saddlebrown",
356
+ "salmon",
357
+ "sandybrown",
358
+ "seagreen",
359
+ "seashell",
360
+ "sienna",
361
+ "silver",
362
+ "skyblue",
363
+ "slateblue",
364
+ "slategray",
365
+ "slategrey",
366
+ "snow",
367
+ "springgreen",
368
+ "steelblue",
369
+ "tan",
370
+ "teal",
371
+ "thistle",
372
+ "tomato",
373
+ "turquoise",
374
+ "violet",
375
+ "wheat",
376
+ "white",
377
+ "whitesmoke",
378
+ "yellow",
379
+ "yellowgreen",
380
+ // Special values
381
+ "transparent",
382
+ "currentcolor",
383
+ "inherit",
384
+ "initial",
385
+ "unset",
386
+ ]);
387
+
388
+ /**
389
+ * Validate that a string is a safe CSS color value.
390
+ *
391
+ * Accepts named colors, hex notation (`#RGB`, `#RGBA`, `#RRGGBB`,
392
+ * `#RRGGBBAA`), and functional notation (`rgb()`, `rgba()`, `hsl()`,
393
+ * `hsla()`) with strictly numeric arguments.
394
+ *
395
+ * Rejects anything else -- including semicolons, `url()`, `expression()`,
396
+ * and any other pattern that could be used for CSS injection.
397
+ *
398
+ * @param color - The CSS color value to validate.
399
+ * @returns `true` if the value is a recognized safe color format.
400
+ */
401
+ export function isValidCssColor(color: string): boolean {
402
+ const trimmed = color.trim().toLowerCase();
403
+
404
+ // Empty is invalid
405
+ if (!trimmed) return false;
406
+
407
+ // Named colors
408
+ if (CSS_NAMED_COLORS.has(trimmed)) return true;
409
+
410
+ // Hex colors: #RGB, #RGBA, #RRGGBB, #RRGGBBAA
411
+ if (/^#[0-9a-f]{3}([0-9a-f])?$/.test(trimmed) || /^#[0-9a-f]{6}([0-9a-f]{2})?$/.test(trimmed)) {
412
+ return true;
413
+ }
414
+
415
+ // Extract function name and args separately to avoid ReDoS from repeated \s* quantifiers.
416
+ // Only strip whitespace from args, keeping function name validation strict.
417
+ const fnMatch = trimmed.match(/^(rgba?|hsla?)\(([^)]*)\)$/);
418
+ if (fnMatch) {
419
+ const fn = fnMatch[1]!;
420
+ // Only trim whitespace around commas (structural delimiters), not within tokens
421
+ const args = fnMatch[2]!
422
+ .split(",")
423
+ .map((s) => s.trim())
424
+ .join(",");
425
+ if (fn.startsWith("rgb")) {
426
+ if (/^\d{1,3},\d{1,3},\d{1,3}(,(0|1|0?\.\d+))?$/.test(args)) return true;
427
+ } else {
428
+ if (/^\d{1,3},\d{1,3}%,\d{1,3}%(,(0|1|0?\.\d+))?$/.test(args)) return true;
429
+ }
430
+ }
431
+
432
+ // Reject everything else (including semicolons, url(), expression(), etc.)
433
+ return false;
434
+ }
435
+
436
+ /**
437
+ * Sanitize a CSS color value, returning a fallback if validation fails.
438
+ *
439
+ * Delegates to {@link isValidCssColor} for validation. If the color
440
+ * is not a recognized safe format, the fallback value is returned
441
+ * instead (defaulting to `"inherit"`).
442
+ *
443
+ * @param color - The CSS color value to sanitize.
444
+ * @param fallback - The value to return if validation fails (default `"inherit"`).
445
+ * @returns The original color if valid, otherwise the fallback.
446
+ */
447
+ export function sanitizeCssColor(color: string, fallback = "inherit"): string {
448
+ return isValidCssColor(color) ? color : fallback;
449
+ }
450
+
451
+ /**
452
+ * Normalize a CSS value by resolving escape sequences, removing comments,
453
+ * stripping whitespace and control characters, and lowercasing.
454
+ *
455
+ * This normalization is critical for security: attackers can use CSS
456
+ * comments (`/* ... *​/`), escape sequences (`\75rl` for `url`), and
457
+ * line continuations to disguise dangerous patterns. By normalizing
458
+ * first, the downstream checks in {@link isDangerousCssValue} operate
459
+ * on a canonical representation.
460
+ *
461
+ * @param value - The raw CSS property value.
462
+ * @returns The normalized, lowercase, whitespace-free representation.
463
+ */
464
+ function normalizeCssValue(value: string): string {
465
+ let result = value;
466
+
467
+ // Remove CSS comments: /* ... */
468
+ result = result.replace(/\/\*[\s\S]*?\*\//g, "");
469
+
470
+ // Remove CSS line continuations: backslash followed by newline
471
+ result = result.replace(/\\(?:\r\n|[\n\r\f])/g, "");
472
+
473
+ // Decode CSS escapes: \XX (hex) or \char
474
+ // CSS allows \0-\10FFFF with optional trailing whitespace
475
+ result = result.replace(/\\([0-9a-f]{1,6})\s?/gi, (_, hex) => {
476
+ const code = Number.parseInt(hex, 16);
477
+ return code > 0 && code <= 0x10ffff ? String.fromCodePoint(code) : "";
478
+ });
479
+
480
+ // Remove remaining backslash escapes: \char -> char
481
+ result = result.replace(/\\(.)/g, "$1");
482
+
483
+ // Remove whitespace and control characters
484
+ result = result.replace(/[\s\u0000-\u001f\u007f-\u009f]/g, "");
485
+
486
+ // Lowercase at the end to catch decoded uppercase chars (e.g., \55 -> U)
487
+ return result.toLowerCase();
488
+ }
489
+
490
+ /**
491
+ * Allowlist check for a raw URL string extracted from a `url(...)` token.
492
+ *
493
+ * Wikidot itself allows arbitrary URLs (including `javascript:` and
494
+ * `expression()`) in `style` attributes, but we cannot match that
495
+ * exactly without re-introducing XSS. The schemes permitted below are
496
+ * the ones a CSS-side `url(...)` needs to actually fetch an image or
497
+ * background — anything else either has no visual effect or carries
498
+ * code-execution risk:
499
+ *
500
+ * - `http://`, `https://`, `//host/...` — load over the network.
501
+ * - `/path`, `./path`, `../path` — load relative to the document.
502
+ * - `#fragment` — same-document SVG / gradient reference.
503
+ * - `data:image/{png,jpeg,jpg,gif,webp}` — inline raster image.
504
+ * SVG is excluded because SVG documents can embed `<script>` and
505
+ * event handlers; treating an inline SVG as a `url()` payload is
506
+ * indistinguishable from running attacker-supplied JavaScript.
507
+ *
508
+ * Everything else (`javascript:`, `vbscript:`, `data:text/...`,
509
+ * `data:application/...`, `data:image/svg+xml`) is rejected — those
510
+ * payloads either execute scripts directly or are interpreted as
511
+ * markup that can host them.
512
+ *
513
+ * The input is assumed to come from a normalised CSS value (escapes,
514
+ * comments, whitespace, control chars stripped and lowercased), so this
515
+ * function only needs to handle surrounding `"` / `'` quotes.
516
+ */
517
+ function isUrlAllowed(rawUrl: string): boolean {
518
+ let url = rawUrl;
519
+
520
+ // Strip a single layer of matched surrounding quotes if present
521
+ if (url.length >= 2) {
522
+ const first = url[0];
523
+ const last = url[url.length - 1];
524
+ if ((first === '"' && last === '"') || (first === "'" && last === "'")) {
525
+ url = url.slice(1, -1);
526
+ }
527
+ }
528
+
529
+ // Empty url() is treated as harmless (Wikidot pass-through)
530
+ if (url === "") return true;
531
+
532
+ // Fragment
533
+ if (url.startsWith("#")) return true;
534
+
535
+ // Path-relative
536
+ if (url.startsWith("./") || url.startsWith("../")) return true;
537
+
538
+ // Protocol-relative `//host/...`
539
+ if (url.startsWith("//")) return true;
540
+
541
+ // Root-relative `/path`. Does not match `//` (handled above).
542
+ if (url.startsWith("/")) return true;
543
+
544
+ if (url.startsWith("http://") || url.startsWith("https://")) return true;
545
+
546
+ // data: URLs — only raster image MIME types.
547
+ // Match `data:image/<mime>` followed by `;` (params) or `,` (start of data),
548
+ // so `data:image/png+xml` or `data:image/pngsomething` cannot sneak through
549
+ // the allowlist with a misleading prefix.
550
+ if (url.startsWith("data:image/")) {
551
+ const after = url.slice("data:image/".length);
552
+ const sep = Math.min(
553
+ after.indexOf(";") === -1 ? after.length : after.indexOf(";"),
554
+ after.indexOf(",") === -1 ? after.length : after.indexOf(","),
555
+ );
556
+ const mime = after.slice(0, sep);
557
+ if (mime === "png" || mime === "jpeg" || mime === "jpg" || mime === "gif" || mime === "webp") {
558
+ return true;
559
+ }
560
+ }
561
+
562
+ return false;
563
+ }
564
+
565
+ /**
566
+ * Extract every `url(...)` invocation from a normalised CSS value and
567
+ * yield each raw inner string (parentheses excluded).
568
+ *
569
+ * The walker tracks `"` and `'` quoted regions so that `)` inside a
570
+ * quoted URL string (e.g. `url("https://example.com/a)b.png")`) does
571
+ * not close the `url(` prematurely. Within a quoted region paren
572
+ * tracking is suspended.
573
+ *
574
+ * Returns an iterator of `{ inner, malformed }` records. `malformed`
575
+ * is `true` when a `url(` had no matching closing `)`, which the
576
+ * caller should treat as dangerous (fail-closed).
577
+ */
578
+ function* iterateUrls(normalized: string): Generator<{ inner: string; malformed: boolean }> {
579
+ let searchPos = 0;
580
+ while (searchPos < normalized.length) {
581
+ const idx = normalized.indexOf("url(", searchPos);
582
+ if (idx === -1) return;
583
+
584
+ let depth = 1;
585
+ let quoteChar: string | null = null;
586
+ let i = idx + 4;
587
+ while (i < normalized.length && depth > 0) {
588
+ const ch = normalized[i];
589
+ if (quoteChar !== null) {
590
+ if (ch === quoteChar) quoteChar = null;
591
+ } else if (ch === '"' || ch === "'") {
592
+ quoteChar = ch;
593
+ } else if (ch === "(") {
594
+ depth++;
595
+ } else if (ch === ")") {
596
+ depth--;
597
+ }
598
+ i++;
599
+ }
600
+
601
+ if (depth > 0) {
602
+ // Unclosed url(
603
+ yield { inner: normalized.slice(idx + 4), malformed: true };
604
+ return;
605
+ }
606
+
607
+ yield { inner: normalized.slice(idx + 4, i - 1), malformed: false };
608
+ searchPos = i;
609
+ }
610
+ }
611
+
612
+ /**
613
+ * Check whether a CSS property value contains dangerous patterns that
614
+ * could enable script execution or external resource loading.
615
+ *
616
+ * The value is first normalized via `normalizeCssValue()` to resolve
617
+ * CSS escapes and comments, then checked against a blocklist:
618
+ * - `url(...)` -- only allowed when the inner URL passes
619
+ * {@link isUrlAllowed} (raster images, http(s), relative paths).
620
+ * Malformed `url(` (no closing paren) is treated as dangerous.
621
+ * - `expression()` -- blocks IE's CSS expression evaluation
622
+ * - `-moz-binding` -- blocks Firefox XBL binding injection
623
+ * - `behavior:` -- blocks IE behavior attachment
624
+ * - `@import` -- blocks external stylesheet loading
625
+ *
626
+ * @param value - The CSS property value to check.
627
+ * @returns `true` if the value contains a dangerous pattern and should be removed.
628
+ */
629
+ export function isDangerousCssValue(value: string): boolean {
630
+ const normalized = normalizeCssValue(value);
631
+
632
+ for (const { inner, malformed } of iterateUrls(normalized)) {
633
+ if (malformed) return true;
634
+ if (!isUrlAllowed(inner)) return true;
635
+ }
636
+
637
+ // Block expression() (IE)
638
+ if (normalized.includes("expression(")) return true;
639
+
640
+ // Block -moz-binding (Firefox)
641
+ if (normalized.includes("-moz-binding")) return true;
642
+
643
+ // Block behavior (IE)
644
+ if (normalized.includes("behavior:")) return true;
645
+
646
+ // Block @import (can load external stylesheets)
647
+ if (normalized.includes("@import")) return true;
648
+
649
+ return false;
650
+ }
651
+
652
+ /**
653
+ * Sanitize a `style` attribute value by removing dangerous declarations
654
+ * while preserving safe ones.
655
+ *
656
+ * Splits the value on semicolons into individual declarations, checks
657
+ * each declaration's value via {@link isDangerousCssValue}, and drops
658
+ * any that fail. Also blocks the `-moz-binding` and `behavior`
659
+ * property names directly.
660
+ *
661
+ * The original formatting is preserved: if the input ended with a
662
+ * semicolon, the output will too (matching Wikidot's pass-through
663
+ * behavior for user-authored styles).
664
+ *
665
+ * @param style - The raw `style` attribute value.
666
+ * @returns The sanitized style string with dangerous declarations removed,
667
+ * or an empty string if nothing is safe.
668
+ */
669
+ /**
670
+ * Split a CSS style attribute value into individual declarations,
671
+ * respecting parentheses and quoted strings.
672
+ *
673
+ * A simple `split(";")` would corrupt declarations whose value
674
+ * contains `;` inside a `url(...)` invocation, e.g. a base64 data URL
675
+ * passed via a CSS custom property:
676
+ *
677
+ * ```css
678
+ * --logo: url(data:image/png;base64,iVBORw0KGgo...)
679
+ * ```
680
+ *
681
+ * This walker only splits on `;` when not inside `(...)` and not inside
682
+ * a `"..."` / `'...'` string.
683
+ */
684
+ function splitDeclarations(style: string): string[] {
685
+ const out: string[] = [];
686
+ let buf = "";
687
+ let parenDepth = 0;
688
+ let quoteChar: string | null = null;
689
+
690
+ for (const ch of style) {
691
+ if (quoteChar !== null) {
692
+ buf += ch;
693
+ if (ch === quoteChar) quoteChar = null;
694
+ continue;
695
+ }
696
+ if (ch === '"' || ch === "'") {
697
+ quoteChar = ch;
698
+ buf += ch;
699
+ continue;
700
+ }
701
+ if (ch === "(") {
702
+ parenDepth++;
703
+ buf += ch;
704
+ continue;
705
+ }
706
+ if (ch === ")") {
707
+ if (parenDepth > 0) parenDepth--;
708
+ buf += ch;
709
+ continue;
710
+ }
711
+ if (ch === ";" && parenDepth === 0) {
712
+ out.push(buf);
713
+ buf = "";
714
+ continue;
715
+ }
716
+ buf += ch;
717
+ }
718
+ if (buf.length > 0) out.push(buf);
719
+ return out;
720
+ }
721
+
722
+ export function sanitizeStyleValue(style: string): string {
723
+ // Remember if original ends with semicolon (Wikidot preserves this)
724
+ const endsWithSemicolon = style.trimEnd().endsWith(";");
725
+
726
+ // Split by semicolon (respecting parens/quotes) into individual declarations
727
+ const declarations = splitDeclarations(style)
728
+ .map((d) => d.trim())
729
+ .filter(Boolean);
730
+ const safe: string[] = [];
731
+
732
+ for (const decl of declarations) {
733
+ const colonIdx = decl.indexOf(":");
734
+ if (colonIdx === -1) continue;
735
+
736
+ const property = decl.slice(0, colonIdx).trim();
737
+ const value = decl.slice(colonIdx + 1).trim();
738
+
739
+ // Skip if value contains dangerous patterns
740
+ if (isDangerousCssValue(value)) continue;
741
+
742
+ // Skip dangerous properties. CSS allows escape sequences inside
743
+ // property names too (e.g. `-mo\7a-binding` → `-moz-binding`), so we
744
+ // run them through the same normaliser as values before matching.
745
+ const normalisedProperty = normalizeCssValue(property);
746
+ if (normalisedProperty.startsWith("-moz-binding")) continue;
747
+ if (normalisedProperty === "behavior") continue;
748
+
749
+ // Keep original format (Wikidot outputs input as is)
750
+ safe.push(decl);
751
+ }
752
+
753
+ if (safe.length === 0) return "";
754
+
755
+ // Preserve original trailing semicolon format
756
+ return endsWithSemicolon ? safe.join(";") + ";" : safe.join(";");
757
+ }
758
+
759
+ /**
760
+ * Validate that a string looks like a safe email address.
761
+ *
762
+ * Uses a deliberately simple pattern that accepts the vast majority of
763
+ * real-world addresses while blocking characters that could enable
764
+ * injection attacks when the address is used in a `mailto:` link.
765
+ *
766
+ * The percent character (`%`) is intentionally disallowed because
767
+ * `mailto:` URLs undergo percent-decoding, allowing an attacker to
768
+ * inject headers (e.g. `a%0d%0abcc%3aevil@example.com` decodes to
769
+ * a BCC header injection).
770
+ *
771
+ * @param email - The email string to validate.
772
+ * @returns `true` if the email matches the safe pattern.
773
+ */
774
+ export function isValidEmail(email: string): boolean {
775
+ // Simple email pattern: local@domain
776
+ // - local: alphanumeric, dots, underscores, hyphens, plus signs (NO percent)
777
+ // - domain: alphanumeric, dots, hyphens
778
+ // Does NOT allow: spaces, colons, angle brackets, percent, or other special chars
779
+ return /^[a-zA-Z0-9._+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/.test(email);
780
+ }
781
+
782
+ /**
783
+ * Set of HTML attribute names whose values are interpreted as URLs
784
+ * by the browser. Values of these attributes must be checked via
785
+ * {@link isDangerousUrl} before rendering.
786
+ */
787
+ const URL_ATTRIBUTES = new Set([
788
+ "href",
789
+ "src",
790
+ "action",
791
+ "formaction",
792
+ "srcset",
793
+ "poster",
794
+ "background",
795
+ ]);
796
+
797
+ /**
798
+ * Sanitize a map of HTML attributes, returning a new map containing
799
+ * only entries that pass all safety checks.
800
+ *
801
+ * For each attribute, this function:
802
+ * 1. Drops attributes that fail {@link isSafeAttribute} (event handlers, unknown names)
803
+ * 2. Drops URL-bearing attributes whose values fail {@link isDangerousUrl}
804
+ * 3. Sanitizes `style` values via {@link sanitizeStyleValue}, dropping them entirely
805
+ * if the result is empty
806
+ * 4. Passes all other safe attributes through unchanged
807
+ *
808
+ * @param attributes - The raw attribute name-value map to sanitize.
809
+ * @returns A new map containing only the safe attributes and their (possibly sanitized) values.
810
+ */
811
+ export function sanitizeAttributes(attributes: Record<string, string>): Record<string, string> {
812
+ const result: Record<string, string> = {};
813
+ for (const [key, value] of Object.entries(attributes)) {
814
+ if (!isSafeAttribute(key)) continue;
815
+ const lower = key.toLowerCase();
816
+ // Check URL attributes for dangerous schemes
817
+ if (URL_ATTRIBUTES.has(lower) && isDangerousUrl(value)) continue;
818
+ // Sanitize style attribute
819
+ if (lower === "style") {
820
+ const sanitized = sanitizeStyleValue(value);
821
+ if (sanitized) {
822
+ result[key] = sanitized;
823
+ }
824
+ continue;
825
+ }
826
+ result[key] = value;
827
+ }
828
+ return result;
829
+ }