@wdprlib/render 2.1.0 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. package/dist/index.cjs +2344 -1668
  2. package/dist/index.d.cts +15 -13
  3. package/dist/index.d.ts +15 -13
  4. package/dist/index.js +2375 -1699
  5. package/package.json +1 -1
  6. package/src/context/attributes.ts +14 -0
  7. package/src/context/bibliography.ts +109 -0
  8. package/src/context/counters.ts +51 -0
  9. package/src/context/image-urls.ts +31 -0
  10. package/src/context/index.ts +285 -0
  11. package/src/context/output.ts +17 -0
  12. package/src/context/page-urls.ts +81 -0
  13. package/src/context/style-slots.ts +29 -0
  14. package/src/context/urls.ts +2 -0
  15. package/src/elements/bibliography/block.ts +27 -0
  16. package/src/elements/bibliography/cite.ts +23 -0
  17. package/src/elements/bibliography/ids.ts +9 -0
  18. package/src/elements/bibliography/index.ts +9 -0
  19. package/src/elements/code/contents.ts +18 -0
  20. package/src/elements/code/index.ts +29 -0
  21. package/src/elements/collapsible/index.ts +31 -0
  22. package/src/elements/collapsible/labels.ts +35 -0
  23. package/src/elements/collapsible/link.ts +11 -0
  24. package/src/elements/collapsible/sections.ts +39 -0
  25. package/src/elements/container/attributes.ts +28 -0
  26. package/src/elements/container/header.ts +27 -0
  27. package/src/elements/container/index.ts +35 -0
  28. package/src/elements/container/string-container.ts +40 -0
  29. package/src/elements/container/string-types.ts +63 -0
  30. package/src/elements/container/wrappers.ts +32 -0
  31. package/src/elements/date/format.ts +20 -0
  32. package/src/elements/{date.ts → date/index.ts} +4 -29
  33. package/src/elements/date/output.ts +6 -0
  34. package/src/elements/embed/iframe.ts +8 -0
  35. package/src/elements/embed/index.ts +28 -0
  36. package/src/elements/embed/providers.ts +43 -0
  37. package/src/elements/embed/validation.ts +15 -0
  38. package/src/elements/embed-block/allowlist.ts +60 -0
  39. package/src/elements/embed-block/boolean-attributes.ts +38 -0
  40. package/src/elements/embed-block/iframe.ts +33 -0
  41. package/src/elements/embed-block/index.ts +31 -0
  42. package/src/elements/embed-block/sanitize-config.ts +22 -0
  43. package/src/elements/embed-block/sanitize.ts +44 -0
  44. package/src/elements/expr/branch.ts +29 -0
  45. package/src/elements/expr/index.ts +63 -0
  46. package/src/elements/expr/result.ts +19 -0
  47. package/src/elements/footnote/body.ts +11 -0
  48. package/src/elements/footnote/index.ts +35 -0
  49. package/src/elements/footnote/ref.ts +16 -0
  50. package/src/elements/html/attributes.ts +24 -0
  51. package/src/elements/html/index.ts +39 -0
  52. package/src/elements/html/url.ts +19 -0
  53. package/src/elements/iframe/attributes.ts +28 -0
  54. package/src/elements/iframe/index.ts +22 -0
  55. package/src/elements/iftags/condition.ts +42 -0
  56. package/src/elements/iftags/index.ts +39 -0
  57. package/src/elements/iftags/style-slot.ts +23 -0
  58. package/src/elements/iftags/tokens.ts +36 -0
  59. package/src/elements/image/alignment.ts +44 -0
  60. package/src/elements/image/attributes.ts +10 -0
  61. package/src/elements/image/img-attributes.ts +26 -0
  62. package/src/elements/image/index.ts +36 -0
  63. package/src/elements/image/link-href.ts +24 -0
  64. package/src/elements/image/link.ts +13 -0
  65. package/src/elements/image/source.ts +16 -0
  66. package/src/elements/{include.ts → include/index.ts} +5 -13
  67. package/src/elements/include/missing.ts +15 -0
  68. package/src/elements/link/anchor-name.ts +6 -0
  69. package/src/elements/link/anchor.ts +27 -0
  70. package/src/elements/link/attributes.ts +47 -0
  71. package/src/elements/link/index.ts +26 -0
  72. package/src/elements/link/label.ts +23 -0
  73. package/src/elements/link/target.ts +20 -0
  74. package/src/elements/list/attributes.ts +19 -0
  75. package/src/elements/list/definition-list.ts +16 -0
  76. package/src/elements/list/index.ts +48 -0
  77. package/src/elements/list/item-rendering.ts +38 -0
  78. package/src/elements/list/items.ts +61 -0
  79. package/src/elements/list/no-marker.ts +53 -0
  80. package/src/elements/list/paragraphs.ts +34 -0
  81. package/src/elements/list/trim.ts +38 -0
  82. package/src/elements/math/block.ts +29 -0
  83. package/src/elements/math/equation-ref.ts +12 -0
  84. package/src/elements/math/index.ts +14 -0
  85. package/src/elements/math/inline.ts +19 -0
  86. package/src/elements/math/latex.ts +27 -0
  87. package/src/elements/math/source.ts +18 -0
  88. package/src/elements/module/backlinks.ts +2 -1
  89. package/src/elements/module/categories.ts +2 -2
  90. package/src/elements/module/empty-container.ts +10 -0
  91. package/src/elements/module/index.ts +2 -4
  92. package/src/elements/module/join-markup.ts +10 -0
  93. package/src/elements/module/join.ts +2 -7
  94. package/src/elements/module/listpages.ts +2 -2
  95. package/src/elements/module/listusers.ts +2 -2
  96. package/src/elements/module/page-tree.ts +2 -2
  97. package/src/elements/module/rate-markup.ts +10 -0
  98. package/src/elements/module/rate.ts +4 -13
  99. package/src/elements/module/unknown.ts +11 -0
  100. package/src/elements/tab-view/ids.ts +16 -0
  101. package/src/elements/tab-view/index.ts +31 -0
  102. package/src/elements/tab-view/navigation.ts +15 -0
  103. package/src/elements/tab-view/panels.ts +16 -0
  104. package/src/elements/table/attributes.ts +23 -0
  105. package/src/elements/table/cell-attributes.ts +62 -0
  106. package/src/elements/table/cell.ts +13 -0
  107. package/src/elements/table/index.ts +27 -0
  108. package/src/elements/text/email.ts +20 -0
  109. package/src/elements/text/index.ts +11 -0
  110. package/src/elements/text/plain.ts +11 -0
  111. package/src/elements/text/raw.ts +20 -0
  112. package/src/elements/toc/body.ts +12 -0
  113. package/src/elements/toc/entries.ts +34 -0
  114. package/src/elements/toc/frame.ts +27 -0
  115. package/src/elements/toc/index.ts +17 -0
  116. package/src/elements/toc/link.ts +26 -0
  117. package/src/elements/user/index.ts +40 -0
  118. package/src/elements/user/markup.ts +34 -0
  119. package/src/elements/user/resolve.ts +6 -0
  120. package/src/escape/attribute-allowlists.ts +101 -0
  121. package/src/escape/attributes.ts +62 -0
  122. package/src/escape/css-color-functions.ts +18 -0
  123. package/src/escape/css-colors.ts +183 -0
  124. package/src/escape/css-danger.ts +22 -0
  125. package/src/escape/css-normalize.ts +54 -0
  126. package/src/escape/css-style.ts +78 -0
  127. package/src/escape/css-urls.ts +76 -0
  128. package/src/escape/css.ts +4 -0
  129. package/src/escape/email.ts +22 -0
  130. package/src/escape/html.ts +68 -0
  131. package/src/escape/index.ts +15 -0
  132. package/src/escape/url.ts +18 -0
  133. package/src/libs/highlighter/engine/end-pattern.ts +26 -0
  134. package/src/libs/highlighter/engine/html.ts +19 -0
  135. package/src/libs/highlighter/engine/index.ts +3 -0
  136. package/src/libs/highlighter/engine/keywords.ts +22 -0
  137. package/src/libs/highlighter/engine/parts.ts +36 -0
  138. package/src/libs/highlighter/engine/preprocess.ts +10 -0
  139. package/src/libs/highlighter/engine/render.ts +31 -0
  140. package/src/libs/highlighter/engine/token.ts +7 -0
  141. package/src/libs/highlighter/engine/tokenizer.ts +266 -0
  142. package/src/libs/highlighter/engine/utils.ts +38 -0
  143. package/src/render/collected-styles.ts +22 -0
  144. package/src/render/dispatch.ts +181 -0
  145. package/src/render/index.ts +28 -0
  146. package/src/render/primitives.ts +17 -0
  147. package/src/render/style-tag.ts +6 -0
  148. package/src/render/style.ts +15 -0
  149. package/src/types.ts +6 -2
  150. package/src/context.ts +0 -422
  151. package/src/elements/bibliography.ts +0 -123
  152. package/src/elements/code.ts +0 -49
  153. package/src/elements/collapsible.ts +0 -105
  154. package/src/elements/container.ts +0 -302
  155. package/src/elements/embed-block.ts +0 -327
  156. package/src/elements/embed.ts +0 -166
  157. package/src/elements/expr.ts +0 -102
  158. package/src/elements/footnote.ts +0 -76
  159. package/src/elements/html.ts +0 -79
  160. package/src/elements/iframe.ts +0 -44
  161. package/src/elements/iftags.ts +0 -118
  162. package/src/elements/image.ts +0 -154
  163. package/src/elements/link.ts +0 -201
  164. package/src/elements/list.ts +0 -241
  165. package/src/elements/math.ts +0 -177
  166. package/src/elements/tab-view.ts +0 -75
  167. package/src/elements/table.ts +0 -101
  168. package/src/elements/text.ts +0 -57
  169. package/src/elements/toc.ts +0 -147
  170. package/src/elements/user.ts +0 -79
  171. package/src/escape.ts +0 -829
  172. package/src/libs/highlighter/engine.ts +0 -352
  173. package/src/render.ts +0 -231
@@ -1,302 +0,0 @@
1
- /**
2
- *
3
- * Renderer for "container" AST elements -- the most general wrapper node
4
- * in the Wikidot AST.
5
- *
6
- * A container can represent many different HTML constructs depending on
7
- * its `type` discriminant:
8
- * - Headers (`h1`..`h6`) with optional TOC anchor IDs
9
- * - Text alignment wrappers (`left`, `center`, `right`, `justify`)
10
- * - Inline formatting (`bold`, `italics`, `underline`, `strikethrough`,
11
- * `superscript`, `subscript`, `monospace`, `mark`, `insertion`, `deletion`)
12
- * - Block containers (`paragraph`, `div`, `blockquote`, `span`)
13
- * - Visibility modifiers (`hidden`, `invisible`)
14
- * - Ruby annotations (`ruby`, `ruby-text`)
15
- * - Definition lists (`definition-list`, `definition-list-item`, etc.)
16
- * - Table sub-elements (`table-row`, `table-cell`)
17
- * - Size containers with inline `font-size` styling
18
- *
19
- * All attributes are sanitized before rendering to prevent XSS.
20
- *
21
- * @module
22
- */
23
-
24
- import type { ContainerData } from "@wdprlib/ast";
25
- import { isStringContainerType, isHeaderType, isAlignType } from "@wdprlib/ast";
26
- import type { RenderContext } from "../context";
27
- import { escapeAttr, sanitizeAttributes } from "../escape";
28
- import { renderElements } from "../render";
29
-
30
- /**
31
- * Render a container element by dispatching on its `type` discriminant.
32
- *
33
- * Headers, alignment wrappers, and string-typed containers each follow
34
- * different rendering paths. Unknown container types render their
35
- * children without a wrapping element.
36
- *
37
- * @param ctx - The current render context.
38
- * @param data - Container data including type, attributes, and child elements.
39
- */
40
- export function renderContainer(ctx: RenderContext, data: ContainerData): void {
41
- const { type, attributes, elements } = data;
42
-
43
- if (isHeaderType(type)) {
44
- renderHeader(ctx, type.header.level, type.header["has-toc"], attributes, elements);
45
- return;
46
- }
47
-
48
- if (isAlignType(type)) {
49
- ctx.push(`<div style="text-align: ${type.align};">`);
50
- renderElements(ctx, elements);
51
- ctx.push("</div>");
52
- return;
53
- }
54
-
55
- if (isStringContainerType(type)) {
56
- renderStringContainer(ctx, type, attributes, elements);
57
- }
58
- }
59
-
60
- /**
61
- * Render a heading element (`h1`..`h6`).
62
- *
63
- * When the heading participates in the table of contents (`hasToc` is true),
64
- * a `toc{N}` ID attribute is generated so that TOC links can target it.
65
- * The heading content is wrapped in a `<span>` to match Wikidot's output.
66
- *
67
- * @param ctx - The current render context.
68
- * @param level - Heading level (1-6).
69
- * @param hasToc - Whether this heading has a corresponding TOC entry.
70
- * @param attributes - Sanitized HTML attributes from the AST.
71
- * @param elements - Child elements to render inside the heading.
72
- */
73
- function renderHeader(
74
- ctx: RenderContext,
75
- level: number,
76
- hasToc: boolean,
77
- attributes: Record<string, string>,
78
- elements: import("@wdprlib/ast").Element[],
79
- ): void {
80
- const tag = `h${level}`;
81
- if (hasToc) {
82
- const tocId = ctx.generateId("toc", ctx.nextTocIndex());
83
- ctx.push(`<${tag} id="${tocId}"${renderAttrs(attributes)}>`);
84
- } else {
85
- ctx.push(`<${tag}${renderAttrs(attributes)}>`);
86
- }
87
- ctx.push("<span>");
88
- renderElements(ctx, elements);
89
- ctx.push("</span>");
90
- ctx.push(`</${tag}>`);
91
- }
92
-
93
- /**
94
- * Render a container whose type is a plain string identifier.
95
- *
96
- * Dispatches to the appropriate HTML element based on the type string.
97
- * Each case wraps child elements in the correct HTML tag with sanitized
98
- * attributes. Empty divs without attributes are skipped (matching Wikidot).
99
- *
100
- * @param ctx - The current render context.
101
- * @param type - Container type string (e.g. "paragraph", "bold", "div").
102
- * @param attributes - Sanitized HTML attributes from the AST.
103
- * @param elements - Child elements to render inside the container.
104
- */
105
- function renderStringContainer(
106
- ctx: RenderContext,
107
- type: string,
108
- attributes: Record<string, string>,
109
- elements: import("@wdprlib/ast").Element[],
110
- ): void {
111
- switch (type) {
112
- case "paragraph":
113
- ctx.push(`<p${renderAttrs(attributes)}>`);
114
- renderElements(ctx, elements);
115
- ctx.push("</p>");
116
- break;
117
- case "bold":
118
- ctx.push(`<strong${renderAttrs(attributes)}>`);
119
- renderElements(ctx, elements);
120
- ctx.push("</strong>");
121
- break;
122
- case "italics":
123
- ctx.push(`<em${renderAttrs(attributes)}>`);
124
- renderElements(ctx, elements);
125
- ctx.push("</em>");
126
- break;
127
- case "underline":
128
- ctx.push(`<span style="text-decoration: underline;"${renderAttrs(attributes)}>`);
129
- renderElements(ctx, elements);
130
- ctx.push("</span>");
131
- break;
132
- case "strikethrough":
133
- ctx.push(`<span style="text-decoration: line-through;"${renderAttrs(attributes)}>`);
134
- renderElements(ctx, elements);
135
- ctx.push("</span>");
136
- break;
137
- case "superscript":
138
- ctx.push(`<sup${renderAttrs(attributes)}>`);
139
- renderElements(ctx, elements);
140
- ctx.push("</sup>");
141
- break;
142
- case "subscript":
143
- ctx.push(`<sub${renderAttrs(attributes)}>`);
144
- renderElements(ctx, elements);
145
- ctx.push("</sub>");
146
- break;
147
- case "monospace":
148
- ctx.push(`<tt${renderAttrs(attributes)}>`);
149
- renderElements(ctx, elements);
150
- ctx.push("</tt>");
151
- break;
152
- case "span":
153
- ctx.push(`<span${renderAttrs(attributes)}>`);
154
- renderElements(ctx, elements);
155
- ctx.push("</span>");
156
- break;
157
- case "div":
158
- // Wikidot skips empty divs without attributes
159
- if (elements.length === 0 && Object.keys(attributes).length === 0) {
160
- break;
161
- }
162
- ctx.push(`<div${renderAttrs(attributes)}>`);
163
- renderElements(ctx, elements);
164
- ctx.push("</div>");
165
- break;
166
- case "blockquote":
167
- ctx.push(`<blockquote${renderAttrs(attributes)}>`);
168
- renderElements(ctx, elements);
169
- ctx.push("</blockquote>");
170
- break;
171
- case "mark":
172
- ctx.push(`<mark${renderAttrs(attributes)}>`);
173
- renderElements(ctx, elements);
174
- ctx.push("</mark>");
175
- break;
176
- case "insertion":
177
- ctx.push(`<ins${renderAttrs(attributes)}>`);
178
- renderElements(ctx, elements);
179
- ctx.push("</ins>");
180
- break;
181
- case "deletion":
182
- ctx.push(`<del${renderAttrs(attributes)}>`);
183
- renderElements(ctx, elements);
184
- ctx.push("</del>");
185
- break;
186
- case "size":
187
- // Size uses style attribute with font-size
188
- renderSizeContainer(ctx, attributes, elements);
189
- break;
190
- case "hidden":
191
- ctx.push(`<span style="display: none"${renderAttrs(attributes)}>`);
192
- renderElements(ctx, elements);
193
- ctx.push("</span>");
194
- break;
195
- case "invisible":
196
- ctx.push(`<span style="visibility: hidden"${renderAttrs(attributes)}>`);
197
- renderElements(ctx, elements);
198
- ctx.push("</span>");
199
- break;
200
- case "ruby":
201
- ctx.push(`<ruby${renderAttrs(attributes)}>`);
202
- renderElements(ctx, elements);
203
- ctx.push("</ruby>");
204
- break;
205
- case "ruby-text":
206
- ctx.push(`<rt${renderAttrs(attributes)}>`);
207
- renderElements(ctx, elements);
208
- ctx.push("</rt>");
209
- break;
210
- case "heading":
211
- // Heading as container type (used in definition-list context)
212
- renderElements(ctx, elements);
213
- break;
214
- case "collapsible":
215
- // Collapsible as container type
216
- renderElements(ctx, elements);
217
- break;
218
- case "definition-list":
219
- ctx.push("<dl>");
220
- renderElements(ctx, elements);
221
- ctx.push("</dl>");
222
- break;
223
- case "definition-list-item":
224
- renderElements(ctx, elements);
225
- break;
226
- case "definition-list-key":
227
- ctx.push("<dt>");
228
- renderElements(ctx, elements);
229
- ctx.push("</dt>");
230
- break;
231
- case "definition-list-value":
232
- ctx.push("<dd>");
233
- renderElements(ctx, elements);
234
- ctx.push("</dd>");
235
- break;
236
- case "table-row":
237
- ctx.push(`<tr${renderAttrs(attributes)}>`);
238
- renderElements(ctx, elements);
239
- ctx.push("</tr>");
240
- break;
241
- case "table-cell":
242
- ctx.push(`<td${renderAttrs(attributes)}>`);
243
- renderElements(ctx, elements);
244
- ctx.push("</td>");
245
- break;
246
- default:
247
- // Unknown container types: just render children
248
- renderElements(ctx, elements);
249
- }
250
- }
251
-
252
- /**
253
- * Render a `[[size]]` container element.
254
- *
255
- * The font size value is expected to be pre-encoded in the `style`
256
- * attribute as a `font-size` declaration by the parser. The element
257
- * is rendered as a `<span>` with the full attribute set.
258
- *
259
- * @param ctx - The current render context.
260
- * @param attributes - Attributes including the `style` with `font-size`.
261
- * @param elements - Child elements to render inside the span.
262
- */
263
- function renderSizeContainer(
264
- ctx: RenderContext,
265
- attributes: Record<string, string>,
266
- elements: import("@wdprlib/ast").Element[],
267
- ): void {
268
- const style = attributes.style ?? "";
269
- // The size value is stored in the style attribute as font-size
270
- const existingAttrs = { ...attributes };
271
- if (!style.includes("font-size")) {
272
- // Fallback: render without font-size if not in style
273
- ctx.push(`<span${renderAttrs(existingAttrs)}>`);
274
- } else {
275
- ctx.push(`<span${renderAttrs(existingAttrs)}>`);
276
- }
277
- renderElements(ctx, elements);
278
- ctx.push("</span>");
279
- }
280
-
281
- /**
282
- * Sanitize and format an attribute map into an HTML attribute string.
283
- *
284
- * Internal attributes (prefixed with `_`) are excluded from the output.
285
- *
286
- * @param attributes - Raw attribute map from the AST.
287
- * @returns An HTML attribute string with a leading space, or `""` if empty.
288
- */
289
- function renderAttrs(attributes: Record<string, string>): string {
290
- const safe = sanitizeAttributes(attributes);
291
- let result = "";
292
- for (const [key, value] of Object.entries(safe)) {
293
- // Skip internal attributes
294
- if (key.startsWith("_")) continue;
295
- if (value !== "") {
296
- result += ` ${key}="${escapeAttr(value)}"`;
297
- } else {
298
- result += ` ${key}=""`;
299
- }
300
- }
301
- return result;
302
- }
@@ -1,327 +0,0 @@
1
- /**
2
- *
3
- * Renderer for `[[embed]]...[[/embed]]` block-level embeds.
4
- *
5
- * Unlike inline embeds (which target specific providers like YouTube),
6
- * embed blocks contain raw HTML that the user provides. This module
7
- * validates and sanitizes that HTML through a multi-layer pipeline:
8
- *
9
- * 1. `sanitize-html` strips everything except a single `<iframe>` with
10
- * a limited set of safe attributes.
11
- * 2. The iframe's `src` URL must use HTTP or HTTPS.
12
- * 3. The hostname and path must match the configured allowlist (or the
13
- * allowlist can be set to `null` for Wikidot's "anyiframe" mode).
14
- *
15
- * If any validation step fails, a Wikidot-compatible error block is
16
- * rendered instead.
17
- *
18
- * @module
19
- */
20
-
21
- import type { EmbedBlockData } from "@wdprlib/ast";
22
- import type { Element } from "domhandler";
23
- import { parseDocument } from "htmlparser2";
24
- import sanitizeHtml from "sanitize-html";
25
- import type { RenderContext } from "../context";
26
-
27
- /**
28
- * Boolean attributes that should be normalized to attr="attr" format
29
- * (Wikidot normalizes these attributes in its output)
30
- */
31
- const BOOLEAN_ATTRIBUTES = [
32
- "allowfullscreen",
33
- "async",
34
- "autofocus",
35
- "autoplay",
36
- "checked",
37
- "controls",
38
- "default",
39
- "defer",
40
- "disabled",
41
- "formnovalidate",
42
- "hidden",
43
- "ismap",
44
- "loop",
45
- "multiple",
46
- "muted",
47
- "novalidate",
48
- "open",
49
- "readonly",
50
- "required",
51
- "reversed",
52
- "selected",
53
- ];
54
-
55
- /**
56
- * Allowlist entry for embed content validation
57
- * Each entry specifies a host pattern and optional path prefix
58
- */
59
- export interface EmbedAllowlistEntry {
60
- /** Host pattern. Supports wildcard prefix '*.' (e.g., '*.youtube.com') */
61
- host: string;
62
- /** Optional path prefix that must match (e.g., '/embed/') */
63
- pathPrefix?: string;
64
- }
65
-
66
- /**
67
- * Default allowlist for embed content (ported from Wikidot's default.php)
68
- * Only iframes with src matching these host+path patterns will be rendered.
69
- *
70
- * Note: Set to null to allow any HTTPS iframe (Wikidot's 'anyiframe' behavior).
71
- * sanitize-html still enforces HTTPS-only and blocks dangerous attributes.
72
- */
73
- export const DEFAULT_EMBED_ALLOWLIST: EmbedAllowlistEntry[] | null = [
74
- // YouTube
75
- { host: "*.youtube.com", pathPrefix: "/embed/" },
76
- { host: "*.youtube-nocookie.com", pathPrefix: "/embed/" },
77
- // Vimeo
78
- { host: "player.vimeo.com", pathPrefix: "/video/" },
79
- // Google Maps
80
- { host: "*.google.com", pathPrefix: "/maps/embed" },
81
- // Google Calendar
82
- { host: "calendar.google.com", pathPrefix: "/calendar/embed" },
83
- // Spotify
84
- { host: "open.spotify.com", pathPrefix: "/embed/" },
85
- // SoundCloud
86
- { host: "w.soundcloud.com", pathPrefix: "/player/" },
87
- // CodePen
88
- { host: "codepen.io" },
89
- ];
90
-
91
- /**
92
- * sanitize-html configuration for embed content.
93
- * Only allows iframe elements with safe attributes, HTTPS scheme only.
94
- */
95
- const SANITIZE_CONFIG: sanitizeHtml.IOptions = {
96
- allowedTags: ["iframe"],
97
- allowedAttributes: {
98
- iframe: [
99
- "class",
100
- "src",
101
- "style",
102
- "allow",
103
- "allowfullscreen",
104
- "frameborder",
105
- "height",
106
- "loading",
107
- "referrerpolicy",
108
- "sandbox",
109
- "title",
110
- "width",
111
- ],
112
- },
113
- allowedSchemes: ["https", "http"],
114
- };
115
-
116
- /**
117
- * Parse HTML and recursively find all `<iframe>` elements.
118
- *
119
- * Recursion is needed because `sanitize-html` might leave nested
120
- * structures intact, and we need to ensure exactly one iframe exists
121
- * at any nesting level.
122
- *
123
- * @param html - Sanitized HTML string.
124
- * @returns Array of found iframe DOM elements.
125
- */
126
- function findIframes(html: string): Element[] {
127
- const doc = parseDocument(html);
128
- const iframes: Element[] = [];
129
- function walk(nodes: typeof doc.children): void {
130
- for (const node of nodes) {
131
- if (node.type === "tag") {
132
- if (node.name === "iframe") {
133
- iframes.push(node);
134
- }
135
- if (node.children) {
136
- walk(node.children);
137
- }
138
- }
139
- }
140
- }
141
- walk(doc.children);
142
- return iframes;
143
- }
144
-
145
- /**
146
- * Check whether a hostname matches a host pattern.
147
- *
148
- * Supports wildcard prefix `*.` (e.g., `*.youtube.com` matches both
149
- * `youtube.com` and `www.youtube.com` but not `evil-youtube.com`).
150
- * Non-wildcard patterns require an exact match.
151
- *
152
- * @param hostname - The actual hostname from the iframe `src` URL.
153
- * @param pattern - The allowlist host pattern to match against.
154
- * @returns `true` if the hostname matches the pattern.
155
- */
156
- function matchesHostPattern(hostname: string, pattern: string): boolean {
157
- const lowerHostname = hostname.toLowerCase();
158
- const lowerPattern = pattern.toLowerCase();
159
-
160
- if (lowerPattern.startsWith("*.")) {
161
- // Wildcard match: *.example.com matches example.com and sub.example.com
162
- // But not evil-example.com (must be exact or have dot boundary)
163
- const base = lowerPattern.slice(2); // Remove '*.'
164
- return lowerHostname === base || lowerHostname.endsWith("." + base);
165
- }
166
- // Exact match
167
- return lowerHostname === lowerPattern;
168
- }
169
-
170
- /**
171
- * Check whether a URL matches an allowlist entry's host and optional path prefix.
172
- *
173
- * The path prefix must match at a boundary: it must be followed by `/`, `?`,
174
- * `#`, or end of string to prevent partial matches (e.g., `/embed` must not
175
- * match `/embedX`).
176
- *
177
- * @param url - Parsed URL from the iframe `src` attribute.
178
- * @param entry - Allowlist entry with host pattern and optional path prefix.
179
- * @returns `true` if both host and path conditions are satisfied.
180
- */
181
- function matchesAllowlistEntry(url: URL, entry: EmbedAllowlistEntry): boolean {
182
- if (!matchesHostPattern(url.hostname, entry.host)) {
183
- return false;
184
- }
185
- if (entry.pathPrefix) {
186
- const pathLower = url.pathname.toLowerCase();
187
- const prefixLower = entry.pathPrefix.toLowerCase();
188
- if (!pathLower.startsWith(prefixLower)) {
189
- return false;
190
- }
191
- // If prefix ends with /, boundary check is already satisfied
192
- // Otherwise ensure prefix matches at a boundary (not partial, e.g., /embed vs /embedX)
193
- if (!prefixLower.endsWith("/")) {
194
- const remainder = pathLower.slice(prefixLower.length);
195
- if (remainder && !/^[/?#]/.test(remainder)) {
196
- return false;
197
- }
198
- }
199
- }
200
- return true;
201
- }
202
-
203
- /**
204
- * Validate and sanitize embed block content through a multi-step pipeline.
205
- *
206
- * Steps:
207
- * 1. Strip all elements except `<iframe>` with safe attributes via `sanitize-html`.
208
- * 2. Verify exactly one iframe element exists.
209
- * 3. Parse the iframe `src` URL and enforce HTTP/HTTPS scheme.
210
- * 4. Match the URL against the allowlist (unless `null` for anyiframe mode).
211
- *
212
- * @param content - Raw HTML content from the `[[embed]]` block.
213
- * @param allowlist - Host/path allowlist entries, or `null` for anyiframe mode.
214
- * @param baseUrl - Optional base URL for resolving protocol-relative `src` values.
215
- * @returns Sanitized HTML string, or `null` if validation fails.
216
- */
217
- function validateAndSanitizeEmbed(
218
- content: string,
219
- allowlist: EmbedAllowlistEntry[] | null,
220
- baseUrl?: string,
221
- ): string | null {
222
- // Sanitize with sanitize-html to remove dangerous content
223
- const sanitized = sanitizeHtml(content.trim(), SANITIZE_CONFIG);
224
-
225
- if (!sanitized.trim()) {
226
- return null;
227
- }
228
-
229
- // Parse sanitized content to find iframes
230
- const iframes = findIframes(sanitized);
231
-
232
- // Must have exactly one iframe
233
- if (iframes.length !== 1) {
234
- return null;
235
- }
236
-
237
- const iframe = iframes[0]!;
238
- const src = iframe.attribs.src?.trim();
239
- if (!src) {
240
- return null;
241
- }
242
-
243
- // Parse URL (protocol-relative URLs are resolved against baseUrl)
244
- let url: URL;
245
- try {
246
- if (src.startsWith("//")) {
247
- // Protocol-relative URL: resolve against baseUrl, defaulting to https:
248
- const base = baseUrl ?? "https://localhost";
249
- url = new URL(src, base);
250
- } else {
251
- url = new URL(src);
252
- }
253
- } catch {
254
- return null;
255
- }
256
-
257
- // Only allow HTTP and HTTPS
258
- if (url.protocol !== "https:" && url.protocol !== "http:") {
259
- return null;
260
- }
261
-
262
- // If allowlist is null, allow any HTTP(S) iframe (Wikidot's 'anyiframe' behavior)
263
- if (allowlist !== null) {
264
- // Check if URL matches any allowlist entry
265
- const matched = allowlist.some((entry) => matchesAllowlistEntry(url, entry));
266
- if (!matched) {
267
- return null;
268
- }
269
- }
270
-
271
- return sanitized;
272
- }
273
-
274
- /**
275
- * Normalize HTML boolean attributes to Wikidot's format.
276
- *
277
- * Wikidot outputs boolean attributes as `attr="attr"` rather than the
278
- * minimized form (`attr`) or empty form (`attr=""`). This function
279
- * rewrites both forms to match.
280
- *
281
- * @param html - HTML string potentially containing boolean attributes.
282
- * @returns HTML with boolean attributes in `attr="attr"` format.
283
- */
284
- function normalizeBooleanAttributes(html: string): string {
285
- let result = html;
286
- for (const attr of BOOLEAN_ATTRIBUTES) {
287
- // Match standalone boolean attribute (not already having a value)
288
- // Pattern: attr followed by whitespace, > or />
289
- const standalonePattern = new RegExp(`\\s${attr}(?=\\s|>|/>)`, "gi");
290
- result = result.replace(standalonePattern, ` ${attr}="${attr}"`);
291
-
292
- // Match attr="" (empty value, sanitize-html output)
293
- const emptyValuePattern = new RegExp(`\\s${attr}=""`, "gi");
294
- result = result.replace(emptyValuePattern, ` ${attr}="${attr}"`);
295
- }
296
- return result;
297
- }
298
-
299
- /**
300
- * Render an `[[embed]]...[[/embed]]` block element.
301
- *
302
- * The raw HTML content is validated and sanitized through the full
303
- * pipeline. On failure, a Wikidot-compatible error block is shown:
304
- * `<div class="error-block">Sorry, no match for the embedded content.</div>`.
305
- *
306
- * The allowlist is taken from `ctx.options.embedAllowlist`, falling back
307
- * to {@link DEFAULT_EMBED_ALLOWLIST} when not specified. Setting it to
308
- * `null` enables Wikidot's "anyiframe" mode (any HTTPS iframe allowed).
309
- *
310
- * @param ctx - The current render context.
311
- * @param data - Embed block data containing the raw HTML contents.
312
- */
313
- export function renderEmbedBlock(ctx: RenderContext, data: EmbedBlockData): void {
314
- // Use explicit undefined check to allow null (anyiframe mode)
315
- const allowlist =
316
- ctx.options.embedAllowlist !== undefined ? ctx.options.embedAllowlist : DEFAULT_EMBED_ALLOWLIST;
317
-
318
- const sanitized = validateAndSanitizeEmbed(data.contents, allowlist, ctx.options.baseUrl);
319
- if (sanitized === null) {
320
- ctx.push('<div class="error-block">Sorry, no match for the embedded content.</div>');
321
- return;
322
- }
323
-
324
- // Normalize boolean attributes and output
325
- const normalized = normalizeBooleanAttributes(sanitized);
326
- ctx.push(normalized);
327
- }