@wdprlib/render 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/dist/index.cjs +11 -387
  2. package/dist/index.js +2 -378
  3. package/package.json +5 -3
  4. package/src/context.ts +422 -0
  5. package/src/elements/bibliography.ts +123 -0
  6. package/src/elements/clear-float.ts +27 -0
  7. package/src/elements/code.ts +49 -0
  8. package/src/elements/collapsible.ts +105 -0
  9. package/src/elements/color.ts +32 -0
  10. package/src/elements/container.ts +302 -0
  11. package/src/elements/date.ts +59 -0
  12. package/src/elements/embed-block.ts +327 -0
  13. package/src/elements/embed.ts +166 -0
  14. package/src/elements/expr.ts +102 -0
  15. package/src/elements/footnote.ts +76 -0
  16. package/src/elements/html.ts +79 -0
  17. package/src/elements/iframe.ts +44 -0
  18. package/src/elements/iftags.ts +118 -0
  19. package/src/elements/image.ts +154 -0
  20. package/src/elements/include.ts +43 -0
  21. package/src/elements/index.ts +35 -0
  22. package/src/elements/line-break.ts +22 -0
  23. package/src/elements/link.ts +201 -0
  24. package/src/elements/list.ts +241 -0
  25. package/src/elements/math.ts +177 -0
  26. package/src/elements/module/backlinks.ts +28 -0
  27. package/src/elements/module/categories.ts +27 -0
  28. package/src/elements/module/index.ts +67 -0
  29. package/src/elements/module/join.ts +33 -0
  30. package/src/elements/module/listpages.ts +27 -0
  31. package/src/elements/module/listusers.ts +27 -0
  32. package/src/elements/module/page-tree.ts +27 -0
  33. package/src/elements/module/rate.ts +44 -0
  34. package/src/elements/tab-view.ts +75 -0
  35. package/src/elements/table.ts +101 -0
  36. package/src/elements/text.ts +57 -0
  37. package/src/elements/toc.ts +147 -0
  38. package/src/elements/user.ts +79 -0
  39. package/src/escape.ts +829 -0
  40. package/src/hash.ts +62 -0
  41. package/src/index.ts +26 -0
  42. package/src/libs/highlighter/engine.ts +352 -0
  43. package/src/libs/highlighter/index.ts +70 -0
  44. package/src/libs/highlighter/languages/cpp.ts +345 -0
  45. package/src/libs/highlighter/languages/css.ts +104 -0
  46. package/src/libs/highlighter/languages/diff.ts +154 -0
  47. package/src/libs/highlighter/languages/dtd.ts +99 -0
  48. package/src/libs/highlighter/languages/html.ts +59 -0
  49. package/src/libs/highlighter/languages/java.ts +251 -0
  50. package/src/libs/highlighter/languages/javascript.ts +213 -0
  51. package/src/libs/highlighter/languages/php.ts +433 -0
  52. package/src/libs/highlighter/languages/python.ts +308 -0
  53. package/src/libs/highlighter/languages/ruby.ts +360 -0
  54. package/src/libs/highlighter/languages/sql.ts +125 -0
  55. package/src/libs/highlighter/languages/xml.ts +68 -0
  56. package/src/libs/highlighter/types.ts +44 -0
  57. package/src/render.ts +231 -0
  58. package/src/types.ts +140 -0
package/src/hash.ts ADDED
@@ -0,0 +1,62 @@
1
+ /**
2
+ *
3
+ * Pure-JavaScript hash functions for generating deterministic element IDs.
4
+ *
5
+ * These functions use FNV-1a internally and produce hex strings whose
6
+ * lengths match SHA-1 (40 chars) and MD5 (32 chars) for compatibility
7
+ * with Wikidot's ID generation patterns. Cryptographic security is not
8
+ * required; the hashes only need to be deterministic and well-distributed.
9
+ *
10
+ * `node:crypto` is intentionally avoided because `bunup`'s ESM build
11
+ * injects `createRequire` from `node:module`, which is incompatible
12
+ * with browser environments.
13
+ *
14
+ * @module
15
+ */
16
+
17
+ /**
18
+ * Generate a 40-character hex hash (same length as SHA-1) from the input string.
19
+ *
20
+ * @param input - The string to hash.
21
+ * @returns A 40-character lowercase hex string.
22
+ */
23
+ export function syncHashSha1(input: string): string {
24
+ return fnv1aHash(input, 40);
25
+ }
26
+
27
+ /**
28
+ * Generate a 32-character hex hash (same length as MD5) from the input string.
29
+ *
30
+ * @param input - The string to hash.
31
+ * @returns A 32-character lowercase hex string.
32
+ */
33
+ export function syncHashMd5(input: string): string {
34
+ return fnv1aHash(input, 32);
35
+ }
36
+
37
+ /**
38
+ * Compute an FNV-1a hash of the given input and return a hex string of
39
+ * the requested length.
40
+ *
41
+ * Because a single FNV-1a pass produces only 32 bits (8 hex chars), the
42
+ * function runs multiple rounds with different initial seeds (XOR of
43
+ * the round index into the offset basis) and concatenates the results
44
+ * to reach the desired length.
45
+ *
46
+ * @param input - The string to hash.
47
+ * @param hexLen - Desired length of the output hex string (e.g. 32 or 40).
48
+ * @returns A lowercase hex string of exactly `hexLen` characters.
49
+ */
50
+ function fnv1aHash(input: string, hexLen: number): string {
51
+ let result = "";
52
+ const rounds = Math.ceil(hexLen / 8);
53
+ for (let round = 0; round < rounds; round++) {
54
+ let h = 0x811c9dc5 ^ round;
55
+ for (let i = 0; i < input.length; i++) {
56
+ h ^= input.charCodeAt(i);
57
+ h = Math.imul(h, 0x01000193);
58
+ }
59
+ result += (h >>> 0).toString(16).padStart(8, "0");
60
+ }
61
+ return result.substring(0, hexLen);
62
+ }
package/src/index.ts ADDED
@@ -0,0 +1,26 @@
1
+ /**
2
+ * HTML renderer for the Wikidot AST.
3
+ *
4
+ * Takes a `SyntaxTree` produced by `@wdprlib/parser` and serialises
5
+ * it to an HTML string. Page context, user resolution, and security
6
+ * settings (embed allowlists, iframe sandboxing) are configurable via
7
+ * {@link RenderOptions}.
8
+ *
9
+ * ```ts
10
+ * import { parse } from "@wdprlib/parser";
11
+ * import { renderToHtml } from "@wdprlib/render";
12
+ *
13
+ * const html = renderToHtml(parse("**hello**"));
14
+ * // => "<p><strong>hello</strong></p>"
15
+ * ```
16
+ *
17
+ * @packageDocumentation
18
+ */
19
+
20
+ export { renderToHtml } from "./render";
21
+ export type { RenderOptions, RenderResolvers, PageContext, ResolvedUser } from "./types";
22
+ export { DEFAULT_EMBED_ALLOWLIST } from "./elements/embed-block";
23
+
24
+ // Wikitext settings (re-exported from @wdprlib/ast)
25
+ export type { WikitextMode, WikitextSettings } from "@wdprlib/ast";
26
+ export { createSettings, DEFAULT_SETTINGS } from "@wdprlib/ast";
@@ -0,0 +1,352 @@
1
+ /**
2
+ *
3
+ * Tokenizer and renderer for the Text_Highlighter-compatible syntax
4
+ * highlighting engine. This is a faithful TypeScript port of the
5
+ * PEAR Text_Highlighter 0.5.1 PHP library's `_getToken` algorithm and
6
+ * HTML renderer.
7
+ *
8
+ * The engine processes source code through a state-machine-based tokenizer
9
+ * that assigns CSS class names to each token, then renders the tokens as
10
+ * `<span class="hl-*">` elements.
11
+ *
12
+ * @module
13
+ */
14
+
15
+ import type { LanguageDefinition } from "./types";
16
+
17
+ /** A single highlighted token with its CSS class and text content. */
18
+ interface Token {
19
+ /** CSS class name suffix (used as `hl-{class}`). */
20
+ class: string;
21
+ /** The literal text content of this token. */
22
+ content: string;
23
+ }
24
+
25
+ /**
26
+ * Tokenize source code using a language definition's state machine.
27
+ *
28
+ * This is a faithful port of PEAR Text_Highlighter's `_getToken` algorithm.
29
+ * The key difference from PHP is that JavaScript lacks `PREG_OFFSET_CAPTURE`,
30
+ * so capture group positions are computed from the match result.
31
+ *
32
+ * The input is preprocessed to normalize line endings, replace tabs with
33
+ * spaces, and ensure empty lines have at least one space character
34
+ * (matching PHP's behavior).
35
+ *
36
+ * @param def - The language definition describing the state machine.
37
+ * @param input - Raw source code string to tokenize.
38
+ * @returns Array of tokens, each with a CSS class and content string.
39
+ */
40
+ export function tokenize(def: LanguageDefinition, input: string): Token[] {
41
+ // Preprocess: same as PHP Html renderer's preprocess()
42
+ let str = input.replace(/\r\n/g, "\n");
43
+ // Replace empty lines with a space (PHP: preg_replace('~^$~m', " ", $str))
44
+ str = str.replace(/^$/gm, " ");
45
+ str = str.replace(/\t/g, " ");
46
+ // rtrim
47
+ str = str.replace(/\s+$/, "");
48
+
49
+ const len = str.length;
50
+ if (len === 0) return [];
51
+
52
+ let state = -1;
53
+ let pos = 0;
54
+ let lastinner = def.defClass;
55
+ let lastdelim = def.defClass;
56
+ let endpattern: RegExp | null = null;
57
+ const stateStack: {
58
+ state: number;
59
+ lastdelim: string;
60
+ lastinner: string;
61
+ endpattern: RegExp | null;
62
+ }[] = [];
63
+ const tokenStack: Token[] = [];
64
+ const result: Token[] = [];
65
+
66
+ function getToken(): Token | null {
67
+ if (tokenStack.length > 0) {
68
+ return tokenStack.pop()!;
69
+ }
70
+ if (pos >= len) {
71
+ return null;
72
+ }
73
+
74
+ // Check for end of current state
75
+ let endpos = -1;
76
+ let endmatch = "";
77
+ if (state !== -1 && endpattern) {
78
+ endpattern.lastIndex = pos;
79
+ const em = endpattern.exec(str);
80
+ if (em) {
81
+ endpos = em.index;
82
+ endmatch = em[0];
83
+ }
84
+ }
85
+
86
+ // Try to match patterns for current state
87
+ const reg = def.regs[state];
88
+ if (reg) {
89
+ reg.lastIndex = pos;
90
+ const m = reg.exec(str);
91
+
92
+ if (m) {
93
+ // Find which pattern (alternative) matched by checking capture groups
94
+ const countsArr = def.counts[state]!;
95
+ const statesArr = def.states[state]!;
96
+ const delimArr = def.delim[state]!;
97
+ const innerArr = def.inner[state]!;
98
+ let n = 1;
99
+ for (let i = 0; i < countsArr.length; i++) {
100
+ const count = countsArr[i]!;
101
+ if (n >= m.length) break;
102
+
103
+ // PHP: $m[$n][1] > -1 means the group captured something at a valid position
104
+ // JS: m[n] != null means the group participated in the match (including empty string captures)
105
+ if (m[n] != null && (endpos === -1 || m.index < endpos)) {
106
+ const matchStart = m.index;
107
+ const matchStr = m[n]!;
108
+
109
+ // Find actual position of this specific group within the match
110
+ // For alternation patterns, the matched group starts at m.index
111
+ // because only one alternative matches at a time
112
+ const groupStart = findGroupPosition(str, m, n, matchStart);
113
+
114
+ if (statesArr[i] !== -1) {
115
+ // State transition - push delimiter token
116
+ tokenStack.push({ class: delimArr[i]!, content: matchStr });
117
+ } else {
118
+ // Non-transitioning match
119
+ let inner = innerArr[i]!;
120
+
121
+ // Check parts first
122
+ const partDef = def.parts[state]?.[i];
123
+ if (partDef) {
124
+ const parts: Token[] = [];
125
+ let partpos = groupStart;
126
+ for (let j = 1; j <= count; j++) {
127
+ const subIdx = j + n;
128
+ if (subIdx >= m.length || m[subIdx] == null || m[subIdx] === "") continue;
129
+ const subStr = m[subIdx]!;
130
+ const subStart = str.indexOf(subStr, partpos);
131
+ if (subStart < 0) continue;
132
+ if (partDef[j]) {
133
+ if (subStart > partpos) {
134
+ parts.unshift({ class: inner, content: str.substring(partpos, subStart) });
135
+ }
136
+ parts.unshift({ class: partDef[j]!, content: subStr });
137
+ }
138
+ partpos = subStart + subStr.length;
139
+ }
140
+ if (partpos < groupStart + matchStr.length) {
141
+ parts.unshift({
142
+ class: inner,
143
+ content: str.substring(partpos, groupStart + matchStr.length),
144
+ });
145
+ }
146
+ tokenStack.push(...parts);
147
+ } else {
148
+ // Check keywords (fallback to state -1 if current state has no keyword def)
149
+ let kwDef = def.keywords[state]?.[i];
150
+ if (
151
+ !kwDef ||
152
+ kwDef === -1 ||
153
+ typeof kwDef !== "object" ||
154
+ Object.keys(kwDef).length === 0
155
+ ) {
156
+ kwDef = def.keywords[-1]?.[i];
157
+ }
158
+ if (kwDef && kwDef !== -1 && typeof kwDef === "object") {
159
+ for (const [group, re] of Object.entries(kwDef)) {
160
+ if ((re as RegExp).test(matchStr)) {
161
+ inner = def.kwmap[group] ?? inner;
162
+ break;
163
+ }
164
+ }
165
+ }
166
+ tokenStack.push({ class: inner, content: matchStr });
167
+ }
168
+ }
169
+
170
+ // Emit text before match (pushed after so it pops first)
171
+ if (groupStart > pos) {
172
+ tokenStack.push({ class: lastinner, content: str.substring(pos, groupStart) });
173
+ }
174
+
175
+ pos = groupStart + matchStr.length;
176
+
177
+ // Handle state transition
178
+ if (statesArr[i] !== -1) {
179
+ stateStack.push({ state, lastdelim, lastinner, endpattern });
180
+ lastinner = innerArr[i]!;
181
+ lastdelim = delimArr[i]!;
182
+ const prevState = state;
183
+ state = statesArr[i]!;
184
+
185
+ // Get end pattern for new state
186
+ const endRe = def.end[state];
187
+
188
+ // Handle substitution in end pattern (requires new RegExp)
189
+ if (def.subst[prevState]?.[i] && endRe) {
190
+ let epSource = endRe.source;
191
+ for (let k = 0; k <= count; k++) {
192
+ const subIdx = n + k;
193
+ if (subIdx >= m.length || m[subIdx] == null) break;
194
+ const quoted = escapeRegex(m[subIdx]!);
195
+ epSource = epSource.replace(`%${k}%`, quoted);
196
+ epSource = epSource.replace(`%b${k}%`, matchingBrackets(quoted));
197
+ }
198
+ endpattern = new RegExp(epSource, endRe.flags);
199
+ } else {
200
+ // Reuse existing RegExp object (no substitution needed)
201
+ endpattern = endRe ?? null;
202
+ }
203
+ }
204
+
205
+ return tokenStack.pop()!;
206
+ }
207
+ n += count + 1;
208
+ }
209
+ }
210
+ }
211
+
212
+ // Handle end of state
213
+ if (endpos > -1) {
214
+ // Always push delimiter token (even for zero-width matches) to match PHP behavior
215
+ tokenStack.push({ class: lastdelim, content: endmatch });
216
+ if (endpos > pos) {
217
+ tokenStack.push({ class: lastinner, content: str.substring(pos, endpos) });
218
+ }
219
+ const prev = stateStack.pop()!;
220
+ state = prev.state;
221
+ lastdelim = prev.lastdelim;
222
+ lastinner = prev.lastinner;
223
+ endpattern = prev.endpattern;
224
+ pos = endpos + endmatch.length;
225
+ if (tokenStack.length > 0) {
226
+ return tokenStack.pop()!;
227
+ }
228
+ // Zero-width end pattern with no preceding content: continue to next token
229
+ return getToken();
230
+ }
231
+
232
+ // No match - consume rest as default class
233
+ const p = pos;
234
+ pos = len;
235
+ return { class: lastinner, content: str.substring(p) };
236
+ }
237
+
238
+ let token: Token | null;
239
+ while ((token = getToken()) !== null) {
240
+ result.push(token);
241
+ }
242
+
243
+ return result;
244
+ }
245
+
246
+ /**
247
+ * Find the actual position of capture group `n` within the source string.
248
+ *
249
+ * For alternation patterns (`a|b|c`), the matched alternative starts at
250
+ * the overall match position (`m.index`). This function locates the
251
+ * capture group's substring within the source, searching from `matchStart`.
252
+ *
253
+ * @param str - The full source string.
254
+ * @param m - The regex match result.
255
+ * @param n - The capture group index.
256
+ * @param matchStart - The starting position of the overall match.
257
+ * @returns The position of the capture group within the source string.
258
+ */
259
+ function findGroupPosition(str: string, m: RegExpExecArray, n: number, matchStart: number): number {
260
+ // The overall match m[0] starts at m.index
261
+ // The capture group m[n] is a substring of m[0]
262
+ // Find where m[n] starts within the string, searching from matchStart
263
+ const groupStr = m[n]!;
264
+ const idx = str.indexOf(groupStr, matchStart);
265
+ return idx >= 0 ? idx : matchStart;
266
+ }
267
+
268
+ /**
269
+ * Render an array of tokens to HTML with `hl-*` class spans.
270
+ *
271
+ * This is a faithful port of Text_Highlighter's HTML renderer:
272
+ * - Adjacent tokens with the same class are merged into a single `<span>`.
273
+ * - All text is wrapped in spans (no unwrapped text nodes).
274
+ * - The output is wrapped in `<div class="hl-main"><pre>...</pre></div>`.
275
+ *
276
+ * @param tokens - Array of tokens produced by {@link tokenize}.
277
+ * @returns Complete HTML string for the highlighted code block.
278
+ */
279
+ export function renderTokens(tokens: Token[]): string {
280
+ if (tokens.length === 0) return "";
281
+
282
+ let html = "";
283
+ let lastClass = "";
284
+
285
+ for (const token of tokens) {
286
+ if (token.content.length === 0) continue;
287
+ const escaped = escapeHtml(token.content);
288
+ if (token.class !== lastClass) {
289
+ if (lastClass) {
290
+ html += "</span>";
291
+ }
292
+ html += `<span class="hl-${token.class}">`;
293
+ lastClass = token.class;
294
+ }
295
+ html += escaped;
296
+ }
297
+
298
+ if (lastClass) {
299
+ html += "</span>";
300
+ }
301
+
302
+ return `<div class="hl-main"><pre>${html}</pre></div>`;
303
+ }
304
+
305
+ /**
306
+ * Escape HTML special characters for use inside highlighted code spans.
307
+ *
308
+ * @param str - Raw text to escape.
309
+ * @returns HTML-safe string.
310
+ */
311
+ function escapeHtml(str: string): string {
312
+ return str
313
+ .replace(/&/g, "&amp;")
314
+ .replace(/</g, "&lt;")
315
+ .replace(/>/g, "&gt;")
316
+ .replace(/"/g, "&quot;");
317
+ }
318
+
319
+ /**
320
+ * Escape regex special characters in a string for safe use in `new RegExp()`.
321
+ *
322
+ * @param str - Raw string to escape.
323
+ * @returns Regex-safe string.
324
+ */
325
+ function escapeRegex(str: string): string {
326
+ return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
327
+ }
328
+
329
+ /**
330
+ * Swap bracket characters to their matching counterparts.
331
+ *
332
+ * Used for end-pattern substitution where the closing delimiter is the
333
+ * mirror of the opening delimiter (e.g., `<` becomes `>`).
334
+ *
335
+ * @param str - String containing bracket characters.
336
+ * @returns String with each bracket replaced by its counterpart.
337
+ */
338
+ function matchingBrackets(str: string): string {
339
+ return str.replace(/[()<>[\]{}]/g, (c) => {
340
+ const map: Record<string, string> = {
341
+ "(": ")",
342
+ ")": "(",
343
+ "<": ">",
344
+ ">": "<",
345
+ "[": "]",
346
+ "]": "[",
347
+ "{": "}",
348
+ "}": "{",
349
+ };
350
+ return map[c] ?? c;
351
+ });
352
+ }
@@ -0,0 +1,70 @@
1
+ /**
2
+ *
3
+ * TypeScript port of PEAR Text_Highlighter 0.5.1 for server-side
4
+ * syntax highlighting of `[[code]]` blocks.
5
+ *
6
+ * Supports the same languages as Wikidot's original PHP implementation:
7
+ * CSS, C++, Diff, DTD, HTML, Java, JavaScript, PHP, Python, Ruby, SQL,
8
+ * and XML. Perl is excluded because its definition relies on PCRE-only
9
+ * regex features.
10
+ *
11
+ * Original: http://pear.php.net/package/Text_Highlighter
12
+ * Author: Andrey Demenev <demenev@on-line.jar.ru>
13
+ * Copyright: 2004 Andrey Demenev
14
+ * License: PHP License 3.0 (http://www.php.net/license/3_0.txt)
15
+ *
16
+ * @module
17
+ */
18
+
19
+ import type { LanguageDefinition } from "./types";
20
+ import { tokenize, renderTokens } from "./engine";
21
+ import { cssLang } from "./languages/css";
22
+ import { cppLang } from "./languages/cpp";
23
+ import { diffLang } from "./languages/diff";
24
+ import { dtdLang } from "./languages/dtd";
25
+ import { htmlLang } from "./languages/html";
26
+ import { javaLang } from "./languages/java";
27
+ import { javascriptLang } from "./languages/javascript";
28
+ import { phpLang } from "./languages/php";
29
+ import { pythonLang } from "./languages/python";
30
+ import { rubyLang } from "./languages/ruby";
31
+ import { sqlLang } from "./languages/sql";
32
+ import { xmlLang } from "./languages/xml";
33
+
34
+ /**
35
+ * Registry of supported language definitions, keyed by lowercase name.
36
+ *
37
+ * `xhtml` is aliased to `html` (they share the same definition).
38
+ */
39
+ const LANGUAGES: Record<string, LanguageDefinition> = {
40
+ css: cssLang,
41
+ cpp: cppLang,
42
+ diff: diffLang,
43
+ dtd: dtdLang,
44
+ html: htmlLang,
45
+ java: javaLang,
46
+ javascript: javascriptLang,
47
+ // perl: excluded (PCRE-only features)
48
+ php: phpLang,
49
+ python: pythonLang,
50
+ ruby: rubyLang,
51
+ sql: sqlLang,
52
+ xml: xmlLang,
53
+ xhtml: htmlLang,
54
+ };
55
+
56
+ /**
57
+ * Highlight source code using the Text_Highlighter-compatible engine.
58
+ *
59
+ * @param code - Raw source code to highlight.
60
+ * @param language - Programming language name (case-insensitive).
61
+ * @returns HTML string with `hl-*` class spans wrapped in
62
+ * `<div class="hl-main"><pre>...</pre></div>`, or `null` if the
63
+ * language is not supported.
64
+ */
65
+ export function highlight(code: string, language: string): string | null {
66
+ const def = LANGUAGES[language.toLowerCase()];
67
+ if (!def) return null;
68
+ const tokens = tokenize(def, code);
69
+ return renderTokens(tokens);
70
+ }