@wdprlib/parser 3.1.2 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/dist/index.cjs +295 -118
  2. package/dist/index.js +272 -95
  3. package/package.json +5 -3
  4. package/src/index.ts +163 -0
  5. package/src/lexer/index.ts +20 -0
  6. package/src/lexer/lexer.ts +687 -0
  7. package/src/lexer/tokens.ts +141 -0
  8. package/src/parser/constants.ts +173 -0
  9. package/src/parser/depth.ts +251 -0
  10. package/src/parser/index.ts +18 -0
  11. package/src/parser/parse.ts +315 -0
  12. package/src/parser/postprocess/divAdjacentParagraph.ts +76 -0
  13. package/src/parser/postprocess/index.ts +15 -0
  14. package/src/parser/postprocess/spanStrip.ts +697 -0
  15. package/src/parser/preprocess/expr.ts +265 -0
  16. package/src/parser/preprocess/index.ts +38 -0
  17. package/src/parser/preprocess/typography.ts +67 -0
  18. package/src/parser/preprocess/utils.ts +250 -0
  19. package/src/parser/preprocess/whitespace.ts +111 -0
  20. package/src/parser/rules/block/align.ts +282 -0
  21. package/src/parser/rules/block/bibliography.ts +359 -0
  22. package/src/parser/rules/block/block-list.ts +689 -0
  23. package/src/parser/rules/block/blockquote.ts +238 -0
  24. package/src/parser/rules/block/center.ts +87 -0
  25. package/src/parser/rules/block/clear-float.ts +75 -0
  26. package/src/parser/rules/block/code.ts +187 -0
  27. package/src/parser/rules/block/collapsible.ts +337 -0
  28. package/src/parser/rules/block/comment.ts +73 -0
  29. package/src/parser/rules/block/content-separator.ts +79 -0
  30. package/src/parser/rules/block/definition-list.ts +270 -0
  31. package/src/parser/rules/block/div.ts +400 -0
  32. package/src/parser/rules/block/embed-block.ts +153 -0
  33. package/src/parser/rules/block/footnoteblock.ts +200 -0
  34. package/src/parser/rules/block/heading.ts +142 -0
  35. package/src/parser/rules/block/horizontal-rule.ts +61 -0
  36. package/src/parser/rules/block/html.ts +222 -0
  37. package/src/parser/rules/block/iframe.ts +239 -0
  38. package/src/parser/rules/block/iftags.ts +150 -0
  39. package/src/parser/rules/block/include.ts +179 -0
  40. package/src/parser/rules/block/index.ts +127 -0
  41. package/src/parser/rules/block/list.ts +244 -0
  42. package/src/parser/rules/block/math.ts +183 -0
  43. package/src/parser/rules/block/module/backlinks/index.ts +31 -0
  44. package/src/parser/rules/block/module/backlinks/types.ts +21 -0
  45. package/src/parser/rules/block/module/categories/index.ts +34 -0
  46. package/src/parser/rules/block/module/categories/types.ts +21 -0
  47. package/src/parser/rules/block/module/css/index.ts +37 -0
  48. package/src/parser/rules/block/module/iftags/condition.ts +109 -0
  49. package/src/parser/rules/block/module/iftags/index.ts +26 -0
  50. package/src/parser/rules/block/module/iftags/preprocess.ts +140 -0
  51. package/src/parser/rules/block/module/iftags/resolve.ts +73 -0
  52. package/src/parser/rules/block/module/iftags/types.ts +63 -0
  53. package/src/parser/rules/block/module/include/index.ts +20 -0
  54. package/src/parser/rules/block/module/include/resolve.ts +556 -0
  55. package/src/parser/rules/block/module/index.ts +122 -0
  56. package/src/parser/rules/block/module/join/index.ts +34 -0
  57. package/src/parser/rules/block/module/join/types.ts +23 -0
  58. package/src/parser/rules/block/module/listpages/compiler.ts +453 -0
  59. package/src/parser/rules/block/module/listpages/extract.ts +410 -0
  60. package/src/parser/rules/block/module/listpages/index.ts +83 -0
  61. package/src/parser/rules/block/module/listpages/normalize.ts +390 -0
  62. package/src/parser/rules/block/module/listpages/parser.ts +106 -0
  63. package/src/parser/rules/block/module/listpages/resolve.ts +130 -0
  64. package/src/parser/rules/block/module/listpages/types.ts +513 -0
  65. package/src/parser/rules/block/module/listpages/url-resolver.ts +186 -0
  66. package/src/parser/rules/block/module/listusers/compiler.ts +77 -0
  67. package/src/parser/rules/block/module/listusers/extract.ts +45 -0
  68. package/src/parser/rules/block/module/listusers/index.ts +36 -0
  69. package/src/parser/rules/block/module/listusers/parser.ts +54 -0
  70. package/src/parser/rules/block/module/listusers/resolve.ts +58 -0
  71. package/src/parser/rules/block/module/listusers/types.ts +93 -0
  72. package/src/parser/rules/block/module/mapping.ts +61 -0
  73. package/src/parser/rules/block/module/page-tree/index.ts +38 -0
  74. package/src/parser/rules/block/module/page-tree/types.ts +29 -0
  75. package/src/parser/rules/block/module/rate/index.ts +28 -0
  76. package/src/parser/rules/block/module/rate/types.ts +19 -0
  77. package/src/parser/rules/block/module/resolve.ts +411 -0
  78. package/src/parser/rules/block/module/types-common.ts +59 -0
  79. package/src/parser/rules/block/module/types.ts +61 -0
  80. package/src/parser/rules/block/module/utils.ts +43 -0
  81. package/src/parser/rules/block/module/walk.ts +380 -0
  82. package/src/parser/rules/block/module.ts +164 -0
  83. package/src/parser/rules/block/orphan-li.ts +177 -0
  84. package/src/parser/rules/block/paragraph.ts +157 -0
  85. package/src/parser/rules/block/table-block.ts +726 -0
  86. package/src/parser/rules/block/table.ts +441 -0
  87. package/src/parser/rules/block/tabview.ts +331 -0
  88. package/src/parser/rules/block/toc.ts +129 -0
  89. package/src/parser/rules/block/utils.ts +615 -0
  90. package/src/parser/rules/index.ts +49 -0
  91. package/src/parser/rules/inline/anchor-name.ts +154 -0
  92. package/src/parser/rules/inline/anchor.ts +327 -0
  93. package/src/parser/rules/inline/bibcite.ts +153 -0
  94. package/src/parser/rules/inline/bold.ts +86 -0
  95. package/src/parser/rules/inline/color.ts +140 -0
  96. package/src/parser/rules/inline/comment.ts +90 -0
  97. package/src/parser/rules/inline/equation-ref.ts +115 -0
  98. package/src/parser/rules/inline/expr.ts +526 -0
  99. package/src/parser/rules/inline/footnote.ts +223 -0
  100. package/src/parser/rules/inline/guillemet.ts +64 -0
  101. package/src/parser/rules/inline/html.ts +132 -0
  102. package/src/parser/rules/inline/image.ts +328 -0
  103. package/src/parser/rules/inline/index.ts +150 -0
  104. package/src/parser/rules/inline/italic.ts +74 -0
  105. package/src/parser/rules/inline/line-break.ts +326 -0
  106. package/src/parser/rules/inline/link-anchor.ts +147 -0
  107. package/src/parser/rules/inline/link-single.ts +164 -0
  108. package/src/parser/rules/inline/link-star.ts +134 -0
  109. package/src/parser/rules/inline/link-triple.ts +267 -0
  110. package/src/parser/rules/inline/math-inline.ts +126 -0
  111. package/src/parser/rules/inline/monospace.ts +78 -0
  112. package/src/parser/rules/inline/raw.ts +262 -0
  113. package/src/parser/rules/inline/size.ts +244 -0
  114. package/src/parser/rules/inline/span.ts +424 -0
  115. package/src/parser/rules/inline/strikethrough.ts +115 -0
  116. package/src/parser/rules/inline/subscript.ts +84 -0
  117. package/src/parser/rules/inline/superscript.ts +84 -0
  118. package/src/parser/rules/inline/text.ts +84 -0
  119. package/src/parser/rules/inline/underline.ts +127 -0
  120. package/src/parser/rules/inline/user.ts +147 -0
  121. package/src/parser/rules/inline/utils.ts +344 -0
  122. package/src/parser/rules/types.ts +252 -0
  123. package/src/parser/rules/utils.ts +155 -0
  124. package/src/parser/toc.ts +130 -0
@@ -0,0 +1,63 @@
1
+ /**
2
+ *
3
+ * Type definitions for the IfTags conditional rendering module.
4
+ *
5
+ * `[[iftags]]` is a Wikidot block that conditionally renders its content
6
+ * based on the current page's tags. The condition syntax supports required
7
+ * tags (`+tag`), forbidden tags (`-tag`), and optional tags (bare `tag`).
8
+ *
9
+ * @module
10
+ */
11
+
12
+ /**
13
+ * Parsed representation of an `[[iftags +tag -tag ...]]` condition.
14
+ *
15
+ * The condition string is parsed into three arrays:
16
+ * - `required` tags must ALL be present on the page (AND logic, `+tag` syntax)
17
+ * - `forbidden` tags must ALL be absent from the page (AND logic, `-tag` syntax)
18
+ * - `optional` tags require at least ONE to be present (OR logic, bare `tag` syntax)
19
+ *
20
+ * All three categories must independently be satisfied.
21
+ *
22
+ * @example
23
+ * `[[iftags +fruit -admin component template]]` parses to:
24
+ * ```
25
+ * { required: ["fruit"], forbidden: ["admin"], optional: ["component", "template"] }
26
+ * ```
27
+ */
28
+ export interface TagCondition {
29
+ /** Tags that must all be present on the page (`+tag` syntax) */
30
+ required: string[];
31
+
32
+ /** Tags that must all be absent from the page (`-tag` syntax) */
33
+ forbidden: string[];
34
+
35
+ /** Tags where at least one must be present (bare `tag` syntax, OR logic) */
36
+ optional: string[];
37
+
38
+ /**
39
+ * `true` when the condition contained a bare `+` token (a `+` prefix with
40
+ * no tag name). Wikidot treats `+` alone as "require an unnamed tag",
41
+ * which can never be satisfied, so a `+`-only condition evaluates to
42
+ * `false` (Hide Always).
43
+ */
44
+ hasEmptyRequired?: boolean;
45
+
46
+ /**
47
+ * `true` when the condition contained a bare `-` token (a `-` prefix with
48
+ * no tag name). Wikidot treats `-` alone as "forbid nothing", which is
49
+ * trivially satisfied — so a `-`-only condition evaluates to `true`
50
+ * (Show Always).
51
+ */
52
+ hasEmptyForbidden?: boolean;
53
+ }
54
+
55
+ /**
56
+ * Callback to retrieve the current page's tags during the resolve phase.
57
+ *
58
+ * Called when evaluating `[[iftags]]` conditions. The application must provide
59
+ * this callback with access to the current page's tag list.
60
+ *
61
+ * @returns Array of tag names for the current page
62
+ */
63
+ export type IfTagsResolver = () => string[];
@@ -0,0 +1,20 @@
1
+ /**
2
+ *
3
+ * Include directive resolution for Wikidot's `[[include page]]` syntax.
4
+ *
5
+ * The include system performs text-level macro expansion before parsing. Each
6
+ * `[[include page | var=value]]` directive is replaced with the fetched page's
7
+ * content (after variable substitution), allowing block structures like `[[div]]`
8
+ * to span across include boundaries.
9
+ *
10
+ * Supports:
11
+ * - Same-site includes: `[[include page-name]]`
12
+ * - Cross-site includes: `[[include :site-name:page-name]]`
13
+ * - Variable substitution: `[[include page | key=value]]` replaces `{$key}` in the included content
14
+ * - Iterative expansion with configurable iteration limit (Wikidot-compatible)
15
+ *
16
+ * @module
17
+ */
18
+
19
+ export { resolveIncludes, resolveIncludesAsync } from "./resolve";
20
+ export type { IncludeFetcher, AsyncIncludeFetcher, ResolveIncludesOptions } from "./resolve";
@@ -0,0 +1,556 @@
1
+ /**
2
+ *
3
+ * Text-level expansion of `[[include]]` directives.
4
+ *
5
+ * Unlike most Wikidot constructs that are handled during AST parsing, include
6
+ * directives are resolved as a text-level macro expansion BEFORE the main parse.
7
+ * This is necessary because included content may contain partial block structures
8
+ * (e.g., an opening `[[div]]` tag in one include and its closing `[[/div]]` in
9
+ * another) that must be visible to the parser as a single continuous text.
10
+ *
11
+ * The resolution process follows Wikidot's iterative (do-while) approach:
12
+ * 1. Scan the entire source text for `[[include page | var=val]]` patterns
13
+ * 2. Replace ALL matches in one pass (each fetched, variable-substituted)
14
+ * 3. Compare the result with the previous source
15
+ * 4. Repeat until no changes occur or `maxIterations` is reached
16
+ *
17
+ * This differs from a DFS recursive approach: each iteration expands one
18
+ * "layer" of includes across the whole source, rather than drilling into
19
+ * each include immediately. This allows patterns like inc-loop (where the
20
+ * same page is included with different variables across iterations) to work.
21
+ *
22
+ * @module
23
+ */
24
+
25
+ import type { PageRef, VariableMap, WikitextSettings } from "@wdprlib/ast";
26
+
27
+ /**
28
+ * Callback to fetch page content for include resolution.
29
+ * Returns the wikitext source of the page, or null if the page does not exist.
30
+ *
31
+ * @security The fetcher is called with user-provided page references.
32
+ * Implementations should validate and sanitize page references before
33
+ * using them in database queries or file system access.
34
+ */
35
+ export type IncludeFetcher = (pageRef: PageRef) => string | null;
36
+
37
+ /**
38
+ * Async callback to fetch page content for include resolution.
39
+ * Returns a promise of the wikitext source, or null if the page does not exist.
40
+ *
41
+ * @security The fetcher is called with user-provided page references.
42
+ * Implementations should validate and sanitize page references before
43
+ * using them in database queries or file system access.
44
+ */
45
+ export type AsyncIncludeFetcher = (pageRef: PageRef) => Promise<string | null>;
46
+
47
+ /**
48
+ * Options for resolveIncludes / resolveIncludesAsync
49
+ */
50
+ export interface ResolveIncludesOptions {
51
+ /**
52
+ * Maximum number of expansion iterations (default: 10).
53
+ *
54
+ * Each iteration replaces all `[[include]]` directives in the current
55
+ * source with fetched content. Iteration stops when the source is
56
+ * unchanged or this limit is reached.
57
+ */
58
+ maxIterations?: number;
59
+ /** Wikitext settings. If enablePageSyntax is false, includes are not expanded. */
60
+ settings?: WikitextSettings;
61
+ }
62
+
63
+ /**
64
+ * Expand all [[include]] directives in the source text.
65
+ *
66
+ * Uses Wikidot-compatible iterative expansion: each iteration replaces
67
+ * all include directives in the current source with fetched (and
68
+ * variable-substituted) content. Iteration continues until no further
69
+ * changes occur or `maxIterations` is reached.
70
+ *
71
+ * @example
72
+ * ```ts
73
+ * const expanded = resolveIncludes(source, fetcher);
74
+ * const ast = parse(expanded);
75
+ * ```
76
+ */
77
+ export function resolveIncludes(
78
+ source: string,
79
+ fetcher: IncludeFetcher,
80
+ options?: ResolveIncludesOptions,
81
+ ): string {
82
+ if (options?.settings && !options.settings.enablePageSyntax) {
83
+ return source;
84
+ }
85
+
86
+ const maxIterations = options?.maxIterations ?? 10;
87
+ const cache = new Map<string, string | null>();
88
+
89
+ const cachedFetcher: IncludeFetcher = (pageRef: PageRef) => {
90
+ const key = normalizePageKey(pageRef);
91
+ if (cache.has(key)) {
92
+ return cache.get(key)!;
93
+ }
94
+ let result: string | null;
95
+ try {
96
+ result = fetcher(pageRef);
97
+ } catch {
98
+ result = null;
99
+ }
100
+ cache.set(key, result);
101
+ return result;
102
+ };
103
+
104
+ return expandIterative(source, cachedFetcher, maxIterations);
105
+ }
106
+
107
+ /**
108
+ * Async version of {@link resolveIncludes}.
109
+ *
110
+ * Expand all [[include]] directives using an async fetcher, allowing
111
+ * page content to be loaded from async sources such as databases.
112
+ *
113
+ * @example
114
+ * ```ts
115
+ * const expanded = await resolveIncludesAsync(source, async (ref) => {
116
+ * return await db.getPageContent(ref.page);
117
+ * });
118
+ * const ast = parse(expanded);
119
+ * ```
120
+ */
121
+ export async function resolveIncludesAsync(
122
+ source: string,
123
+ fetcher: AsyncIncludeFetcher,
124
+ options?: ResolveIncludesOptions,
125
+ ): Promise<string> {
126
+ if (options?.settings && !options.settings.enablePageSyntax) {
127
+ return source;
128
+ }
129
+
130
+ const maxIterations = options?.maxIterations ?? 10;
131
+ const cache = new Map<string, string | null>();
132
+
133
+ const cachedFetcher: AsyncIncludeFetcher = async (pageRef: PageRef) => {
134
+ const key = normalizePageKey(pageRef);
135
+ if (cache.has(key)) {
136
+ return cache.get(key)!;
137
+ }
138
+ let result: string | null;
139
+ try {
140
+ result = await fetcher(pageRef);
141
+ } catch {
142
+ result = null;
143
+ }
144
+ cache.set(key, result);
145
+ return result;
146
+ };
147
+
148
+ return expandIterativeAsync(source, cachedFetcher, maxIterations);
149
+ }
150
+
151
+ /**
152
+ * Matches the opening `[[include` token at the start of a line.
153
+ *
154
+ * The `m` flag makes `^` match at line boundaries, enforcing the Wikidot
155
+ * rule that `[[include]]` must appear at the start of a line. The trailing
156
+ * `\s` separates the directive name from its arguments. The actual extent
157
+ * of each directive is found by {@link scanIncludeDirectives}, which
158
+ * balances nested `[[ ... ]]` so that block markup inside a parameter
159
+ * value does not terminate the directive at the first `]]`.
160
+ */
161
+ const INCLUDE_OPEN_PATTERN = /^\[\[include\s/gim;
162
+
163
+ /** A located `[[include ...]]` directive with bracket-balanced extent. */
164
+ interface IncludeDirectiveMatch {
165
+ /** Index of the opening `[[`. */
166
+ start: number;
167
+ /** Index just past the closing `]]`. */
168
+ end: number;
169
+ /** Text between `[[include ` and the closing `]]`. */
170
+ inner: string;
171
+ }
172
+
173
+ /**
174
+ * Returns `true` when the directive's inner content (the text between
175
+ * `[[include ` and the closing `]]`) carries an attribute section —
176
+ * either pipe-delimited (`|key=value`) or space-separated after the
177
+ * page name (`tmpl key=value`). A bare page name with no following
178
+ * argument is reported as having no attributes so that a stray `]`
179
+ * after the closing `]]` is not absorbed into the page reference.
180
+ */
181
+ function hasAttributes(innerSoFar: string): boolean {
182
+ if (innerSoFar.includes("|")) return true;
183
+ const trimmed = innerSoFar.trimStart();
184
+ // Match the first whitespace run; anything non-whitespace after it
185
+ // counts as a space-separated parameter segment.
186
+ const ws = trimmed.search(/\s/);
187
+ if (ws === -1) return false;
188
+ return trimmed.slice(ws).trim().length > 0;
189
+ }
190
+
191
+ /**
192
+ * Returns `true` when everything between `pos` and the next newline (or
193
+ * end of string) is whitespace — i.e. `pos` sits at the end of its line.
194
+ */
195
+ function isRestOfLineBlank(source: string, pos: number): boolean {
196
+ for (let i = pos; i < source.length; i++) {
197
+ const ch = source[i];
198
+ if (ch === "\n") return true;
199
+ if (ch !== " " && ch !== "\t" && ch !== "\r") return false;
200
+ }
201
+ return true; // reached EOF with only whitespace
202
+ }
203
+
204
+ /**
205
+ * Find all `[[include ...]]` directives in `source`, choosing each
206
+ * closing `]]` so that block markup inside a parameter value does not
207
+ * end the directive prematurely.
208
+ *
209
+ * A parameter value can contain nested `[[ ... ]]` (e.g. a `[[span]]`
210
+ * run) or a stray `]]`. The directive closes at the first `]]` that
211
+ * drives the `[[`/`]]` depth to zero or below AND is positioned as a
212
+ * real terminator, which (matching the observed Wikidot behaviour) means
213
+ * either:
214
+ *
215
+ * - it is on the opener's own line — a single-line / inline directive
216
+ * like `[[include x ...]]` (and `[[include x]] trailing` closes right
217
+ * after the first balanced `]]`, leaving the trailing text alone); or
218
+ * - it sits at the end of a line (only whitespace before the newline) —
219
+ * the standalone `]]` that terminates a multi-line directive.
220
+ *
221
+ * A mid-line `]]` on a continuation line — whether part of balanced
222
+ * markup or a bare symbol — therefore does not close the directive, so
223
+ * captions such as `[[span]]...[[/span]]` survive intact.
224
+ *
225
+ * A `[[[ ... ]]]` triple-bracket link is an inline token, not a
226
+ * `[[ ... ]]` block. It is counted on a separate link depth (`]]]`
227
+ * matched against `[[[`), and while that depth is non-zero the link's
228
+ * content is treated as literal text — a plain `[[` or `]]` inside the
229
+ * link does not touch the block depth. This lets a link sit directly
230
+ * against the directive's closing `]]` on the same line
231
+ * (e.g. `...|cap=[[[link]]]]]`): the `]]]` closes the link and the next
232
+ * `]]` closes the directive. An unterminated `[[[` just leaves the link
233
+ * depth raised, which does not change where the block `]]` closes.
234
+ *
235
+ * Openers that never reach depth zero are left untouched.
236
+ */
237
+ function scanIncludeDirectives(source: string): IncludeDirectiveMatch[] {
238
+ const matches: IncludeDirectiveMatch[] = [];
239
+ const opener = new RegExp(INCLUDE_OPEN_PATTERN.source, INCLUDE_OPEN_PATTERN.flags);
240
+ let m: RegExpExecArray | null;
241
+
242
+ while ((m = opener.exec(source)) !== null) {
243
+ const start = m.index;
244
+ const contentStart = start + m[0].length;
245
+ const firstNewline = source.indexOf("\n", start);
246
+
247
+ let depth = 0;
248
+ let linkDepth = 0;
249
+ let i = start;
250
+ let closeEnd = -1;
251
+ while (i < source.length) {
252
+ if (source.startsWith("[[[", i)) {
253
+ // `[[[` opens a triple-bracket link. Count it on a separate link
254
+ // depth so its brackets never affect the block depth that decides
255
+ // the directive close.
256
+ linkDepth++;
257
+ i += 3;
258
+ } else if (linkDepth > 0 && source.startsWith("]]]", i)) {
259
+ linkDepth--;
260
+ i += 3;
261
+ } else if (linkDepth > 0) {
262
+ // Inside a triple-bracket link the content is literal text, so a
263
+ // plain `[[` or `]]` here belongs to the link, not to block markup.
264
+ i++;
265
+ } else if (source.startsWith("[[", i)) {
266
+ depth++;
267
+ i += 2;
268
+ } else if (source.startsWith("]]", i)) {
269
+ const closeStart = i;
270
+ depth--;
271
+ i += 2;
272
+ if (depth <= 0) {
273
+ // Wikidot is greedy *within an attribute value*: any `]`
274
+ // that immediately follows the `]]` driving depth to zero
275
+ // belongs to the directive's final attribute value (e.g.
276
+ // `[[include foo |k=--]]]` keeps `--]` as the value).
277
+ // Only extend the close when the directive actually has an
278
+ // attribute section — a plain `[[include my-page]]]` must
279
+ // resolve `my-page` and leave the trailing `]` outside,
280
+ // otherwise the page name would absorb the bracket and the
281
+ // fetch would fail.
282
+ //
283
+ // A directive has attributes when it contains a `|` segment
284
+ // separator, OR when the page-name token is followed by
285
+ // additional non-whitespace content (space-separated
286
+ // parameters like `[[include foo bar=baz]]`). Using `=`
287
+ // alone is unsafe because page names may legitimately
288
+ // contain `=` (`[[include foo=bar]]`).
289
+ const innerSoFar = source.slice(contentStart, closeStart);
290
+ if (hasAttributes(innerSoFar)) {
291
+ while (i < source.length && source[i] === "]") {
292
+ i++;
293
+ }
294
+ }
295
+ const onOpenerLine = firstNewline === -1 || closeStart < firstNewline;
296
+ if (onOpenerLine || isRestOfLineBlank(source, i)) {
297
+ closeEnd = i;
298
+ break;
299
+ }
300
+ }
301
+ } else {
302
+ i++;
303
+ }
304
+ }
305
+
306
+ if (closeEnd === -1) {
307
+ // No terminating `]]` (opener-line or line-end) found — leave the
308
+ // opener untouched and resume scanning just past it so a later,
309
+ // well-formed directive can still match.
310
+ opener.lastIndex = start + 2;
311
+ continue;
312
+ }
313
+
314
+ matches.push({ start, end: closeEnd, inner: source.slice(contentStart, closeEnd - 2) });
315
+ opener.lastIndex = closeEnd;
316
+ }
317
+
318
+ return matches;
319
+ }
320
+
321
+ /**
322
+ * Parse the inner content of an `[[include ...]]` directive into a page reference
323
+ * and variable assignments.
324
+ *
325
+ * The inner content has the format: `page-name key1=value1 | key2=value2`
326
+ * where variable assignments can appear space-separated after the page name
327
+ * in the first segment (before any pipe), as well as in pipe-separated segments.
328
+ * The page name may include a cross-site prefix (`:site-name:page-name`).
329
+ *
330
+ * @param inner - The text between `[[include` and `]]`
331
+ * @returns Object containing the parsed page location and variable map
332
+ */
333
+ function parseIncludeDirective(inner: string): { location: PageRef; variables: VariableMap } {
334
+ // Remove newlines and normalize whitespace within segments
335
+ const normalized = inner.replace(/\n/g, " ");
336
+
337
+ // Split by pipe to get target and variable assignments
338
+ const parts = normalized.split("|");
339
+ const firstSegment = parts[0]!.trim();
340
+
341
+ // Separate page name from space-separated parameters in the first segment.
342
+ // e.g. "page-name key=value" → target="page-name", rest="key=value"
343
+ const spaceIndex = firstSegment.indexOf(" ");
344
+ let target: string;
345
+ const varSegments: string[] = [];
346
+
347
+ if (spaceIndex !== -1) {
348
+ target = firstSegment.slice(0, spaceIndex);
349
+ const rest = firstSegment.slice(spaceIndex + 1).trim();
350
+ if (rest) {
351
+ varSegments.push(rest);
352
+ }
353
+ } else {
354
+ target = firstSegment;
355
+ }
356
+
357
+ // Collect pipe-separated variable segments
358
+ for (let i = 1; i < parts.length; i++) {
359
+ const segment = parts[i]!.trim();
360
+ if (segment) {
361
+ varSegments.push(segment);
362
+ }
363
+ }
364
+
365
+ // Build the variable map, honouring Wikidot's default-value idiom.
366
+ //
367
+ // A template supplies a default for a forwarded variable by repeating
368
+ // the key: `key={$key} | key=default`. Once the outer include has
369
+ // substituted `{$key}`, that segment pair becomes one of:
370
+ // - `key=value | key=default` (caller supplied a value)
371
+ // - `key= | key=default` (caller passed an empty value)
372
+ // - `key={$key} | key=default` (caller omitted it; placeholder
373
+ // left unresolved)
374
+ // The intended result is "use the caller's value if present, else the
375
+ // default", so the FIRST *concrete* value for a key wins. An empty
376
+ // string or a still-unresolved `{$...}` placeholder is not concrete and
377
+ // lets a later default apply.
378
+ //
379
+ // A key seen only with empty/placeholder values keeps that (empty or
380
+ // literal) value, preserving the existing pass-through behaviour for
381
+ // genuinely unset variables.
382
+ const variables: VariableMap = {};
383
+ const hasConcrete = new Set<string>();
384
+ for (const segment of varSegments) {
385
+ const eqIndex = segment.indexOf("=");
386
+ if (eqIndex === -1) continue;
387
+ const key = segment.slice(0, eqIndex).trim();
388
+ if (!key) continue;
389
+ const value = segment.slice(eqIndex + 1).trim();
390
+
391
+ const isPlaceholder = /^\{\$[^}]*\}$/.test(value);
392
+ const isConcrete = value !== "" && !isPlaceholder;
393
+
394
+ if (isConcrete) {
395
+ if (!hasConcrete.has(key)) {
396
+ variables[key] = value;
397
+ hasConcrete.add(key);
398
+ }
399
+ // A later concrete value for the same key is a default; ignore it.
400
+ } else if (!Object.hasOwn(variables, key)) {
401
+ // First empty/placeholder occurrence — keep it unless a concrete
402
+ // value (earlier or later) replaces it via the branch above.
403
+ // `Object.hasOwn` (not `in`) so keys like `toString` / `constructor`
404
+ // are not mistaken for already-present via the prototype chain.
405
+ variables[key] = value;
406
+ }
407
+ }
408
+
409
+ // Parse page reference
410
+ let location: PageRef;
411
+ if (target.startsWith(":")) {
412
+ const rest = target.slice(1);
413
+ const colonIndex = rest.indexOf(":");
414
+ if (colonIndex !== -1) {
415
+ location = { site: rest.slice(0, colonIndex), page: rest.slice(colonIndex + 1) };
416
+ } else {
417
+ location = { site: null, page: target };
418
+ }
419
+ } else {
420
+ location = { site: null, page: target };
421
+ }
422
+
423
+ return { location, variables };
424
+ }
425
+
426
+ /**
427
+ * Replace a single include match with its fetched + variable-substituted content.
428
+ * Returns the replacement text for a single directive's `inner` content.
429
+ */
430
+ function replaceOneInclude(inner: string, fetcher: IncludeFetcher): string {
431
+ const { location, variables } = parseIncludeDirective(inner);
432
+ const content = fetcher(location);
433
+ if (content === null) {
434
+ return `[[div class="error-block"]]\nPage to be included "${location.page}" cannot be found!\n[[/div]]`;
435
+ }
436
+ return substituteVariables(content, variables);
437
+ }
438
+
439
+ /**
440
+ * Iteratively expand all `[[include]]` directives in source text.
441
+ *
442
+ * Each iteration replaces every include directive in the current source
443
+ * with its fetched content (after variable substitution). No recursion
444
+ * into individual includes — the next iteration handles nested includes.
445
+ *
446
+ * Stops when the source is unchanged (no includes left or all resolved)
447
+ * or `maxIterations` is reached.
448
+ */
449
+ function expandIterative(source: string, fetcher: IncludeFetcher, maxIterations: number): string {
450
+ let current = source;
451
+ for (let i = 0; i < maxIterations; i++) {
452
+ const directives = scanIncludeDirectives(current);
453
+ if (directives.length === 0) break;
454
+
455
+ let result = "";
456
+ let lastPos = 0;
457
+ for (const { start, end, inner } of directives) {
458
+ result += current.slice(lastPos, start);
459
+ result += replaceOneInclude(inner, fetcher);
460
+ lastPos = end;
461
+ }
462
+ result += current.slice(lastPos);
463
+
464
+ if (result === current) break;
465
+ current = result;
466
+ }
467
+ return current;
468
+ }
469
+
470
+ /**
471
+ * Async iterative expansion of `[[include]]` directives.
472
+ *
473
+ * Each iteration scans the current source for include directives using
474
+ * RegExp.exec(), fetches content sequentially (to preserve cache semantics),
475
+ * and builds the replacement string. A fresh RegExp is created per iteration
476
+ * to avoid lastIndex conflicts.
477
+ */
478
+ async function expandIterativeAsync(
479
+ source: string,
480
+ fetcher: AsyncIncludeFetcher,
481
+ maxIterations: number,
482
+ ): Promise<string> {
483
+ let current = source;
484
+ for (let i = 0; i < maxIterations; i++) {
485
+ const directives = scanIncludeDirectives(current);
486
+ if (directives.length === 0) break;
487
+
488
+ let result = "";
489
+ let lastPos = 0;
490
+ for (const { start, end, inner } of directives) {
491
+ result += current.slice(lastPos, start);
492
+
493
+ const { location, variables } = parseIncludeDirective(inner);
494
+ const content = await fetcher(location);
495
+ if (content === null) {
496
+ result += `[[div class="error-block"]]\nPage to be included "${location.page}" cannot be found!\n[[/div]]`;
497
+ } else {
498
+ result += substituteVariables(content, variables);
499
+ }
500
+
501
+ lastPos = end;
502
+ }
503
+
504
+ result += current.slice(lastPos);
505
+ if (result === current) break;
506
+ current = result;
507
+ }
508
+ return current;
509
+ }
510
+
511
+ /**
512
+ * Normalize a PageRef into a consistent string key for cache lookups.
513
+ *
514
+ * Page names are lowercased for case-insensitive matching. Cross-site
515
+ * references include the site name as a prefix.
516
+ *
517
+ * @param location - The page reference to normalize
518
+ * @returns A normalized string key (e.g., "page-name" or "site:page-name")
519
+ */
520
+ function normalizePageKey(location: PageRef): string {
521
+ const site = location.site ?? "";
522
+ const page = location.page.toLowerCase();
523
+ return site ? `${site}:${page}` : page;
524
+ }
525
+
526
+ /**
527
+ * Substitute variables in included page content.
528
+ *
529
+ * Replaces `{$key}` patterns with the corresponding value from the variables
530
+ * map provided in the include directive (e.g., `[[include page | key=value]]`).
531
+ *
532
+ * @param content - The fetched page content containing `{$key}` placeholders
533
+ * @param variables - Key-value pairs from the include directive
534
+ * @returns Content with all matching variables substituted
535
+ */
536
+ function substituteVariables(content: string, variables: VariableMap): string {
537
+ if (Object.keys(variables).length === 0) return content;
538
+
539
+ let result = content;
540
+ for (const [key, value] of Object.entries(variables)) {
541
+ const escaped = escapeRegExp(key);
542
+ result = result.replace(new RegExp(`\\{\\$${escaped}\\}`, "g"), value);
543
+ }
544
+ return result;
545
+ }
546
+
547
+ /**
548
+ * Escape special RegExp characters in a string so it can be safely used
549
+ * in a `new RegExp()` constructor.
550
+ *
551
+ * @param str - The string to escape
552
+ * @returns The escaped string with all regex special characters prefixed with backslash
553
+ */
554
+ function escapeRegExp(str: string): string {
555
+ return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
556
+ }