@wdprlib/parser 3.1.2 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +295 -118
- package/dist/index.js +272 -95
- package/package.json +5 -3
- package/src/index.ts +163 -0
- package/src/lexer/index.ts +20 -0
- package/src/lexer/lexer.ts +687 -0
- package/src/lexer/tokens.ts +141 -0
- package/src/parser/constants.ts +173 -0
- package/src/parser/depth.ts +251 -0
- package/src/parser/index.ts +18 -0
- package/src/parser/parse.ts +315 -0
- package/src/parser/postprocess/divAdjacentParagraph.ts +76 -0
- package/src/parser/postprocess/index.ts +15 -0
- package/src/parser/postprocess/spanStrip.ts +697 -0
- package/src/parser/preprocess/expr.ts +265 -0
- package/src/parser/preprocess/index.ts +38 -0
- package/src/parser/preprocess/typography.ts +67 -0
- package/src/parser/preprocess/utils.ts +250 -0
- package/src/parser/preprocess/whitespace.ts +111 -0
- package/src/parser/rules/block/align.ts +282 -0
- package/src/parser/rules/block/bibliography.ts +359 -0
- package/src/parser/rules/block/block-list.ts +689 -0
- package/src/parser/rules/block/blockquote.ts +238 -0
- package/src/parser/rules/block/center.ts +87 -0
- package/src/parser/rules/block/clear-float.ts +75 -0
- package/src/parser/rules/block/code.ts +187 -0
- package/src/parser/rules/block/collapsible.ts +337 -0
- package/src/parser/rules/block/comment.ts +73 -0
- package/src/parser/rules/block/content-separator.ts +79 -0
- package/src/parser/rules/block/definition-list.ts +270 -0
- package/src/parser/rules/block/div.ts +400 -0
- package/src/parser/rules/block/embed-block.ts +153 -0
- package/src/parser/rules/block/footnoteblock.ts +200 -0
- package/src/parser/rules/block/heading.ts +142 -0
- package/src/parser/rules/block/horizontal-rule.ts +61 -0
- package/src/parser/rules/block/html.ts +222 -0
- package/src/parser/rules/block/iframe.ts +239 -0
- package/src/parser/rules/block/iftags.ts +150 -0
- package/src/parser/rules/block/include.ts +179 -0
- package/src/parser/rules/block/index.ts +127 -0
- package/src/parser/rules/block/list.ts +244 -0
- package/src/parser/rules/block/math.ts +183 -0
- package/src/parser/rules/block/module/backlinks/index.ts +31 -0
- package/src/parser/rules/block/module/backlinks/types.ts +21 -0
- package/src/parser/rules/block/module/categories/index.ts +34 -0
- package/src/parser/rules/block/module/categories/types.ts +21 -0
- package/src/parser/rules/block/module/css/index.ts +37 -0
- package/src/parser/rules/block/module/iftags/condition.ts +109 -0
- package/src/parser/rules/block/module/iftags/index.ts +26 -0
- package/src/parser/rules/block/module/iftags/preprocess.ts +140 -0
- package/src/parser/rules/block/module/iftags/resolve.ts +73 -0
- package/src/parser/rules/block/module/iftags/types.ts +63 -0
- package/src/parser/rules/block/module/include/index.ts +20 -0
- package/src/parser/rules/block/module/include/resolve.ts +556 -0
- package/src/parser/rules/block/module/index.ts +122 -0
- package/src/parser/rules/block/module/join/index.ts +34 -0
- package/src/parser/rules/block/module/join/types.ts +23 -0
- package/src/parser/rules/block/module/listpages/compiler.ts +453 -0
- package/src/parser/rules/block/module/listpages/extract.ts +410 -0
- package/src/parser/rules/block/module/listpages/index.ts +83 -0
- package/src/parser/rules/block/module/listpages/normalize.ts +390 -0
- package/src/parser/rules/block/module/listpages/parser.ts +106 -0
- package/src/parser/rules/block/module/listpages/resolve.ts +130 -0
- package/src/parser/rules/block/module/listpages/types.ts +513 -0
- package/src/parser/rules/block/module/listpages/url-resolver.ts +186 -0
- package/src/parser/rules/block/module/listusers/compiler.ts +77 -0
- package/src/parser/rules/block/module/listusers/extract.ts +45 -0
- package/src/parser/rules/block/module/listusers/index.ts +36 -0
- package/src/parser/rules/block/module/listusers/parser.ts +54 -0
- package/src/parser/rules/block/module/listusers/resolve.ts +58 -0
- package/src/parser/rules/block/module/listusers/types.ts +93 -0
- package/src/parser/rules/block/module/mapping.ts +61 -0
- package/src/parser/rules/block/module/page-tree/index.ts +38 -0
- package/src/parser/rules/block/module/page-tree/types.ts +29 -0
- package/src/parser/rules/block/module/rate/index.ts +28 -0
- package/src/parser/rules/block/module/rate/types.ts +19 -0
- package/src/parser/rules/block/module/resolve.ts +411 -0
- package/src/parser/rules/block/module/types-common.ts +59 -0
- package/src/parser/rules/block/module/types.ts +61 -0
- package/src/parser/rules/block/module/utils.ts +43 -0
- package/src/parser/rules/block/module/walk.ts +380 -0
- package/src/parser/rules/block/module.ts +164 -0
- package/src/parser/rules/block/orphan-li.ts +177 -0
- package/src/parser/rules/block/paragraph.ts +157 -0
- package/src/parser/rules/block/table-block.ts +726 -0
- package/src/parser/rules/block/table.ts +441 -0
- package/src/parser/rules/block/tabview.ts +331 -0
- package/src/parser/rules/block/toc.ts +129 -0
- package/src/parser/rules/block/utils.ts +615 -0
- package/src/parser/rules/index.ts +49 -0
- package/src/parser/rules/inline/anchor-name.ts +154 -0
- package/src/parser/rules/inline/anchor.ts +327 -0
- package/src/parser/rules/inline/bibcite.ts +153 -0
- package/src/parser/rules/inline/bold.ts +86 -0
- package/src/parser/rules/inline/color.ts +140 -0
- package/src/parser/rules/inline/comment.ts +90 -0
- package/src/parser/rules/inline/equation-ref.ts +115 -0
- package/src/parser/rules/inline/expr.ts +526 -0
- package/src/parser/rules/inline/footnote.ts +223 -0
- package/src/parser/rules/inline/guillemet.ts +64 -0
- package/src/parser/rules/inline/html.ts +132 -0
- package/src/parser/rules/inline/image.ts +328 -0
- package/src/parser/rules/inline/index.ts +150 -0
- package/src/parser/rules/inline/italic.ts +74 -0
- package/src/parser/rules/inline/line-break.ts +326 -0
- package/src/parser/rules/inline/link-anchor.ts +147 -0
- package/src/parser/rules/inline/link-single.ts +164 -0
- package/src/parser/rules/inline/link-star.ts +134 -0
- package/src/parser/rules/inline/link-triple.ts +267 -0
- package/src/parser/rules/inline/math-inline.ts +126 -0
- package/src/parser/rules/inline/monospace.ts +78 -0
- package/src/parser/rules/inline/raw.ts +262 -0
- package/src/parser/rules/inline/size.ts +244 -0
- package/src/parser/rules/inline/span.ts +424 -0
- package/src/parser/rules/inline/strikethrough.ts +115 -0
- package/src/parser/rules/inline/subscript.ts +84 -0
- package/src/parser/rules/inline/superscript.ts +84 -0
- package/src/parser/rules/inline/text.ts +84 -0
- package/src/parser/rules/inline/underline.ts +127 -0
- package/src/parser/rules/inline/user.ts +147 -0
- package/src/parser/rules/inline/utils.ts +344 -0
- package/src/parser/rules/types.ts +252 -0
- package/src/parser/rules/utils.ts +155 -0
- package/src/parser/toc.ts +130 -0
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
/**
|
|
2
|
+
*
|
|
3
|
+
* Type definitions for the IfTags conditional rendering module.
|
|
4
|
+
*
|
|
5
|
+
* `[[iftags]]` is a Wikidot block that conditionally renders its content
|
|
6
|
+
* based on the current page's tags. The condition syntax supports required
|
|
7
|
+
* tags (`+tag`), forbidden tags (`-tag`), and optional tags (bare `tag`).
|
|
8
|
+
*
|
|
9
|
+
* @module
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Parsed representation of an `[[iftags +tag -tag ...]]` condition.
|
|
14
|
+
*
|
|
15
|
+
* The condition string is parsed into three arrays:
|
|
16
|
+
* - `required` tags must ALL be present on the page (AND logic, `+tag` syntax)
|
|
17
|
+
* - `forbidden` tags must ALL be absent from the page (AND logic, `-tag` syntax)
|
|
18
|
+
* - `optional` tags require at least ONE to be present (OR logic, bare `tag` syntax)
|
|
19
|
+
*
|
|
20
|
+
* All three categories must independently be satisfied.
|
|
21
|
+
*
|
|
22
|
+
* @example
|
|
23
|
+
* `[[iftags +fruit -admin component template]]` parses to:
|
|
24
|
+
* ```
|
|
25
|
+
* { required: ["fruit"], forbidden: ["admin"], optional: ["component", "template"] }
|
|
26
|
+
* ```
|
|
27
|
+
*/
|
|
28
|
+
export interface TagCondition {
|
|
29
|
+
/** Tags that must all be present on the page (`+tag` syntax) */
|
|
30
|
+
required: string[];
|
|
31
|
+
|
|
32
|
+
/** Tags that must all be absent from the page (`-tag` syntax) */
|
|
33
|
+
forbidden: string[];
|
|
34
|
+
|
|
35
|
+
/** Tags where at least one must be present (bare `tag` syntax, OR logic) */
|
|
36
|
+
optional: string[];
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* `true` when the condition contained a bare `+` token (a `+` prefix with
|
|
40
|
+
* no tag name). Wikidot treats `+` alone as "require an unnamed tag",
|
|
41
|
+
* which can never be satisfied, so a `+`-only condition evaluates to
|
|
42
|
+
* `false` (Hide Always).
|
|
43
|
+
*/
|
|
44
|
+
hasEmptyRequired?: boolean;
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* `true` when the condition contained a bare `-` token (a `-` prefix with
|
|
48
|
+
* no tag name). Wikidot treats `-` alone as "forbid nothing", which is
|
|
49
|
+
* trivially satisfied — so a `-`-only condition evaluates to `true`
|
|
50
|
+
* (Show Always).
|
|
51
|
+
*/
|
|
52
|
+
hasEmptyForbidden?: boolean;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Callback to retrieve the current page's tags during the resolve phase.
|
|
57
|
+
*
|
|
58
|
+
* Called when evaluating `[[iftags]]` conditions. The application must provide
|
|
59
|
+
* this callback with access to the current page's tag list.
|
|
60
|
+
*
|
|
61
|
+
* @returns Array of tag names for the current page
|
|
62
|
+
*/
|
|
63
|
+
export type IfTagsResolver = () => string[];
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
*
|
|
3
|
+
* Include directive resolution for Wikidot's `[[include page]]` syntax.
|
|
4
|
+
*
|
|
5
|
+
* The include system performs text-level macro expansion before parsing. Each
|
|
6
|
+
* `[[include page | var=value]]` directive is replaced with the fetched page's
|
|
7
|
+
* content (after variable substitution), allowing block structures like `[[div]]`
|
|
8
|
+
* to span across include boundaries.
|
|
9
|
+
*
|
|
10
|
+
* Supports:
|
|
11
|
+
* - Same-site includes: `[[include page-name]]`
|
|
12
|
+
* - Cross-site includes: `[[include :site-name:page-name]]`
|
|
13
|
+
* - Variable substitution: `[[include page | key=value]]` replaces `{$key}` in the included content
|
|
14
|
+
* - Iterative expansion with configurable iteration limit (Wikidot-compatible)
|
|
15
|
+
*
|
|
16
|
+
* @module
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
export { resolveIncludes, resolveIncludesAsync } from "./resolve";
|
|
20
|
+
export type { IncludeFetcher, AsyncIncludeFetcher, ResolveIncludesOptions } from "./resolve";
|
|
@@ -0,0 +1,556 @@
|
|
|
1
|
+
/**
|
|
2
|
+
*
|
|
3
|
+
* Text-level expansion of `[[include]]` directives.
|
|
4
|
+
*
|
|
5
|
+
* Unlike most Wikidot constructs that are handled during AST parsing, include
|
|
6
|
+
* directives are resolved as a text-level macro expansion BEFORE the main parse.
|
|
7
|
+
* This is necessary because included content may contain partial block structures
|
|
8
|
+
* (e.g., an opening `[[div]]` tag in one include and its closing `[[/div]]` in
|
|
9
|
+
* another) that must be visible to the parser as a single continuous text.
|
|
10
|
+
*
|
|
11
|
+
* The resolution process follows Wikidot's iterative (do-while) approach:
|
|
12
|
+
* 1. Scan the entire source text for `[[include page | var=val]]` patterns
|
|
13
|
+
* 2. Replace ALL matches in one pass (each fetched, variable-substituted)
|
|
14
|
+
* 3. Compare the result with the previous source
|
|
15
|
+
* 4. Repeat until no changes occur or `maxIterations` is reached
|
|
16
|
+
*
|
|
17
|
+
* This differs from a DFS recursive approach: each iteration expands one
|
|
18
|
+
* "layer" of includes across the whole source, rather than drilling into
|
|
19
|
+
* each include immediately. This allows patterns like inc-loop (where the
|
|
20
|
+
* same page is included with different variables across iterations) to work.
|
|
21
|
+
*
|
|
22
|
+
* @module
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
import type { PageRef, VariableMap, WikitextSettings } from "@wdprlib/ast";
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Callback to fetch page content for include resolution.
|
|
29
|
+
* Returns the wikitext source of the page, or null if the page does not exist.
|
|
30
|
+
*
|
|
31
|
+
* @security The fetcher is called with user-provided page references.
|
|
32
|
+
* Implementations should validate and sanitize page references before
|
|
33
|
+
* using them in database queries or file system access.
|
|
34
|
+
*/
|
|
35
|
+
export type IncludeFetcher = (pageRef: PageRef) => string | null;
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Async callback to fetch page content for include resolution.
|
|
39
|
+
* Returns a promise of the wikitext source, or null if the page does not exist.
|
|
40
|
+
*
|
|
41
|
+
* @security The fetcher is called with user-provided page references.
|
|
42
|
+
* Implementations should validate and sanitize page references before
|
|
43
|
+
* using them in database queries or file system access.
|
|
44
|
+
*/
|
|
45
|
+
export type AsyncIncludeFetcher = (pageRef: PageRef) => Promise<string | null>;
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Options for resolveIncludes / resolveIncludesAsync
|
|
49
|
+
*/
|
|
50
|
+
export interface ResolveIncludesOptions {
|
|
51
|
+
/**
|
|
52
|
+
* Maximum number of expansion iterations (default: 10).
|
|
53
|
+
*
|
|
54
|
+
* Each iteration replaces all `[[include]]` directives in the current
|
|
55
|
+
* source with fetched content. Iteration stops when the source is
|
|
56
|
+
* unchanged or this limit is reached.
|
|
57
|
+
*/
|
|
58
|
+
maxIterations?: number;
|
|
59
|
+
/** Wikitext settings. If enablePageSyntax is false, includes are not expanded. */
|
|
60
|
+
settings?: WikitextSettings;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Expand all [[include]] directives in the source text.
|
|
65
|
+
*
|
|
66
|
+
* Uses Wikidot-compatible iterative expansion: each iteration replaces
|
|
67
|
+
* all include directives in the current source with fetched (and
|
|
68
|
+
* variable-substituted) content. Iteration continues until no further
|
|
69
|
+
* changes occur or `maxIterations` is reached.
|
|
70
|
+
*
|
|
71
|
+
* @example
|
|
72
|
+
* ```ts
|
|
73
|
+
* const expanded = resolveIncludes(source, fetcher);
|
|
74
|
+
* const ast = parse(expanded);
|
|
75
|
+
* ```
|
|
76
|
+
*/
|
|
77
|
+
export function resolveIncludes(
|
|
78
|
+
source: string,
|
|
79
|
+
fetcher: IncludeFetcher,
|
|
80
|
+
options?: ResolveIncludesOptions,
|
|
81
|
+
): string {
|
|
82
|
+
if (options?.settings && !options.settings.enablePageSyntax) {
|
|
83
|
+
return source;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
const maxIterations = options?.maxIterations ?? 10;
|
|
87
|
+
const cache = new Map<string, string | null>();
|
|
88
|
+
|
|
89
|
+
const cachedFetcher: IncludeFetcher = (pageRef: PageRef) => {
|
|
90
|
+
const key = normalizePageKey(pageRef);
|
|
91
|
+
if (cache.has(key)) {
|
|
92
|
+
return cache.get(key)!;
|
|
93
|
+
}
|
|
94
|
+
let result: string | null;
|
|
95
|
+
try {
|
|
96
|
+
result = fetcher(pageRef);
|
|
97
|
+
} catch {
|
|
98
|
+
result = null;
|
|
99
|
+
}
|
|
100
|
+
cache.set(key, result);
|
|
101
|
+
return result;
|
|
102
|
+
};
|
|
103
|
+
|
|
104
|
+
return expandIterative(source, cachedFetcher, maxIterations);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Async version of {@link resolveIncludes}.
|
|
109
|
+
*
|
|
110
|
+
* Expand all [[include]] directives using an async fetcher, allowing
|
|
111
|
+
* page content to be loaded from async sources such as databases.
|
|
112
|
+
*
|
|
113
|
+
* @example
|
|
114
|
+
* ```ts
|
|
115
|
+
* const expanded = await resolveIncludesAsync(source, async (ref) => {
|
|
116
|
+
* return await db.getPageContent(ref.page);
|
|
117
|
+
* });
|
|
118
|
+
* const ast = parse(expanded);
|
|
119
|
+
* ```
|
|
120
|
+
*/
|
|
121
|
+
export async function resolveIncludesAsync(
|
|
122
|
+
source: string,
|
|
123
|
+
fetcher: AsyncIncludeFetcher,
|
|
124
|
+
options?: ResolveIncludesOptions,
|
|
125
|
+
): Promise<string> {
|
|
126
|
+
if (options?.settings && !options.settings.enablePageSyntax) {
|
|
127
|
+
return source;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
const maxIterations = options?.maxIterations ?? 10;
|
|
131
|
+
const cache = new Map<string, string | null>();
|
|
132
|
+
|
|
133
|
+
const cachedFetcher: AsyncIncludeFetcher = async (pageRef: PageRef) => {
|
|
134
|
+
const key = normalizePageKey(pageRef);
|
|
135
|
+
if (cache.has(key)) {
|
|
136
|
+
return cache.get(key)!;
|
|
137
|
+
}
|
|
138
|
+
let result: string | null;
|
|
139
|
+
try {
|
|
140
|
+
result = await fetcher(pageRef);
|
|
141
|
+
} catch {
|
|
142
|
+
result = null;
|
|
143
|
+
}
|
|
144
|
+
cache.set(key, result);
|
|
145
|
+
return result;
|
|
146
|
+
};
|
|
147
|
+
|
|
148
|
+
return expandIterativeAsync(source, cachedFetcher, maxIterations);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Matches the opening `[[include` token at the start of a line.
|
|
153
|
+
*
|
|
154
|
+
* The `m` flag makes `^` match at line boundaries, enforcing the Wikidot
|
|
155
|
+
* rule that `[[include]]` must appear at the start of a line. The trailing
|
|
156
|
+
* `\s` separates the directive name from its arguments. The actual extent
|
|
157
|
+
* of each directive is found by {@link scanIncludeDirectives}, which
|
|
158
|
+
* balances nested `[[ ... ]]` so that block markup inside a parameter
|
|
159
|
+
* value does not terminate the directive at the first `]]`.
|
|
160
|
+
*/
|
|
161
|
+
const INCLUDE_OPEN_PATTERN = /^\[\[include\s/gim;
|
|
162
|
+
|
|
163
|
+
/** A located `[[include ...]]` directive with bracket-balanced extent. */
|
|
164
|
+
interface IncludeDirectiveMatch {
|
|
165
|
+
/** Index of the opening `[[`. */
|
|
166
|
+
start: number;
|
|
167
|
+
/** Index just past the closing `]]`. */
|
|
168
|
+
end: number;
|
|
169
|
+
/** Text between `[[include ` and the closing `]]`. */
|
|
170
|
+
inner: string;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
/**
|
|
174
|
+
* Returns `true` when the directive's inner content (the text between
|
|
175
|
+
* `[[include ` and the closing `]]`) carries an attribute section —
|
|
176
|
+
* either pipe-delimited (`|key=value`) or space-separated after the
|
|
177
|
+
* page name (`tmpl key=value`). A bare page name with no following
|
|
178
|
+
* argument is reported as having no attributes so that a stray `]`
|
|
179
|
+
* after the closing `]]` is not absorbed into the page reference.
|
|
180
|
+
*/
|
|
181
|
+
function hasAttributes(innerSoFar: string): boolean {
|
|
182
|
+
if (innerSoFar.includes("|")) return true;
|
|
183
|
+
const trimmed = innerSoFar.trimStart();
|
|
184
|
+
// Match the first whitespace run; anything non-whitespace after it
|
|
185
|
+
// counts as a space-separated parameter segment.
|
|
186
|
+
const ws = trimmed.search(/\s/);
|
|
187
|
+
if (ws === -1) return false;
|
|
188
|
+
return trimmed.slice(ws).trim().length > 0;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Returns `true` when everything between `pos` and the next newline (or
|
|
193
|
+
* end of string) is whitespace — i.e. `pos` sits at the end of its line.
|
|
194
|
+
*/
|
|
195
|
+
function isRestOfLineBlank(source: string, pos: number): boolean {
|
|
196
|
+
for (let i = pos; i < source.length; i++) {
|
|
197
|
+
const ch = source[i];
|
|
198
|
+
if (ch === "\n") return true;
|
|
199
|
+
if (ch !== " " && ch !== "\t" && ch !== "\r") return false;
|
|
200
|
+
}
|
|
201
|
+
return true; // reached EOF with only whitespace
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
/**
|
|
205
|
+
* Find all `[[include ...]]` directives in `source`, choosing each
|
|
206
|
+
* closing `]]` so that block markup inside a parameter value does not
|
|
207
|
+
* end the directive prematurely.
|
|
208
|
+
*
|
|
209
|
+
* A parameter value can contain nested `[[ ... ]]` (e.g. a `[[span]]`
|
|
210
|
+
* run) or a stray `]]`. The directive closes at the first `]]` that
|
|
211
|
+
* drives the `[[`/`]]` depth to zero or below AND is positioned as a
|
|
212
|
+
* real terminator, which (matching the observed Wikidot behaviour) means
|
|
213
|
+
* either:
|
|
214
|
+
*
|
|
215
|
+
* - it is on the opener's own line — a single-line / inline directive
|
|
216
|
+
* like `[[include x ...]]` (and `[[include x]] trailing` closes right
|
|
217
|
+
* after the first balanced `]]`, leaving the trailing text alone); or
|
|
218
|
+
* - it sits at the end of a line (only whitespace before the newline) —
|
|
219
|
+
* the standalone `]]` that terminates a multi-line directive.
|
|
220
|
+
*
|
|
221
|
+
* A mid-line `]]` on a continuation line — whether part of balanced
|
|
222
|
+
* markup or a bare symbol — therefore does not close the directive, so
|
|
223
|
+
* captions such as `[[span]]...[[/span]]` survive intact.
|
|
224
|
+
*
|
|
225
|
+
* A `[[[ ... ]]]` triple-bracket link is an inline token, not a
|
|
226
|
+
* `[[ ... ]]` block. It is counted on a separate link depth (`]]]`
|
|
227
|
+
* matched against `[[[`), and while that depth is non-zero the link's
|
|
228
|
+
* content is treated as literal text — a plain `[[` or `]]` inside the
|
|
229
|
+
* link does not touch the block depth. This lets a link sit directly
|
|
230
|
+
* against the directive's closing `]]` on the same line
|
|
231
|
+
* (e.g. `...|cap=[[[link]]]]]`): the `]]]` closes the link and the next
|
|
232
|
+
* `]]` closes the directive. An unterminated `[[[` just leaves the link
|
|
233
|
+
* depth raised, which does not change where the block `]]` closes.
|
|
234
|
+
*
|
|
235
|
+
* Openers that never reach depth zero are left untouched.
|
|
236
|
+
*/
|
|
237
|
+
function scanIncludeDirectives(source: string): IncludeDirectiveMatch[] {
|
|
238
|
+
const matches: IncludeDirectiveMatch[] = [];
|
|
239
|
+
const opener = new RegExp(INCLUDE_OPEN_PATTERN.source, INCLUDE_OPEN_PATTERN.flags);
|
|
240
|
+
let m: RegExpExecArray | null;
|
|
241
|
+
|
|
242
|
+
while ((m = opener.exec(source)) !== null) {
|
|
243
|
+
const start = m.index;
|
|
244
|
+
const contentStart = start + m[0].length;
|
|
245
|
+
const firstNewline = source.indexOf("\n", start);
|
|
246
|
+
|
|
247
|
+
let depth = 0;
|
|
248
|
+
let linkDepth = 0;
|
|
249
|
+
let i = start;
|
|
250
|
+
let closeEnd = -1;
|
|
251
|
+
while (i < source.length) {
|
|
252
|
+
if (source.startsWith("[[[", i)) {
|
|
253
|
+
// `[[[` opens a triple-bracket link. Count it on a separate link
|
|
254
|
+
// depth so its brackets never affect the block depth that decides
|
|
255
|
+
// the directive close.
|
|
256
|
+
linkDepth++;
|
|
257
|
+
i += 3;
|
|
258
|
+
} else if (linkDepth > 0 && source.startsWith("]]]", i)) {
|
|
259
|
+
linkDepth--;
|
|
260
|
+
i += 3;
|
|
261
|
+
} else if (linkDepth > 0) {
|
|
262
|
+
// Inside a triple-bracket link the content is literal text, so a
|
|
263
|
+
// plain `[[` or `]]` here belongs to the link, not to block markup.
|
|
264
|
+
i++;
|
|
265
|
+
} else if (source.startsWith("[[", i)) {
|
|
266
|
+
depth++;
|
|
267
|
+
i += 2;
|
|
268
|
+
} else if (source.startsWith("]]", i)) {
|
|
269
|
+
const closeStart = i;
|
|
270
|
+
depth--;
|
|
271
|
+
i += 2;
|
|
272
|
+
if (depth <= 0) {
|
|
273
|
+
// Wikidot is greedy *within an attribute value*: any `]`
|
|
274
|
+
// that immediately follows the `]]` driving depth to zero
|
|
275
|
+
// belongs to the directive's final attribute value (e.g.
|
|
276
|
+
// `[[include foo |k=--]]]` keeps `--]` as the value).
|
|
277
|
+
// Only extend the close when the directive actually has an
|
|
278
|
+
// attribute section — a plain `[[include my-page]]]` must
|
|
279
|
+
// resolve `my-page` and leave the trailing `]` outside,
|
|
280
|
+
// otherwise the page name would absorb the bracket and the
|
|
281
|
+
// fetch would fail.
|
|
282
|
+
//
|
|
283
|
+
// A directive has attributes when it contains a `|` segment
|
|
284
|
+
// separator, OR when the page-name token is followed by
|
|
285
|
+
// additional non-whitespace content (space-separated
|
|
286
|
+
// parameters like `[[include foo bar=baz]]`). Using `=`
|
|
287
|
+
// alone is unsafe because page names may legitimately
|
|
288
|
+
// contain `=` (`[[include foo=bar]]`).
|
|
289
|
+
const innerSoFar = source.slice(contentStart, closeStart);
|
|
290
|
+
if (hasAttributes(innerSoFar)) {
|
|
291
|
+
while (i < source.length && source[i] === "]") {
|
|
292
|
+
i++;
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
const onOpenerLine = firstNewline === -1 || closeStart < firstNewline;
|
|
296
|
+
if (onOpenerLine || isRestOfLineBlank(source, i)) {
|
|
297
|
+
closeEnd = i;
|
|
298
|
+
break;
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
} else {
|
|
302
|
+
i++;
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
if (closeEnd === -1) {
|
|
307
|
+
// No terminating `]]` (opener-line or line-end) found — leave the
|
|
308
|
+
// opener untouched and resume scanning just past it so a later,
|
|
309
|
+
// well-formed directive can still match.
|
|
310
|
+
opener.lastIndex = start + 2;
|
|
311
|
+
continue;
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
matches.push({ start, end: closeEnd, inner: source.slice(contentStart, closeEnd - 2) });
|
|
315
|
+
opener.lastIndex = closeEnd;
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
return matches;
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
/**
|
|
322
|
+
* Parse the inner content of an `[[include ...]]` directive into a page reference
|
|
323
|
+
* and variable assignments.
|
|
324
|
+
*
|
|
325
|
+
* The inner content has the format: `page-name key1=value1 | key2=value2`
|
|
326
|
+
* where variable assignments can appear space-separated after the page name
|
|
327
|
+
* in the first segment (before any pipe), as well as in pipe-separated segments.
|
|
328
|
+
* The page name may include a cross-site prefix (`:site-name:page-name`).
|
|
329
|
+
*
|
|
330
|
+
* @param inner - The text between `[[include` and `]]`
|
|
331
|
+
* @returns Object containing the parsed page location and variable map
|
|
332
|
+
*/
|
|
333
|
+
function parseIncludeDirective(inner: string): { location: PageRef; variables: VariableMap } {
|
|
334
|
+
// Remove newlines and normalize whitespace within segments
|
|
335
|
+
const normalized = inner.replace(/\n/g, " ");
|
|
336
|
+
|
|
337
|
+
// Split by pipe to get target and variable assignments
|
|
338
|
+
const parts = normalized.split("|");
|
|
339
|
+
const firstSegment = parts[0]!.trim();
|
|
340
|
+
|
|
341
|
+
// Separate page name from space-separated parameters in the first segment.
|
|
342
|
+
// e.g. "page-name key=value" → target="page-name", rest="key=value"
|
|
343
|
+
const spaceIndex = firstSegment.indexOf(" ");
|
|
344
|
+
let target: string;
|
|
345
|
+
const varSegments: string[] = [];
|
|
346
|
+
|
|
347
|
+
if (spaceIndex !== -1) {
|
|
348
|
+
target = firstSegment.slice(0, spaceIndex);
|
|
349
|
+
const rest = firstSegment.slice(spaceIndex + 1).trim();
|
|
350
|
+
if (rest) {
|
|
351
|
+
varSegments.push(rest);
|
|
352
|
+
}
|
|
353
|
+
} else {
|
|
354
|
+
target = firstSegment;
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
// Collect pipe-separated variable segments
|
|
358
|
+
for (let i = 1; i < parts.length; i++) {
|
|
359
|
+
const segment = parts[i]!.trim();
|
|
360
|
+
if (segment) {
|
|
361
|
+
varSegments.push(segment);
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
// Build the variable map, honouring Wikidot's default-value idiom.
|
|
366
|
+
//
|
|
367
|
+
// A template supplies a default for a forwarded variable by repeating
|
|
368
|
+
// the key: `key={$key} | key=default`. Once the outer include has
|
|
369
|
+
// substituted `{$key}`, that segment pair becomes one of:
|
|
370
|
+
// - `key=value | key=default` (caller supplied a value)
|
|
371
|
+
// - `key= | key=default` (caller passed an empty value)
|
|
372
|
+
// - `key={$key} | key=default` (caller omitted it; placeholder
|
|
373
|
+
// left unresolved)
|
|
374
|
+
// The intended result is "use the caller's value if present, else the
|
|
375
|
+
// default", so the FIRST *concrete* value for a key wins. An empty
|
|
376
|
+
// string or a still-unresolved `{$...}` placeholder is not concrete and
|
|
377
|
+
// lets a later default apply.
|
|
378
|
+
//
|
|
379
|
+
// A key seen only with empty/placeholder values keeps that (empty or
|
|
380
|
+
// literal) value, preserving the existing pass-through behaviour for
|
|
381
|
+
// genuinely unset variables.
|
|
382
|
+
const variables: VariableMap = {};
|
|
383
|
+
const hasConcrete = new Set<string>();
|
|
384
|
+
for (const segment of varSegments) {
|
|
385
|
+
const eqIndex = segment.indexOf("=");
|
|
386
|
+
if (eqIndex === -1) continue;
|
|
387
|
+
const key = segment.slice(0, eqIndex).trim();
|
|
388
|
+
if (!key) continue;
|
|
389
|
+
const value = segment.slice(eqIndex + 1).trim();
|
|
390
|
+
|
|
391
|
+
const isPlaceholder = /^\{\$[^}]*\}$/.test(value);
|
|
392
|
+
const isConcrete = value !== "" && !isPlaceholder;
|
|
393
|
+
|
|
394
|
+
if (isConcrete) {
|
|
395
|
+
if (!hasConcrete.has(key)) {
|
|
396
|
+
variables[key] = value;
|
|
397
|
+
hasConcrete.add(key);
|
|
398
|
+
}
|
|
399
|
+
// A later concrete value for the same key is a default; ignore it.
|
|
400
|
+
} else if (!Object.hasOwn(variables, key)) {
|
|
401
|
+
// First empty/placeholder occurrence — keep it unless a concrete
|
|
402
|
+
// value (earlier or later) replaces it via the branch above.
|
|
403
|
+
// `Object.hasOwn` (not `in`) so keys like `toString` / `constructor`
|
|
404
|
+
// are not mistaken for already-present via the prototype chain.
|
|
405
|
+
variables[key] = value;
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
// Parse page reference
|
|
410
|
+
let location: PageRef;
|
|
411
|
+
if (target.startsWith(":")) {
|
|
412
|
+
const rest = target.slice(1);
|
|
413
|
+
const colonIndex = rest.indexOf(":");
|
|
414
|
+
if (colonIndex !== -1) {
|
|
415
|
+
location = { site: rest.slice(0, colonIndex), page: rest.slice(colonIndex + 1) };
|
|
416
|
+
} else {
|
|
417
|
+
location = { site: null, page: target };
|
|
418
|
+
}
|
|
419
|
+
} else {
|
|
420
|
+
location = { site: null, page: target };
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
return { location, variables };
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
/**
|
|
427
|
+
* Replace a single include match with its fetched + variable-substituted content.
|
|
428
|
+
* Returns the replacement text for a single directive's `inner` content.
|
|
429
|
+
*/
|
|
430
|
+
function replaceOneInclude(inner: string, fetcher: IncludeFetcher): string {
|
|
431
|
+
const { location, variables } = parseIncludeDirective(inner);
|
|
432
|
+
const content = fetcher(location);
|
|
433
|
+
if (content === null) {
|
|
434
|
+
return `[[div class="error-block"]]\nPage to be included "${location.page}" cannot be found!\n[[/div]]`;
|
|
435
|
+
}
|
|
436
|
+
return substituteVariables(content, variables);
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
/**
|
|
440
|
+
* Iteratively expand all `[[include]]` directives in source text.
|
|
441
|
+
*
|
|
442
|
+
* Each iteration replaces every include directive in the current source
|
|
443
|
+
* with its fetched content (after variable substitution). No recursion
|
|
444
|
+
* into individual includes — the next iteration handles nested includes.
|
|
445
|
+
*
|
|
446
|
+
* Stops when the source is unchanged (no includes left or all resolved)
|
|
447
|
+
* or `maxIterations` is reached.
|
|
448
|
+
*/
|
|
449
|
+
function expandIterative(source: string, fetcher: IncludeFetcher, maxIterations: number): string {
|
|
450
|
+
let current = source;
|
|
451
|
+
for (let i = 0; i < maxIterations; i++) {
|
|
452
|
+
const directives = scanIncludeDirectives(current);
|
|
453
|
+
if (directives.length === 0) break;
|
|
454
|
+
|
|
455
|
+
let result = "";
|
|
456
|
+
let lastPos = 0;
|
|
457
|
+
for (const { start, end, inner } of directives) {
|
|
458
|
+
result += current.slice(lastPos, start);
|
|
459
|
+
result += replaceOneInclude(inner, fetcher);
|
|
460
|
+
lastPos = end;
|
|
461
|
+
}
|
|
462
|
+
result += current.slice(lastPos);
|
|
463
|
+
|
|
464
|
+
if (result === current) break;
|
|
465
|
+
current = result;
|
|
466
|
+
}
|
|
467
|
+
return current;
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
/**
|
|
471
|
+
* Async iterative expansion of `[[include]]` directives.
|
|
472
|
+
*
|
|
473
|
+
* Each iteration scans the current source for include directives using
|
|
474
|
+
* RegExp.exec(), fetches content sequentially (to preserve cache semantics),
|
|
475
|
+
* and builds the replacement string. A fresh RegExp is created per iteration
|
|
476
|
+
* to avoid lastIndex conflicts.
|
|
477
|
+
*/
|
|
478
|
+
async function expandIterativeAsync(
|
|
479
|
+
source: string,
|
|
480
|
+
fetcher: AsyncIncludeFetcher,
|
|
481
|
+
maxIterations: number,
|
|
482
|
+
): Promise<string> {
|
|
483
|
+
let current = source;
|
|
484
|
+
for (let i = 0; i < maxIterations; i++) {
|
|
485
|
+
const directives = scanIncludeDirectives(current);
|
|
486
|
+
if (directives.length === 0) break;
|
|
487
|
+
|
|
488
|
+
let result = "";
|
|
489
|
+
let lastPos = 0;
|
|
490
|
+
for (const { start, end, inner } of directives) {
|
|
491
|
+
result += current.slice(lastPos, start);
|
|
492
|
+
|
|
493
|
+
const { location, variables } = parseIncludeDirective(inner);
|
|
494
|
+
const content = await fetcher(location);
|
|
495
|
+
if (content === null) {
|
|
496
|
+
result += `[[div class="error-block"]]\nPage to be included "${location.page}" cannot be found!\n[[/div]]`;
|
|
497
|
+
} else {
|
|
498
|
+
result += substituteVariables(content, variables);
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
lastPos = end;
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
result += current.slice(lastPos);
|
|
505
|
+
if (result === current) break;
|
|
506
|
+
current = result;
|
|
507
|
+
}
|
|
508
|
+
return current;
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
/**
|
|
512
|
+
* Normalize a PageRef into a consistent string key for cache lookups.
|
|
513
|
+
*
|
|
514
|
+
* Page names are lowercased for case-insensitive matching. Cross-site
|
|
515
|
+
* references include the site name as a prefix.
|
|
516
|
+
*
|
|
517
|
+
* @param location - The page reference to normalize
|
|
518
|
+
* @returns A normalized string key (e.g., "page-name" or "site:page-name")
|
|
519
|
+
*/
|
|
520
|
+
function normalizePageKey(location: PageRef): string {
|
|
521
|
+
const site = location.site ?? "";
|
|
522
|
+
const page = location.page.toLowerCase();
|
|
523
|
+
return site ? `${site}:${page}` : page;
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
/**
|
|
527
|
+
* Substitute variables in included page content.
|
|
528
|
+
*
|
|
529
|
+
* Replaces `{$key}` patterns with the corresponding value from the variables
|
|
530
|
+
* map provided in the include directive (e.g., `[[include page | key=value]]`).
|
|
531
|
+
*
|
|
532
|
+
* @param content - The fetched page content containing `{$key}` placeholders
|
|
533
|
+
* @param variables - Key-value pairs from the include directive
|
|
534
|
+
* @returns Content with all matching variables substituted
|
|
535
|
+
*/
|
|
536
|
+
function substituteVariables(content: string, variables: VariableMap): string {
|
|
537
|
+
if (Object.keys(variables).length === 0) return content;
|
|
538
|
+
|
|
539
|
+
let result = content;
|
|
540
|
+
for (const [key, value] of Object.entries(variables)) {
|
|
541
|
+
const escaped = escapeRegExp(key);
|
|
542
|
+
result = result.replace(new RegExp(`\\{\\$${escaped}\\}`, "g"), value);
|
|
543
|
+
}
|
|
544
|
+
return result;
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
/**
|
|
548
|
+
* Escape special RegExp characters in a string so it can be safely used
|
|
549
|
+
* in a `new RegExp()` constructor.
|
|
550
|
+
*
|
|
551
|
+
* @param str - The string to escape
|
|
552
|
+
* @returns The escaped string with all regex special characters prefixed with backslash
|
|
553
|
+
*/
|
|
554
|
+
function escapeRegExp(str: string): string {
|
|
555
|
+
return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
556
|
+
}
|