@wdprlib/parser 3.1.1 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/dist/index.cjs +312 -121
  2. package/dist/index.js +289 -98
  3. package/package.json +5 -3
  4. package/src/index.ts +163 -0
  5. package/src/lexer/index.ts +20 -0
  6. package/src/lexer/lexer.ts +687 -0
  7. package/src/lexer/tokens.ts +141 -0
  8. package/src/parser/constants.ts +173 -0
  9. package/src/parser/depth.ts +251 -0
  10. package/src/parser/index.ts +18 -0
  11. package/src/parser/parse.ts +315 -0
  12. package/src/parser/postprocess/divAdjacentParagraph.ts +76 -0
  13. package/src/parser/postprocess/index.ts +15 -0
  14. package/src/parser/postprocess/spanStrip.ts +697 -0
  15. package/src/parser/preprocess/expr.ts +265 -0
  16. package/src/parser/preprocess/index.ts +38 -0
  17. package/src/parser/preprocess/typography.ts +67 -0
  18. package/src/parser/preprocess/utils.ts +250 -0
  19. package/src/parser/preprocess/whitespace.ts +111 -0
  20. package/src/parser/rules/block/align.ts +282 -0
  21. package/src/parser/rules/block/bibliography.ts +359 -0
  22. package/src/parser/rules/block/block-list.ts +689 -0
  23. package/src/parser/rules/block/blockquote.ts +238 -0
  24. package/src/parser/rules/block/center.ts +87 -0
  25. package/src/parser/rules/block/clear-float.ts +75 -0
  26. package/src/parser/rules/block/code.ts +187 -0
  27. package/src/parser/rules/block/collapsible.ts +337 -0
  28. package/src/parser/rules/block/comment.ts +73 -0
  29. package/src/parser/rules/block/content-separator.ts +79 -0
  30. package/src/parser/rules/block/definition-list.ts +270 -0
  31. package/src/parser/rules/block/div.ts +400 -0
  32. package/src/parser/rules/block/embed-block.ts +153 -0
  33. package/src/parser/rules/block/footnoteblock.ts +200 -0
  34. package/src/parser/rules/block/heading.ts +142 -0
  35. package/src/parser/rules/block/horizontal-rule.ts +61 -0
  36. package/src/parser/rules/block/html.ts +222 -0
  37. package/src/parser/rules/block/iframe.ts +239 -0
  38. package/src/parser/rules/block/iftags.ts +150 -0
  39. package/src/parser/rules/block/include.ts +179 -0
  40. package/src/parser/rules/block/index.ts +127 -0
  41. package/src/parser/rules/block/list.ts +244 -0
  42. package/src/parser/rules/block/math.ts +183 -0
  43. package/src/parser/rules/block/module/backlinks/index.ts +31 -0
  44. package/src/parser/rules/block/module/backlinks/types.ts +21 -0
  45. package/src/parser/rules/block/module/categories/index.ts +34 -0
  46. package/src/parser/rules/block/module/categories/types.ts +21 -0
  47. package/src/parser/rules/block/module/css/index.ts +37 -0
  48. package/src/parser/rules/block/module/iftags/condition.ts +109 -0
  49. package/src/parser/rules/block/module/iftags/index.ts +26 -0
  50. package/src/parser/rules/block/module/iftags/preprocess.ts +140 -0
  51. package/src/parser/rules/block/module/iftags/resolve.ts +73 -0
  52. package/src/parser/rules/block/module/iftags/types.ts +63 -0
  53. package/src/parser/rules/block/module/include/index.ts +20 -0
  54. package/src/parser/rules/block/module/include/resolve.ts +556 -0
  55. package/src/parser/rules/block/module/index.ts +122 -0
  56. package/src/parser/rules/block/module/join/index.ts +34 -0
  57. package/src/parser/rules/block/module/join/types.ts +23 -0
  58. package/src/parser/rules/block/module/listpages/compiler.ts +453 -0
  59. package/src/parser/rules/block/module/listpages/extract.ts +410 -0
  60. package/src/parser/rules/block/module/listpages/index.ts +83 -0
  61. package/src/parser/rules/block/module/listpages/normalize.ts +390 -0
  62. package/src/parser/rules/block/module/listpages/parser.ts +106 -0
  63. package/src/parser/rules/block/module/listpages/resolve.ts +130 -0
  64. package/src/parser/rules/block/module/listpages/types.ts +513 -0
  65. package/src/parser/rules/block/module/listpages/url-resolver.ts +186 -0
  66. package/src/parser/rules/block/module/listusers/compiler.ts +77 -0
  67. package/src/parser/rules/block/module/listusers/extract.ts +45 -0
  68. package/src/parser/rules/block/module/listusers/index.ts +36 -0
  69. package/src/parser/rules/block/module/listusers/parser.ts +54 -0
  70. package/src/parser/rules/block/module/listusers/resolve.ts +58 -0
  71. package/src/parser/rules/block/module/listusers/types.ts +93 -0
  72. package/src/parser/rules/block/module/mapping.ts +61 -0
  73. package/src/parser/rules/block/module/page-tree/index.ts +38 -0
  74. package/src/parser/rules/block/module/page-tree/types.ts +29 -0
  75. package/src/parser/rules/block/module/rate/index.ts +28 -0
  76. package/src/parser/rules/block/module/rate/types.ts +19 -0
  77. package/src/parser/rules/block/module/resolve.ts +411 -0
  78. package/src/parser/rules/block/module/types-common.ts +59 -0
  79. package/src/parser/rules/block/module/types.ts +61 -0
  80. package/src/parser/rules/block/module/utils.ts +43 -0
  81. package/src/parser/rules/block/module/walk.ts +380 -0
  82. package/src/parser/rules/block/module.ts +164 -0
  83. package/src/parser/rules/block/orphan-li.ts +177 -0
  84. package/src/parser/rules/block/paragraph.ts +157 -0
  85. package/src/parser/rules/block/table-block.ts +726 -0
  86. package/src/parser/rules/block/table.ts +441 -0
  87. package/src/parser/rules/block/tabview.ts +331 -0
  88. package/src/parser/rules/block/toc.ts +129 -0
  89. package/src/parser/rules/block/utils.ts +615 -0
  90. package/src/parser/rules/index.ts +49 -0
  91. package/src/parser/rules/inline/anchor-name.ts +154 -0
  92. package/src/parser/rules/inline/anchor.ts +327 -0
  93. package/src/parser/rules/inline/bibcite.ts +153 -0
  94. package/src/parser/rules/inline/bold.ts +86 -0
  95. package/src/parser/rules/inline/color.ts +140 -0
  96. package/src/parser/rules/inline/comment.ts +90 -0
  97. package/src/parser/rules/inline/equation-ref.ts +115 -0
  98. package/src/parser/rules/inline/expr.ts +526 -0
  99. package/src/parser/rules/inline/footnote.ts +223 -0
  100. package/src/parser/rules/inline/guillemet.ts +64 -0
  101. package/src/parser/rules/inline/html.ts +132 -0
  102. package/src/parser/rules/inline/image.ts +328 -0
  103. package/src/parser/rules/inline/index.ts +150 -0
  104. package/src/parser/rules/inline/italic.ts +74 -0
  105. package/src/parser/rules/inline/line-break.ts +326 -0
  106. package/src/parser/rules/inline/link-anchor.ts +147 -0
  107. package/src/parser/rules/inline/link-single.ts +164 -0
  108. package/src/parser/rules/inline/link-star.ts +134 -0
  109. package/src/parser/rules/inline/link-triple.ts +267 -0
  110. package/src/parser/rules/inline/math-inline.ts +126 -0
  111. package/src/parser/rules/inline/monospace.ts +78 -0
  112. package/src/parser/rules/inline/raw.ts +262 -0
  113. package/src/parser/rules/inline/size.ts +244 -0
  114. package/src/parser/rules/inline/span.ts +424 -0
  115. package/src/parser/rules/inline/strikethrough.ts +115 -0
  116. package/src/parser/rules/inline/subscript.ts +84 -0
  117. package/src/parser/rules/inline/superscript.ts +84 -0
  118. package/src/parser/rules/inline/text.ts +84 -0
  119. package/src/parser/rules/inline/underline.ts +127 -0
  120. package/src/parser/rules/inline/user.ts +147 -0
  121. package/src/parser/rules/inline/utils.ts +344 -0
  122. package/src/parser/rules/types.ts +252 -0
  123. package/src/parser/rules/utils.ts +155 -0
  124. package/src/parser/toc.ts +130 -0
@@ -0,0 +1,252 @@
1
+ import type { Token, TokenType } from "../../lexer";
2
+ import type { Version, WikitextSettings, Diagnostic } from "@wdprlib/ast";
3
+ import type { Element, CodeBlockData, TocEntry } from "@wdprlib/ast";
4
+
5
+ /**
6
+ * Per-scope state propagated by spread + override semantics.
7
+ *
8
+ * Every field is `readonly` so a rule cannot accidentally mutate the
9
+ * parent scope by writing through a shared reference. Updates must be
10
+ * expressed as a replacement: `ctx.scope = { ...ctx.scope, X: ... }`
11
+ * (or, more commonly, by constructing a new child context with the
12
+ * desired scope override).
13
+ *
14
+ * The motivation is to keep speculative parse rollback safe: when a
15
+ * block rule fails, any scope it built up is discarded with the failed
16
+ * context. A shared-state design that mutates fields in place does not
17
+ * survive rollback — grouping per-scope fields here and forbidding
18
+ * nested mutation makes the semantics explicit at the type level.
19
+ */
20
+ export interface ScopeContext {
21
+ /**
22
+ * Close condition for the current block. The paragraph parser calls
23
+ * it to decide when to stop collecting inline content.
24
+ */
25
+ readonly blockCloseCondition?: (ctx: ParseContext) => boolean;
26
+ /**
27
+ * Block names excluded from paragraph-boundary detection. When a
28
+ * BLOCK_OPEN/BLOCK_END_OPEN for an excluded name appears at line
29
+ * start, the inline parser does NOT treat it as a paragraph break.
30
+ * Used by `[[collapsible]]` to prevent nested `[[collapsible]]` from
31
+ * splitting paragraphs.
32
+ */
33
+ readonly excludedBlockNames?: ReadonlySet<string>;
34
+ /**
35
+ * Budget for div nesting: tracks how many more nested divs can open.
36
+ * When 0, the div rule fails (innermost excess opens become text).
37
+ * `undefined` means "not yet calculated" (top-level or non-div context).
38
+ */
39
+ readonly divClosesBudget?: number;
40
+ /**
41
+ * Used by the footnote-block rule to reject duplicate occurrences.
42
+ *
43
+ * **Scope is per spread copy of `ParseContext`, not document-global.**
44
+ * `parseBlocksUntil` creates a fresh `{ ...ctx, pos, ... }` on every
45
+ * iteration, so the flag does not propagate between sibling rules in
46
+ * a body, between sibling bodies, or up to the top-level parser.
47
+ *
48
+ * Practical effect today:
49
+ * - Two `[[footnoteblock]]` at the top level: the second one fails
50
+ * (the top-level dispatch hands the parser's own `ctx` to rules,
51
+ * so mutations are visible to the next top-level iteration).
52
+ * - Two `[[footnoteblock]]` inside the same body, or across nested
53
+ * bodies: both currently succeed, even though Wikidot's
54
+ * "first-only" rule should reject the duplicate.
55
+ *
56
+ * Fixing the cross-scope case requires either an AST-level dedup pass
57
+ * after parsing (similar to the auto-append walk) or a shared-state
58
+ * design with proper rollback for speculative parses. Tracked
59
+ * separately; this flag intentionally keeps the original primitive
60
+ * semantics to avoid regressing the top-level duplicate-rejection
61
+ * test fixtures.
62
+ *
63
+ * The auto-append decision in `Parser.parse` deliberately ignores
64
+ * this flag and walks the final AST instead — see `containsFootnoteBlock`.
65
+ */
66
+ readonly footnoteBlockParsed: boolean;
67
+ }
68
+
69
+ /**
70
+ * Parser context passed to rules.
71
+ *
72
+ * Fields are grouped by lifecycle:
73
+ * - Static config (`tokens`, `version`, `trackPositions`, `settings`,
74
+ * rule arrays): constructor-fixed.
75
+ * - `pos`: per-scope cursor; kept top-level for ergonomics because
76
+ * every rule spread overrides it.
77
+ * - Accumulators (`footnotes`, `tocEntries`, …, `diagnostics`):
78
+ * reference-shared via array identity across spreads.
79
+ * - `scope`: per-scope state explicitly grouped; see {@link ScopeContext}.
80
+ */
81
+ export interface ParseContext {
82
+ tokens: Token[];
83
+ pos: number;
84
+ version: Version;
85
+ trackPositions: boolean;
86
+ settings: WikitextSettings;
87
+ // Collections for SyntaxTree output
88
+ footnotes: Element[][];
89
+ tocEntries: TocEntry[];
90
+ codeBlocks: CodeBlockData[];
91
+ htmlBlocks: string[];
92
+ // Bibliography citation labels collected during parsing
93
+ bibcites: string[];
94
+ // Rules (injected to avoid circular dependency)
95
+ blockRules: BlockRule[];
96
+ blockFallbackRule: BlockRule;
97
+ inlineRules: InlineRule[];
98
+ // Diagnostics collected during parsing
99
+ diagnostics: Diagnostic[];
100
+ // Per-scope state (readonly fields, immutable-replace semantics).
101
+ scope: ScopeContext;
102
+ }
103
+
104
+ /**
105
+ * Result of a rule attempt
106
+ * Returns elements array None/Single/Multiple
107
+ *
108
+ * During migration: T can be either internal AST node or Element
109
+ */
110
+ export type RuleResult<T> = { success: true; elements: T[]; consumed: number } | { success: false };
111
+
112
+ /**
113
+ * Block rule interface
114
+ */
115
+ export interface BlockRule {
116
+ /** Rule name for debugging */
117
+ name: string;
118
+ /** Token types that can start this rule */
119
+ startTokens: TokenType[];
120
+ /** Whether this rule requires line start */
121
+ requiresLineStart: boolean;
122
+ /** Try to parse this block */
123
+ parse(ctx: ParseContext): RuleResult<Element>;
124
+ /**
125
+ * Check if tokens at the given position match this rule's start pattern.
126
+ * Used by inline parser to determine behavior before a block boundary
127
+ * (e.g. whether to generate a trailing line-break).
128
+ */
129
+ isStartPattern?(ctx: ParseContext, pos: number): boolean;
130
+ /**
131
+ * When true, a single newline before this block becomes a line-break.
132
+ * Wikidot's Divalign expands content inline, so \n before nested blocks
133
+ * becomes <br />. Other blocks (Code, Div, etc.) suppress this.
134
+ */
135
+ preservesPrecedingLineBreak?: boolean;
136
+ }
137
+
138
+ /**
139
+ * Inline rule interface
140
+ */
141
+ export interface InlineRule {
142
+ /** Rule name for debugging */
143
+ name: string;
144
+ /** Token types that can start this rule */
145
+ startTokens: TokenType[];
146
+ /** Try to parse this inline element */
147
+ parse(ctx: ParseContext): RuleResult<Element>;
148
+ }
149
+
150
+ /**
151
+ * Helper to get current token
152
+ */
153
+ export function currentToken(ctx: ParseContext): Token {
154
+ return ctx.tokens[ctx.pos] ?? eofToken();
155
+ }
156
+
157
+ /**
158
+ * Helper to peek ahead
159
+ */
160
+ export function peekToken(ctx: ParseContext, n = 1): Token {
161
+ return ctx.tokens[ctx.pos + n] ?? eofToken();
162
+ }
163
+
164
+ /**
165
+ * Helper to check token type
166
+ */
167
+ export function checkToken(ctx: ParseContext, type: TokenType): boolean {
168
+ return currentToken(ctx).type === type;
169
+ }
170
+
171
+ /**
172
+ * Helper to check if at end
173
+ */
174
+ export function isAtEnd(ctx: ParseContext): boolean {
175
+ return ctx.pos >= ctx.tokens.length || currentToken(ctx).type === "EOF";
176
+ }
177
+
178
+ /**
179
+ * Create EOF token
180
+ */
181
+ function eofToken(): Token {
182
+ return {
183
+ type: "EOF",
184
+ value: "",
185
+ position: { start: { line: 0, column: 0, offset: 0 }, end: { line: 0, column: 0, offset: 0 } },
186
+ lineStart: false,
187
+ };
188
+ }
189
+
190
+ /**
191
+ * Check if closing marker exists before newline
192
+ * If markerValue is provided, also check that the token value matches
193
+ */
194
+ export function hasClosingMarkerBeforeNewline(
195
+ ctx: ParseContext,
196
+ markerType: TokenType,
197
+ markerValue?: string,
198
+ ): boolean {
199
+ let pos = ctx.pos;
200
+ while (pos < ctx.tokens.length) {
201
+ const token = ctx.tokens[pos];
202
+ if (!token || token.type === "NEWLINE" || token.type === "EOF") {
203
+ return false;
204
+ }
205
+ if (token.type === markerType) {
206
+ if (markerValue === undefined || token.value === markerValue) {
207
+ return true;
208
+ }
209
+ }
210
+ pos++;
211
+ }
212
+ return false;
213
+ }
214
+
215
+ /**
216
+ * Check if closing marker exists before paragraph break (double newline)
217
+ * Allows inline formatting to span multiple lines within a paragraph
218
+ */
219
+ export function hasClosingMarkerBeforeParagraphBreak(
220
+ ctx: ParseContext,
221
+ markerType: TokenType,
222
+ markerValue?: string,
223
+ ): boolean {
224
+ let pos = ctx.pos;
225
+ while (pos < ctx.tokens.length) {
226
+ const token = ctx.tokens[pos];
227
+ if (!token || token.type === "EOF") {
228
+ return false;
229
+ }
230
+ // Check for paragraph break (NEWLINE followed by NEWLINE after optional whitespace)
231
+ if (token.type === "NEWLINE") {
232
+ let lookAhead = 1;
233
+ while (ctx.tokens[pos + lookAhead]?.type === "WHITESPACE") {
234
+ lookAhead++;
235
+ }
236
+ if (
237
+ ctx.tokens[pos + lookAhead]?.type === "NEWLINE" ||
238
+ ctx.tokens[pos + lookAhead]?.type === "EOF" ||
239
+ !ctx.tokens[pos + lookAhead]
240
+ ) {
241
+ return false; // Paragraph break - stop
242
+ }
243
+ }
244
+ if (token.type === markerType) {
245
+ if (markerValue === undefined || token.value === markerValue) {
246
+ return true;
247
+ }
248
+ }
249
+ pos++;
250
+ }
251
+ return false;
252
+ }
@@ -0,0 +1,155 @@
1
+ /**
2
+ * Common utilities shared between block and inline rules
3
+ */
4
+
5
+ import type { ParseContext } from "./types";
6
+
7
+ // =============================================================================
8
+ // Attribute Safety
9
+ // =============================================================================
10
+
11
+ // Event handler attributes (on*) are blocked entirely
12
+ const SAFE_ATTRIBUTES = new Set([
13
+ "accept",
14
+ "align",
15
+ "alt",
16
+ "autocapitalize",
17
+ "autoplay",
18
+ "background",
19
+ "bgcolor",
20
+ "border",
21
+ "buffered",
22
+ "checked",
23
+ "cite",
24
+ "class",
25
+ "cols",
26
+ "colspan",
27
+ "contenteditable",
28
+ "controls",
29
+ "coords",
30
+ "datetime",
31
+ "decoding",
32
+ "default",
33
+ "dir",
34
+ "dirname",
35
+ "disabled",
36
+ "download",
37
+ "draggable",
38
+ "for",
39
+ "form",
40
+ "headers",
41
+ "height",
42
+ "hidden",
43
+ "high",
44
+ "href",
45
+ "hreflang",
46
+ "id",
47
+ "inputmode",
48
+ "ismap",
49
+ "itemprop",
50
+ "kind",
51
+ "label",
52
+ "lang",
53
+ "list",
54
+ "loop",
55
+ "low",
56
+ "max",
57
+ "maxlength",
58
+ "min",
59
+ "minlength",
60
+ "multiple",
61
+ "muted",
62
+ "name",
63
+ "optimum",
64
+ "pattern",
65
+ "placeholder",
66
+ "poster",
67
+ "preload",
68
+ "readonly",
69
+ "required",
70
+ "reversed",
71
+ "role",
72
+ "rows",
73
+ "rowspan",
74
+ "scope",
75
+ "selected",
76
+ "shape",
77
+ "size",
78
+ "sizes",
79
+ "span",
80
+ "spellcheck",
81
+ "src",
82
+ "srclang",
83
+ "srcset",
84
+ "start",
85
+ "step",
86
+ "style",
87
+ "tabindex",
88
+ "target",
89
+ "title",
90
+ "translate",
91
+ "type",
92
+ "usemap",
93
+ "value",
94
+ "width",
95
+ "wrap",
96
+ ]);
97
+
98
+ /**
99
+ * Filter unsafe HTML attributes (blocks event handlers, allows safe attributes + aria-* / data-*)
100
+ */
101
+ export function filterUnsafeAttributes(attrs: Record<string, string>): Record<string, string> {
102
+ const result: Record<string, string> = {};
103
+ for (const [key, value] of Object.entries(attrs)) {
104
+ const lower = key.toLowerCase();
105
+ if (lower.startsWith("on")) continue;
106
+ if (lower.startsWith("aria-") || lower.startsWith("data-")) {
107
+ result[key] = value;
108
+ continue;
109
+ }
110
+ if (!SAFE_ATTRIBUTES.has(lower)) continue;
111
+ // Wikidot prefixes user-set IDs with "u-"
112
+ if (lower === "id") {
113
+ result[key] = value.startsWith("u-") ? value : `u-${value}`;
114
+ continue;
115
+ }
116
+ result[key] = value;
117
+ }
118
+ return result;
119
+ }
120
+
121
+ // =============================================================================
122
+ // Block Name Parsing
123
+ // =============================================================================
124
+
125
+ /**
126
+ * Parse block name from tokens (handles [[name or [[/name)
127
+ * Handles underscore suffix like "div_" which may be tokenized as [IDENTIFIER "div"] [UNDERSCORE "_"]
128
+ */
129
+ export function parseBlockName(
130
+ ctx: ParseContext,
131
+ startPos: number,
132
+ ): { name: string; consumed: number } | null {
133
+ let pos = startPos;
134
+ let consumed = 0;
135
+
136
+ // Wikidot does NOT allow whitespace between [[ and block name
137
+ // e.g. [[ code ]] is treated as plain text, not a code block
138
+ const token = ctx.tokens[pos];
139
+ if (!token || (token.type !== "TEXT" && token.type !== "IDENTIFIER")) {
140
+ return null;
141
+ }
142
+
143
+ // Base name
144
+ let name = token.value.toLowerCase();
145
+ consumed++;
146
+ pos++;
147
+
148
+ // Check for underscore suffix (e.g., "div_" -> "div" + "_")
149
+ if (ctx.tokens[pos]?.type === "UNDERSCORE") {
150
+ name += "_";
151
+ consumed++;
152
+ }
153
+
154
+ return { name, consumed };
155
+ }
@@ -0,0 +1,130 @@
1
+ /**
2
+ *
3
+ * Table of Contents (TOC) generation for Wikidot markup.
4
+ *
5
+ * Converts a flat array of `TocEntry` items (collected from heading elements
6
+ * during parsing) into nested bullet-list `Element` nodes suitable for rendering
7
+ * as `[[toc]]`. Uses the depth module to transform flat heading levels (h1-h6)
8
+ * into a properly nested list hierarchy.
9
+ *
10
+ * Each TOC entry becomes an anchor link (`#toc0`, `#toc1`, ...) pointing to the
11
+ * corresponding heading in the rendered page, matching Wikidot's original
12
+ * anchor naming scheme.
13
+ *
14
+ * @module
15
+ */
16
+
17
+ import type { Element, TocEntry, ListItem } from "@wdprlib/ast";
18
+ import { processDepths, type DepthList, type DepthItem } from "./depth";
19
+
20
+ /**
21
+ * Sequential counter for generating unique TOC anchor IDs.
22
+ *
23
+ * Wikidot assigns sequential `#toc0`, `#toc1`, ... anchors to headings in
24
+ * document order. This class maintains a monotonically increasing counter
25
+ * that is shared across all TOC trees to ensure globally unique anchors.
26
+ */
27
+ class TocIndexer {
28
+ private index = 0;
29
+
30
+ /**
31
+ * Returns the next sequential index and advances the counter.
32
+ * @returns The current index value (0-based) before incrementing
33
+ */
34
+ next(): number {
35
+ return this.index++;
36
+ }
37
+ }
38
+
39
+ /**
40
+ * Build a nested bullet-list Element from depth-processed TOC items.
41
+ *
42
+ * Each item in the depth list is converted to a `ListItem`, with nested lists
43
+ * becoming sub-list items and leaf items becoming anchor links.
44
+ *
45
+ * @param indexer - Shared counter for generating sequential `#tocN` anchors
46
+ * @param items - Depth-processed list of heading text strings
47
+ * @returns A `list` Element with type "bullet" containing the TOC hierarchy
48
+ */
49
+ function buildTocList(indexer: TocIndexer, items: DepthList<null, string>): Element {
50
+ const listItems: ListItem[] = items.map((item) => buildTocListItem(indexer, item));
51
+
52
+ return {
53
+ element: "list",
54
+ data: {
55
+ type: "bullet",
56
+ attributes: {},
57
+ items: listItems,
58
+ },
59
+ };
60
+ }
61
+
62
+ /**
63
+ * Build a single TOC list item from a depth item.
64
+ *
65
+ * For leaf items, creates an anchor link element with a `#tocN` href.
66
+ * For nested list items, recursively builds a sub-list.
67
+ *
68
+ * @param indexer - Shared counter for generating sequential `#tocN` anchors
69
+ * @param item - A single depth item (either a leaf heading or a nested list)
70
+ * @returns A `ListItem` for inclusion in the TOC list
71
+ */
72
+ function buildTocListItem(indexer: TocIndexer, item: DepthItem<null, string>): ListItem {
73
+ if (item.kind === "list") {
74
+ return {
75
+ "item-type": "sub-list",
76
+ element: "list",
77
+ data: {
78
+ type: "bullet",
79
+ attributes: {},
80
+ items: item.children.map((child) => buildTocListItem(indexer, child)),
81
+ },
82
+ };
83
+ }
84
+
85
+ // item.kind === "item"
86
+ const anchor = `#toc${indexer.next()}`;
87
+ const linkElement: Element = {
88
+ element: "link",
89
+ data: {
90
+ type: "table-of-contents",
91
+ link: anchor,
92
+ extra: null,
93
+ label: { text: item.value },
94
+ target: null,
95
+ },
96
+ };
97
+
98
+ return {
99
+ "item-type": "elements",
100
+ attributes: {},
101
+ elements: [linkElement],
102
+ };
103
+ }
104
+
105
+ /**
106
+ * Convert flat TocEntry[] to nested List elements
107
+ *
108
+ * @param entries - Flat list of TOC entries with level and text
109
+ * @returns Array of List elements (usually one, but can be multiple if levels reset)
110
+ */
111
+ export function buildTableOfContents(entries: TocEntry[]): Element[] {
112
+ if (entries.length === 0) {
113
+ return [];
114
+ }
115
+
116
+ // Convert entries to depth-annotated items
117
+ // level is 1-based (h1=1, h2=2, ...), convert to 0-based depth
118
+ const depthItems = entries.map((entry) => ({
119
+ depth: entry.level - 1,
120
+ ltype: null as null, // We don't differentiate list types for TOC
121
+ value: entry.text,
122
+ }));
123
+
124
+ // Process into nested structure
125
+ const trees = processDepths<null, string>(null, depthItems);
126
+
127
+ // Build List elements from each tree
128
+ const indexer = new TocIndexer();
129
+ return trees.map((tree) => buildTocList(indexer, tree.list));
130
+ }