@wdprlib/parser 3.1.1 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/dist/index.cjs +312 -121
  2. package/dist/index.js +289 -98
  3. package/package.json +5 -3
  4. package/src/index.ts +163 -0
  5. package/src/lexer/index.ts +20 -0
  6. package/src/lexer/lexer.ts +687 -0
  7. package/src/lexer/tokens.ts +141 -0
  8. package/src/parser/constants.ts +173 -0
  9. package/src/parser/depth.ts +251 -0
  10. package/src/parser/index.ts +18 -0
  11. package/src/parser/parse.ts +315 -0
  12. package/src/parser/postprocess/divAdjacentParagraph.ts +76 -0
  13. package/src/parser/postprocess/index.ts +15 -0
  14. package/src/parser/postprocess/spanStrip.ts +697 -0
  15. package/src/parser/preprocess/expr.ts +265 -0
  16. package/src/parser/preprocess/index.ts +38 -0
  17. package/src/parser/preprocess/typography.ts +67 -0
  18. package/src/parser/preprocess/utils.ts +250 -0
  19. package/src/parser/preprocess/whitespace.ts +111 -0
  20. package/src/parser/rules/block/align.ts +282 -0
  21. package/src/parser/rules/block/bibliography.ts +359 -0
  22. package/src/parser/rules/block/block-list.ts +689 -0
  23. package/src/parser/rules/block/blockquote.ts +238 -0
  24. package/src/parser/rules/block/center.ts +87 -0
  25. package/src/parser/rules/block/clear-float.ts +75 -0
  26. package/src/parser/rules/block/code.ts +187 -0
  27. package/src/parser/rules/block/collapsible.ts +337 -0
  28. package/src/parser/rules/block/comment.ts +73 -0
  29. package/src/parser/rules/block/content-separator.ts +79 -0
  30. package/src/parser/rules/block/definition-list.ts +270 -0
  31. package/src/parser/rules/block/div.ts +400 -0
  32. package/src/parser/rules/block/embed-block.ts +153 -0
  33. package/src/parser/rules/block/footnoteblock.ts +200 -0
  34. package/src/parser/rules/block/heading.ts +142 -0
  35. package/src/parser/rules/block/horizontal-rule.ts +61 -0
  36. package/src/parser/rules/block/html.ts +222 -0
  37. package/src/parser/rules/block/iframe.ts +239 -0
  38. package/src/parser/rules/block/iftags.ts +150 -0
  39. package/src/parser/rules/block/include.ts +179 -0
  40. package/src/parser/rules/block/index.ts +127 -0
  41. package/src/parser/rules/block/list.ts +244 -0
  42. package/src/parser/rules/block/math.ts +183 -0
  43. package/src/parser/rules/block/module/backlinks/index.ts +31 -0
  44. package/src/parser/rules/block/module/backlinks/types.ts +21 -0
  45. package/src/parser/rules/block/module/categories/index.ts +34 -0
  46. package/src/parser/rules/block/module/categories/types.ts +21 -0
  47. package/src/parser/rules/block/module/css/index.ts +37 -0
  48. package/src/parser/rules/block/module/iftags/condition.ts +109 -0
  49. package/src/parser/rules/block/module/iftags/index.ts +26 -0
  50. package/src/parser/rules/block/module/iftags/preprocess.ts +140 -0
  51. package/src/parser/rules/block/module/iftags/resolve.ts +73 -0
  52. package/src/parser/rules/block/module/iftags/types.ts +63 -0
  53. package/src/parser/rules/block/module/include/index.ts +20 -0
  54. package/src/parser/rules/block/module/include/resolve.ts +556 -0
  55. package/src/parser/rules/block/module/index.ts +122 -0
  56. package/src/parser/rules/block/module/join/index.ts +34 -0
  57. package/src/parser/rules/block/module/join/types.ts +23 -0
  58. package/src/parser/rules/block/module/listpages/compiler.ts +453 -0
  59. package/src/parser/rules/block/module/listpages/extract.ts +410 -0
  60. package/src/parser/rules/block/module/listpages/index.ts +83 -0
  61. package/src/parser/rules/block/module/listpages/normalize.ts +390 -0
  62. package/src/parser/rules/block/module/listpages/parser.ts +106 -0
  63. package/src/parser/rules/block/module/listpages/resolve.ts +130 -0
  64. package/src/parser/rules/block/module/listpages/types.ts +513 -0
  65. package/src/parser/rules/block/module/listpages/url-resolver.ts +186 -0
  66. package/src/parser/rules/block/module/listusers/compiler.ts +77 -0
  67. package/src/parser/rules/block/module/listusers/extract.ts +45 -0
  68. package/src/parser/rules/block/module/listusers/index.ts +36 -0
  69. package/src/parser/rules/block/module/listusers/parser.ts +54 -0
  70. package/src/parser/rules/block/module/listusers/resolve.ts +58 -0
  71. package/src/parser/rules/block/module/listusers/types.ts +93 -0
  72. package/src/parser/rules/block/module/mapping.ts +61 -0
  73. package/src/parser/rules/block/module/page-tree/index.ts +38 -0
  74. package/src/parser/rules/block/module/page-tree/types.ts +29 -0
  75. package/src/parser/rules/block/module/rate/index.ts +28 -0
  76. package/src/parser/rules/block/module/rate/types.ts +19 -0
  77. package/src/parser/rules/block/module/resolve.ts +411 -0
  78. package/src/parser/rules/block/module/types-common.ts +59 -0
  79. package/src/parser/rules/block/module/types.ts +61 -0
  80. package/src/parser/rules/block/module/utils.ts +43 -0
  81. package/src/parser/rules/block/module/walk.ts +380 -0
  82. package/src/parser/rules/block/module.ts +164 -0
  83. package/src/parser/rules/block/orphan-li.ts +177 -0
  84. package/src/parser/rules/block/paragraph.ts +157 -0
  85. package/src/parser/rules/block/table-block.ts +726 -0
  86. package/src/parser/rules/block/table.ts +441 -0
  87. package/src/parser/rules/block/tabview.ts +331 -0
  88. package/src/parser/rules/block/toc.ts +129 -0
  89. package/src/parser/rules/block/utils.ts +615 -0
  90. package/src/parser/rules/index.ts +49 -0
  91. package/src/parser/rules/inline/anchor-name.ts +154 -0
  92. package/src/parser/rules/inline/anchor.ts +327 -0
  93. package/src/parser/rules/inline/bibcite.ts +153 -0
  94. package/src/parser/rules/inline/bold.ts +86 -0
  95. package/src/parser/rules/inline/color.ts +140 -0
  96. package/src/parser/rules/inline/comment.ts +90 -0
  97. package/src/parser/rules/inline/equation-ref.ts +115 -0
  98. package/src/parser/rules/inline/expr.ts +526 -0
  99. package/src/parser/rules/inline/footnote.ts +223 -0
  100. package/src/parser/rules/inline/guillemet.ts +64 -0
  101. package/src/parser/rules/inline/html.ts +132 -0
  102. package/src/parser/rules/inline/image.ts +328 -0
  103. package/src/parser/rules/inline/index.ts +150 -0
  104. package/src/parser/rules/inline/italic.ts +74 -0
  105. package/src/parser/rules/inline/line-break.ts +326 -0
  106. package/src/parser/rules/inline/link-anchor.ts +147 -0
  107. package/src/parser/rules/inline/link-single.ts +164 -0
  108. package/src/parser/rules/inline/link-star.ts +134 -0
  109. package/src/parser/rules/inline/link-triple.ts +267 -0
  110. package/src/parser/rules/inline/math-inline.ts +126 -0
  111. package/src/parser/rules/inline/monospace.ts +78 -0
  112. package/src/parser/rules/inline/raw.ts +262 -0
  113. package/src/parser/rules/inline/size.ts +244 -0
  114. package/src/parser/rules/inline/span.ts +424 -0
  115. package/src/parser/rules/inline/strikethrough.ts +115 -0
  116. package/src/parser/rules/inline/subscript.ts +84 -0
  117. package/src/parser/rules/inline/superscript.ts +84 -0
  118. package/src/parser/rules/inline/text.ts +84 -0
  119. package/src/parser/rules/inline/underline.ts +127 -0
  120. package/src/parser/rules/inline/user.ts +147 -0
  121. package/src/parser/rules/inline/utils.ts +344 -0
  122. package/src/parser/rules/types.ts +252 -0
  123. package/src/parser/rules/utils.ts +155 -0
  124. package/src/parser/toc.ts +130 -0
@@ -0,0 +1,141 @@
1
+ import type { Position } from "@wdprlib/ast";
2
+
3
+ /**
4
+ * Every distinct lexeme the Wikidot lexer can produce.
5
+ *
6
+ * Each value corresponds to a fixed character sequence (or class of
7
+ * sequences) in Wikidot markup. The inline comments show the literal
8
+ * text that produces each token type.
9
+ *
10
+ * @group Lexer
11
+ */
12
+ export type TokenType =
13
+ // Special
14
+ | "EOF"
15
+ | "TEXT"
16
+ | "IDENTIFIER" // alphanumeric word
17
+ | "NEWLINE"
18
+ | "WHITESPACE"
19
+
20
+ // Block delimiters
21
+ | "BLOCK_OPEN" // [[
22
+ | "BLOCK_CLOSE" // ]]
23
+ | "BLOCK_END_OPEN" // [[/
24
+
25
+ // Inline formatting
26
+ | "BOLD_MARKER" // **
27
+ | "ITALIC_MARKER" // //
28
+ | "UNDERLINE_MARKER" // __
29
+ | "STRIKE_MARKER" // --
30
+ | "SUPER_MARKER" // ^^
31
+ | "SUB_MARKER" // ,,
32
+ | "MONO_MARKER" // {{
33
+ | "MONO_CLOSE" // }}
34
+
35
+ // Special syntax
36
+ | "HEADING_MARKER" // + (at line start)
37
+ | "HR_MARKER" // ---- (at line start)
38
+ | "LIST_BULLET" // * (at line start)
39
+ | "LIST_NUMBER" // # (at line start)
40
+ | "BLOCKQUOTE_MARKER" // > (at line start)
41
+ | "TABLE_MARKER" // || (at line start)
42
+ | "TABLE_HEADER" // ||~ (header cell)
43
+ | "TABLE_LEFT" // ||< (left align)
44
+ | "TABLE_CENTER" // ||= (center align)
45
+ | "TABLE_RIGHT" // ||> (right align)
46
+
47
+ // Code blocks
48
+ | "CODE_OPEN" // [[code]]
49
+ | "CODE_CLOSE" // [[/code]]
50
+
51
+ // Links
52
+ | "LINK_OPEN" // [[[
53
+ | "LINK_CLOSE" // ]]]
54
+ | "BRACKET_OPEN" // [
55
+ | "BRACKET_CLOSE" // ]
56
+ | "BRACKET_ANCHOR" // [#
57
+ | "BRACKET_STAR" // [*
58
+
59
+ // Special characters
60
+ | "PIPE" // |
61
+ | "EQUALS" // =
62
+ | "COLON" // :
63
+ | "SLASH" // /
64
+ | "STAR" // *
65
+ | "HASH" // #
66
+ | "AT" // @
67
+ | "AMPERSAND" // &
68
+ | "BACKSLASH" // \
69
+ | "QUOTED_STRING" // "..."
70
+
71
+ // Raw/Escape
72
+ | "RAW_OPEN" // @@
73
+ | "RAW_CLOSE" // @@
74
+ | "RAW_BLOCK_OPEN" // @<
75
+ | "RAW_BLOCK_CLOSE" // >@
76
+
77
+ // Color
78
+ | "COLOR_MARKER" // ##
79
+
80
+ // Other
81
+ | "UNDERSCORE" // _ (single underscore, for line break)
82
+ | "BACKSLASH_BREAK" // U+E000 (preproc marker for \ at end of line)
83
+
84
+ // Comment
85
+ | "COMMENT_OPEN" // [!--
86
+ | "COMMENT_CLOSE" // --]
87
+
88
+ // Clear float
89
+ | "CLEAR_FLOAT" // ~~~
90
+ | "CLEAR_FLOAT_LEFT" // ~~~<
91
+ | "CLEAR_FLOAT_RIGHT" // ~~~>
92
+
93
+ // Double angle (guillemet)
94
+ | "LEFT_DOUBLE_ANGLE" // <<
95
+ | "RIGHT_DOUBLE_ANGLE"; // >> (non-line-start)
96
+
97
+ /**
98
+ * A single lexical token produced by the `Lexer`.
99
+ *
100
+ * Tokens are the input to the parser stage. Each token carries its
101
+ * literal text (`value`), source location (`position`), and a flag
102
+ * indicating whether it appeared at the beginning of a line — which
103
+ * matters because several Wikidot constructs (headings, lists,
104
+ * blockquotes, horizontal rules) are only valid at line start.
105
+ *
106
+ * @group Lexer
107
+ */
108
+ export interface Token {
109
+ /** The lexeme category */
110
+ type: TokenType;
111
+ /** The literal source text that produced this token */
112
+ value: string;
113
+ /** Start/end location in the original source string */
114
+ position: Position;
115
+ /**
116
+ * `true` when this token is the first non-whitespace token on its
117
+ * line. Block-level rules (headings, lists, blockquotes) check this
118
+ * flag before attempting to match.
119
+ */
120
+ lineStart: boolean;
121
+ }
122
+
123
+ /**
124
+ * Construct a {@link Token} value.
125
+ *
126
+ * @param type - The lexeme category
127
+ * @param value - Literal source text
128
+ * @param position - Source location range
129
+ * @param lineStart - Whether the token starts a new line
130
+ * @returns A new token object
131
+ *
132
+ * @group Lexer
133
+ */
134
+ export function createToken(
135
+ type: TokenType,
136
+ value: string,
137
+ position: Position,
138
+ lineStart = false,
139
+ ): Token {
140
+ return { type, value, position, lineStart };
141
+ }
@@ -0,0 +1,173 @@
1
+ /**
2
+ *
3
+ * Parser constants that define structural boundaries in Wikidot markup.
4
+ *
5
+ * These constants are used by the paragraph rule to determine when a new
6
+ * block-level construct begins, which terminates the current paragraph.
7
+ * When any of these token types appear at the start of a line, the parser
8
+ * stops collecting inline content for the current paragraph and begins
9
+ * processing the new block element.
10
+ *
11
+ * @module
12
+ */
13
+
14
+ import type { TokenType } from "../lexer";
15
+
16
+ /**
17
+ * Token types that signal the start of a block-level construct in Wikidot markup.
18
+ *
19
+ * When the parser encounters any of these tokens at the beginning of a line while
20
+ * building a paragraph, it stops the paragraph and delegates to the appropriate
21
+ * block rule. Each token maps to a specific Wikidot syntax element (documented
22
+ * inline with comments).
23
+ */
24
+ export const BLOCK_START_TOKENS: TokenType[] = [
25
+ "BLOCKQUOTE_MARKER",
26
+ "LIST_BULLET",
27
+ "LIST_NUMBER",
28
+ "HEADING_MARKER",
29
+ "HR_MARKER",
30
+ "TABLE_MARKER",
31
+ "COLON", // Definition list
32
+ "BLOCK_OPEN", // [[footnoteblock]], [[div]], etc.
33
+ "BLOCK_END_OPEN", // [[/div]], [[/collapsible]], etc.
34
+ "EQUALS", // Center align (= text) or content separator (====)
35
+ "CLEAR_FLOAT", // ~~~~
36
+ "CLEAR_FLOAT_LEFT", // ~~~~<
37
+ "CLEAR_FLOAT_RIGHT", // ~~~~>
38
+ ];
39
+
40
+ /**
41
+ * Set of block names recognized by the parser at `[[name]]` / `[[/name]]`.
42
+ *
43
+ * Used by inline-parser logic to distinguish real block boundaries from
44
+ * unknown tokens like `[[foo]]`, which Wikidot treats as inline text
45
+ * rather than as a paragraph-breaking block.
46
+ *
47
+ * Keep in sync with the set of block rules registered in
48
+ * `packages/parser/src/parser/rules/block/index.ts`. Align-style markers
49
+ * (`<`, `>`, `=`, `==`) are intentionally included because `[[<]]` etc.
50
+ * open `alignRule`.
51
+ */
52
+ export const KNOWN_BLOCK_NAMES: ReadonlySet<string> = new Set<string>([
53
+ // structural containers
54
+ "collapsible",
55
+ "div",
56
+ "div_",
57
+ "code",
58
+ // list blocks
59
+ "ul",
60
+ "ol",
61
+ "li",
62
+ // table blocks
63
+ "table",
64
+ "row",
65
+ "cell",
66
+ "hcell",
67
+ // tabview / module
68
+ "tabview",
69
+ "tabs",
70
+ "module",
71
+ "module654",
72
+ // misc named blocks
73
+ "bibliography",
74
+ "footnoteblock",
75
+ "toc",
76
+ "iframe",
77
+ "math",
78
+ "html",
79
+ "iftags",
80
+ "include",
81
+ "f", // float TOC prefix: `[[f<toc]]`, `[[f>toc]]` (see toc rule)
82
+ // embed family
83
+ "embed",
84
+ "embedvideo",
85
+ "embedaudio",
86
+ // align markers
87
+ "<",
88
+ ">",
89
+ "=",
90
+ "==",
91
+ // inline-level constructs that use BLOCK_OPEN tokens; recognized here so
92
+ // that the paragraph parser keeps existing block-boundary behavior for
93
+ // `[[span]]`, `[[user ...]]`, `[[$ ... $]]`, etc. when they appear at
94
+ // the start of a line.
95
+ "span",
96
+ "span_",
97
+ "user",
98
+ "a",
99
+ "anchor",
100
+ "size",
101
+ "footnote",
102
+ "eref",
103
+ "$",
104
+ "image",
105
+ "gallery",
106
+ "file",
107
+ ]);
108
+
109
+ /**
110
+ * Block names whose rule sets `requiresLineStart: false`, i.e. they can
111
+ * legitimately start a block even when the `[[...]]` opener is preceded
112
+ * by leading whitespace on its line.
113
+ *
114
+ * Used by the inline parser to decide whether a `\n<indent>[[name]]`
115
+ * sequence ends the current paragraph. Without this list, the inline
116
+ * parser would either:
117
+ * - keep `lineStart` strict and miss legitimately-indented container
118
+ * blocks (Wikidot accepts e.g. `\n [[div_]]`); the inner block
119
+ * gets absorbed into the parent paragraph as literal text, or
120
+ * - drop the `lineStart` check entirely and prematurely break out of
121
+ * paragraphs for `\n [[toc]]` — a rule with `requiresLineStart: true`
122
+ * would refuse the indented token, leaving the paragraph split but
123
+ * the block unconsumed (literal `[[toc]]` text in a new paragraph).
124
+ *
125
+ * Each entry corresponds to a name handled by a block rule whose
126
+ * `requiresLineStart` is `false`. Keep this list in sync when adding or
127
+ * changing such rules; the inline-level constructs that happen to share
128
+ * `BLOCK_OPEN` (`[[span]]`, `[[image]]`, `[[user]]`, etc.) are
129
+ * intentionally excluded — they remain inline and should not split
130
+ * paragraphs based on indentation alone.
131
+ *
132
+ * Sources (block rule → handled names):
133
+ * - `bibliographyRule` → bibliography
134
+ * - `blockListRule` → ul, ol, li
135
+ * - `codeRule` → code
136
+ * - `collapsibleRule` → collapsible
137
+ * - `divRule` → div, div_
138
+ * - `embedBlockRule` → embed, embedvideo, embedaudio
139
+ * - `htmlRule` → html
140
+ * - `iframeRule` → iframe
141
+ * - `iftagsRule` → iftags
142
+ * - `mathRule` → math
143
+ * - `moduleRule` → module, module654
144
+ * - `orphanLiRule` → li (also under blockListRule)
145
+ * - `tableBlockRule` → table (row, cell, hcell are private to the in-table
146
+ * parser, never accepted by the top-level dispatcher)
147
+ * - `tabviewRule` → tabview, tabs (tab is private to the in-tabview parser)
148
+ *
149
+ * `includeRule` is omitted because `[[include ...]]` is expanded as a
150
+ * text-level macro by `resolveIncludes` before the parser sees it.
151
+ */
152
+ export const INDENT_ACCEPTING_BLOCK_NAMES: ReadonlySet<string> = new Set<string>([
153
+ "bibliography",
154
+ "ul",
155
+ "ol",
156
+ "li",
157
+ "code",
158
+ "collapsible",
159
+ "div",
160
+ "div_",
161
+ "embed",
162
+ "embedvideo",
163
+ "embedaudio",
164
+ "html",
165
+ "iframe",
166
+ "iftags",
167
+ "math",
168
+ "module",
169
+ "module654",
170
+ "table",
171
+ "tabview",
172
+ "tabs",
173
+ ]);
@@ -0,0 +1,251 @@
1
+ /**
2
+ *
3
+ * Depth processing module for converting flat lists into nested tree structures.
4
+ *
5
+ * This is a TypeScript port of Wikidot's `depth.rs`. It handles the conversion
6
+ * of flat depth-annotated items (such as bullet/numbered list entries at various
7
+ * indentation levels) into properly nested tree structures. The algorithm uses an
8
+ * internal stack to track open nesting levels and collapses them as depth decreases.
9
+ *
10
+ * Used primarily by the list parser and the table-of-contents builder to transform
11
+ * flat sequences of items with depth annotations into hierarchical AST structures.
12
+ *
13
+ * @module
14
+ */
15
+
16
+ /**
17
+ * Represents a single node in a depth tree.
18
+ *
19
+ * A node is either a leaf item containing a value, or a nested list containing
20
+ * children. This recursive type allows arbitrarily deep nesting.
21
+ *
22
+ * @typeParam L - The list type discriminator (e.g., "bullet" vs "number" for lists,
23
+ * or `null` when list type distinction is not needed)
24
+ * @typeParam T - The type of leaf item values
25
+ */
26
+ export type DepthItem<L, T> =
27
+ | { kind: "item"; value: T }
28
+ | { kind: "list"; ltype: L; children: DepthList<L, T> };
29
+
30
+ /**
31
+ * An ordered collection of depth tree nodes at the same level.
32
+ *
33
+ * @typeParam L - The list type discriminator
34
+ * @typeParam T - The type of leaf item values
35
+ */
36
+ export type DepthList<L, T> = DepthItem<L, T>[];
37
+
38
+ /**
39
+ * Internal stack-based builder for constructing depth trees incrementally.
40
+ *
41
+ * The stack tracks open nesting levels. As items are added at increasing depths,
42
+ * new levels are pushed. When depth decreases, levels are popped and collapsed
43
+ * into their parent as nested list nodes. When the list type changes at the same
44
+ * depth, the current list is finalized and a new one begins.
45
+ *
46
+ * @typeParam L - The list type discriminator
47
+ * @typeParam T - The type of leaf item values
48
+ */
49
+ class DepthStack<L, T> {
50
+ private finished: Array<{ ltype: L; list: DepthList<L, T> }> = [];
51
+ private stack: Array<{ ltype: L; items: DepthItem<L, T>[] }>;
52
+
53
+ /**
54
+ * @param topLtype - The list type for the initial (top-level) nesting layer
55
+ */
56
+ constructor(topLtype: L) {
57
+ this.stack = [{ ltype: topLtype, items: [] }];
58
+ }
59
+
60
+ /** Returns the topmost (deepest nesting) layer on the stack. */
61
+ private get last(): { ltype: L; items: DepthItem<L, T>[] } {
62
+ return this.stack[this.stack.length - 1]!;
63
+ }
64
+
65
+ /** Returns the bottommost (root) layer on the stack. */
66
+ private get first(): { ltype: L; items: DepthItem<L, T>[] } {
67
+ return this.stack[0]!;
68
+ }
69
+
70
+ /** Returns true if only the root layer remains on the stack. */
71
+ private isSingle(): boolean {
72
+ return this.stack.length === 1;
73
+ }
74
+
75
+ /**
76
+ * Push a new nesting level onto the stack.
77
+ * @param ltype - The list type for the new level
78
+ */
79
+ increaseDepth(ltype: L): void {
80
+ this.stack.push({ ltype, items: [] });
81
+ }
82
+
83
+ /**
84
+ * Pop the topmost nesting level and collapse it into a list node on its parent.
85
+ * @throws Error if there is no level to pop (stack is empty)
86
+ */
87
+ decreaseDepth(): void {
88
+ const popped = this.stack.pop();
89
+ if (!popped) {
90
+ throw new Error("No depth to pop off!");
91
+ }
92
+ this.push({ kind: "list", ltype: popped.ltype, children: popped.items });
93
+ }
94
+
95
+ /**
96
+ * Start a new list at the current depth with a different list type.
97
+ *
98
+ * When the list type changes (e.g., from bullet to numbered) at the same depth,
99
+ * this method finalizes the current list and begins a new one. At the root layer
100
+ * the entire tree is finalized; at deeper layers a pop/push cycle suffices.
101
+ *
102
+ * @param ltype - The list type for the new list
103
+ */
104
+ newList(ltype: L): void {
105
+ if (this.isSingle()) {
106
+ // This is the last layer, so the pop/push trick doesn't work.
107
+ // Instead, output this entire thing as a finished list tree,
108
+ // then create a new one for the process to continue.
109
+ this.finishDepthList(ltype);
110
+ } else {
111
+ // We can just decrease and increase to make a new list
112
+ this.decreaseDepth();
113
+ this.increaseDepth(ltype);
114
+ }
115
+ }
116
+
117
+ /** Append a depth item to the current (topmost) layer. */
118
+ private push(item: DepthItem<L, T>): void {
119
+ this.last.items.push(item);
120
+ }
121
+
122
+ /**
123
+ * Add a leaf item to the current nesting level.
124
+ * @param item - The value to wrap in a leaf node
125
+ */
126
+ pushItem(item: T): void {
127
+ this.push({ kind: "item", value: item });
128
+ }
129
+
130
+ /** Returns the list type of the topmost nesting level. */
131
+ lastType(): L {
132
+ return this.last.ltype;
133
+ }
134
+
135
+ /**
136
+ * Finalize the current tree by collapsing all open layers into a single
137
+ * finished tree, then reset the stack for continued processing.
138
+ *
139
+ * @param newLtype - The list type for the next tree, or null to reuse the current type.
140
+ * Null is used by {@link intoTrees} since no further items will be added.
141
+ */
142
+ private finishDepthList(newLtype: L | null): void {
143
+ // Wrap all opened layers
144
+ // Start at 1 since we always have at least one layer
145
+ while (this.stack.length > 1) {
146
+ this.decreaseDepth();
147
+ }
148
+
149
+ // Return top-level layer
150
+ const first = this.first;
151
+ const ltype = first.ltype;
152
+ const list = first.items;
153
+
154
+ // For intoTrees(), we don't care what the new ltype is,
155
+ // so we just reuse the last one.
156
+ // But for newList() we do, we want a new list layer.
157
+ const actualNewLtype = newLtype ?? ltype;
158
+
159
+ // Reset the first layer
160
+ first.ltype = actualNewLtype;
161
+ first.items = [];
162
+
163
+ // Only push if the list has elements
164
+ if (list.length > 0) {
165
+ this.finished.push({ ltype, list });
166
+ }
167
+ }
168
+
169
+ /**
170
+ * Finalize all remaining layers and return the completed trees.
171
+ * @returns Array of finished trees, each with its list type and items
172
+ */
173
+ intoTrees(): Array<{ ltype: L; list: DepthList<L, T> }> {
174
+ this.finishDepthList(null);
175
+ return this.finished;
176
+ }
177
+ }
178
+
179
+ /**
180
+ * Process a flat list of depth-annotated items into nested tree structures.
181
+ *
182
+ * This is the main entry point for the depth module. It takes a sequence of items,
183
+ * each annotated with a nesting depth and a list type, and produces one or more
184
+ * nested trees. Multiple trees are produced when the list type changes at the
185
+ * root level (depth 0).
186
+ *
187
+ * The algorithm iterates through items sequentially, using a stack to track
188
+ * open nesting levels. When depth increases, new levels are pushed; when depth
189
+ * decreases, levels are popped and collapsed into their parent. When the list
190
+ * type changes at the same depth, the current list is finalized and a new one begins.
191
+ *
192
+ * @typeParam L - The list type discriminator
193
+ * @typeParam T - The type of leaf item values
194
+ * @param topLtype - The default list type for the root level
195
+ * @param items - Flat sequence of depth-annotated items, where each item has:
196
+ * - `depth`: the 0-based nesting level
197
+ * - `ltype`: the list type for grouping (e.g., "bullet" vs "number")
198
+ * - `value`: the actual item content
199
+ * @param ltypeEquals - Equality comparator for list types (defaults to `===`)
200
+ * @returns Array of finished trees, each with an `ltype` and a `list` of nested items.
201
+ * Multiple trees are returned when the list type changes at depth 0.
202
+ */
203
+ export function processDepths<L, T>(
204
+ topLtype: L,
205
+ items: Array<{ depth: number; ltype: L; value: T }>,
206
+ ltypeEquals: (a: L, b: L) => boolean = (a, b) => a === b,
207
+ ): Array<{ ltype: L; list: DepthList<L, T> }> {
208
+ const stack = new DepthStack<L, T>(topLtype);
209
+
210
+ // The depth value for the previous item
211
+ let previous = 0;
212
+
213
+ // Iterate through each of the items
214
+ for (const { depth, ltype, value } of items) {
215
+ // Add or remove new depth levels as appropriate,
216
+ // based on what our new depth value is compared
217
+ // to the value in the previous iteration.
218
+ //
219
+ // If previous == depth, then neither of these for loops will run
220
+ // If previous < depth, then only the first will run
221
+ // If previous > depth, then only the second will run
222
+
223
+ // Open new levels
224
+ for (let i = previous; i < depth; i++) {
225
+ stack.increaseDepth(ltype);
226
+ }
227
+
228
+ // Close existing levels
229
+ for (let i = depth; i < previous; i++) {
230
+ stack.decreaseDepth();
231
+ }
232
+
233
+ // Create new level if the type doesn't match
234
+ //
235
+ // Here we decrease and increase the depth to close
236
+ // the current layer, then make a new one with the
237
+ // type this item has.
238
+ //
239
+ // We'll keep appending to this remade layer until
240
+ // we hit a different depth or a different type.
241
+ if (!ltypeEquals(stack.lastType(), ltype)) {
242
+ stack.newList(ltype);
243
+ }
244
+
245
+ // Push element and update state
246
+ stack.pushItem(value);
247
+ previous = depth;
248
+ }
249
+
250
+ return stack.intoTrees();
251
+ }
@@ -0,0 +1,18 @@
1
+ /**
2
+ *
3
+ * Main parser for Wikidot markup.
4
+ *
5
+ * The parser consumes a token stream from the lexer and produces an AST
6
+ * (Abstract Syntax Tree) conforming to the `@wdprlib/ast` package types.
7
+ * It applies block rules and inline rules in a recursive-descent fashion,
8
+ * followed by post-processing passes for paragraph merging and cleanup.
9
+ *
10
+ * The main entry points are:
11
+ * - `parse()` - convenience function that parses a string in one call
12
+ * - `Parser` class - for more control over parsing options
13
+ *
14
+ * @module
15
+ */
16
+
17
+ export type { ParserOptions } from "./parse";
18
+ export { Parser, parse } from "./parse";