@wdprlib/parser 3.1.1 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +312 -121
- package/dist/index.js +289 -98
- package/package.json +5 -3
- package/src/index.ts +163 -0
- package/src/lexer/index.ts +20 -0
- package/src/lexer/lexer.ts +687 -0
- package/src/lexer/tokens.ts +141 -0
- package/src/parser/constants.ts +173 -0
- package/src/parser/depth.ts +251 -0
- package/src/parser/index.ts +18 -0
- package/src/parser/parse.ts +315 -0
- package/src/parser/postprocess/divAdjacentParagraph.ts +76 -0
- package/src/parser/postprocess/index.ts +15 -0
- package/src/parser/postprocess/spanStrip.ts +697 -0
- package/src/parser/preprocess/expr.ts +265 -0
- package/src/parser/preprocess/index.ts +38 -0
- package/src/parser/preprocess/typography.ts +67 -0
- package/src/parser/preprocess/utils.ts +250 -0
- package/src/parser/preprocess/whitespace.ts +111 -0
- package/src/parser/rules/block/align.ts +282 -0
- package/src/parser/rules/block/bibliography.ts +359 -0
- package/src/parser/rules/block/block-list.ts +689 -0
- package/src/parser/rules/block/blockquote.ts +238 -0
- package/src/parser/rules/block/center.ts +87 -0
- package/src/parser/rules/block/clear-float.ts +75 -0
- package/src/parser/rules/block/code.ts +187 -0
- package/src/parser/rules/block/collapsible.ts +337 -0
- package/src/parser/rules/block/comment.ts +73 -0
- package/src/parser/rules/block/content-separator.ts +79 -0
- package/src/parser/rules/block/definition-list.ts +270 -0
- package/src/parser/rules/block/div.ts +400 -0
- package/src/parser/rules/block/embed-block.ts +153 -0
- package/src/parser/rules/block/footnoteblock.ts +200 -0
- package/src/parser/rules/block/heading.ts +142 -0
- package/src/parser/rules/block/horizontal-rule.ts +61 -0
- package/src/parser/rules/block/html.ts +222 -0
- package/src/parser/rules/block/iframe.ts +239 -0
- package/src/parser/rules/block/iftags.ts +150 -0
- package/src/parser/rules/block/include.ts +179 -0
- package/src/parser/rules/block/index.ts +127 -0
- package/src/parser/rules/block/list.ts +244 -0
- package/src/parser/rules/block/math.ts +183 -0
- package/src/parser/rules/block/module/backlinks/index.ts +31 -0
- package/src/parser/rules/block/module/backlinks/types.ts +21 -0
- package/src/parser/rules/block/module/categories/index.ts +34 -0
- package/src/parser/rules/block/module/categories/types.ts +21 -0
- package/src/parser/rules/block/module/css/index.ts +37 -0
- package/src/parser/rules/block/module/iftags/condition.ts +109 -0
- package/src/parser/rules/block/module/iftags/index.ts +26 -0
- package/src/parser/rules/block/module/iftags/preprocess.ts +140 -0
- package/src/parser/rules/block/module/iftags/resolve.ts +73 -0
- package/src/parser/rules/block/module/iftags/types.ts +63 -0
- package/src/parser/rules/block/module/include/index.ts +20 -0
- package/src/parser/rules/block/module/include/resolve.ts +556 -0
- package/src/parser/rules/block/module/index.ts +122 -0
- package/src/parser/rules/block/module/join/index.ts +34 -0
- package/src/parser/rules/block/module/join/types.ts +23 -0
- package/src/parser/rules/block/module/listpages/compiler.ts +453 -0
- package/src/parser/rules/block/module/listpages/extract.ts +410 -0
- package/src/parser/rules/block/module/listpages/index.ts +83 -0
- package/src/parser/rules/block/module/listpages/normalize.ts +390 -0
- package/src/parser/rules/block/module/listpages/parser.ts +106 -0
- package/src/parser/rules/block/module/listpages/resolve.ts +130 -0
- package/src/parser/rules/block/module/listpages/types.ts +513 -0
- package/src/parser/rules/block/module/listpages/url-resolver.ts +186 -0
- package/src/parser/rules/block/module/listusers/compiler.ts +77 -0
- package/src/parser/rules/block/module/listusers/extract.ts +45 -0
- package/src/parser/rules/block/module/listusers/index.ts +36 -0
- package/src/parser/rules/block/module/listusers/parser.ts +54 -0
- package/src/parser/rules/block/module/listusers/resolve.ts +58 -0
- package/src/parser/rules/block/module/listusers/types.ts +93 -0
- package/src/parser/rules/block/module/mapping.ts +61 -0
- package/src/parser/rules/block/module/page-tree/index.ts +38 -0
- package/src/parser/rules/block/module/page-tree/types.ts +29 -0
- package/src/parser/rules/block/module/rate/index.ts +28 -0
- package/src/parser/rules/block/module/rate/types.ts +19 -0
- package/src/parser/rules/block/module/resolve.ts +411 -0
- package/src/parser/rules/block/module/types-common.ts +59 -0
- package/src/parser/rules/block/module/types.ts +61 -0
- package/src/parser/rules/block/module/utils.ts +43 -0
- package/src/parser/rules/block/module/walk.ts +380 -0
- package/src/parser/rules/block/module.ts +164 -0
- package/src/parser/rules/block/orphan-li.ts +177 -0
- package/src/parser/rules/block/paragraph.ts +157 -0
- package/src/parser/rules/block/table-block.ts +726 -0
- package/src/parser/rules/block/table.ts +441 -0
- package/src/parser/rules/block/tabview.ts +331 -0
- package/src/parser/rules/block/toc.ts +129 -0
- package/src/parser/rules/block/utils.ts +615 -0
- package/src/parser/rules/index.ts +49 -0
- package/src/parser/rules/inline/anchor-name.ts +154 -0
- package/src/parser/rules/inline/anchor.ts +327 -0
- package/src/parser/rules/inline/bibcite.ts +153 -0
- package/src/parser/rules/inline/bold.ts +86 -0
- package/src/parser/rules/inline/color.ts +140 -0
- package/src/parser/rules/inline/comment.ts +90 -0
- package/src/parser/rules/inline/equation-ref.ts +115 -0
- package/src/parser/rules/inline/expr.ts +526 -0
- package/src/parser/rules/inline/footnote.ts +223 -0
- package/src/parser/rules/inline/guillemet.ts +64 -0
- package/src/parser/rules/inline/html.ts +132 -0
- package/src/parser/rules/inline/image.ts +328 -0
- package/src/parser/rules/inline/index.ts +150 -0
- package/src/parser/rules/inline/italic.ts +74 -0
- package/src/parser/rules/inline/line-break.ts +326 -0
- package/src/parser/rules/inline/link-anchor.ts +147 -0
- package/src/parser/rules/inline/link-single.ts +164 -0
- package/src/parser/rules/inline/link-star.ts +134 -0
- package/src/parser/rules/inline/link-triple.ts +267 -0
- package/src/parser/rules/inline/math-inline.ts +126 -0
- package/src/parser/rules/inline/monospace.ts +78 -0
- package/src/parser/rules/inline/raw.ts +262 -0
- package/src/parser/rules/inline/size.ts +244 -0
- package/src/parser/rules/inline/span.ts +424 -0
- package/src/parser/rules/inline/strikethrough.ts +115 -0
- package/src/parser/rules/inline/subscript.ts +84 -0
- package/src/parser/rules/inline/superscript.ts +84 -0
- package/src/parser/rules/inline/text.ts +84 -0
- package/src/parser/rules/inline/underline.ts +127 -0
- package/src/parser/rules/inline/user.ts +147 -0
- package/src/parser/rules/inline/utils.ts +344 -0
- package/src/parser/rules/types.ts +252 -0
- package/src/parser/rules/utils.ts +155 -0
- package/src/parser/toc.ts +130 -0
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
import type { Position } from "@wdprlib/ast";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Every distinct lexeme the Wikidot lexer can produce.
|
|
5
|
+
*
|
|
6
|
+
* Each value corresponds to a fixed character sequence (or class of
|
|
7
|
+
* sequences) in Wikidot markup. The inline comments show the literal
|
|
8
|
+
* text that produces each token type.
|
|
9
|
+
*
|
|
10
|
+
* @group Lexer
|
|
11
|
+
*/
|
|
12
|
+
export type TokenType =
|
|
13
|
+
// Special
|
|
14
|
+
| "EOF"
|
|
15
|
+
| "TEXT"
|
|
16
|
+
| "IDENTIFIER" // alphanumeric word
|
|
17
|
+
| "NEWLINE"
|
|
18
|
+
| "WHITESPACE"
|
|
19
|
+
|
|
20
|
+
// Block delimiters
|
|
21
|
+
| "BLOCK_OPEN" // [[
|
|
22
|
+
| "BLOCK_CLOSE" // ]]
|
|
23
|
+
| "BLOCK_END_OPEN" // [[/
|
|
24
|
+
|
|
25
|
+
// Inline formatting
|
|
26
|
+
| "BOLD_MARKER" // **
|
|
27
|
+
| "ITALIC_MARKER" // //
|
|
28
|
+
| "UNDERLINE_MARKER" // __
|
|
29
|
+
| "STRIKE_MARKER" // --
|
|
30
|
+
| "SUPER_MARKER" // ^^
|
|
31
|
+
| "SUB_MARKER" // ,,
|
|
32
|
+
| "MONO_MARKER" // {{
|
|
33
|
+
| "MONO_CLOSE" // }}
|
|
34
|
+
|
|
35
|
+
// Special syntax
|
|
36
|
+
| "HEADING_MARKER" // + (at line start)
|
|
37
|
+
| "HR_MARKER" // ---- (at line start)
|
|
38
|
+
| "LIST_BULLET" // * (at line start)
|
|
39
|
+
| "LIST_NUMBER" // # (at line start)
|
|
40
|
+
| "BLOCKQUOTE_MARKER" // > (at line start)
|
|
41
|
+
| "TABLE_MARKER" // || (at line start)
|
|
42
|
+
| "TABLE_HEADER" // ||~ (header cell)
|
|
43
|
+
| "TABLE_LEFT" // ||< (left align)
|
|
44
|
+
| "TABLE_CENTER" // ||= (center align)
|
|
45
|
+
| "TABLE_RIGHT" // ||> (right align)
|
|
46
|
+
|
|
47
|
+
// Code blocks
|
|
48
|
+
| "CODE_OPEN" // [[code]]
|
|
49
|
+
| "CODE_CLOSE" // [[/code]]
|
|
50
|
+
|
|
51
|
+
// Links
|
|
52
|
+
| "LINK_OPEN" // [[[
|
|
53
|
+
| "LINK_CLOSE" // ]]]
|
|
54
|
+
| "BRACKET_OPEN" // [
|
|
55
|
+
| "BRACKET_CLOSE" // ]
|
|
56
|
+
| "BRACKET_ANCHOR" // [#
|
|
57
|
+
| "BRACKET_STAR" // [*
|
|
58
|
+
|
|
59
|
+
// Special characters
|
|
60
|
+
| "PIPE" // |
|
|
61
|
+
| "EQUALS" // =
|
|
62
|
+
| "COLON" // :
|
|
63
|
+
| "SLASH" // /
|
|
64
|
+
| "STAR" // *
|
|
65
|
+
| "HASH" // #
|
|
66
|
+
| "AT" // @
|
|
67
|
+
| "AMPERSAND" // &
|
|
68
|
+
| "BACKSLASH" // \
|
|
69
|
+
| "QUOTED_STRING" // "..."
|
|
70
|
+
|
|
71
|
+
// Raw/Escape
|
|
72
|
+
| "RAW_OPEN" // @@
|
|
73
|
+
| "RAW_CLOSE" // @@
|
|
74
|
+
| "RAW_BLOCK_OPEN" // @<
|
|
75
|
+
| "RAW_BLOCK_CLOSE" // >@
|
|
76
|
+
|
|
77
|
+
// Color
|
|
78
|
+
| "COLOR_MARKER" // ##
|
|
79
|
+
|
|
80
|
+
// Other
|
|
81
|
+
| "UNDERSCORE" // _ (single underscore, for line break)
|
|
82
|
+
| "BACKSLASH_BREAK" // U+E000 (preproc marker for \ at end of line)
|
|
83
|
+
|
|
84
|
+
// Comment
|
|
85
|
+
| "COMMENT_OPEN" // [!--
|
|
86
|
+
| "COMMENT_CLOSE" // --]
|
|
87
|
+
|
|
88
|
+
// Clear float
|
|
89
|
+
| "CLEAR_FLOAT" // ~~~
|
|
90
|
+
| "CLEAR_FLOAT_LEFT" // ~~~<
|
|
91
|
+
| "CLEAR_FLOAT_RIGHT" // ~~~>
|
|
92
|
+
|
|
93
|
+
// Double angle (guillemet)
|
|
94
|
+
| "LEFT_DOUBLE_ANGLE" // <<
|
|
95
|
+
| "RIGHT_DOUBLE_ANGLE"; // >> (non-line-start)
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* A single lexical token produced by the `Lexer`.
|
|
99
|
+
*
|
|
100
|
+
* Tokens are the input to the parser stage. Each token carries its
|
|
101
|
+
* literal text (`value`), source location (`position`), and a flag
|
|
102
|
+
* indicating whether it appeared at the beginning of a line — which
|
|
103
|
+
* matters because several Wikidot constructs (headings, lists,
|
|
104
|
+
* blockquotes, horizontal rules) are only valid at line start.
|
|
105
|
+
*
|
|
106
|
+
* @group Lexer
|
|
107
|
+
*/
|
|
108
|
+
export interface Token {
|
|
109
|
+
/** The lexeme category */
|
|
110
|
+
type: TokenType;
|
|
111
|
+
/** The literal source text that produced this token */
|
|
112
|
+
value: string;
|
|
113
|
+
/** Start/end location in the original source string */
|
|
114
|
+
position: Position;
|
|
115
|
+
/**
|
|
116
|
+
* `true` when this token is the first non-whitespace token on its
|
|
117
|
+
* line. Block-level rules (headings, lists, blockquotes) check this
|
|
118
|
+
* flag before attempting to match.
|
|
119
|
+
*/
|
|
120
|
+
lineStart: boolean;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Construct a {@link Token} value.
|
|
125
|
+
*
|
|
126
|
+
* @param type - The lexeme category
|
|
127
|
+
* @param value - Literal source text
|
|
128
|
+
* @param position - Source location range
|
|
129
|
+
* @param lineStart - Whether the token starts a new line
|
|
130
|
+
* @returns A new token object
|
|
131
|
+
*
|
|
132
|
+
* @group Lexer
|
|
133
|
+
*/
|
|
134
|
+
export function createToken(
|
|
135
|
+
type: TokenType,
|
|
136
|
+
value: string,
|
|
137
|
+
position: Position,
|
|
138
|
+
lineStart = false,
|
|
139
|
+
): Token {
|
|
140
|
+
return { type, value, position, lineStart };
|
|
141
|
+
}
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
/**
|
|
2
|
+
*
|
|
3
|
+
* Parser constants that define structural boundaries in Wikidot markup.
|
|
4
|
+
*
|
|
5
|
+
* These constants are used by the paragraph rule to determine when a new
|
|
6
|
+
* block-level construct begins, which terminates the current paragraph.
|
|
7
|
+
* When any of these token types appear at the start of a line, the parser
|
|
8
|
+
* stops collecting inline content for the current paragraph and begins
|
|
9
|
+
* processing the new block element.
|
|
10
|
+
*
|
|
11
|
+
* @module
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import type { TokenType } from "../lexer";
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Token types that signal the start of a block-level construct in Wikidot markup.
|
|
18
|
+
*
|
|
19
|
+
* When the parser encounters any of these tokens at the beginning of a line while
|
|
20
|
+
* building a paragraph, it stops the paragraph and delegates to the appropriate
|
|
21
|
+
* block rule. Each token maps to a specific Wikidot syntax element (documented
|
|
22
|
+
* inline with comments).
|
|
23
|
+
*/
|
|
24
|
+
export const BLOCK_START_TOKENS: TokenType[] = [
|
|
25
|
+
"BLOCKQUOTE_MARKER",
|
|
26
|
+
"LIST_BULLET",
|
|
27
|
+
"LIST_NUMBER",
|
|
28
|
+
"HEADING_MARKER",
|
|
29
|
+
"HR_MARKER",
|
|
30
|
+
"TABLE_MARKER",
|
|
31
|
+
"COLON", // Definition list
|
|
32
|
+
"BLOCK_OPEN", // [[footnoteblock]], [[div]], etc.
|
|
33
|
+
"BLOCK_END_OPEN", // [[/div]], [[/collapsible]], etc.
|
|
34
|
+
"EQUALS", // Center align (= text) or content separator (====)
|
|
35
|
+
"CLEAR_FLOAT", // ~~~~
|
|
36
|
+
"CLEAR_FLOAT_LEFT", // ~~~~<
|
|
37
|
+
"CLEAR_FLOAT_RIGHT", // ~~~~>
|
|
38
|
+
];
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Set of block names recognized by the parser at `[[name]]` / `[[/name]]`.
|
|
42
|
+
*
|
|
43
|
+
* Used by inline-parser logic to distinguish real block boundaries from
|
|
44
|
+
* unknown tokens like `[[foo]]`, which Wikidot treats as inline text
|
|
45
|
+
* rather than as a paragraph-breaking block.
|
|
46
|
+
*
|
|
47
|
+
* Keep in sync with the set of block rules registered in
|
|
48
|
+
* `packages/parser/src/parser/rules/block/index.ts`. Align-style markers
|
|
49
|
+
* (`<`, `>`, `=`, `==`) are intentionally included because `[[<]]` etc.
|
|
50
|
+
* open `alignRule`.
|
|
51
|
+
*/
|
|
52
|
+
export const KNOWN_BLOCK_NAMES: ReadonlySet<string> = new Set<string>([
|
|
53
|
+
// structural containers
|
|
54
|
+
"collapsible",
|
|
55
|
+
"div",
|
|
56
|
+
"div_",
|
|
57
|
+
"code",
|
|
58
|
+
// list blocks
|
|
59
|
+
"ul",
|
|
60
|
+
"ol",
|
|
61
|
+
"li",
|
|
62
|
+
// table blocks
|
|
63
|
+
"table",
|
|
64
|
+
"row",
|
|
65
|
+
"cell",
|
|
66
|
+
"hcell",
|
|
67
|
+
// tabview / module
|
|
68
|
+
"tabview",
|
|
69
|
+
"tabs",
|
|
70
|
+
"module",
|
|
71
|
+
"module654",
|
|
72
|
+
// misc named blocks
|
|
73
|
+
"bibliography",
|
|
74
|
+
"footnoteblock",
|
|
75
|
+
"toc",
|
|
76
|
+
"iframe",
|
|
77
|
+
"math",
|
|
78
|
+
"html",
|
|
79
|
+
"iftags",
|
|
80
|
+
"include",
|
|
81
|
+
"f", // float TOC prefix: `[[f<toc]]`, `[[f>toc]]` (see toc rule)
|
|
82
|
+
// embed family
|
|
83
|
+
"embed",
|
|
84
|
+
"embedvideo",
|
|
85
|
+
"embedaudio",
|
|
86
|
+
// align markers
|
|
87
|
+
"<",
|
|
88
|
+
">",
|
|
89
|
+
"=",
|
|
90
|
+
"==",
|
|
91
|
+
// inline-level constructs that use BLOCK_OPEN tokens; recognized here so
|
|
92
|
+
// that the paragraph parser keeps existing block-boundary behavior for
|
|
93
|
+
// `[[span]]`, `[[user ...]]`, `[[$ ... $]]`, etc. when they appear at
|
|
94
|
+
// the start of a line.
|
|
95
|
+
"span",
|
|
96
|
+
"span_",
|
|
97
|
+
"user",
|
|
98
|
+
"a",
|
|
99
|
+
"anchor",
|
|
100
|
+
"size",
|
|
101
|
+
"footnote",
|
|
102
|
+
"eref",
|
|
103
|
+
"$",
|
|
104
|
+
"image",
|
|
105
|
+
"gallery",
|
|
106
|
+
"file",
|
|
107
|
+
]);
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Block names whose rule sets `requiresLineStart: false`, i.e. they can
|
|
111
|
+
* legitimately start a block even when the `[[...]]` opener is preceded
|
|
112
|
+
* by leading whitespace on its line.
|
|
113
|
+
*
|
|
114
|
+
* Used by the inline parser to decide whether a `\n<indent>[[name]]`
|
|
115
|
+
* sequence ends the current paragraph. Without this list, the inline
|
|
116
|
+
* parser would either:
|
|
117
|
+
* - keep `lineStart` strict and miss legitimately-indented container
|
|
118
|
+
* blocks (Wikidot accepts e.g. `\n [[div_]]`); the inner block
|
|
119
|
+
* gets absorbed into the parent paragraph as literal text, or
|
|
120
|
+
* - drop the `lineStart` check entirely and prematurely break out of
|
|
121
|
+
* paragraphs for `\n [[toc]]` — a rule with `requiresLineStart: true`
|
|
122
|
+
* would refuse the indented token, leaving the paragraph split but
|
|
123
|
+
* the block unconsumed (literal `[[toc]]` text in a new paragraph).
|
|
124
|
+
*
|
|
125
|
+
* Each entry corresponds to a name handled by a block rule whose
|
|
126
|
+
* `requiresLineStart` is `false`. Keep this list in sync when adding or
|
|
127
|
+
* changing such rules; the inline-level constructs that happen to share
|
|
128
|
+
* `BLOCK_OPEN` (`[[span]]`, `[[image]]`, `[[user]]`, etc.) are
|
|
129
|
+
* intentionally excluded — they remain inline and should not split
|
|
130
|
+
* paragraphs based on indentation alone.
|
|
131
|
+
*
|
|
132
|
+
* Sources (block rule → handled names):
|
|
133
|
+
* - `bibliographyRule` → bibliography
|
|
134
|
+
* - `blockListRule` → ul, ol, li
|
|
135
|
+
* - `codeRule` → code
|
|
136
|
+
* - `collapsibleRule` → collapsible
|
|
137
|
+
* - `divRule` → div, div_
|
|
138
|
+
* - `embedBlockRule` → embed, embedvideo, embedaudio
|
|
139
|
+
* - `htmlRule` → html
|
|
140
|
+
* - `iframeRule` → iframe
|
|
141
|
+
* - `iftagsRule` → iftags
|
|
142
|
+
* - `mathRule` → math
|
|
143
|
+
* - `moduleRule` → module, module654
|
|
144
|
+
* - `orphanLiRule` → li (also under blockListRule)
|
|
145
|
+
* - `tableBlockRule` → table (row, cell, hcell are private to the in-table
|
|
146
|
+
* parser, never accepted by the top-level dispatcher)
|
|
147
|
+
* - `tabviewRule` → tabview, tabs (tab is private to the in-tabview parser)
|
|
148
|
+
*
|
|
149
|
+
* `includeRule` is omitted because `[[include ...]]` is expanded as a
|
|
150
|
+
* text-level macro by `resolveIncludes` before the parser sees it.
|
|
151
|
+
*/
|
|
152
|
+
export const INDENT_ACCEPTING_BLOCK_NAMES: ReadonlySet<string> = new Set<string>([
|
|
153
|
+
"bibliography",
|
|
154
|
+
"ul",
|
|
155
|
+
"ol",
|
|
156
|
+
"li",
|
|
157
|
+
"code",
|
|
158
|
+
"collapsible",
|
|
159
|
+
"div",
|
|
160
|
+
"div_",
|
|
161
|
+
"embed",
|
|
162
|
+
"embedvideo",
|
|
163
|
+
"embedaudio",
|
|
164
|
+
"html",
|
|
165
|
+
"iframe",
|
|
166
|
+
"iftags",
|
|
167
|
+
"math",
|
|
168
|
+
"module",
|
|
169
|
+
"module654",
|
|
170
|
+
"table",
|
|
171
|
+
"tabview",
|
|
172
|
+
"tabs",
|
|
173
|
+
]);
|
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
/**
|
|
2
|
+
*
|
|
3
|
+
* Depth processing module for converting flat lists into nested tree structures.
|
|
4
|
+
*
|
|
5
|
+
* This is a TypeScript port of Wikidot's `depth.rs`. It handles the conversion
|
|
6
|
+
* of flat depth-annotated items (such as bullet/numbered list entries at various
|
|
7
|
+
* indentation levels) into properly nested tree structures. The algorithm uses an
|
|
8
|
+
* internal stack to track open nesting levels and collapses them as depth decreases.
|
|
9
|
+
*
|
|
10
|
+
* Used primarily by the list parser and the table-of-contents builder to transform
|
|
11
|
+
* flat sequences of items with depth annotations into hierarchical AST structures.
|
|
12
|
+
*
|
|
13
|
+
* @module
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Represents a single node in a depth tree.
|
|
18
|
+
*
|
|
19
|
+
* A node is either a leaf item containing a value, or a nested list containing
|
|
20
|
+
* children. This recursive type allows arbitrarily deep nesting.
|
|
21
|
+
*
|
|
22
|
+
* @typeParam L - The list type discriminator (e.g., "bullet" vs "number" for lists,
|
|
23
|
+
* or `null` when list type distinction is not needed)
|
|
24
|
+
* @typeParam T - The type of leaf item values
|
|
25
|
+
*/
|
|
26
|
+
export type DepthItem<L, T> =
|
|
27
|
+
| { kind: "item"; value: T }
|
|
28
|
+
| { kind: "list"; ltype: L; children: DepthList<L, T> };
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* An ordered collection of depth tree nodes at the same level.
|
|
32
|
+
*
|
|
33
|
+
* @typeParam L - The list type discriminator
|
|
34
|
+
* @typeParam T - The type of leaf item values
|
|
35
|
+
*/
|
|
36
|
+
export type DepthList<L, T> = DepthItem<L, T>[];
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Internal stack-based builder for constructing depth trees incrementally.
|
|
40
|
+
*
|
|
41
|
+
* The stack tracks open nesting levels. As items are added at increasing depths,
|
|
42
|
+
* new levels are pushed. When depth decreases, levels are popped and collapsed
|
|
43
|
+
* into their parent as nested list nodes. When the list type changes at the same
|
|
44
|
+
* depth, the current list is finalized and a new one begins.
|
|
45
|
+
*
|
|
46
|
+
* @typeParam L - The list type discriminator
|
|
47
|
+
* @typeParam T - The type of leaf item values
|
|
48
|
+
*/
|
|
49
|
+
class DepthStack<L, T> {
|
|
50
|
+
private finished: Array<{ ltype: L; list: DepthList<L, T> }> = [];
|
|
51
|
+
private stack: Array<{ ltype: L; items: DepthItem<L, T>[] }>;
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* @param topLtype - The list type for the initial (top-level) nesting layer
|
|
55
|
+
*/
|
|
56
|
+
constructor(topLtype: L) {
|
|
57
|
+
this.stack = [{ ltype: topLtype, items: [] }];
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/** Returns the topmost (deepest nesting) layer on the stack. */
|
|
61
|
+
private get last(): { ltype: L; items: DepthItem<L, T>[] } {
|
|
62
|
+
return this.stack[this.stack.length - 1]!;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/** Returns the bottommost (root) layer on the stack. */
|
|
66
|
+
private get first(): { ltype: L; items: DepthItem<L, T>[] } {
|
|
67
|
+
return this.stack[0]!;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/** Returns true if only the root layer remains on the stack. */
|
|
71
|
+
private isSingle(): boolean {
|
|
72
|
+
return this.stack.length === 1;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Push a new nesting level onto the stack.
|
|
77
|
+
* @param ltype - The list type for the new level
|
|
78
|
+
*/
|
|
79
|
+
increaseDepth(ltype: L): void {
|
|
80
|
+
this.stack.push({ ltype, items: [] });
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Pop the topmost nesting level and collapse it into a list node on its parent.
|
|
85
|
+
* @throws Error if there is no level to pop (stack is empty)
|
|
86
|
+
*/
|
|
87
|
+
decreaseDepth(): void {
|
|
88
|
+
const popped = this.stack.pop();
|
|
89
|
+
if (!popped) {
|
|
90
|
+
throw new Error("No depth to pop off!");
|
|
91
|
+
}
|
|
92
|
+
this.push({ kind: "list", ltype: popped.ltype, children: popped.items });
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Start a new list at the current depth with a different list type.
|
|
97
|
+
*
|
|
98
|
+
* When the list type changes (e.g., from bullet to numbered) at the same depth,
|
|
99
|
+
* this method finalizes the current list and begins a new one. At the root layer
|
|
100
|
+
* the entire tree is finalized; at deeper layers a pop/push cycle suffices.
|
|
101
|
+
*
|
|
102
|
+
* @param ltype - The list type for the new list
|
|
103
|
+
*/
|
|
104
|
+
newList(ltype: L): void {
|
|
105
|
+
if (this.isSingle()) {
|
|
106
|
+
// This is the last layer, so the pop/push trick doesn't work.
|
|
107
|
+
// Instead, output this entire thing as a finished list tree,
|
|
108
|
+
// then create a new one for the process to continue.
|
|
109
|
+
this.finishDepthList(ltype);
|
|
110
|
+
} else {
|
|
111
|
+
// We can just decrease and increase to make a new list
|
|
112
|
+
this.decreaseDepth();
|
|
113
|
+
this.increaseDepth(ltype);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/** Append a depth item to the current (topmost) layer. */
|
|
118
|
+
private push(item: DepthItem<L, T>): void {
|
|
119
|
+
this.last.items.push(item);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Add a leaf item to the current nesting level.
|
|
124
|
+
* @param item - The value to wrap in a leaf node
|
|
125
|
+
*/
|
|
126
|
+
pushItem(item: T): void {
|
|
127
|
+
this.push({ kind: "item", value: item });
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/** Returns the list type of the topmost nesting level. */
|
|
131
|
+
lastType(): L {
|
|
132
|
+
return this.last.ltype;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Finalize the current tree by collapsing all open layers into a single
|
|
137
|
+
* finished tree, then reset the stack for continued processing.
|
|
138
|
+
*
|
|
139
|
+
* @param newLtype - The list type for the next tree, or null to reuse the current type.
|
|
140
|
+
* Null is used by {@link intoTrees} since no further items will be added.
|
|
141
|
+
*/
|
|
142
|
+
private finishDepthList(newLtype: L | null): void {
|
|
143
|
+
// Wrap all opened layers
|
|
144
|
+
// Start at 1 since we always have at least one layer
|
|
145
|
+
while (this.stack.length > 1) {
|
|
146
|
+
this.decreaseDepth();
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// Return top-level layer
|
|
150
|
+
const first = this.first;
|
|
151
|
+
const ltype = first.ltype;
|
|
152
|
+
const list = first.items;
|
|
153
|
+
|
|
154
|
+
// For intoTrees(), we don't care what the new ltype is,
|
|
155
|
+
// so we just reuse the last one.
|
|
156
|
+
// But for newList() we do, we want a new list layer.
|
|
157
|
+
const actualNewLtype = newLtype ?? ltype;
|
|
158
|
+
|
|
159
|
+
// Reset the first layer
|
|
160
|
+
first.ltype = actualNewLtype;
|
|
161
|
+
first.items = [];
|
|
162
|
+
|
|
163
|
+
// Only push if the list has elements
|
|
164
|
+
if (list.length > 0) {
|
|
165
|
+
this.finished.push({ ltype, list });
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Finalize all remaining layers and return the completed trees.
|
|
171
|
+
* @returns Array of finished trees, each with its list type and items
|
|
172
|
+
*/
|
|
173
|
+
intoTrees(): Array<{ ltype: L; list: DepthList<L, T> }> {
|
|
174
|
+
this.finishDepthList(null);
|
|
175
|
+
return this.finished;
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Process a flat list of depth-annotated items into nested tree structures.
|
|
181
|
+
*
|
|
182
|
+
* This is the main entry point for the depth module. It takes a sequence of items,
|
|
183
|
+
* each annotated with a nesting depth and a list type, and produces one or more
|
|
184
|
+
* nested trees. Multiple trees are produced when the list type changes at the
|
|
185
|
+
* root level (depth 0).
|
|
186
|
+
*
|
|
187
|
+
* The algorithm iterates through items sequentially, using a stack to track
|
|
188
|
+
* open nesting levels. When depth increases, new levels are pushed; when depth
|
|
189
|
+
* decreases, levels are popped and collapsed into their parent. When the list
|
|
190
|
+
* type changes at the same depth, the current list is finalized and a new one begins.
|
|
191
|
+
*
|
|
192
|
+
* @typeParam L - The list type discriminator
|
|
193
|
+
* @typeParam T - The type of leaf item values
|
|
194
|
+
* @param topLtype - The default list type for the root level
|
|
195
|
+
* @param items - Flat sequence of depth-annotated items, where each item has:
|
|
196
|
+
* - `depth`: the 0-based nesting level
|
|
197
|
+
* - `ltype`: the list type for grouping (e.g., "bullet" vs "number")
|
|
198
|
+
* - `value`: the actual item content
|
|
199
|
+
* @param ltypeEquals - Equality comparator for list types (defaults to `===`)
|
|
200
|
+
* @returns Array of finished trees, each with an `ltype` and a `list` of nested items.
|
|
201
|
+
* Multiple trees are returned when the list type changes at depth 0.
|
|
202
|
+
*/
|
|
203
|
+
export function processDepths<L, T>(
|
|
204
|
+
topLtype: L,
|
|
205
|
+
items: Array<{ depth: number; ltype: L; value: T }>,
|
|
206
|
+
ltypeEquals: (a: L, b: L) => boolean = (a, b) => a === b,
|
|
207
|
+
): Array<{ ltype: L; list: DepthList<L, T> }> {
|
|
208
|
+
const stack = new DepthStack<L, T>(topLtype);
|
|
209
|
+
|
|
210
|
+
// The depth value for the previous item
|
|
211
|
+
let previous = 0;
|
|
212
|
+
|
|
213
|
+
// Iterate through each of the items
|
|
214
|
+
for (const { depth, ltype, value } of items) {
|
|
215
|
+
// Add or remove new depth levels as appropriate,
|
|
216
|
+
// based on what our new depth value is compared
|
|
217
|
+
// to the value in the previous iteration.
|
|
218
|
+
//
|
|
219
|
+
// If previous == depth, then neither of these for loops will run
|
|
220
|
+
// If previous < depth, then only the first will run
|
|
221
|
+
// If previous > depth, then only the second will run
|
|
222
|
+
|
|
223
|
+
// Open new levels
|
|
224
|
+
for (let i = previous; i < depth; i++) {
|
|
225
|
+
stack.increaseDepth(ltype);
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// Close existing levels
|
|
229
|
+
for (let i = depth; i < previous; i++) {
|
|
230
|
+
stack.decreaseDepth();
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
// Create new level if the type doesn't match
|
|
234
|
+
//
|
|
235
|
+
// Here we decrease and increase the depth to close
|
|
236
|
+
// the current layer, then make a new one with the
|
|
237
|
+
// type this item has.
|
|
238
|
+
//
|
|
239
|
+
// We'll keep appending to this remade layer until
|
|
240
|
+
// we hit a different depth or a different type.
|
|
241
|
+
if (!ltypeEquals(stack.lastType(), ltype)) {
|
|
242
|
+
stack.newList(ltype);
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
// Push element and update state
|
|
246
|
+
stack.pushItem(value);
|
|
247
|
+
previous = depth;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
return stack.intoTrees();
|
|
251
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
*
|
|
3
|
+
* Main parser for Wikidot markup.
|
|
4
|
+
*
|
|
5
|
+
* The parser consumes a token stream from the lexer and produces an AST
|
|
6
|
+
* (Abstract Syntax Tree) conforming to the `@wdprlib/ast` package types.
|
|
7
|
+
* It applies block rules and inline rules in a recursive-descent fashion,
|
|
8
|
+
* followed by post-processing passes for paragraph merging and cleanup.
|
|
9
|
+
*
|
|
10
|
+
* The main entry points are:
|
|
11
|
+
* - `parse()` - convenience function that parses a string in one call
|
|
12
|
+
* - `Parser` class - for more control over parsing options
|
|
13
|
+
*
|
|
14
|
+
* @module
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
export type { ParserOptions } from "./parse";
|
|
18
|
+
export { Parser, parse } from "./parse";
|