@xaendar/compiler 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +12 -0
- package/src/costants/chars.constants.ts +188 -0
- package/src/costants/tags/base-tags.constants.ts +106 -0
- package/src/costants/tags/not-alllowed-tags.constants.ts +13 -0
- package/src/costants/tags/not-allowed-chars.constants.ts +24 -0
- package/src/lexer/lexer.ts +91 -0
- package/src/lexer/models/current-char.type.ts +17 -0
- package/src/lexer/models/current-position.type.ts +13 -0
- package/src/lexer/models/lexer-cursor.model.ts +192 -0
- package/src/lexer/models/lexer-state.enum.ts +13 -0
- package/src/lexer/models/token-type.enum.ts +17 -0
- package/src/lexer/models/token.type.ts +62 -0
- package/src/lexer/models/transition-function/transition-function-context.type.ts +27 -0
- package/src/lexer/models/transition-function/transition-function-return-type.type.ts +48 -0
- package/src/lexer/models/transition-function/transition-function.type.ts +36 -0
- package/src/lexer/states/attribute.state.ts +47 -0
- package/src/lexer/states/event.state.ts +38 -0
- package/src/lexer/states/interpolation-expression.state.ts +68 -0
- package/src/lexer/states/interpolation-literal.state.ts +71 -0
- package/src/lexer/states/interpolation.state.ts +31 -0
- package/src/lexer/states/tag-body.state.ts +43 -0
- package/src/lexer/states/tag-close.state.ts +46 -0
- package/src/lexer/states/tag-open-end.state.ts +40 -0
- package/src/lexer/states/tag-open-name.state.ts +50 -0
- package/src/lexer/states/text.state.ts +61 -0
- package/src/parser/models/ast.type.ts +34 -0
- package/src/parser/models/current-token.type.ts +6 -0
- package/src/parser/models/node.enum.ts +5 -0
- package/src/parser/models/parser-cursor.model.ts +133 -0
- package/src/parser/parser.ts +225 -0
- package/src/public-api.ts +3 -0
- package/src/render-generator/render-generator.model.ts +73 -0
- package/src/utils/chars.utils.ts +70 -0
- package/src/utils/tags.utils.ts +36 -0
- package/tsconfig.json +3 -0
- package/vite.config.ts +4 -0
package/package.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@xaendar/compiler",
|
|
3
|
+
"version": "0.0.1",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"author": "Kaitenjo",
|
|
6
|
+
"license": "MIT",
|
|
7
|
+
"description": "A library containing compiler engine",
|
|
8
|
+
"peerDependencies": {
|
|
9
|
+
"@xaendar/common": "1.0.0",
|
|
10
|
+
"vite": "^8.0.4"
|
|
11
|
+
}
|
|
12
|
+
}
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `EOF`
|
|
3
|
+
*/
|
|
4
|
+
export const EOF = 0;
|
|
5
|
+
/**
|
|
6
|
+
* `\b`
|
|
7
|
+
*/
|
|
8
|
+
export const BACK_SPACE = 8;
|
|
9
|
+
/**
|
|
10
|
+
* `\t`
|
|
11
|
+
*/
|
|
12
|
+
export const TAB = 9;
|
|
13
|
+
/**
|
|
14
|
+
* `\n`
|
|
15
|
+
*/
|
|
16
|
+
export const LF = 10;
|
|
17
|
+
/**
|
|
18
|
+
* `\v`
|
|
19
|
+
*/
|
|
20
|
+
export const VERTICAL_TAB = 11;
|
|
21
|
+
/**
|
|
22
|
+
* `\f`
|
|
23
|
+
*/
|
|
24
|
+
export const FF = 12;
|
|
25
|
+
/**
|
|
26
|
+
* `\r`
|
|
27
|
+
*/
|
|
28
|
+
export const CR = 13;
|
|
29
|
+
/**
|
|
30
|
+
* ` `
|
|
31
|
+
*/
|
|
32
|
+
export const SPACE = 32;
|
|
33
|
+
/**
|
|
34
|
+
* `!`
|
|
35
|
+
*/
|
|
36
|
+
export const EXCLAMATION = 33;
|
|
37
|
+
/**
|
|
38
|
+
* `'`
|
|
39
|
+
*/
|
|
40
|
+
export const DOUBLE_QUOTE = 34;
|
|
41
|
+
/**
|
|
42
|
+
* `#`
|
|
43
|
+
*/
|
|
44
|
+
export const HASHTAG = 35;
|
|
45
|
+
/**
|
|
46
|
+
* `$`
|
|
47
|
+
*/
|
|
48
|
+
export const DOLLAR = 36;
|
|
49
|
+
/**
|
|
50
|
+
* `%`
|
|
51
|
+
*/
|
|
52
|
+
export const PERCENTAGE = 37;
|
|
53
|
+
/**
|
|
54
|
+
* `&`
|
|
55
|
+
*/
|
|
56
|
+
export const AMPERSAND = 38;
|
|
57
|
+
/**
|
|
58
|
+
* `'`
|
|
59
|
+
*/
|
|
60
|
+
export const SINGLE_QUOTE = 39;
|
|
61
|
+
/**
|
|
62
|
+
* `(`
|
|
63
|
+
*/
|
|
64
|
+
export const LPAREN = 40;
|
|
65
|
+
/**
|
|
66
|
+
* `)`
|
|
67
|
+
*/
|
|
68
|
+
export const RPAREN = 41;
|
|
69
|
+
/**
|
|
70
|
+
* `*`
|
|
71
|
+
*/
|
|
72
|
+
export const STAR = 42;
|
|
73
|
+
/**
|
|
74
|
+
* `+`
|
|
75
|
+
*/
|
|
76
|
+
export const PLUS = 43;
|
|
77
|
+
/**
|
|
78
|
+
* `,`
|
|
79
|
+
*/
|
|
80
|
+
export const COMMA = 44;
|
|
81
|
+
/**
|
|
82
|
+
* `-`
|
|
83
|
+
*/
|
|
84
|
+
export const MINUS = 45;
|
|
85
|
+
/**
|
|
86
|
+
* `.`
|
|
87
|
+
*/
|
|
88
|
+
export const PERIOD = 46;
|
|
89
|
+
/**
|
|
90
|
+
* `/`
|
|
91
|
+
*/
|
|
92
|
+
export const SLASH = 47;
|
|
93
|
+
/**
|
|
94
|
+
* `0`
|
|
95
|
+
*/
|
|
96
|
+
export const ZERO = 48;
|
|
97
|
+
/**
|
|
98
|
+
* `9`
|
|
99
|
+
*/
|
|
100
|
+
export const NINE = 57;
|
|
101
|
+
/**
|
|
102
|
+
* `:`
|
|
103
|
+
*/
|
|
104
|
+
export const COLON = 58;
|
|
105
|
+
/**
|
|
106
|
+
* `;`
|
|
107
|
+
*/
|
|
108
|
+
export const SEMICOLON = 59;
|
|
109
|
+
/**
|
|
110
|
+
* `<`
|
|
111
|
+
*/
|
|
112
|
+
export const LESS_THAN = 60;
|
|
113
|
+
/**
|
|
114
|
+
* `=`
|
|
115
|
+
*/
|
|
116
|
+
export const EQUAL_THEN = 61;
|
|
117
|
+
/**
|
|
118
|
+
* `>`
|
|
119
|
+
*/
|
|
120
|
+
export const GREATER_THEN = 62;
|
|
121
|
+
/**
|
|
122
|
+
* `?`
|
|
123
|
+
*/
|
|
124
|
+
export const QUESTION = 63;
|
|
125
|
+
/**
|
|
126
|
+
* `@`
|
|
127
|
+
*/
|
|
128
|
+
export const AT_SIGN = 64;
|
|
129
|
+
/**
|
|
130
|
+
* `A`
|
|
131
|
+
*/
|
|
132
|
+
export const A = 65;
|
|
133
|
+
/**
|
|
134
|
+
* `Z`
|
|
135
|
+
*/
|
|
136
|
+
export const Z = 90;
|
|
137
|
+
/**
|
|
138
|
+
* `[`
|
|
139
|
+
*/
|
|
140
|
+
export const LEFT_BRACKET = 91;
|
|
141
|
+
/**
|
|
142
|
+
* `\`
|
|
143
|
+
*/
|
|
144
|
+
export const BACKSLASH = 92;
|
|
145
|
+
/**
|
|
146
|
+
* `]`
|
|
147
|
+
*/
|
|
148
|
+
export const RIGHT_BRACKET = 93;
|
|
149
|
+
/**
|
|
150
|
+
* `^`
|
|
151
|
+
*/
|
|
152
|
+
export const CARET = 94;
|
|
153
|
+
/**
|
|
154
|
+
* `_`
|
|
155
|
+
*/
|
|
156
|
+
export const UNDERSCORE = 95;
|
|
157
|
+
/**
|
|
158
|
+
* ```
|
|
159
|
+
*/
|
|
160
|
+
export const GRAVE_ACCENT = 96;
|
|
161
|
+
/**
|
|
162
|
+
* `a`
|
|
163
|
+
*/
|
|
164
|
+
export const a = 97;
|
|
165
|
+
/**
|
|
166
|
+
* `z`
|
|
167
|
+
*/
|
|
168
|
+
export const z = 122;
|
|
169
|
+
/**
|
|
170
|
+
* `{`
|
|
171
|
+
*/
|
|
172
|
+
export const LEFT_BRACE = 123;
|
|
173
|
+
/**
|
|
174
|
+
* `|`
|
|
175
|
+
*/
|
|
176
|
+
export const VERTICAL_BAR = 124;
|
|
177
|
+
/**
|
|
178
|
+
* `}`
|
|
179
|
+
*/
|
|
180
|
+
export const RIGHT_BRACE = 125;
|
|
181
|
+
/**
|
|
182
|
+
* `~`
|
|
183
|
+
*/
|
|
184
|
+
export const TILDE = 126;
|
|
185
|
+
/**
|
|
186
|
+
* `NBSP`
|
|
187
|
+
*/
|
|
188
|
+
export const NBSP = 160;
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* List of all standard, valid HTML tags in Xendar Components's Template
|
|
3
|
+
*
|
|
4
|
+
* These are native HTML elements and should not be used as custom element names.
|
|
5
|
+
*/
|
|
6
|
+
export const HTML_TAGS = [
|
|
7
|
+
'section',
|
|
8
|
+
'article',
|
|
9
|
+
'aside',
|
|
10
|
+
'nav',
|
|
11
|
+
'header',
|
|
12
|
+
'footer',
|
|
13
|
+
'address',
|
|
14
|
+
'h1',
|
|
15
|
+
'h2',
|
|
16
|
+
'h3',
|
|
17
|
+
'h4',
|
|
18
|
+
'h5',
|
|
19
|
+
'h6',
|
|
20
|
+
'p',
|
|
21
|
+
'hr',
|
|
22
|
+
'pre',
|
|
23
|
+
'blockquote',
|
|
24
|
+
'ol',
|
|
25
|
+
'ul',
|
|
26
|
+
'li',
|
|
27
|
+
'dl',
|
|
28
|
+
'dt',
|
|
29
|
+
'dd',
|
|
30
|
+
'figure',
|
|
31
|
+
'figcaption',
|
|
32
|
+
'div',
|
|
33
|
+
'a',
|
|
34
|
+
'span',
|
|
35
|
+
'br',
|
|
36
|
+
'wbr',
|
|
37
|
+
'b',
|
|
38
|
+
'strong',
|
|
39
|
+
'i',
|
|
40
|
+
'em',
|
|
41
|
+
'u',
|
|
42
|
+
's',
|
|
43
|
+
'mark',
|
|
44
|
+
'small',
|
|
45
|
+
'sub',
|
|
46
|
+
'sup',
|
|
47
|
+
'abbr',
|
|
48
|
+
'cite',
|
|
49
|
+
'code',
|
|
50
|
+
'var',
|
|
51
|
+
'kbd',
|
|
52
|
+
'samp',
|
|
53
|
+
'time',
|
|
54
|
+
'data',
|
|
55
|
+
'dfn',
|
|
56
|
+
'q',
|
|
57
|
+
'ruby',
|
|
58
|
+
'rt',
|
|
59
|
+
'rp',
|
|
60
|
+
'bdi',
|
|
61
|
+
'bdo',
|
|
62
|
+
'ins',
|
|
63
|
+
'del',
|
|
64
|
+
'img',
|
|
65
|
+
'iframe',
|
|
66
|
+
'embed',
|
|
67
|
+
'object',
|
|
68
|
+
'param',
|
|
69
|
+
'picture',
|
|
70
|
+
'source',
|
|
71
|
+
'track',
|
|
72
|
+
'video',
|
|
73
|
+
'audio',
|
|
74
|
+
'map',
|
|
75
|
+
'area',
|
|
76
|
+
'table',
|
|
77
|
+
'caption',
|
|
78
|
+
'colgroup',
|
|
79
|
+
'col',
|
|
80
|
+
'tbody',
|
|
81
|
+
'thead',
|
|
82
|
+
'tfoot',
|
|
83
|
+
'tr',
|
|
84
|
+
'td',
|
|
85
|
+
'th',
|
|
86
|
+
'form',
|
|
87
|
+
'label',
|
|
88
|
+
'input',
|
|
89
|
+
'button',
|
|
90
|
+
'select',
|
|
91
|
+
'datalist',
|
|
92
|
+
'optgroup',
|
|
93
|
+
'option',
|
|
94
|
+
'textarea',
|
|
95
|
+
'output',
|
|
96
|
+
'progress',
|
|
97
|
+
'meter',
|
|
98
|
+
'fieldset',
|
|
99
|
+
'legend',
|
|
100
|
+
'details',
|
|
101
|
+
'summary',
|
|
102
|
+
'dialog',
|
|
103
|
+
'template',
|
|
104
|
+
'slot',
|
|
105
|
+
'canvas'
|
|
106
|
+
];
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* List of strings not allowed as names for CustomElements
|
|
3
|
+
*/
|
|
4
|
+
export const NOT_ALLOWED_TAGS = [
|
|
5
|
+
'annotation-xml',
|
|
6
|
+
'color-profile',
|
|
7
|
+
'font-face',
|
|
8
|
+
'font-face-src',
|
|
9
|
+
'font-face-uri',
|
|
10
|
+
'font-face-format',
|
|
11
|
+
'font-face-name',
|
|
12
|
+
'missing-glyph'
|
|
13
|
+
]
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* List of characters that are not allowed in a custom element tag name.
|
|
3
|
+
*
|
|
4
|
+
* These characters are forbidden according to the Custom Elements specification
|
|
5
|
+
* and will make a tag name invalid if included.
|
|
6
|
+
*/
|
|
7
|
+
export const NOT_ALLOWED_CHARS_FOR_TAGS = [
|
|
8
|
+
'@',
|
|
9
|
+
'#',
|
|
10
|
+
'$',
|
|
11
|
+
'%',
|
|
12
|
+
'&',
|
|
13
|
+
'*',
|
|
14
|
+
'!',
|
|
15
|
+
'?',
|
|
16
|
+
'/',
|
|
17
|
+
'\\',
|
|
18
|
+
'|',
|
|
19
|
+
"'",
|
|
20
|
+
'"',
|
|
21
|
+
'<',
|
|
22
|
+
'>',
|
|
23
|
+
'='
|
|
24
|
+
];
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import { Dictionary, Stack } from '@xaendar/common';
|
|
2
|
+
import { EOF } from '../costants/chars.constants';
|
|
3
|
+
import { LexerCursor } from './models/lexer-cursor.model';
|
|
4
|
+
import { LexerState } from './models/lexer-state.enum';
|
|
5
|
+
import { Token } from './models/token.type';
|
|
6
|
+
import { LexerTransitionFunction } from './models/transition-function/transition-function.type';
|
|
7
|
+
import { consumeAttribute } from './states/attribute.state';
|
|
8
|
+
import { consumeEvent } from './states/event.state';
|
|
9
|
+
import { consumeInterpolationExpression } from './states/interpolation-expression.state';
|
|
10
|
+
import { consumeInterpolationliteral } from './states/interpolation-literal.state';
|
|
11
|
+
import { consumeInterpolation } from './states/interpolation.state';
|
|
12
|
+
import { consumeTagBody } from './states/tag-body.state';
|
|
13
|
+
import { consumeTagClose } from './states/tag-close.state';
|
|
14
|
+
import { consumeTagOpenEnd } from './states/tag-open-end.state';
|
|
15
|
+
import { consumeTagOpenName } from './states/tag-open-name.state';
|
|
16
|
+
import { consumeText } from './states/text.state';
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Utility class that emulates a cursor navigating through a template string.
|
|
20
|
+
*
|
|
21
|
+
* The cursor keeps track of the current character, its absolute position
|
|
22
|
+
* within the text, and its logical position expressed as row and column.
|
|
23
|
+
* This is useful when parsing or analyzing template content character by character.
|
|
24
|
+
*/
|
|
25
|
+
export class Lexer {
|
|
26
|
+
|
|
27
|
+
private readonly _cursor;
|
|
28
|
+
|
|
29
|
+
private _state = LexerState.START;
|
|
30
|
+
|
|
31
|
+
private _stack = new Stack<LexerState>;
|
|
32
|
+
|
|
33
|
+
private readonly _tokens = new Array<Token>;
|
|
34
|
+
|
|
35
|
+
private readonly _states: Dictionary<LexerState, LexerTransitionFunction> = {
|
|
36
|
+
[LexerState.START]: consumeText,
|
|
37
|
+
[LexerState.TEXT]: consumeText,
|
|
38
|
+
[LexerState.TAG_OPEN_NAME]: consumeTagOpenName,
|
|
39
|
+
[LexerState.TAG_BODY]: consumeTagBody,
|
|
40
|
+
[LexerState.TAG_OPEN_END]: consumeTagOpenEnd,
|
|
41
|
+
[LexerState.TAG_CLOSE]: consumeTagClose,
|
|
42
|
+
[LexerState.ATTRIBUTE]: consumeAttribute,
|
|
43
|
+
[LexerState.EVENT]: consumeEvent,
|
|
44
|
+
[LexerState.INTERPOLATION]: consumeInterpolation,
|
|
45
|
+
[LexerState.INTERPOLATION_EXPRESSION]: consumeInterpolationExpression,
|
|
46
|
+
[LexerState.INTERPOLATION_LITERAL]: consumeInterpolationliteral
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Creates a new Cursor instance for the given template content.
|
|
51
|
+
*
|
|
52
|
+
* @param input The full template text that the cursor will navigate.
|
|
53
|
+
*/
|
|
54
|
+
constructor(public input: string) {
|
|
55
|
+
this._cursor = new LexerCursor(this.input);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
public tokenize(): Token[] {
|
|
59
|
+
let eof = false;
|
|
60
|
+
|
|
61
|
+
while (!eof) {
|
|
62
|
+
try {
|
|
63
|
+
const transitionFunction = this._states[this._state];
|
|
64
|
+
const { state, tokens, popState, pushState } = transitionFunction!(this._cursor, { history: this._stack.values });
|
|
65
|
+
|
|
66
|
+
if (tokens?.length) {
|
|
67
|
+
this._tokens.push(...tokens);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
if (pushState) {
|
|
71
|
+
this._stack.push(this._state);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
if (popState) {
|
|
75
|
+
this._stack.pop();
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
this._state = state;
|
|
79
|
+
} catch (err) {
|
|
80
|
+
const error = err as Error;
|
|
81
|
+
if (error.cause === EOF) {
|
|
82
|
+
eof = true;
|
|
83
|
+
} else {
|
|
84
|
+
throw err;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
return this._tokens;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Represents a character within a string during parsing.
|
|
3
|
+
*/
|
|
4
|
+
export type CurrentChar = {
|
|
5
|
+
/**
|
|
6
|
+
* Index of the character in the original string.
|
|
7
|
+
*/
|
|
8
|
+
index: number
|
|
9
|
+
/**
|
|
10
|
+
* UTF-16 code unit of the character (from String.charCodeAt).
|
|
11
|
+
*/
|
|
12
|
+
code: number
|
|
13
|
+
/**
|
|
14
|
+
* String value of the character.
|
|
15
|
+
*/
|
|
16
|
+
value: string
|
|
17
|
+
}
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
import { PositiveInteger, TupleOfLength } from '@xaendar/common';
|
|
2
|
+
import { CR, EOF, LF, SPACE } from '../../costants/chars.constants';
|
|
3
|
+
import { CurrentChar } from './current-char.type';
|
|
4
|
+
import { CursorPosition } from './current-position.type';
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Cursor abstraction used by the Lexer to navigate the input source.
|
|
8
|
+
*
|
|
9
|
+
* The LexerCursor is responsible for:
|
|
10
|
+
* - Sequential character consumption
|
|
11
|
+
* - Lookahead (peek) operations without state mutation
|
|
12
|
+
* - Tracking logical position (row, column)
|
|
13
|
+
* - Handling end-of-file conditions
|
|
14
|
+
*
|
|
15
|
+
* This class deliberately contains **no lexer logic**:
|
|
16
|
+
* it does not know about tokens, states, or grammar rules.
|
|
17
|
+
* Its sole responsibility is controlled navigation of the input stream.
|
|
18
|
+
*/
|
|
19
|
+
export class LexerCursor {
|
|
20
|
+
/**
|
|
21
|
+
* Representation of the current character.
|
|
22
|
+
*
|
|
23
|
+
* - `index`: absolute index within the input string
|
|
24
|
+
* - `code`: Unicode code point of the character
|
|
25
|
+
* - `value`: actual character value
|
|
26
|
+
*
|
|
27
|
+
* An index of `-1` indicates that the cursor has not yet consumed
|
|
28
|
+
* any character or has reached EOF.
|
|
29
|
+
*/
|
|
30
|
+
private readonly _currentChar: CurrentChar = {
|
|
31
|
+
code: 0,
|
|
32
|
+
index: -1,
|
|
33
|
+
value: ''
|
|
34
|
+
};
|
|
35
|
+
/**
|
|
36
|
+
* Returns a read-only snapshot of the current character.
|
|
37
|
+
*/
|
|
38
|
+
public get currentChar(): Readonly<CurrentChar> {
|
|
39
|
+
return this._currentChar;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Cache used by peek operations to avoid re-reading
|
|
43
|
+
* the same character positions multiple times.
|
|
44
|
+
*
|
|
45
|
+
* Key: absolute character index
|
|
46
|
+
* Value: Unicode code point
|
|
47
|
+
*/
|
|
48
|
+
private readonly _peekCache = new Map<number, number>();
|
|
49
|
+
/**
|
|
50
|
+
* Logical position of the cursor in the input.
|
|
51
|
+
*
|
|
52
|
+
* - `row`: zero-based line number
|
|
53
|
+
* - `column`: zero-based column number
|
|
54
|
+
*/
|
|
55
|
+
private readonly _position: CursorPosition = {
|
|
56
|
+
row: 0,
|
|
57
|
+
column: 0
|
|
58
|
+
};
|
|
59
|
+
/**
|
|
60
|
+
* Returns a read-only snapshot of the current cursor position.
|
|
61
|
+
*/
|
|
62
|
+
public get position(): Readonly<CursorPosition> {
|
|
63
|
+
return this._position;
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Creates a new cursor for the given input source.
|
|
67
|
+
*
|
|
68
|
+
* @param input Full source string to be tokenized.
|
|
69
|
+
*/
|
|
70
|
+
constructor(public input: string) { }
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Advances the cursor by the specified number of characters.
|
|
74
|
+
*
|
|
75
|
+
* This method:
|
|
76
|
+
* - Updates the current character
|
|
77
|
+
* - Updates row/column position
|
|
78
|
+
* - Detects line breaks (LF / CR)
|
|
79
|
+
* - Throws an EOF error when the end of the input is reached
|
|
80
|
+
*
|
|
81
|
+
* @param chars Number of characters to consume (must be >= 1)
|
|
82
|
+
*
|
|
83
|
+
* @throws Error with cause `EOF` when advancing past input length
|
|
84
|
+
*/
|
|
85
|
+
public advance(chars = 1): void {
|
|
86
|
+
if (chars < 1) {
|
|
87
|
+
throw new Error(`${chars} is not a valid value. Please enter a number equal or greater than 1`);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
const newIndex = this._currentChar.index + chars;
|
|
91
|
+
|
|
92
|
+
if (newIndex >= this.input.length) {
|
|
93
|
+
this._currentChar.code = EOF;
|
|
94
|
+
this._currentChar.index = -1;
|
|
95
|
+
this._currentChar.value = '';
|
|
96
|
+
this.throwEOFError();
|
|
97
|
+
} else {
|
|
98
|
+
/*
|
|
99
|
+
Before updating the character, adjust logical position.
|
|
100
|
+
Line breaks reset column and increment row.
|
|
101
|
+
*/
|
|
102
|
+
if ([LF, CR].includes(this._currentChar.code)) {
|
|
103
|
+
this._position.row++;
|
|
104
|
+
this._position.column = 0;
|
|
105
|
+
} else {
|
|
106
|
+
this._position.column++;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
this._currentChar.index = newIndex;
|
|
110
|
+
this._currentChar.value = this.input[newIndex]!;
|
|
111
|
+
this._currentChar.code = this.input.charCodeAt(newIndex);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Peeks ahead in the input stream without advancing the cursor.
|
|
117
|
+
*
|
|
118
|
+
* This method supports:
|
|
119
|
+
* - Single-character lookahead
|
|
120
|
+
* - Multi-character lookahead
|
|
121
|
+
* - Optional offset from the current position
|
|
122
|
+
*
|
|
123
|
+
* Peek operations are cached for performance reasons and do not
|
|
124
|
+
* modify the cursor state.
|
|
125
|
+
*
|
|
126
|
+
* @returns
|
|
127
|
+
* - A single Unicode code point when peeking one character
|
|
128
|
+
* - An array of Unicode code points when peeking multiple characters
|
|
129
|
+
*
|
|
130
|
+
* @throws Error with cause `EOF` if the peek exceeds input length
|
|
131
|
+
*/
|
|
132
|
+
public peek(): number;
|
|
133
|
+
public peek<OffSet extends number>(options?: { offset?: PositiveInteger<OffSet> }): number;
|
|
134
|
+
public peek(chars: 1): number;
|
|
135
|
+
public peek<OffSet extends number>(chars: 1, options?: { offset?: PositiveInteger<OffSet> }): number;
|
|
136
|
+
public peek<ReadChars extends number>(chars: PositiveInteger<ReadChars>): TupleOfLength<ReadChars>;
|
|
137
|
+
public peek<ReadChars extends number, OffSet extends number>(chars: PositiveInteger<ReadChars>, options?: { offset?: PositiveInteger<OffSet> }): TupleOfLength<ReadChars>;
|
|
138
|
+
public peek(charsOrOptions?: number | { offset?: number }, options?: { offset?: number }): number | number[] {
|
|
139
|
+
const cache = this._peekCache;
|
|
140
|
+
const chars = typeof charsOrOptions === 'number' ? charsOrOptions : 1;
|
|
141
|
+
const offset = (typeof charsOrOptions === 'object' ? charsOrOptions : options)?.offset ?? 0;
|
|
142
|
+
return chars === 1 ? this.peekOneChar(this._currentChar.index + offset + 1, cache) : this.peekMany(chars + offset, cache);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Skips all consecutive space characters from the current position.
|
|
147
|
+
*/
|
|
148
|
+
public skipSpaces(): void {
|
|
149
|
+
while (this.peek() === SPACE) {
|
|
150
|
+
this.advance();
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Peeks multiple characters ahead.
|
|
156
|
+
*/
|
|
157
|
+
private peekMany(chars: number, cache: Map<number, number>): number[] {
|
|
158
|
+
const peekedChars: number[] = [];
|
|
159
|
+
const nextCharIndex = this._currentChar.index + 1;
|
|
160
|
+
|
|
161
|
+
for (let i = nextCharIndex; i < nextCharIndex + chars; i++) {
|
|
162
|
+
peekedChars.push(this.peekOneChar(i, cache));
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
return peekedChars;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
/**
|
|
169
|
+
* Peeks a single character at the given absolute index.
|
|
170
|
+
*/
|
|
171
|
+
private peekOneChar(index: number, cache: Map<number, number>): number {
|
|
172
|
+
if (cache.has(index)) {
|
|
173
|
+
return cache.get(index)!;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
if (index >= this.input.length) {
|
|
177
|
+
this.throwEOFError();
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
const charCode = this.input.charCodeAt(index);
|
|
181
|
+
cache.set(index, charCode);
|
|
182
|
+
return charCode;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
/**
|
|
186
|
+
* Throws a standardized EOF error used by the lexer engine
|
|
187
|
+
* to terminate tokenization.
|
|
188
|
+
*/
|
|
189
|
+
private throwEOFError(): never {
|
|
190
|
+
throw new Error('', { cause: EOF });
|
|
191
|
+
}
|
|
192
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
export enum LexerState {
|
|
2
|
+
START = 'start',
|
|
3
|
+
TEXT = 'text',
|
|
4
|
+
TAG_OPEN_NAME = 'tag-open-name',
|
|
5
|
+
TAG_BODY = 'tag-body',
|
|
6
|
+
TAG_OPEN_END = 'tag-open-end',
|
|
7
|
+
TAG_CLOSE = 'tag-close',
|
|
8
|
+
ATTRIBUTE = 'attribute',
|
|
9
|
+
EVENT = 'event',
|
|
10
|
+
INTERPOLATION = 'interpolation',
|
|
11
|
+
INTERPOLATION_EXPRESSION = 'interpolation-expression',
|
|
12
|
+
INTERPOLATION_LITERAL = 'interpolation-literal'
|
|
13
|
+
}
|