drift-parser 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/ast.json +72 -0
  2. package/dist/index.d.ts +2 -0
  3. package/dist/index.js +8 -0
  4. package/dist/src/ast/ast.d.ts +0 -0
  5. package/dist/src/ast/exports.d.ts +1 -0
  6. package/dist/src/ast/expr.d.ts +0 -0
  7. package/dist/src/ast/stmt.d.ts +0 -0
  8. package/dist/src/ast/type.d.ts +31 -0
  9. package/dist/src/lexer/exports.d.ts +2 -0
  10. package/dist/src/lexer/tokenizer.d.ts +36 -0
  11. package/dist/src/lexer/tokens.d.ts +174 -0
  12. package/dist/src/parser/exports.d.ts +1 -0
  13. package/dist/src/parser/expr.d.ts +5 -0
  14. package/dist/src/parser/lookup.d.ts +28 -0
  15. package/dist/src/parser/parser.d.ts +23 -0
  16. package/dist/src/parser/stmt.d.ts +3 -0
  17. package/dist/src/parser/type.d.ts +0 -0
  18. package/dist/src/utils/combineLocation.d.ts +2 -0
  19. package/dist/src/utils/genexpr.d.ts +16 -0
  20. package/dist/src/utils/mapAll.d.ts +0 -0
  21. package/dist/src/utils/registerParse.d.ts +7 -0
  22. package/index.d.ts +1 -0
  23. package/index.ts +2 -0
  24. package/package.json +22 -0
  25. package/scripts/build.js +50 -0
  26. package/src/ast/ast.ts +0 -0
  27. package/src/ast/exports.ts +3 -0
  28. package/src/ast/expr.ts +122 -0
  29. package/src/ast/stmt.ts +126 -0
  30. package/src/ast/type.ts +46 -0
  31. package/src/lexer/exports.ts +2 -0
  32. package/src/lexer/tokenizer.ts +395 -0
  33. package/src/lexer/tokens.ts +241 -0
  34. package/src/parser/exports.ts +1 -0
  35. package/src/parser/expr.ts +82 -0
  36. package/src/parser/lookup.ts +69 -0
  37. package/src/parser/parser.ts +166 -0
  38. package/src/parser/stmt.ts +151 -0
  39. package/src/parser/type.ts +89 -0
  40. package/src/utils/combineLocation.ts +7 -0
  41. package/src/utils/mapAll.ts +43 -0
  42. package/src/utils/registerParse.ts +117 -0
  43. package/tests/astTest.js +44 -0
  44. package/tests/printTest.mjs +7 -0
  45. package/tests/tokenize.js +92 -0
  46. package/tests/typenames.js +15 -0
  47. package/tsconfig.json +15 -0
@@ -0,0 +1,122 @@
1
+ // import { ASTNode } from "./ast";
2
+ // import { Token } from "../lexer/tokens";
3
+ // import { Parser } from "../parser/parser";
4
+ // import { parseExpr } from "../parser/expr";
5
+ // import { BP } from "../parser/lookup";
6
+ // import { makeExpressionClass } from "../utils/genexpr";
7
+
8
+ // export abstract class Expression extends ASTNode {
9
+ // toJSON() {
10
+ // return {
11
+ // type: this.constructor.name, // captures class name dynamically
12
+ // loc: this.loc,
13
+ // range: this.range,
14
+ // };
15
+ // }
16
+ // }
17
+
18
+ // export class SymbolExpression extends Expression {
19
+ // /** The name of the identifier. */
20
+ // public value: string;
21
+
22
+ // constructor(token: Token) {
23
+ // super(token.loc, token.range);
24
+ // this.value = token.value;
25
+ // }
26
+
27
+ // toJSON() {
28
+ // return {
29
+ // ...super.toJSON(),
30
+ // value: this.value,
31
+ // };
32
+ // }
33
+ // }
34
+
35
+ // export class StringExpression extends Expression {
36
+ // /** The value of the string with escaped quotes. */
37
+ // public value: string;
38
+
39
+ // constructor(token: Token) {
40
+ // super(token.loc, token.range);
41
+ // this.value = token.value;
42
+ // }
43
+
44
+ // toJSON() {
45
+ // return {
46
+ // ...super.toJSON(),
47
+ // value: this.value,
48
+ // };
49
+ // }
50
+ // }
51
+
52
+ // export class NumberExpression extends Expression {
53
+ // /** The value of the number as a string to not deal with bit issues. */
54
+ // public value: string;
55
+
56
+ // constructor(token: Token) {
57
+ // super(token.loc, token.range);
58
+ // this.value = token.value;
59
+ // }
60
+
61
+ // toJSON() {
62
+ // return {
63
+ // ...super.toJSON(),
64
+ // value: this.value,
65
+ // };
66
+ // }
67
+ // }
68
+
69
+ // // export class PrefixExpression extends Expression {
70
+ // // public prefix: string;
71
+ // // public expression: Expression;
72
+
73
+ // // constructor(prefixToken: Token, rightExpression: Expression) {
74
+ // // super(prefixToken.loc, prefixToken.range);
75
+ // // this.prefix = prefixToken.value;
76
+ // // this.expression = rightExpression;
77
+ // // }
78
+
79
+ // // toJSON() {
80
+ // // return {
81
+ // // ...super.toJSON(),
82
+ // // prefix: this.prefix,
83
+ // // expression: this.expression.toJSON()
84
+ // // }
85
+ // // }
86
+ // // }
87
+
88
+ // export const PrefixExpression = makeExpressionClass({
89
+ // className: "PrefixExpression",
90
+ // fields: ["prefix", "expression"],
91
+ // constructor: (prefixToken: Token, rightExpression: Expression) => ({
92
+ // loc: prefixToken.loc,
93
+ // range: prefixToken.range,
94
+ // prefix: prefixToken.value,
95
+ // expression: rightExpression,
96
+ // }),
97
+ // toJSON: ({ prefix, expression }) => ({
98
+ // prefix,
99
+ // expression: expression.toJSON(),
100
+ // }),
101
+ // });
102
+
103
+
104
+
105
+ // export const BinaryExpression = makeExpressionClass({
106
+ // className: "PrefixExpression",
107
+ // fields: ["left", "op", "right"],
108
+ // constructor: (prefixToken: Token, rightExpression: Expression) => ({
109
+ // loc: prefixToken.loc,
110
+ // range: prefixToken.range,
111
+ // prefix: prefixToken.value,
112
+ // expression: rightExpression,
113
+ // }),
114
+ // toJSON: ({ prefix, expression }) => ({
115
+ // prefix,
116
+ // expression: expression.toJSON(),
117
+ // }),
118
+ // });
119
+
120
+
121
+
122
+
@@ -0,0 +1,126 @@
1
+ // // ./ast/stmt.ts
2
+ // import { LocationInterface, Token, TokenType } from "../lexer/tokens";
3
+ // import { ASTNode } from "./ast";
4
+ // import { Expression } from "./expr";
5
+ // import { Type } from "./type";
6
+
7
+ // export abstract class Statement extends ASTNode {
8
+ // toJSON() {
9
+ // return {
10
+ // type: this.constructor.name,
11
+ // loc: this.loc,
12
+ // range: this.range,
13
+ // };
14
+ // }
15
+ // }
16
+
17
+ // export class BlockStatement extends Statement {
18
+ // public body: Statement[];
19
+
20
+ // constructor(body: Statement[], loc: LocationInterface, range: [number, number]) {
21
+ // super(loc, range);
22
+ // this.body = body;
23
+ // }
24
+
25
+ // toJSON() {
26
+ // return {
27
+ // ...super.toJSON(),
28
+ // body: this.body.map((s) => s.toJSON()),
29
+ // };
30
+ // }
31
+ // }
32
+
33
+ // export class ExpressionStatement extends Statement {
34
+ // public expression: Expression;
35
+
36
+ // constructor(expression: Expression, loc: LocationInterface, range: [number, number]) {
37
+ // super(loc, range);
38
+ // this.expression = expression;
39
+ // }
40
+
41
+ // toJSON() {
42
+ // return {
43
+ // ...super.toJSON(),
44
+ // expression: this.expression.toJSON(),
45
+ // };
46
+ // }
47
+ // }
48
+
49
+
50
+ // export class TerminatorStatement extends Statement {
51
+ // public kind: "semicolon" | "newline" | "eof";
52
+
53
+ // constructor(token: Token) {
54
+ // super(token.loc, token.range);
55
+
56
+ // if (token.type === TokenType.EOF) {
57
+ // this.kind = "eof";
58
+ // } else {
59
+ // this.kind = token.value === ";" ? "semicolon" : "newline";
60
+ // }
61
+ // }
62
+
63
+ // toJSON() {
64
+ // return {
65
+ // ...super.toJSON(),
66
+ // kind: this.kind,
67
+ // };
68
+ // }
69
+ // }
70
+
71
+ // export class UsingStatement extends Statement {
72
+ // public value: string;
73
+ // public as?: Expression;
74
+
75
+ // constructor(
76
+ // valueToken: Token,
77
+ // asExpression: Expression | undefined,
78
+ // _terminator: Token, // still required to build loc/range externally
79
+ // loc: LocationInterface,
80
+ // range: [number, number]
81
+ // ) {
82
+ // super(loc, range);
83
+ // this.value = valueToken.value;
84
+ // this.as = asExpression;
85
+ // }
86
+
87
+ // toJSON() {
88
+ // return {
89
+ // ...super.toJSON(),
90
+ // value: this.value,
91
+ // as: this.as ? this.as.toJSON() : null,
92
+ // };
93
+ // }
94
+ // }
95
+
96
+ // export class VariableDeclarationStatement extends Statement {
97
+ // public identifier: string;
98
+ // public varType?: Type;
99
+ // public initializer?: Expression;
100
+ // public isConstant: boolean;
101
+
102
+ // constructor(
103
+ // nameToken: Token,
104
+ // varType: Type | undefined,
105
+ // initializer: Expression | undefined,
106
+ // _terminator: Token,
107
+ // isConstant: boolean,
108
+ // loc: LocationInterface,
109
+ // range: [number, number]
110
+ // ) {
111
+ // super(loc, range);
112
+ // this.identifier = nameToken.value;
113
+ // this.varType = varType;
114
+ // this.initializer = initializer;
115
+ // this.isConstant = isConstant;
116
+ // }
117
+
118
+ // toJSON() {
119
+ // return {
120
+ // ...super.toJSON(),
121
+ // identifier: this.identifier,
122
+ // varType: this.varType ? this.varType.toJSON() : null,
123
+ // initializer: this.initializer ? this.initializer.toJSON() : null,
124
+ // };
125
+ // }
126
+ // }
@@ -0,0 +1,46 @@
1
+ import { LocationInterface } from "../lexer/tokens"
2
+
3
+ export enum NodeKind {
4
+ Statement,
5
+ Expression,
6
+ }
7
+
8
+ export interface Node {
9
+ kind: NodeKind;
10
+ type: string;
11
+ loc: LocationInterface;
12
+ range: [number, number];
13
+ body: Node[];
14
+ [key: string]: any;
15
+ }
16
+
17
+ export interface Stmt extends Node {
18
+ kind: NodeKind.Statement;
19
+ }
20
+
21
+ export interface Expr extends Node {
22
+ kind: NodeKind.Expression;
23
+ }
24
+
25
+ type NodeFieldsRequired = {
26
+ type: string;
27
+ loc: LocationInterface;
28
+ range: [number, number];
29
+ } & Record<string, any>;
30
+
31
+ // Expr factory
32
+ export function mkexpr(fields: NodeFieldsRequired & { body?: Node[] }): Expr {
33
+ return {
34
+ kind: NodeKind.Expression,
35
+ body: fields.body ?? [],
36
+ ...fields,
37
+ };
38
+ }
39
+
40
+ export function mkstmt(fields: NodeFieldsRequired & { body?: Node[] }): Stmt {
41
+ return {
42
+ kind: NodeKind.Statement,
43
+ body: fields.body ?? [],
44
+ ...fields,
45
+ };
46
+ }
@@ -0,0 +1,2 @@
1
+ export * from './tokenizer';
2
+ export * from './tokens';
@@ -0,0 +1,395 @@
1
+ import { LocationInterface, Token, TokenType, Position, ReservedTokens } from "./tokens";
2
+
3
+ type RegexHandler = (lex: Lexer, regex: RegExp) => void;
4
+
5
+ interface RegexPattern {
6
+ regex: RegExp;
7
+ handler: RegexHandler;
8
+ }
9
+
10
+ interface LexerInterface {
11
+ patterns: RegexPattern[];
12
+ Tokens: Token[];
13
+ source: string;
14
+ pos: number;
15
+ line: number;
16
+ col: number;
17
+ remainingSource(): string;
18
+ end_of_file(): boolean;
19
+ push(token: Token): void;
20
+ }
21
+
22
+ class Lexer implements LexerInterface {
23
+ patterns: RegexPattern[];
24
+ Tokens: Token[];
25
+ source: string;
26
+ pos: number;
27
+ line: number;
28
+ col: number;
29
+
30
+ constructor(source: string, patterns: RegexPattern[]) {
31
+ this.source = source;
32
+ this.patterns = patterns;
33
+ this.Tokens = [];
34
+ this.pos = 0;
35
+ this.line = 1;
36
+ this.col = 0;
37
+ }
38
+
39
+ remainingSource(): string {
40
+ return this.source.slice(this.pos);
41
+ }
42
+
43
+ end_of_file(): boolean {
44
+ return this.pos >= this.source.length;
45
+ }
46
+
47
+ push(token: Token): void {
48
+ this.Tokens.push(token);
49
+ }
50
+ }
51
+
52
+ function stringHandler(tokenType: TokenType): RegexHandler {
53
+ return (lex: Lexer, regex: RegExp) => {
54
+ const match = lex.remainingSource().match(regex);
55
+ if (match && match.index === 0) {
56
+ const text = match[0];
57
+ const start: Position = { line: lex.line, col: lex.col };
58
+
59
+ let line = lex.line;
60
+ let col = lex.col;
61
+ for (const ch of text) {
62
+ if (ch === '\n') {
63
+ line++;
64
+ col = 0;
65
+ } else {
66
+ col++;
67
+ }
68
+ }
69
+ const end: Position = { line, col };
70
+ const loc: LocationInterface = { start, end };
71
+ const range: [number, number] = [lex.pos, lex.pos + text.length];
72
+
73
+ lex.push(new Token(tokenType, text, loc, range));
74
+
75
+ lex.pos += text.length;
76
+ lex.line = line;
77
+ lex.col = col;
78
+ }
79
+ };
80
+ }
81
+
82
+ function numberHandler(tokenType: TokenType): RegexHandler {
83
+ return (lex: Lexer, regex: RegExp) => {
84
+ const match = lex.remainingSource().match(regex);
85
+ if (match && match.index === 0) {
86
+ const text = match[0];
87
+ const start: Position = { line: lex.line, col: lex.col };
88
+
89
+ let line = lex.line;
90
+ let col = lex.col;
91
+ for (const ch of text) {
92
+ if (ch === '\n') {
93
+ line++;
94
+ col = 0;
95
+ } else {
96
+ col++;
97
+ }
98
+ }
99
+ const end: Position = { line, col };
100
+ const loc: LocationInterface = { start, end };
101
+ const range: [number, number] = [lex.pos, lex.pos + text.length];
102
+
103
+ lex.push(new Token(tokenType, text, loc, range));
104
+
105
+ lex.pos += text.length;
106
+ lex.line = line;
107
+ lex.col = col;
108
+ }
109
+ };
110
+ }
111
+
112
+ function multiCommentHandler(tokenType: TokenType): RegexHandler {
113
+ return (lex: Lexer, regex: RegExp) => {
114
+ const match = lex.remainingSource().match(regex);
115
+ if (match && match.index === 0) {
116
+ const text = match[0];
117
+ const start: Position = { line: lex.line, col: lex.col };
118
+
119
+ let line = lex.line;
120
+ let col = lex.col;
121
+ for (const ch of text) {
122
+ if (ch === '\n') {
123
+ line++;
124
+ col = 0;
125
+ } else {
126
+ col++;
127
+ }
128
+ }
129
+ const end: Position = { line, col };
130
+ const loc: LocationInterface = { start, end };
131
+ const range: [number, number] = [lex.pos, lex.pos + text.length];
132
+
133
+ lex.push(new Token(tokenType, text, loc, range));
134
+
135
+ lex.pos += text.length;
136
+ lex.line = line;
137
+ lex.col = col;
138
+ }
139
+ };
140
+ }
141
+
142
+ function singleCommentHandler(tokenType: TokenType): RegexHandler {
143
+ return (lex: Lexer, regex: RegExp) => {
144
+ const remaining = lex.remainingSource();
145
+ // Match comment line without the newline character itself
146
+ const match = remaining.match(regex);
147
+ if (match && match.index === 0) {
148
+ const text = match[0];
149
+ const start: Position = { line: lex.line, col: lex.col };
150
+
151
+ // Count columns only, no newlines expected in single-line comments
152
+ let col = lex.col + text.length;
153
+ const end: Position = { line: lex.line, col };
154
+ const loc: LocationInterface = { start, end };
155
+ const range: [number, number] = [lex.pos, lex.pos + text.length];
156
+
157
+ lex.push(new Token(tokenType, text, loc, range));
158
+
159
+ lex.pos += text.length;
160
+ lex.col = col;
161
+
162
+ // **Do NOT consume the newline here. Leave it for the main lexer loop**
163
+ }
164
+ };
165
+ }
166
+
167
+
168
+ function whitespaceHandler(tokenType: TokenType): RegexHandler {
169
+ return (lex: Lexer, regex: RegExp) => {
170
+ const match = lex.remainingSource().match(regex);
171
+ if (match && match.index === 0) {
172
+ const text = match[0];
173
+
174
+ const start: Position = { line: lex.line, col: lex.col };
175
+
176
+ let line = lex.line;
177
+ let col = lex.col;
178
+ for (const ch of text) {
179
+ if (ch === '\n') {
180
+ line++;
181
+ col = 0;
182
+ } else {
183
+ col++;
184
+ }
185
+ }
186
+ const end: Position = { line, col };
187
+ const loc: LocationInterface = { start, end };
188
+ const range: [number, number] = [lex.pos, lex.pos + text.length];
189
+
190
+ lex.push(new Token(tokenType, text, loc, range));
191
+
192
+ lex.pos += text.length;
193
+ lex.line = line;
194
+ lex.col = col;
195
+ }
196
+ };
197
+ }
198
+
199
+ function defaultHandler(tokenType: TokenType, tokenValue: string): RegexHandler {
200
+ return (lex: Lexer, regex: RegExp) => {
201
+ const remaining = lex.remainingSource();
202
+ const match = remaining.match(regex);
203
+ if (match && match.index === 0) {
204
+ const text = match[0];
205
+
206
+ const start: Position = { line: lex.line, col: lex.col };
207
+
208
+ let line = lex.line;
209
+ let col = lex.col;
210
+ for (const ch of text) {
211
+ if (ch === '\n') {
212
+ line++;
213
+ col = 0;
214
+ } else {
215
+ col++;
216
+ }
217
+ }
218
+ const end: Position = { line, col };
219
+
220
+ const loc: LocationInterface = { start, end };
221
+ const range: [number, number] = [lex.pos, lex.pos + text.length];
222
+
223
+ lex.push(new Token(tokenType, tokenValue, loc, range));
224
+
225
+ lex.pos += text.length;
226
+ lex.line = line;
227
+ lex.col = col;
228
+ }
229
+ };
230
+ }
231
+
232
+ type ReservedMap = { [key: string]: TokenType };
233
+ function symbolHandler(reservedKeywords: ReservedMap): RegexHandler {
234
+ return (lex: Lexer, regex: RegExp) => {
235
+ const remaining = lex.remainingSource();
236
+ const match = remaining.match(regex);
237
+ if (match && match.index === 0) {
238
+ const text = match[0];
239
+ const start: Position = { line: lex.line, col: lex.col };
240
+
241
+ let line = lex.line;
242
+ let col = lex.col;
243
+ for (const ch of text) {
244
+ if (ch === '\n') {
245
+ line++;
246
+ col = 0;
247
+ } else {
248
+ col++;
249
+ }
250
+ }
251
+ const end: Position = { line, col };
252
+ const loc: LocationInterface = { start, end };
253
+ const range: [number, number] = [lex.pos, lex.pos + text.length];
254
+
255
+ // Use reserved token type if it exists, else default to IDENTIFIER
256
+ const tokenType = reservedKeywords[text] ?? TokenType.IDENTIFIER;
257
+
258
+ lex.push(new Token(tokenType, text, loc, range));
259
+
260
+ lex.pos += text.length;
261
+ lex.line = line;
262
+ lex.col = col;
263
+ }
264
+ };
265
+ }
266
+
267
+
268
+
269
+ function CreateLexer(source: string): Lexer {
270
+ const patterns: RegexPattern[] = [
271
+ // Whitespace and newline — emit, don't skip (for formatter support)
272
+ { regex: /^[ \t]+/, handler: whitespaceHandler(TokenType.WHITESPACE) },
273
+ { regex: /^(?:\r\n|\r|\n)/, handler: defaultHandler(TokenType.NEWLINE, "\n") },
274
+
275
+ // Comments — single-line first, then multi-line
276
+ { regex: /^\/\/[^\r\n]*/, handler: singleCommentHandler(TokenType.SINGLE_COMMENT) },
277
+ { regex: /^\/\*[\s\S]*?\*\//, handler: multiCommentHandler(TokenType.MULTI_COMMENT) },
278
+
279
+ // Strings — assuming stringHandler handles quotes and escapes
280
+ { regex: /^"(?:[^"\\]|\\.)*"/, handler: stringHandler(TokenType.STRING) },
281
+ { regex: /^'(?:[^'\\]|\\.)*'/, handler: stringHandler(TokenType.STRING) },
282
+
283
+ // Numbers — integers and decimals
284
+ { regex: /^\d+\.\d+/, handler: numberHandler(TokenType.NUMBER) }, // decimal numbers
285
+ { regex: /^\d+/, handler: numberHandler(TokenType.NUMBER) }, // integers
286
+
287
+ // Identifiers — variable/function names, keywords, etc.
288
+ { regex: /^[A-Za-z_][A-Za-z0-9_]*!?$/, handler: symbolHandler(ReservedTokens) },
289
+
290
+ // Grouping & brackets
291
+ { regex: /^\[/, handler: defaultHandler(TokenType.LEFT_BRACKET, "[") },
292
+ { regex: /^\]/, handler: defaultHandler(TokenType.RIGHT_BRACKET, "]") },
293
+ { regex: /^\{/, handler: defaultHandler(TokenType.LEFT_BRACE, "{") },
294
+ { regex: /^\}/, handler: defaultHandler(TokenType.RIGHT_BRACE, "}") },
295
+ { regex: /^\(/, handler: defaultHandler(TokenType.LEFT_PAREN, "(") },
296
+ { regex: /^\)/, handler: defaultHandler(TokenType.RIGHT_PAREN, ")") },
297
+
298
+ // Comparison operators — longer first
299
+ { regex: /^==/, handler: defaultHandler(TokenType.EQUALS, "==") },
300
+ { regex: /^!=/, handler: defaultHandler(TokenType.NOT_EQUALS, "!=") },
301
+ { regex: /^<=/, handler: defaultHandler(TokenType.LESS_EQUALS, "<=") },
302
+ { regex: /^</, handler: defaultHandler(TokenType.LESS, "<") },
303
+ { regex: /^>=/, handler: defaultHandler(TokenType.GREATER_EQUALS, ">=") },
304
+ { regex: /^>/, handler: defaultHandler(TokenType.GREATER, ">") },
305
+
306
+ // Assignment and logical operators
307
+ { regex: /^&&/, handler: defaultHandler(TokenType.AND, "&&") },
308
+ { regex: /^\|\|/, handler: defaultHandler(TokenType.OR, "||") },
309
+ { regex: /^=/, handler: defaultHandler(TokenType.ASSINGMENT_EQUALS, "=") },
310
+ { regex: /^!/, handler: defaultHandler(TokenType.NOT, "!") },
311
+
312
+ // Range and dot — precedence important
313
+ { regex: /^\.\./, handler: defaultHandler(TokenType.RANGE_OPERATOR, "..") },
314
+ { regex: /^\./, handler: defaultHandler(TokenType.DOT, ".") },
315
+
316
+ // Punctuation and symbols
317
+ { regex: /^;/, handler: defaultHandler(TokenType.SEMI_COLON, ";") },
318
+ { regex: /^:/, handler: defaultHandler(TokenType.COLON, ":") },
319
+ { regex: /^\?/, handler: defaultHandler(TokenType.QUESTION_OPERATOR, "?") },
320
+ { regex: /^,/, handler: defaultHandler(TokenType.COMMA, ",") },
321
+
322
+ // Increment/decrement & compound assign — longer first
323
+ { regex: /^\+\+/, handler: defaultHandler(TokenType.PLUS_PLUS, "++") },
324
+ { regex: /^--/, handler: defaultHandler(TokenType.MINUS_MINUS, "--") },
325
+ { regex: /^\+=/, handler: defaultHandler(TokenType.PLUS_EQUALS, "+=") },
326
+ { regex: /^-=/, handler: defaultHandler(TokenType.MINUS_EQUALS, "-=") },
327
+
328
+ // Arithmetic operators
329
+ { regex: /^\+/, handler: defaultHandler(TokenType.PLUS, "+") },
330
+ { regex: /^-/, handler: defaultHandler(TokenType.MINUS, "-") },
331
+ { regex: /^\//, handler: defaultHandler(TokenType.DIVIDE, "/") },
332
+ { regex: /^\*/, handler: defaultHandler(TokenType.MUL, "*") },
333
+ { regex: /^%/, handler: defaultHandler(TokenType.MODULO, "%") },
334
+ ];
335
+
336
+ return new Lexer(source, patterns);
337
+ }
338
+
339
+
340
+
341
+ function Tokenize(source: string): Token[] {
342
+ const lex = CreateLexer(source);
343
+
344
+ while (!lex.end_of_file()) {
345
+ let matched = false;
346
+
347
+ for (const pattern of lex.patterns) {
348
+ const match = lex.remainingSource().match(pattern.regex);
349
+
350
+ if (match && match.index === 0) {
351
+ pattern.handler(lex, pattern.regex);
352
+ matched = true;
353
+ break;
354
+ }
355
+ }
356
+
357
+ if (!matched) {
358
+ const context = lex.remainingSource().slice(0, 20);
359
+ throw new Error(
360
+ `Unrecognized token at line ${lex.line}, column ${lex.col}: '${context}'`
361
+ );
362
+ }
363
+ }
364
+
365
+ // Push EOF token at end of source
366
+ const eofLoc: LocationInterface = {
367
+ start: { line: lex.line, col: lex.col },
368
+ end: { line: lex.line, col: lex.col },
369
+ };
370
+
371
+ lex.push(new Token(TokenType.EOF, "EOF", eofLoc, [lex.pos, lex.pos]));
372
+
373
+ return lex.Tokens;
374
+ }
375
+
376
+
377
+ interface PrintOptions {
378
+ shortenWhitespace: boolean;
379
+ }
380
+
381
+ function Print(tokens: Token[], options: PrintOptions | undefined) {
382
+ const shorten = options?.shortenWhitespace ?? false;
383
+ if (shorten) {
384
+ for (let i = 0; i < tokens.length; i++) {
385
+ if (tokens[i].type == TokenType.WHITESPACE) {
386
+ tokens[i].value = ' ';
387
+ }
388
+ }
389
+ }
390
+
391
+ return tokens.slice(0,-1).map(t => t.value).join('');
392
+ }
393
+
394
+
395
+ export { Lexer, CreateLexer, Tokenize, Print };