drift-parser 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ast.json +72 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +8 -0
- package/dist/src/ast/ast.d.ts +0 -0
- package/dist/src/ast/exports.d.ts +1 -0
- package/dist/src/ast/expr.d.ts +0 -0
- package/dist/src/ast/stmt.d.ts +0 -0
- package/dist/src/ast/type.d.ts +31 -0
- package/dist/src/lexer/exports.d.ts +2 -0
- package/dist/src/lexer/tokenizer.d.ts +36 -0
- package/dist/src/lexer/tokens.d.ts +174 -0
- package/dist/src/parser/exports.d.ts +1 -0
- package/dist/src/parser/expr.d.ts +5 -0
- package/dist/src/parser/lookup.d.ts +28 -0
- package/dist/src/parser/parser.d.ts +23 -0
- package/dist/src/parser/stmt.d.ts +3 -0
- package/dist/src/parser/type.d.ts +0 -0
- package/dist/src/utils/combineLocation.d.ts +2 -0
- package/dist/src/utils/genexpr.d.ts +16 -0
- package/dist/src/utils/mapAll.d.ts +0 -0
- package/dist/src/utils/registerParse.d.ts +7 -0
- package/index.d.ts +1 -0
- package/index.ts +2 -0
- package/package.json +22 -0
- package/scripts/build.js +50 -0
- package/src/ast/ast.ts +0 -0
- package/src/ast/exports.ts +3 -0
- package/src/ast/expr.ts +122 -0
- package/src/ast/stmt.ts +126 -0
- package/src/ast/type.ts +46 -0
- package/src/lexer/exports.ts +2 -0
- package/src/lexer/tokenizer.ts +395 -0
- package/src/lexer/tokens.ts +241 -0
- package/src/parser/exports.ts +1 -0
- package/src/parser/expr.ts +82 -0
- package/src/parser/lookup.ts +69 -0
- package/src/parser/parser.ts +166 -0
- package/src/parser/stmt.ts +151 -0
- package/src/parser/type.ts +89 -0
- package/src/utils/combineLocation.ts +7 -0
- package/src/utils/mapAll.ts +43 -0
- package/src/utils/registerParse.ts +117 -0
- package/tests/astTest.js +44 -0
- package/tests/printTest.mjs +7 -0
- package/tests/tokenize.js +92 -0
- package/tests/typenames.js +15 -0
- package/tsconfig.json +15 -0
package/src/ast/expr.ts
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
// import { ASTNode } from "./ast";
|
|
2
|
+
// import { Token } from "../lexer/tokens";
|
|
3
|
+
// import { Parser } from "../parser/parser";
|
|
4
|
+
// import { parseExpr } from "../parser/expr";
|
|
5
|
+
// import { BP } from "../parser/lookup";
|
|
6
|
+
// import { makeExpressionClass } from "../utils/genexpr";
|
|
7
|
+
|
|
8
|
+
// export abstract class Expression extends ASTNode {
|
|
9
|
+
// toJSON() {
|
|
10
|
+
// return {
|
|
11
|
+
// type: this.constructor.name, // captures class name dynamically
|
|
12
|
+
// loc: this.loc,
|
|
13
|
+
// range: this.range,
|
|
14
|
+
// };
|
|
15
|
+
// }
|
|
16
|
+
// }
|
|
17
|
+
|
|
18
|
+
// export class SymbolExpression extends Expression {
|
|
19
|
+
// /** The name of the identifier. */
|
|
20
|
+
// public value: string;
|
|
21
|
+
|
|
22
|
+
// constructor(token: Token) {
|
|
23
|
+
// super(token.loc, token.range);
|
|
24
|
+
// this.value = token.value;
|
|
25
|
+
// }
|
|
26
|
+
|
|
27
|
+
// toJSON() {
|
|
28
|
+
// return {
|
|
29
|
+
// ...super.toJSON(),
|
|
30
|
+
// value: this.value,
|
|
31
|
+
// };
|
|
32
|
+
// }
|
|
33
|
+
// }
|
|
34
|
+
|
|
35
|
+
// export class StringExpression extends Expression {
|
|
36
|
+
// /** The value of the string with escaped quotes. */
|
|
37
|
+
// public value: string;
|
|
38
|
+
|
|
39
|
+
// constructor(token: Token) {
|
|
40
|
+
// super(token.loc, token.range);
|
|
41
|
+
// this.value = token.value;
|
|
42
|
+
// }
|
|
43
|
+
|
|
44
|
+
// toJSON() {
|
|
45
|
+
// return {
|
|
46
|
+
// ...super.toJSON(),
|
|
47
|
+
// value: this.value,
|
|
48
|
+
// };
|
|
49
|
+
// }
|
|
50
|
+
// }
|
|
51
|
+
|
|
52
|
+
// export class NumberExpression extends Expression {
|
|
53
|
+
// /** The value of the number as a string to not deal with bit issues. */
|
|
54
|
+
// public value: string;
|
|
55
|
+
|
|
56
|
+
// constructor(token: Token) {
|
|
57
|
+
// super(token.loc, token.range);
|
|
58
|
+
// this.value = token.value;
|
|
59
|
+
// }
|
|
60
|
+
|
|
61
|
+
// toJSON() {
|
|
62
|
+
// return {
|
|
63
|
+
// ...super.toJSON(),
|
|
64
|
+
// value: this.value,
|
|
65
|
+
// };
|
|
66
|
+
// }
|
|
67
|
+
// }
|
|
68
|
+
|
|
69
|
+
// // export class PrefixExpression extends Expression {
|
|
70
|
+
// // public prefix: string;
|
|
71
|
+
// // public expression: Expression;
|
|
72
|
+
|
|
73
|
+
// // constructor(prefixToken: Token, rightExpression: Expression) {
|
|
74
|
+
// // super(prefixToken.loc, prefixToken.range);
|
|
75
|
+
// // this.prefix = prefixToken.value;
|
|
76
|
+
// // this.expression = rightExpression;
|
|
77
|
+
// // }
|
|
78
|
+
|
|
79
|
+
// // toJSON() {
|
|
80
|
+
// // return {
|
|
81
|
+
// // ...super.toJSON(),
|
|
82
|
+
// // prefix: this.prefix,
|
|
83
|
+
// // expression: this.expression.toJSON()
|
|
84
|
+
// // }
|
|
85
|
+
// // }
|
|
86
|
+
// // }
|
|
87
|
+
|
|
88
|
+
// export const PrefixExpression = makeExpressionClass({
|
|
89
|
+
// className: "PrefixExpression",
|
|
90
|
+
// fields: ["prefix", "expression"],
|
|
91
|
+
// constructor: (prefixToken: Token, rightExpression: Expression) => ({
|
|
92
|
+
// loc: prefixToken.loc,
|
|
93
|
+
// range: prefixToken.range,
|
|
94
|
+
// prefix: prefixToken.value,
|
|
95
|
+
// expression: rightExpression,
|
|
96
|
+
// }),
|
|
97
|
+
// toJSON: ({ prefix, expression }) => ({
|
|
98
|
+
// prefix,
|
|
99
|
+
// expression: expression.toJSON(),
|
|
100
|
+
// }),
|
|
101
|
+
// });
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
// export const BinaryExpression = makeExpressionClass({
|
|
106
|
+
// className: "PrefixExpression",
|
|
107
|
+
// fields: ["left", "op", "right"],
|
|
108
|
+
// constructor: (prefixToken: Token, rightExpression: Expression) => ({
|
|
109
|
+
// loc: prefixToken.loc,
|
|
110
|
+
// range: prefixToken.range,
|
|
111
|
+
// prefix: prefixToken.value,
|
|
112
|
+
// expression: rightExpression,
|
|
113
|
+
// }),
|
|
114
|
+
// toJSON: ({ prefix, expression }) => ({
|
|
115
|
+
// prefix,
|
|
116
|
+
// expression: expression.toJSON(),
|
|
117
|
+
// }),
|
|
118
|
+
// });
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
|
package/src/ast/stmt.ts
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
// // ./ast/stmt.ts
|
|
2
|
+
// import { LocationInterface, Token, TokenType } from "../lexer/tokens";
|
|
3
|
+
// import { ASTNode } from "./ast";
|
|
4
|
+
// import { Expression } from "./expr";
|
|
5
|
+
// import { Type } from "./type";
|
|
6
|
+
|
|
7
|
+
// export abstract class Statement extends ASTNode {
|
|
8
|
+
// toJSON() {
|
|
9
|
+
// return {
|
|
10
|
+
// type: this.constructor.name,
|
|
11
|
+
// loc: this.loc,
|
|
12
|
+
// range: this.range,
|
|
13
|
+
// };
|
|
14
|
+
// }
|
|
15
|
+
// }
|
|
16
|
+
|
|
17
|
+
// export class BlockStatement extends Statement {
|
|
18
|
+
// public body: Statement[];
|
|
19
|
+
|
|
20
|
+
// constructor(body: Statement[], loc: LocationInterface, range: [number, number]) {
|
|
21
|
+
// super(loc, range);
|
|
22
|
+
// this.body = body;
|
|
23
|
+
// }
|
|
24
|
+
|
|
25
|
+
// toJSON() {
|
|
26
|
+
// return {
|
|
27
|
+
// ...super.toJSON(),
|
|
28
|
+
// body: this.body.map((s) => s.toJSON()),
|
|
29
|
+
// };
|
|
30
|
+
// }
|
|
31
|
+
// }
|
|
32
|
+
|
|
33
|
+
// export class ExpressionStatement extends Statement {
|
|
34
|
+
// public expression: Expression;
|
|
35
|
+
|
|
36
|
+
// constructor(expression: Expression, loc: LocationInterface, range: [number, number]) {
|
|
37
|
+
// super(loc, range);
|
|
38
|
+
// this.expression = expression;
|
|
39
|
+
// }
|
|
40
|
+
|
|
41
|
+
// toJSON() {
|
|
42
|
+
// return {
|
|
43
|
+
// ...super.toJSON(),
|
|
44
|
+
// expression: this.expression.toJSON(),
|
|
45
|
+
// };
|
|
46
|
+
// }
|
|
47
|
+
// }
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
// export class TerminatorStatement extends Statement {
|
|
51
|
+
// public kind: "semicolon" | "newline" | "eof";
|
|
52
|
+
|
|
53
|
+
// constructor(token: Token) {
|
|
54
|
+
// super(token.loc, token.range);
|
|
55
|
+
|
|
56
|
+
// if (token.type === TokenType.EOF) {
|
|
57
|
+
// this.kind = "eof";
|
|
58
|
+
// } else {
|
|
59
|
+
// this.kind = token.value === ";" ? "semicolon" : "newline";
|
|
60
|
+
// }
|
|
61
|
+
// }
|
|
62
|
+
|
|
63
|
+
// toJSON() {
|
|
64
|
+
// return {
|
|
65
|
+
// ...super.toJSON(),
|
|
66
|
+
// kind: this.kind,
|
|
67
|
+
// };
|
|
68
|
+
// }
|
|
69
|
+
// }
|
|
70
|
+
|
|
71
|
+
// export class UsingStatement extends Statement {
|
|
72
|
+
// public value: string;
|
|
73
|
+
// public as?: Expression;
|
|
74
|
+
|
|
75
|
+
// constructor(
|
|
76
|
+
// valueToken: Token,
|
|
77
|
+
// asExpression: Expression | undefined,
|
|
78
|
+
// _terminator: Token, // still required to build loc/range externally
|
|
79
|
+
// loc: LocationInterface,
|
|
80
|
+
// range: [number, number]
|
|
81
|
+
// ) {
|
|
82
|
+
// super(loc, range);
|
|
83
|
+
// this.value = valueToken.value;
|
|
84
|
+
// this.as = asExpression;
|
|
85
|
+
// }
|
|
86
|
+
|
|
87
|
+
// toJSON() {
|
|
88
|
+
// return {
|
|
89
|
+
// ...super.toJSON(),
|
|
90
|
+
// value: this.value,
|
|
91
|
+
// as: this.as ? this.as.toJSON() : null,
|
|
92
|
+
// };
|
|
93
|
+
// }
|
|
94
|
+
// }
|
|
95
|
+
|
|
96
|
+
// export class VariableDeclarationStatement extends Statement {
|
|
97
|
+
// public identifier: string;
|
|
98
|
+
// public varType?: Type;
|
|
99
|
+
// public initializer?: Expression;
|
|
100
|
+
// public isConstant: boolean;
|
|
101
|
+
|
|
102
|
+
// constructor(
|
|
103
|
+
// nameToken: Token,
|
|
104
|
+
// varType: Type | undefined,
|
|
105
|
+
// initializer: Expression | undefined,
|
|
106
|
+
// _terminator: Token,
|
|
107
|
+
// isConstant: boolean,
|
|
108
|
+
// loc: LocationInterface,
|
|
109
|
+
// range: [number, number]
|
|
110
|
+
// ) {
|
|
111
|
+
// super(loc, range);
|
|
112
|
+
// this.identifier = nameToken.value;
|
|
113
|
+
// this.varType = varType;
|
|
114
|
+
// this.initializer = initializer;
|
|
115
|
+
// this.isConstant = isConstant;
|
|
116
|
+
// }
|
|
117
|
+
|
|
118
|
+
// toJSON() {
|
|
119
|
+
// return {
|
|
120
|
+
// ...super.toJSON(),
|
|
121
|
+
// identifier: this.identifier,
|
|
122
|
+
// varType: this.varType ? this.varType.toJSON() : null,
|
|
123
|
+
// initializer: this.initializer ? this.initializer.toJSON() : null,
|
|
124
|
+
// };
|
|
125
|
+
// }
|
|
126
|
+
// }
|
package/src/ast/type.ts
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import { LocationInterface } from "../lexer/tokens"
|
|
2
|
+
|
|
3
|
+
export enum NodeKind {
|
|
4
|
+
Statement,
|
|
5
|
+
Expression,
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
export interface Node {
|
|
9
|
+
kind: NodeKind;
|
|
10
|
+
type: string;
|
|
11
|
+
loc: LocationInterface;
|
|
12
|
+
range: [number, number];
|
|
13
|
+
body: Node[];
|
|
14
|
+
[key: string]: any;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export interface Stmt extends Node {
|
|
18
|
+
kind: NodeKind.Statement;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export interface Expr extends Node {
|
|
22
|
+
kind: NodeKind.Expression;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
type NodeFieldsRequired = {
|
|
26
|
+
type: string;
|
|
27
|
+
loc: LocationInterface;
|
|
28
|
+
range: [number, number];
|
|
29
|
+
} & Record<string, any>;
|
|
30
|
+
|
|
31
|
+
// Expr factory
|
|
32
|
+
export function mkexpr(fields: NodeFieldsRequired & { body?: Node[] }): Expr {
|
|
33
|
+
return {
|
|
34
|
+
kind: NodeKind.Expression,
|
|
35
|
+
body: fields.body ?? [],
|
|
36
|
+
...fields,
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export function mkstmt(fields: NodeFieldsRequired & { body?: Node[] }): Stmt {
|
|
41
|
+
return {
|
|
42
|
+
kind: NodeKind.Statement,
|
|
43
|
+
body: fields.body ?? [],
|
|
44
|
+
...fields,
|
|
45
|
+
};
|
|
46
|
+
}
|
|
@@ -0,0 +1,395 @@
|
|
|
1
|
+
import { LocationInterface, Token, TokenType, Position, ReservedTokens } from "./tokens";
|
|
2
|
+
|
|
3
|
+
type RegexHandler = (lex: Lexer, regex: RegExp) => void;
|
|
4
|
+
|
|
5
|
+
interface RegexPattern {
|
|
6
|
+
regex: RegExp;
|
|
7
|
+
handler: RegexHandler;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
interface LexerInterface {
|
|
11
|
+
patterns: RegexPattern[];
|
|
12
|
+
Tokens: Token[];
|
|
13
|
+
source: string;
|
|
14
|
+
pos: number;
|
|
15
|
+
line: number;
|
|
16
|
+
col: number;
|
|
17
|
+
remainingSource(): string;
|
|
18
|
+
end_of_file(): boolean;
|
|
19
|
+
push(token: Token): void;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
class Lexer implements LexerInterface {
|
|
23
|
+
patterns: RegexPattern[];
|
|
24
|
+
Tokens: Token[];
|
|
25
|
+
source: string;
|
|
26
|
+
pos: number;
|
|
27
|
+
line: number;
|
|
28
|
+
col: number;
|
|
29
|
+
|
|
30
|
+
constructor(source: string, patterns: RegexPattern[]) {
|
|
31
|
+
this.source = source;
|
|
32
|
+
this.patterns = patterns;
|
|
33
|
+
this.Tokens = [];
|
|
34
|
+
this.pos = 0;
|
|
35
|
+
this.line = 1;
|
|
36
|
+
this.col = 0;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
remainingSource(): string {
|
|
40
|
+
return this.source.slice(this.pos);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
end_of_file(): boolean {
|
|
44
|
+
return this.pos >= this.source.length;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
push(token: Token): void {
|
|
48
|
+
this.Tokens.push(token);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function stringHandler(tokenType: TokenType): RegexHandler {
|
|
53
|
+
return (lex: Lexer, regex: RegExp) => {
|
|
54
|
+
const match = lex.remainingSource().match(regex);
|
|
55
|
+
if (match && match.index === 0) {
|
|
56
|
+
const text = match[0];
|
|
57
|
+
const start: Position = { line: lex.line, col: lex.col };
|
|
58
|
+
|
|
59
|
+
let line = lex.line;
|
|
60
|
+
let col = lex.col;
|
|
61
|
+
for (const ch of text) {
|
|
62
|
+
if (ch === '\n') {
|
|
63
|
+
line++;
|
|
64
|
+
col = 0;
|
|
65
|
+
} else {
|
|
66
|
+
col++;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
const end: Position = { line, col };
|
|
70
|
+
const loc: LocationInterface = { start, end };
|
|
71
|
+
const range: [number, number] = [lex.pos, lex.pos + text.length];
|
|
72
|
+
|
|
73
|
+
lex.push(new Token(tokenType, text, loc, range));
|
|
74
|
+
|
|
75
|
+
lex.pos += text.length;
|
|
76
|
+
lex.line = line;
|
|
77
|
+
lex.col = col;
|
|
78
|
+
}
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
function numberHandler(tokenType: TokenType): RegexHandler {
|
|
83
|
+
return (lex: Lexer, regex: RegExp) => {
|
|
84
|
+
const match = lex.remainingSource().match(regex);
|
|
85
|
+
if (match && match.index === 0) {
|
|
86
|
+
const text = match[0];
|
|
87
|
+
const start: Position = { line: lex.line, col: lex.col };
|
|
88
|
+
|
|
89
|
+
let line = lex.line;
|
|
90
|
+
let col = lex.col;
|
|
91
|
+
for (const ch of text) {
|
|
92
|
+
if (ch === '\n') {
|
|
93
|
+
line++;
|
|
94
|
+
col = 0;
|
|
95
|
+
} else {
|
|
96
|
+
col++;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
const end: Position = { line, col };
|
|
100
|
+
const loc: LocationInterface = { start, end };
|
|
101
|
+
const range: [number, number] = [lex.pos, lex.pos + text.length];
|
|
102
|
+
|
|
103
|
+
lex.push(new Token(tokenType, text, loc, range));
|
|
104
|
+
|
|
105
|
+
lex.pos += text.length;
|
|
106
|
+
lex.line = line;
|
|
107
|
+
lex.col = col;
|
|
108
|
+
}
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
function multiCommentHandler(tokenType: TokenType): RegexHandler {
|
|
113
|
+
return (lex: Lexer, regex: RegExp) => {
|
|
114
|
+
const match = lex.remainingSource().match(regex);
|
|
115
|
+
if (match && match.index === 0) {
|
|
116
|
+
const text = match[0];
|
|
117
|
+
const start: Position = { line: lex.line, col: lex.col };
|
|
118
|
+
|
|
119
|
+
let line = lex.line;
|
|
120
|
+
let col = lex.col;
|
|
121
|
+
for (const ch of text) {
|
|
122
|
+
if (ch === '\n') {
|
|
123
|
+
line++;
|
|
124
|
+
col = 0;
|
|
125
|
+
} else {
|
|
126
|
+
col++;
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
const end: Position = { line, col };
|
|
130
|
+
const loc: LocationInterface = { start, end };
|
|
131
|
+
const range: [number, number] = [lex.pos, lex.pos + text.length];
|
|
132
|
+
|
|
133
|
+
lex.push(new Token(tokenType, text, loc, range));
|
|
134
|
+
|
|
135
|
+
lex.pos += text.length;
|
|
136
|
+
lex.line = line;
|
|
137
|
+
lex.col = col;
|
|
138
|
+
}
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
function singleCommentHandler(tokenType: TokenType): RegexHandler {
|
|
143
|
+
return (lex: Lexer, regex: RegExp) => {
|
|
144
|
+
const remaining = lex.remainingSource();
|
|
145
|
+
// Match comment line without the newline character itself
|
|
146
|
+
const match = remaining.match(regex);
|
|
147
|
+
if (match && match.index === 0) {
|
|
148
|
+
const text = match[0];
|
|
149
|
+
const start: Position = { line: lex.line, col: lex.col };
|
|
150
|
+
|
|
151
|
+
// Count columns only, no newlines expected in single-line comments
|
|
152
|
+
let col = lex.col + text.length;
|
|
153
|
+
const end: Position = { line: lex.line, col };
|
|
154
|
+
const loc: LocationInterface = { start, end };
|
|
155
|
+
const range: [number, number] = [lex.pos, lex.pos + text.length];
|
|
156
|
+
|
|
157
|
+
lex.push(new Token(tokenType, text, loc, range));
|
|
158
|
+
|
|
159
|
+
lex.pos += text.length;
|
|
160
|
+
lex.col = col;
|
|
161
|
+
|
|
162
|
+
// **Do NOT consume the newline here. Leave it for the main lexer loop**
|
|
163
|
+
}
|
|
164
|
+
};
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
function whitespaceHandler(tokenType: TokenType): RegexHandler {
|
|
169
|
+
return (lex: Lexer, regex: RegExp) => {
|
|
170
|
+
const match = lex.remainingSource().match(regex);
|
|
171
|
+
if (match && match.index === 0) {
|
|
172
|
+
const text = match[0];
|
|
173
|
+
|
|
174
|
+
const start: Position = { line: lex.line, col: lex.col };
|
|
175
|
+
|
|
176
|
+
let line = lex.line;
|
|
177
|
+
let col = lex.col;
|
|
178
|
+
for (const ch of text) {
|
|
179
|
+
if (ch === '\n') {
|
|
180
|
+
line++;
|
|
181
|
+
col = 0;
|
|
182
|
+
} else {
|
|
183
|
+
col++;
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
const end: Position = { line, col };
|
|
187
|
+
const loc: LocationInterface = { start, end };
|
|
188
|
+
const range: [number, number] = [lex.pos, lex.pos + text.length];
|
|
189
|
+
|
|
190
|
+
lex.push(new Token(tokenType, text, loc, range));
|
|
191
|
+
|
|
192
|
+
lex.pos += text.length;
|
|
193
|
+
lex.line = line;
|
|
194
|
+
lex.col = col;
|
|
195
|
+
}
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
function defaultHandler(tokenType: TokenType, tokenValue: string): RegexHandler {
|
|
200
|
+
return (lex: Lexer, regex: RegExp) => {
|
|
201
|
+
const remaining = lex.remainingSource();
|
|
202
|
+
const match = remaining.match(regex);
|
|
203
|
+
if (match && match.index === 0) {
|
|
204
|
+
const text = match[0];
|
|
205
|
+
|
|
206
|
+
const start: Position = { line: lex.line, col: lex.col };
|
|
207
|
+
|
|
208
|
+
let line = lex.line;
|
|
209
|
+
let col = lex.col;
|
|
210
|
+
for (const ch of text) {
|
|
211
|
+
if (ch === '\n') {
|
|
212
|
+
line++;
|
|
213
|
+
col = 0;
|
|
214
|
+
} else {
|
|
215
|
+
col++;
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
const end: Position = { line, col };
|
|
219
|
+
|
|
220
|
+
const loc: LocationInterface = { start, end };
|
|
221
|
+
const range: [number, number] = [lex.pos, lex.pos + text.length];
|
|
222
|
+
|
|
223
|
+
lex.push(new Token(tokenType, tokenValue, loc, range));
|
|
224
|
+
|
|
225
|
+
lex.pos += text.length;
|
|
226
|
+
lex.line = line;
|
|
227
|
+
lex.col = col;
|
|
228
|
+
}
|
|
229
|
+
};
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
type ReservedMap = { [key: string]: TokenType };
|
|
233
|
+
function symbolHandler(reservedKeywords: ReservedMap): RegexHandler {
|
|
234
|
+
return (lex: Lexer, regex: RegExp) => {
|
|
235
|
+
const remaining = lex.remainingSource();
|
|
236
|
+
const match = remaining.match(regex);
|
|
237
|
+
if (match && match.index === 0) {
|
|
238
|
+
const text = match[0];
|
|
239
|
+
const start: Position = { line: lex.line, col: lex.col };
|
|
240
|
+
|
|
241
|
+
let line = lex.line;
|
|
242
|
+
let col = lex.col;
|
|
243
|
+
for (const ch of text) {
|
|
244
|
+
if (ch === '\n') {
|
|
245
|
+
line++;
|
|
246
|
+
col = 0;
|
|
247
|
+
} else {
|
|
248
|
+
col++;
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
const end: Position = { line, col };
|
|
252
|
+
const loc: LocationInterface = { start, end };
|
|
253
|
+
const range: [number, number] = [lex.pos, lex.pos + text.length];
|
|
254
|
+
|
|
255
|
+
// Use reserved token type if it exists, else default to IDENTIFIER
|
|
256
|
+
const tokenType = reservedKeywords[text] ?? TokenType.IDENTIFIER;
|
|
257
|
+
|
|
258
|
+
lex.push(new Token(tokenType, text, loc, range));
|
|
259
|
+
|
|
260
|
+
lex.pos += text.length;
|
|
261
|
+
lex.line = line;
|
|
262
|
+
lex.col = col;
|
|
263
|
+
}
|
|
264
|
+
};
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
function CreateLexer(source: string): Lexer {
|
|
270
|
+
const patterns: RegexPattern[] = [
|
|
271
|
+
// Whitespace and newline — emit, don't skip (for formatter support)
|
|
272
|
+
{ regex: /^[ \t]+/, handler: whitespaceHandler(TokenType.WHITESPACE) },
|
|
273
|
+
{ regex: /^(?:\r\n|\r|\n)/, handler: defaultHandler(TokenType.NEWLINE, "\n") },
|
|
274
|
+
|
|
275
|
+
// Comments — single-line first, then multi-line
|
|
276
|
+
{ regex: /^\/\/[^\r\n]*/, handler: singleCommentHandler(TokenType.SINGLE_COMMENT) },
|
|
277
|
+
{ regex: /^\/\*[\s\S]*?\*\//, handler: multiCommentHandler(TokenType.MULTI_COMMENT) },
|
|
278
|
+
|
|
279
|
+
// Strings — assuming stringHandler handles quotes and escapes
|
|
280
|
+
{ regex: /^"(?:[^"\\]|\\.)*"/, handler: stringHandler(TokenType.STRING) },
|
|
281
|
+
{ regex: /^'(?:[^'\\]|\\.)*'/, handler: stringHandler(TokenType.STRING) },
|
|
282
|
+
|
|
283
|
+
// Numbers — integers and decimals
|
|
284
|
+
{ regex: /^\d+\.\d+/, handler: numberHandler(TokenType.NUMBER) }, // decimal numbers
|
|
285
|
+
{ regex: /^\d+/, handler: numberHandler(TokenType.NUMBER) }, // integers
|
|
286
|
+
|
|
287
|
+
// Identifiers — variable/function names, keywords, etc.
|
|
288
|
+
{ regex: /^[A-Za-z_][A-Za-z0-9_]*!?$/, handler: symbolHandler(ReservedTokens) },
|
|
289
|
+
|
|
290
|
+
// Grouping & brackets
|
|
291
|
+
{ regex: /^\[/, handler: defaultHandler(TokenType.LEFT_BRACKET, "[") },
|
|
292
|
+
{ regex: /^\]/, handler: defaultHandler(TokenType.RIGHT_BRACKET, "]") },
|
|
293
|
+
{ regex: /^\{/, handler: defaultHandler(TokenType.LEFT_BRACE, "{") },
|
|
294
|
+
{ regex: /^\}/, handler: defaultHandler(TokenType.RIGHT_BRACE, "}") },
|
|
295
|
+
{ regex: /^\(/, handler: defaultHandler(TokenType.LEFT_PAREN, "(") },
|
|
296
|
+
{ regex: /^\)/, handler: defaultHandler(TokenType.RIGHT_PAREN, ")") },
|
|
297
|
+
|
|
298
|
+
// Comparison operators — longer first
|
|
299
|
+
{ regex: /^==/, handler: defaultHandler(TokenType.EQUALS, "==") },
|
|
300
|
+
{ regex: /^!=/, handler: defaultHandler(TokenType.NOT_EQUALS, "!=") },
|
|
301
|
+
{ regex: /^<=/, handler: defaultHandler(TokenType.LESS_EQUALS, "<=") },
|
|
302
|
+
{ regex: /^</, handler: defaultHandler(TokenType.LESS, "<") },
|
|
303
|
+
{ regex: /^>=/, handler: defaultHandler(TokenType.GREATER_EQUALS, ">=") },
|
|
304
|
+
{ regex: /^>/, handler: defaultHandler(TokenType.GREATER, ">") },
|
|
305
|
+
|
|
306
|
+
// Assignment and logical operators
|
|
307
|
+
{ regex: /^&&/, handler: defaultHandler(TokenType.AND, "&&") },
|
|
308
|
+
{ regex: /^\|\|/, handler: defaultHandler(TokenType.OR, "||") },
|
|
309
|
+
{ regex: /^=/, handler: defaultHandler(TokenType.ASSINGMENT_EQUALS, "=") },
|
|
310
|
+
{ regex: /^!/, handler: defaultHandler(TokenType.NOT, "!") },
|
|
311
|
+
|
|
312
|
+
// Range and dot — precedence important
|
|
313
|
+
{ regex: /^\.\./, handler: defaultHandler(TokenType.RANGE_OPERATOR, "..") },
|
|
314
|
+
{ regex: /^\./, handler: defaultHandler(TokenType.DOT, ".") },
|
|
315
|
+
|
|
316
|
+
// Punctuation and symbols
|
|
317
|
+
{ regex: /^;/, handler: defaultHandler(TokenType.SEMI_COLON, ";") },
|
|
318
|
+
{ regex: /^:/, handler: defaultHandler(TokenType.COLON, ":") },
|
|
319
|
+
{ regex: /^\?/, handler: defaultHandler(TokenType.QUESTION_OPERATOR, "?") },
|
|
320
|
+
{ regex: /^,/, handler: defaultHandler(TokenType.COMMA, ",") },
|
|
321
|
+
|
|
322
|
+
// Increment/decrement & compound assign — longer first
|
|
323
|
+
{ regex: /^\+\+/, handler: defaultHandler(TokenType.PLUS_PLUS, "++") },
|
|
324
|
+
{ regex: /^--/, handler: defaultHandler(TokenType.MINUS_MINUS, "--") },
|
|
325
|
+
{ regex: /^\+=/, handler: defaultHandler(TokenType.PLUS_EQUALS, "+=") },
|
|
326
|
+
{ regex: /^-=/, handler: defaultHandler(TokenType.MINUS_EQUALS, "-=") },
|
|
327
|
+
|
|
328
|
+
// Arithmetic operators
|
|
329
|
+
{ regex: /^\+/, handler: defaultHandler(TokenType.PLUS, "+") },
|
|
330
|
+
{ regex: /^-/, handler: defaultHandler(TokenType.MINUS, "-") },
|
|
331
|
+
{ regex: /^\//, handler: defaultHandler(TokenType.DIVIDE, "/") },
|
|
332
|
+
{ regex: /^\*/, handler: defaultHandler(TokenType.MUL, "*") },
|
|
333
|
+
{ regex: /^%/, handler: defaultHandler(TokenType.MODULO, "%") },
|
|
334
|
+
];
|
|
335
|
+
|
|
336
|
+
return new Lexer(source, patterns);
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
function Tokenize(source: string): Token[] {
|
|
342
|
+
const lex = CreateLexer(source);
|
|
343
|
+
|
|
344
|
+
while (!lex.end_of_file()) {
|
|
345
|
+
let matched = false;
|
|
346
|
+
|
|
347
|
+
for (const pattern of lex.patterns) {
|
|
348
|
+
const match = lex.remainingSource().match(pattern.regex);
|
|
349
|
+
|
|
350
|
+
if (match && match.index === 0) {
|
|
351
|
+
pattern.handler(lex, pattern.regex);
|
|
352
|
+
matched = true;
|
|
353
|
+
break;
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
if (!matched) {
|
|
358
|
+
const context = lex.remainingSource().slice(0, 20);
|
|
359
|
+
throw new Error(
|
|
360
|
+
`Unrecognized token at line ${lex.line}, column ${lex.col}: '${context}'`
|
|
361
|
+
);
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
// Push EOF token at end of source
|
|
366
|
+
const eofLoc: LocationInterface = {
|
|
367
|
+
start: { line: lex.line, col: lex.col },
|
|
368
|
+
end: { line: lex.line, col: lex.col },
|
|
369
|
+
};
|
|
370
|
+
|
|
371
|
+
lex.push(new Token(TokenType.EOF, "EOF", eofLoc, [lex.pos, lex.pos]));
|
|
372
|
+
|
|
373
|
+
return lex.Tokens;
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
interface PrintOptions {
|
|
378
|
+
shortenWhitespace: boolean;
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
function Print(tokens: Token[], options: PrintOptions | undefined) {
|
|
382
|
+
const shorten = options?.shortenWhitespace ?? false;
|
|
383
|
+
if (shorten) {
|
|
384
|
+
for (let i = 0; i < tokens.length; i++) {
|
|
385
|
+
if (tokens[i].type == TokenType.WHITESPACE) {
|
|
386
|
+
tokens[i].value = ' ';
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
return tokens.slice(0,-1).map(t => t.value).join('');
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
export { Lexer, CreateLexer, Tokenize, Print };
|