drift-parser 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ast.json +72 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +8 -0
- package/dist/src/ast/ast.d.ts +0 -0
- package/dist/src/ast/exports.d.ts +1 -0
- package/dist/src/ast/expr.d.ts +0 -0
- package/dist/src/ast/stmt.d.ts +0 -0
- package/dist/src/ast/type.d.ts +31 -0
- package/dist/src/lexer/exports.d.ts +2 -0
- package/dist/src/lexer/tokenizer.d.ts +36 -0
- package/dist/src/lexer/tokens.d.ts +174 -0
- package/dist/src/parser/exports.d.ts +1 -0
- package/dist/src/parser/expr.d.ts +5 -0
- package/dist/src/parser/lookup.d.ts +28 -0
- package/dist/src/parser/parser.d.ts +23 -0
- package/dist/src/parser/stmt.d.ts +3 -0
- package/dist/src/parser/type.d.ts +0 -0
- package/dist/src/utils/combineLocation.d.ts +2 -0
- package/dist/src/utils/genexpr.d.ts +16 -0
- package/dist/src/utils/mapAll.d.ts +0 -0
- package/dist/src/utils/registerParse.d.ts +7 -0
- package/index.d.ts +1 -0
- package/index.ts +2 -0
- package/package.json +22 -0
- package/scripts/build.js +50 -0
- package/src/ast/ast.ts +0 -0
- package/src/ast/exports.ts +3 -0
- package/src/ast/expr.ts +122 -0
- package/src/ast/stmt.ts +126 -0
- package/src/ast/type.ts +46 -0
- package/src/lexer/exports.ts +2 -0
- package/src/lexer/tokenizer.ts +395 -0
- package/src/lexer/tokens.ts +241 -0
- package/src/parser/exports.ts +1 -0
- package/src/parser/expr.ts +82 -0
- package/src/parser/lookup.ts +69 -0
- package/src/parser/parser.ts +166 -0
- package/src/parser/stmt.ts +151 -0
- package/src/parser/type.ts +89 -0
- package/src/utils/combineLocation.ts +7 -0
- package/src/utils/mapAll.ts +43 -0
- package/src/utils/registerParse.ts +117 -0
- package/tests/astTest.js +44 -0
- package/tests/printTest.mjs +7 -0
- package/tests/tokenize.js +92 -0
- package/tests/typenames.js +15 -0
- package/tsconfig.json +15 -0
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
// import { LocationInterface, TokenType } from "../lexer/tokens";
|
|
2
|
+
// // import { Type, SymbolType, ArrayType } from "../ast/exports";
|
|
3
|
+
// import { Parser } from "./parser";
|
|
4
|
+
|
|
5
|
+
// export enum TypeBP {
|
|
6
|
+
// Default = 0,
|
|
7
|
+
|
|
8
|
+
// }
|
|
9
|
+
|
|
10
|
+
// export type TypeNudHandler = (p: Parser) => Type;
|
|
11
|
+
// export type TypeLedHandler = (p: Parser, left: Type, bp: TypeBP) => Type;
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
// const _type_bp_lu: Partial<Record<TokenType, TypeBP>> = {};
|
|
15
|
+
// const _type_nud_lu: Partial<Record<TokenType, TypeNudHandler>> = {};
|
|
16
|
+
// const _type_led_lu: Partial<Record<TokenType, TypeLedHandler>> = {};
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
// export function type_led(kind: TokenType, bp: TypeBP, ledFn: TypeLedHandler): void {
|
|
20
|
+
// _type_bp_lu[kind] = bp;
|
|
21
|
+
// _type_led_lu[kind] = ledFn;
|
|
22
|
+
// }
|
|
23
|
+
|
|
24
|
+
// export function type_nud(kind: TokenType, nudFn: TypeNudHandler): void {
|
|
25
|
+
// _type_nud_lu[kind] = nudFn;
|
|
26
|
+
// }
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
// export function createTokenTypeLookups(): void {
|
|
30
|
+
// type_nud(TokenType.IDENTIFIER, parse_symbol_type);
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
// }
|
|
34
|
+
|
|
35
|
+
// createTokenTypeLookups()
|
|
36
|
+
|
|
37
|
+
// function parse_symbol_type(p: Parser): Type {
|
|
38
|
+
// const token = p.expect(TokenType.IDENTIFIER);
|
|
39
|
+
// return new SymbolType(token.value, token.loc, token.range);
|
|
40
|
+
// }
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
// export function parse_type(p: Parser, bp: TypeBP): Type {
|
|
44
|
+
// const tokenKind = p.currentToken().type;
|
|
45
|
+
// const nudFn = _type_nud_lu[tokenKind];
|
|
46
|
+
|
|
47
|
+
// if (!nudFn) {
|
|
48
|
+
// throw new Error(`No nud handler for type token: ${TokenType[tokenKind]}`);
|
|
49
|
+
// }
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
// let left = nudFn(p);
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
// while (p.currentToken().type === TokenType.LEFT_BRACKET) {
|
|
56
|
+
// const leftBracket = p.expect(TokenType.LEFT_BRACKET);
|
|
57
|
+
// p.expect(TokenType.RIGHT_BRACKET);
|
|
58
|
+
// const rightBracket = p.previousToken();
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
// const loc: LocationInterface = {
|
|
62
|
+
// start: left.loc.start,
|
|
63
|
+
// end: rightBracket.loc.end,
|
|
64
|
+
// };
|
|
65
|
+
|
|
66
|
+
// const range: [number, number] = [left.range[0], rightBracket.range[1]];
|
|
67
|
+
|
|
68
|
+
// left = new ArrayType(left, loc, range);
|
|
69
|
+
// }
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
// while ((_type_bp_lu[p.currentToken().type] ?? TypeBP.Default) > bp) {
|
|
73
|
+
// const tokenKind = p.currentToken().type;
|
|
74
|
+
// const ledFn = _type_led_lu[tokenKind];
|
|
75
|
+
// if (!ledFn) {
|
|
76
|
+
// throw new Error(`No led handler for type token: ${TokenType[tokenKind]}`);
|
|
77
|
+
// }
|
|
78
|
+
// left = ledFn(p, left, _type_bp_lu[tokenKind]!);
|
|
79
|
+
// }
|
|
80
|
+
|
|
81
|
+
// return left;
|
|
82
|
+
// }
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
// export {
|
|
86
|
+
// _type_bp_lu as type_bp_lu,
|
|
87
|
+
// _type_nud_lu as type_nud_lu,
|
|
88
|
+
// _type_led_lu as type_led_lu,
|
|
89
|
+
// };
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
// import { ASTNode } from '../ast/ast';
|
|
2
|
+
// import {
|
|
3
|
+
// Statement,
|
|
4
|
+
// BlockStatement,
|
|
5
|
+
// ExpressionStatement,
|
|
6
|
+
// TerminatorStatement
|
|
7
|
+
// } from '../ast/stmt';
|
|
8
|
+
// import { Expression } from '../ast/expr';
|
|
9
|
+
|
|
10
|
+
// type Mapper = (node: ASTNode) => ASTNode;
|
|
11
|
+
|
|
12
|
+
// export function mapAll(node: ASTNode, mapper: Mapper): ASTNode {
|
|
13
|
+
// // Apply mapper to current node first
|
|
14
|
+
// const newNode = mapper(node);
|
|
15
|
+
|
|
16
|
+
// // Recursively map children depending on node type
|
|
17
|
+
// if (newNode instanceof BlockStatement) {
|
|
18
|
+
// return new BlockStatement(
|
|
19
|
+
// newNode.body.map(stmt => mapAll(stmt, mapper) as Statement),
|
|
20
|
+
// newNode.loc,
|
|
21
|
+
// newNode.range
|
|
22
|
+
// );
|
|
23
|
+
// } else if (newNode instanceof ExpressionStatement) {
|
|
24
|
+
// return new ExpressionStatement(
|
|
25
|
+
// mapAll(newNode.expression, mapper) as Expression,
|
|
26
|
+
// newNode.loc,
|
|
27
|
+
// newNode.range
|
|
28
|
+
// );
|
|
29
|
+
// } else if (newNode instanceof TerminatorStatement) {
|
|
30
|
+
// // No children, return as is
|
|
31
|
+
// return newNode;
|
|
32
|
+
// }
|
|
33
|
+
|
|
34
|
+
// // Add other node types here if needed
|
|
35
|
+
|
|
36
|
+
// // Default: no children, return node as is
|
|
37
|
+
// return newNode;
|
|
38
|
+
// }
|
|
39
|
+
|
|
40
|
+
// export function _toJSON(n: ASTNode) {
|
|
41
|
+
// // Just call toJSON directly on root node
|
|
42
|
+
// return n.toJSON();
|
|
43
|
+
// }
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
// import { Statement } from "../ast/stmt";
|
|
2
|
+
import { Stmt } from "../ast/type";
|
|
3
|
+
import { Token, TokenType, TypeName } from "../lexer/tokens";
|
|
4
|
+
import { Parser } from "../parser/parser";
|
|
5
|
+
|
|
6
|
+
// export function stmt_registerParse({
|
|
7
|
+
// consume,
|
|
8
|
+
// handler
|
|
9
|
+
// }: {
|
|
10
|
+
// consume: string[];
|
|
11
|
+
// handler: (...tokens: Token[]) => Statement[];
|
|
12
|
+
// }) {
|
|
13
|
+
// return function parse(p: Parser): Statement[] {
|
|
14
|
+
// const args: Token[] = [];
|
|
15
|
+
|
|
16
|
+
// for (const entry of consume) {
|
|
17
|
+
// if (entry === "_") {
|
|
18
|
+
// p.advance(); // discard
|
|
19
|
+
// continue;
|
|
20
|
+
// }
|
|
21
|
+
|
|
22
|
+
// const typeNames = entry.split("|");
|
|
23
|
+
// const expectedTypes = typeNames.map(name => {
|
|
24
|
+
// const upper = name.toUpperCase();
|
|
25
|
+
// if (!(upper in TokenType)) {
|
|
26
|
+
// throw new Error(`Unknown token type: '${name}'`);
|
|
27
|
+
// }
|
|
28
|
+
// return TokenType[upper as keyof typeof TokenType];
|
|
29
|
+
// });
|
|
30
|
+
|
|
31
|
+
// const token = p.currentToken();
|
|
32
|
+
// if (!token.IsA(...expectedTypes)) {
|
|
33
|
+
// throw new Error(`Expected ${typeNames.join(" or ")}, but got ${TypeName(token.type)}`);
|
|
34
|
+
// }
|
|
35
|
+
|
|
36
|
+
// args.push(token);
|
|
37
|
+
// p.advance();
|
|
38
|
+
// }
|
|
39
|
+
|
|
40
|
+
// return handler(...args);
|
|
41
|
+
// };
|
|
42
|
+
// }
|
|
43
|
+
|
|
44
|
+
export function stmt_registerParse({
|
|
45
|
+
consume,
|
|
46
|
+
handler
|
|
47
|
+
}: {
|
|
48
|
+
consume: (string | string[])[]; // allow optional groups as nested arrays
|
|
49
|
+
handler: (...tokens: Token[]) => Stmt[];
|
|
50
|
+
}) {
|
|
51
|
+
return function parse(p: Parser): Stmt[] {
|
|
52
|
+
const args: Token[] = [];
|
|
53
|
+
|
|
54
|
+
for (const entry of consume) {
|
|
55
|
+
if (entry === "_") {
|
|
56
|
+
p.advance(); // discard
|
|
57
|
+
continue;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
if (Array.isArray(entry)) {
|
|
61
|
+
// Optional group: try to parse it, else reset
|
|
62
|
+
const checkpoint = p.mark();
|
|
63
|
+
|
|
64
|
+
try {
|
|
65
|
+
for (const subEntry of entry) {
|
|
66
|
+
if (subEntry === "_") {
|
|
67
|
+
p.advance(); // discard
|
|
68
|
+
continue;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const types = subEntry.split("|").map(name => {
|
|
72
|
+
const upper = name.toUpperCase();
|
|
73
|
+
if (!(upper in TokenType)) {
|
|
74
|
+
throw new Error(`Unknown token type: '${name}'`);
|
|
75
|
+
}
|
|
76
|
+
return TokenType[upper as keyof typeof TokenType];
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
const token = p.currentToken();
|
|
80
|
+
if (!token.IsA(...types)) {
|
|
81
|
+
throw new Error();
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
args.push(token);
|
|
85
|
+
p.advance();
|
|
86
|
+
}
|
|
87
|
+
} catch {
|
|
88
|
+
// rollback if any token didn't match
|
|
89
|
+
p.reset(checkpoint);
|
|
90
|
+
continue; // skip optional group
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
continue;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// single token
|
|
97
|
+
const typeNames = entry.split("|");
|
|
98
|
+
const expectedTypes = typeNames.map(name => {
|
|
99
|
+
const upper = name.toUpperCase();
|
|
100
|
+
if (!(upper in TokenType)) {
|
|
101
|
+
throw new Error(`Unknown token type: '${name}'`);
|
|
102
|
+
}
|
|
103
|
+
return TokenType[upper as keyof typeof TokenType];
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
const token = p.currentToken();
|
|
107
|
+
if (!token.IsA(...expectedTypes)) {
|
|
108
|
+
throw new Error(`Expected ${typeNames.join(" or ")}, but got ${TokenType[token.type]}`);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
args.push(token);
|
|
112
|
+
p.advance();
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
return handler(...args);
|
|
116
|
+
};
|
|
117
|
+
}
|
package/tests/astTest.js
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
const drift = require('../dist/index.js');
|
|
2
|
+
|
|
3
|
+
const program = `5;`;
|
|
4
|
+
|
|
5
|
+
console.log(JSON.stringify(program))
|
|
6
|
+
// Tokenize the input string using drift.Lexer.Tokenize
|
|
7
|
+
const tokens = drift.Lexer.Tokenize(program);
|
|
8
|
+
|
|
9
|
+
// Parse tokens into AST
|
|
10
|
+
const ast = drift.Parser.parse(tokens);
|
|
11
|
+
|
|
12
|
+
// Pretty print the AST with full depth and colors
|
|
13
|
+
console.dir(ast, { depth: null, colors: true });
|
|
14
|
+
|
|
15
|
+
// function mapAll(node, mapper) {
|
|
16
|
+
// // Apply mapper to current node first
|
|
17
|
+
// const newNode = mapper(node);
|
|
18
|
+
|
|
19
|
+
// // Recursively map children depending on node type
|
|
20
|
+
// if (newNode.constructor.name === 'BlockStatement') {
|
|
21
|
+
// return new BlockStatement(
|
|
22
|
+
// newNode.body.map(stmt => mapAll(stmt, mapper)),
|
|
23
|
+
// newNode.loc,
|
|
24
|
+
// newNode.range
|
|
25
|
+
// );
|
|
26
|
+
// } else if (newNode.constructor.name === 'ExpressionStatement') {
|
|
27
|
+
// return new ExpressionStatement(
|
|
28
|
+
// mapAll(newNode.expression, mapper),
|
|
29
|
+
// newNode.loc,
|
|
30
|
+
// newNode.range
|
|
31
|
+
// );
|
|
32
|
+
// } else if (newNode.constructor.name === 'TerminatorStatement') {
|
|
33
|
+
// // No children, return as is
|
|
34
|
+
// return newNode;
|
|
35
|
+
// }
|
|
36
|
+
|
|
37
|
+
// // Add other node types here if needed
|
|
38
|
+
|
|
39
|
+
// // Default: no children, return node as is
|
|
40
|
+
// return newNode;
|
|
41
|
+
// }
|
|
42
|
+
|
|
43
|
+
// const op = mapAll(ast, m => m.toJSON())
|
|
44
|
+
// require('fs').writeFileSync('./ast.json', JSON.stringify(op, null, 2))
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
const drift = require('../dist/index.js');
|
|
2
|
+
|
|
3
|
+
const code = 'let b = 5;'
|
|
4
|
+
const tokens = drift.Lexer.Tokenize(code)
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
function updateTokenAtIndex(tokenArray, index, newValue) {
|
|
8
|
+
if (index < 0 || index >= tokenArray.length) {
|
|
9
|
+
throw new Error("Index out of bounds");
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
const token = tokenArray[index];
|
|
13
|
+
|
|
14
|
+
// Preserve start info
|
|
15
|
+
const startIndex = token.range[0];
|
|
16
|
+
const startLine = token.loc.start.line;
|
|
17
|
+
const startColumn = token.loc.start.column;
|
|
18
|
+
|
|
19
|
+
const oldLength = token.range[1] - token.range[0];
|
|
20
|
+
const newLength = newValue.length;
|
|
21
|
+
const lengthDiff = newLength - oldLength;
|
|
22
|
+
|
|
23
|
+
// Update the token value
|
|
24
|
+
token.value = newValue;
|
|
25
|
+
|
|
26
|
+
// Update the token range for the modified token
|
|
27
|
+
token.range = [startIndex, startIndex + newLength];
|
|
28
|
+
|
|
29
|
+
// Update token.loc.end based on newValue
|
|
30
|
+
const lines = newValue.split('\n');
|
|
31
|
+
|
|
32
|
+
if (lines.length === 1) {
|
|
33
|
+
token.loc.end = {
|
|
34
|
+
line: startLine,
|
|
35
|
+
column: startColumn + newLength,
|
|
36
|
+
};
|
|
37
|
+
} else {
|
|
38
|
+
token.loc.end = {
|
|
39
|
+
line: startLine + lines.length - 1,
|
|
40
|
+
column: lines[lines.length - 1].length,
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Now shift all tokens after the modified one by lengthDiff
|
|
45
|
+
for (let i = index + 1; i < tokenArray.length; i++) {
|
|
46
|
+
const t = tokenArray[i];
|
|
47
|
+
|
|
48
|
+
// Shift range by lengthDiff
|
|
49
|
+
t.range = [t.range[0] + lengthDiff, t.range[1] + lengthDiff];
|
|
50
|
+
|
|
51
|
+
// Adjust loc.start and loc.end accordingly
|
|
52
|
+
// If the token is on the same line as the modified token's end line,
|
|
53
|
+
// update the column; otherwise, just shift lines if needed
|
|
54
|
+
|
|
55
|
+
// Simple approximation: if the token is on the same line as the changed token's end line,
|
|
56
|
+
// shift the column by lengthDiff, else leave columns alone.
|
|
57
|
+
if (t.loc.start.line === token.loc.end.line) {
|
|
58
|
+
t.loc.start = {
|
|
59
|
+
line: t.loc.start.line,
|
|
60
|
+
column: t.loc.start.column + lengthDiff,
|
|
61
|
+
};
|
|
62
|
+
} else if (t.loc.start.line > token.loc.end.line) {
|
|
63
|
+
// If the token is on a line after the modified token, shift the line number by the number of new lines added or removed
|
|
64
|
+
t.loc.start = {
|
|
65
|
+
line: t.loc.start.line + (lines.length - 1),
|
|
66
|
+
column: t.loc.start.column,
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
// Similarly for loc.end:
|
|
70
|
+
if (t.loc.end.line === token.loc.end.line) {
|
|
71
|
+
t.loc.end = {
|
|
72
|
+
line: t.loc.end.line,
|
|
73
|
+
column: t.loc.end.column + lengthDiff,
|
|
74
|
+
};
|
|
75
|
+
} else if (t.loc.end.line > token.loc.end.line) {
|
|
76
|
+
t.loc.end = {
|
|
77
|
+
line: t.loc.end.line + (lines.length - 1),
|
|
78
|
+
column: t.loc.end.column,
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
// updateTokenAtIndex(tokens, 0, 'latest!value')
|
|
86
|
+
|
|
87
|
+
// const cj = tokens.slice(0,-1).map(t => t.value).join('')
|
|
88
|
+
// console.log(tokens, JSON.stringify(cj).slice(1, -1));
|
|
89
|
+
|
|
90
|
+
console.log(drift.Lexer.Print(tokens, {
|
|
91
|
+
|
|
92
|
+
}))
|
package/tsconfig.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"target": "ES2020",
|
|
4
|
+
"module": "ESNext",
|
|
5
|
+
"declaration": true,
|
|
6
|
+
"emitDeclarationOnly": true,
|
|
7
|
+
"outDir": "dist",
|
|
8
|
+
"strict": true,
|
|
9
|
+
"moduleResolution": "node",
|
|
10
|
+
"esModuleInterop": true,
|
|
11
|
+
"skipLibCheck": true,
|
|
12
|
+
"forceConsistentCasingInFileNames": true
|
|
13
|
+
},
|
|
14
|
+
"include": ["index.ts", "src/**/*"]
|
|
15
|
+
}
|