@ascent-lang/dev 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/errors/elaborate.d.ts +24 -0
- package/dist/errors/elaborate.d.ts.map +1 -0
- package/dist/errors/elaborate.js +53 -0
- package/dist/errors/elaborate.js.map +1 -0
- package/dist/errors/index.d.ts.map +1 -1
- package/dist/errors/index.js +356 -30
- package/dist/errors/index.js.map +1 -1
- package/dist/errors/render.d.ts +3 -0
- package/dist/errors/render.d.ts.map +1 -0
- package/dist/errors/render.js +43 -0
- package/dist/errors/render.js.map +1 -0
- package/dist/errors/types.d.ts +29 -0
- package/dist/errors/types.d.ts.map +1 -1
- package/dist/index.js +21 -28
- package/dist/index.js.map +1 -1
- package/dist/interpreter.d.ts.map +1 -1
- package/dist/interpreter.js +28 -5
- package/dist/interpreter.js.map +1 -1
- package/dist/lexer/index.d.ts.map +1 -1
- package/dist/lexer/index.js +4 -3
- package/dist/lexer/index.js.map +1 -1
- package/dist/lexer/keywords.d.ts.map +1 -1
- package/dist/lexer/keywords.js +3 -0
- package/dist/lexer/keywords.js.map +1 -1
- package/dist/lexer/token.d.ts +7 -1
- package/dist/lexer/token.d.ts.map +1 -1
- package/dist/lib.d.ts +3 -3
- package/dist/lib.d.ts.map +1 -1
- package/dist/lib.js +11 -6
- package/dist/lib.js.map +1 -1
- package/dist/parser/ast.d.ts +8 -4
- package/dist/parser/ast.d.ts.map +1 -1
- package/dist/parser/expr.d.ts.map +1 -1
- package/dist/parser/expr.js +34 -19
- package/dist/parser/expr.js.map +1 -1
- package/dist/parser/index.d.ts +3 -5
- package/dist/parser/index.d.ts.map +1 -1
- package/dist/parser/index.js +26 -33
- package/dist/parser/index.js.map +1 -1
- package/dist/parser/printer.d.ts +1 -0
- package/dist/parser/printer.d.ts.map +1 -1
- package/dist/parser/printer.js +21 -0
- package/dist/parser/printer.js.map +1 -1
- package/dist/parser/stmt.d.ts.map +1 -1
- package/dist/parser/stmt.js +5 -3
- package/dist/parser/stmt.js.map +1 -1
- package/dist/parser/token-stream.d.ts +4 -4
- package/dist/parser/token-stream.d.ts.map +1 -1
- package/dist/parser/token-stream.js +21 -9
- package/dist/parser/token-stream.js.map +1 -1
- package/dist/parser/type-expr.d.ts +1 -1
- package/dist/parser/type-expr.d.ts.map +1 -1
- package/dist/parser/type-expr.js +11 -4
- package/dist/parser/type-expr.js.map +1 -1
- package/dist/parser/typechecker.d.ts +2 -2
- package/dist/parser/typechecker.d.ts.map +1 -1
- package/dist/parser/typechecker.js +109 -67
- package/dist/parser/typechecker.js.map +1 -1
- package/dist/types/types.d.ts +4 -0
- package/dist/types/types.d.ts.map +1 -1
- package/dist/types/types.js +27 -15
- package/dist/types/types.js.map +1 -1
- package/package.json +1 -1
- package/src/errors/elaborate.ts +88 -0
- package/src/errors/index.ts +356 -30
- package/src/errors/lexical.yml +48 -13
- package/src/errors/name.yml +45 -9
- package/src/errors/render.ts +59 -0
- package/src/errors/syntactic.yml +128 -49
- package/src/errors/typechecker.yml +147 -61
- package/src/errors/types.ts +55 -0
- package/src/index.ts +20 -30
- package/src/interpreter.ts +24 -6
- package/src/lexer/index.ts +4 -3
- package/src/lexer/keywords.ts +3 -0
- package/src/lexer/token.ts +18 -0
- package/src/lib.ts +12 -7
- package/src/parser/ast.ts +7 -6
- package/src/parser/expr.ts +34 -19
- package/src/parser/index.ts +32 -32
- package/src/parser/printer.ts +22 -0
- package/src/parser/stmt.ts +5 -3
- package/src/parser/token-stream.ts +20 -8
- package/src/parser/type-expr.ts +10 -4
- package/src/parser/typechecker.ts +142 -54
- package/src/types/types.ts +36 -16
package/src/interpreter.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { BinaryOp } from './parser/ast.js';
|
|
2
2
|
import type { TypedExpr, TypedBlock, TypedStatement, TypedProgram } from './parser/typed-ast.js';
|
|
3
|
-
import type
|
|
3
|
+
import { INT_TYPE, subtype, type AscentType } from './types/types.js';
|
|
4
4
|
|
|
5
5
|
export type RuntimeValue = (
|
|
6
6
|
| { type: 'Int'; value: bigint }
|
|
@@ -54,11 +54,11 @@ export class Environment {
|
|
|
54
54
|
}
|
|
55
55
|
}
|
|
56
56
|
|
|
57
|
-
// Coerce a runtime value to match a target type
|
|
58
|
-
//
|
|
57
|
+
// Coerce a runtime value to match a target type, per the witness `subtype`
|
|
58
|
+
// produces — currently only Int <: Float, so only an Int value ever moves.
|
|
59
59
|
// All other type conversions are explicit (methods like toFloat/toInt).
|
|
60
60
|
const coerce = (v: RuntimeValue, targetType: AscentType): RuntimeValue => {
|
|
61
|
-
if (
|
|
61
|
+
if (v.type === 'Int' && subtype(INT_TYPE, targetType) === 'intToFloat') {
|
|
62
62
|
return { type: 'Float', value: Number(v.value) };
|
|
63
63
|
}
|
|
64
64
|
return v;
|
|
@@ -77,7 +77,8 @@ export const evaluateExpr = (expr: TypedExpr, env: Environment): RuntimeValue =>
|
|
|
77
77
|
}
|
|
78
78
|
}
|
|
79
79
|
case 'slot': {
|
|
80
|
-
//
|
|
80
|
+
// Name-binding errors (N0001–N0003) are caught at type-check time; this
|
|
81
|
+
// is an internal guard.
|
|
81
82
|
const value = env.get(expr.name);
|
|
82
83
|
if (value === undefined) throw new Error(`internal: unbound slot '${expr.name}'`);
|
|
83
84
|
return value;
|
|
@@ -119,12 +120,29 @@ export const evaluateExpr = (expr: TypedExpr, env: Environment): RuntimeValue =>
|
|
|
119
120
|
}
|
|
120
121
|
case 'unary': {
|
|
121
122
|
const operand = evaluateExpr(expr.operand, env);
|
|
123
|
+
if (expr.op === 'not') {
|
|
124
|
+
if (operand.type !== 'Bool') throw new Error(`internal: 'not' on ${operand.type}`);
|
|
125
|
+
return { type: 'Bool', value: !operand.value };
|
|
126
|
+
}
|
|
122
127
|
if (operand.type === 'Int') return { type: 'Int', value: -operand.value };
|
|
123
128
|
if (operand.type === 'Float') return { type: 'Float', value: -operand.value };
|
|
124
129
|
throw new Error(`internal: unary '-' on ${operand.type}`);
|
|
125
130
|
}
|
|
126
|
-
case 'binary':
|
|
131
|
+
case 'binary': {
|
|
132
|
+
// 'and'/'or' short-circuit: the left operand alone can decide the
|
|
133
|
+
// result ('False and e' / 'True or e'), so 'e' is only evaluated
|
|
134
|
+
// when it's still needed — the same laziness every mainstream
|
|
135
|
+
// language gives its logical operators.
|
|
136
|
+
if (expr.op === 'and' || expr.op === 'or') {
|
|
137
|
+
const left = evaluateExpr(expr.left, env);
|
|
138
|
+
if (left.type !== 'Bool') throw new Error(`internal: '${expr.op}' on non-Bool`);
|
|
139
|
+
if (expr.op === 'and' ? !left.value : left.value) return left;
|
|
140
|
+
const right = evaluateExpr(expr.right, env);
|
|
141
|
+
if (right.type !== 'Bool') throw new Error(`internal: '${expr.op}' on non-Bool`);
|
|
142
|
+
return right;
|
|
143
|
+
}
|
|
127
144
|
return evaluateBinary(expr.op, evaluateExpr(expr.left, env), evaluateExpr(expr.right, env));
|
|
145
|
+
}
|
|
128
146
|
case 'block': {
|
|
129
147
|
return evaluateBlock(expr, env);
|
|
130
148
|
}
|
package/src/lexer/index.ts
CHANGED
|
@@ -114,13 +114,14 @@ export class Lexer {
|
|
|
114
114
|
return this.readWord();
|
|
115
115
|
}
|
|
116
116
|
|
|
117
|
-
// A leading-dot float like .5
|
|
118
|
-
//
|
|
117
|
+
// A leading-dot float like .5 is a number missing its integer part, so
|
|
118
|
+
// L0004 (its own error, with a certain '0.5' fix) is more helpful than
|
|
119
|
+
// L0001 ("unexpected character") or L0002 (a number run into letters).
|
|
119
120
|
if (ch === '.' && isDigit(this.c.peek(1))) {
|
|
120
121
|
const start = this.c.mark();
|
|
121
122
|
this.c.advance(); // '.'
|
|
122
123
|
this.consumeWhile(isDigit);
|
|
123
|
-
return this.error('
|
|
124
|
+
return this.error('L0004', this.c.spanFrom(start));
|
|
124
125
|
}
|
|
125
126
|
|
|
126
127
|
const start = this.c.mark();
|
package/src/lexer/keywords.ts
CHANGED
package/src/lexer/token.ts
CHANGED
|
@@ -12,6 +12,9 @@ export type TokenKind =
|
|
|
12
12
|
| 'SLASH' // '/', always real division — yields a Float
|
|
13
13
|
| 'KW_DIV' // the keyword div — Int-only floor division
|
|
14
14
|
| 'KW_MOD' // the keyword mod — Int-only floored modulo
|
|
15
|
+
| 'KW_AND' // the keyword and — Bool-only logical and, short-circuits
|
|
16
|
+
| 'KW_OR' // the keyword or — Bool-only logical or, short-circuits
|
|
17
|
+
| 'KW_NOT' // the keyword not — Bool-only prefix negation
|
|
15
18
|
| 'KW_FIX' // the keyword fix — declares a fixed slot
|
|
16
19
|
| 'KW_MUT' // the keyword mut — declares a mutable slot
|
|
17
20
|
| 'KW_IF' // the keyword if — starts a conditional expression
|
|
@@ -50,9 +53,24 @@ export interface Span {
|
|
|
50
53
|
end: Position; // exclusive — points one past the last character
|
|
51
54
|
}
|
|
52
55
|
|
|
56
|
+
// A supporting span a stage attaches to a marker — e.g. the earlier
|
|
57
|
+
// declaration a "can't reassign" error refers back to. `key` names the span's
|
|
58
|
+
// role; the matching label (prose) lives in the error's .yml row, keyed the
|
|
59
|
+
// same way, so no stage holds a user-facing sentence.
|
|
60
|
+
export interface RelatedMarker {
|
|
61
|
+
key: string;
|
|
62
|
+
span: Span;
|
|
63
|
+
}
|
|
64
|
+
|
|
53
65
|
export interface Marker {
|
|
54
66
|
code: string;
|
|
55
67
|
span: Span;
|
|
68
|
+
related?: RelatedMarker[];
|
|
69
|
+
// Named values a stage knows but the source can't reconstruct — chiefly the
|
|
70
|
+
// type names in a type error ('Int', 'String'). Interpolated into the
|
|
71
|
+
// message/explanation as {key}. Kept as strings so the checker never holds a
|
|
72
|
+
// sentence, only the words that fill the blanks.
|
|
73
|
+
data?: Record<string, string>;
|
|
56
74
|
}
|
|
57
75
|
|
|
58
76
|
export interface Token {
|
package/src/lib.ts
CHANGED
|
@@ -1,20 +1,25 @@
|
|
|
1
1
|
// Public programmatic API for the Ascent language toolchain.
|
|
2
|
-
// The `ascent` CLI (see index.ts) is the primary entry point
|
|
3
|
-
//
|
|
2
|
+
// The `ascent` CLI (see index.ts) is the primary entry point. Embedders
|
|
3
|
+
// wanting source-to-typed-AST in one call should use parse():
|
|
4
|
+
//
|
|
5
|
+
// const { typedProgram, errorMarkers } = parse(src);
|
|
6
|
+
// const result = executeProgram(typedProgram!, new Environment());
|
|
7
|
+
//
|
|
8
|
+
// The individual stages are also re-exported for tools that need
|
|
9
|
+
// intermediate results (e.g. tokens, or the untyped AST):
|
|
4
10
|
//
|
|
5
11
|
// const { tokens, errorMarkers } = new Lexer(src).tokenize();
|
|
6
|
-
// const { program } =
|
|
12
|
+
// const { program } = parseTokens(tokens);
|
|
7
13
|
// const { typedProgram } = typecheck(program!);
|
|
8
|
-
// const result = executeProgram(typedProgram!, new Environment());
|
|
9
14
|
|
|
10
15
|
export { Lexer } from './lexer/index.js';
|
|
11
16
|
export type { LexResult } from './lexer/index.js';
|
|
12
17
|
|
|
13
|
-
export {
|
|
18
|
+
export { parse, parseTokens } from './parser/index.js';
|
|
14
19
|
export type { ParseResult } from './parser/index.js';
|
|
15
20
|
|
|
16
21
|
export { typecheck } from './parser/typechecker.js';
|
|
17
|
-
export type {
|
|
22
|
+
export type { TypedResult } from './parser/typechecker.js';
|
|
18
23
|
|
|
19
24
|
export {
|
|
20
25
|
Environment,
|
|
@@ -24,7 +29,7 @@ export {
|
|
|
24
29
|
} from './interpreter.js';
|
|
25
30
|
export type { RuntimeValue, AssignResult } from './interpreter.js';
|
|
26
31
|
|
|
27
|
-
export { formatExpr, formatStmt, formatValue } from './parser/printer.js';
|
|
32
|
+
export { formatExpr, formatStmt, formatValue, valueToString } from './parser/printer.js';
|
|
28
33
|
export { formatTypedStmt } from './parser/typed-printer.js';
|
|
29
34
|
|
|
30
35
|
export * from './types/types.js';
|
package/src/parser/ast.ts
CHANGED
|
@@ -2,9 +2,9 @@ import type { Span } from '../lexer/token.js';
|
|
|
2
2
|
|
|
3
3
|
// TypeExpr is the AST node for a type written in source code.
|
|
4
4
|
// It carries span information so the type checker can point at it in errors.
|
|
5
|
-
export type
|
|
6
|
-
|
|
7
|
-
|
|
5
|
+
export type TypeName = { kind: 'TypeName'; name: 'Int' | 'Float' | 'Bool' | 'String'; span: Span };
|
|
6
|
+
export type ListType = { kind: 'ListType'; elem: TypeExpr; span: Span };
|
|
7
|
+
export type TypeExpr = TypeName | ListType;
|
|
8
8
|
|
|
9
9
|
export type Literal = (
|
|
10
10
|
| { kind: 'literal'; valueType: 'Int'; value: bigint; span: Span }
|
|
@@ -15,10 +15,11 @@ export type Literal = (
|
|
|
15
15
|
| { kind: 'literal'; valueType: 'Done'; span: Span }
|
|
16
16
|
);
|
|
17
17
|
|
|
18
|
-
export type UnaryOp = '-';
|
|
18
|
+
export type UnaryOp = '-' | 'not';
|
|
19
19
|
export type ArithmeticOp = '+' | '-' | '*' | '/' | 'div' | 'mod';
|
|
20
20
|
export type ComparisonOp = '==' | '!=' | '<' | '<=' | '>' | '>=';
|
|
21
|
-
export type
|
|
21
|
+
export type BooleanOp = 'and' | 'or';
|
|
22
|
+
export type BinaryOp = ArithmeticOp | ComparisonOp | BooleanOp;
|
|
22
23
|
|
|
23
24
|
// A block is itself an expression — it yields the value of its last
|
|
24
25
|
// statement, or Done when empty (the '{}' unit value).
|
|
@@ -53,7 +54,7 @@ export type Expr = (
|
|
|
53
54
|
export type Statement = (
|
|
54
55
|
| { kind: 'fix'; name: string; typeAnnotation: TypeExpr | null; init: Expr; span: Span }
|
|
55
56
|
| { kind: 'mut'; name: string; typeAnnotation: TypeExpr | null; init: Expr; span: Span }
|
|
56
|
-
| { kind: 'assign'; name: string; value: Expr; span: Span }
|
|
57
|
+
| { kind: 'assign'; name: string; nameSpan: Span; value: Expr; span: Span }
|
|
57
58
|
| { kind: 'expr'; expr: Expr; span: Span }
|
|
58
59
|
| { kind: 'while'; cond: Expr; body: Block; span: Span }
|
|
59
60
|
);
|
package/src/parser/expr.ts
CHANGED
|
@@ -22,14 +22,20 @@ import { parseBlock, parseIf } from './stmt.js';
|
|
|
22
22
|
// This ladder is the single source of truth for what binds tighter
|
|
23
23
|
// than what: postfix (`.method()`, `[index]`) binds tightest, then
|
|
24
24
|
// unary '-', then '*'/'/'/'div'/'mod', then '+'/'-', then the
|
|
25
|
-
// comparisons,
|
|
25
|
+
// comparisons, then 'not', then 'and', then 'or', loosest —
|
|
26
|
+
// the word operators sit below the comparisons (§5 of design.md), so
|
|
27
|
+
// `a == b and c == d` groups as `(a == b) and (c == d)`, never
|
|
28
|
+
// `a == (b and c) == d`. Every table below is keyed off these numbers
|
|
26
29
|
// instead of inlining its own.
|
|
27
30
|
const BP = {
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
31
|
+
OR: 1,
|
|
32
|
+
AND: 2,
|
|
33
|
+
NOT: 3,
|
|
34
|
+
COMPARISON: 4,
|
|
35
|
+
ADDITIVE: 5,
|
|
36
|
+
MULTIPLICATIVE: 6,
|
|
37
|
+
UNARY: 7,
|
|
38
|
+
POSTFIX: 7,
|
|
33
39
|
} as const;
|
|
34
40
|
|
|
35
41
|
// Every binary operator this parser knows about has one row in this
|
|
@@ -40,7 +46,12 @@ const BP = {
|
|
|
40
46
|
// `(1 + 2) < (3 * 4)`. Comparisons are also marked `assoc: 'none'`:
|
|
41
47
|
// unlike '+' or '*', two of them can never sit side by side
|
|
42
48
|
// (`a < b < c` is rejected, not silently grouped one way or the other).
|
|
49
|
+
// 'or' belongs to a tier below 'and' — the same "same precedence,
|
|
50
|
+
// left-associative" shape as '+'/'-' — so `a or b or c` groups as
|
|
51
|
+
// `(a or b) or c`.
|
|
43
52
|
const INFIX_OPS: Partial<Record<TokenKind, { op: BinaryOp; bp: number; assoc: 'left' | 'none' }>> = {
|
|
53
|
+
KW_OR: { op: 'or', bp: BP.OR, assoc: 'left' },
|
|
54
|
+
KW_AND: { op: 'and', bp: BP.AND, assoc: 'left' },
|
|
44
55
|
EQ_EQ: { op: '==', bp: BP.COMPARISON, assoc: 'none' },
|
|
45
56
|
BANG_EQ: { op: '!=', bp: BP.COMPARISON, assoc: 'none' },
|
|
46
57
|
LT: { op: '<', bp: BP.COMPARISON, assoc: 'none' },
|
|
@@ -66,12 +77,14 @@ const POSTFIX_OPS: Partial<Record<TokenKind, { bp: number }>> = {
|
|
|
66
77
|
LBRACKET: { bp: BP.POSTFIX },
|
|
67
78
|
};
|
|
68
79
|
|
|
69
|
-
// Prefix table —
|
|
70
|
-
//
|
|
71
|
-
//
|
|
72
|
-
// than
|
|
80
|
+
// Prefix table — the Pratt parser's other operator kind (a "nud" that
|
|
81
|
+
// still takes an operand, parsed in parseAtom below). Unary '-' binds
|
|
82
|
+
// tight, at the same tier as postfix; 'not' binds much looser — tighter
|
|
83
|
+
// than 'and'/'or' but looser than the comparisons — which is what makes
|
|
84
|
+
// `not a == b` parse as `not (a == b)` rather than `(not a) == b`.
|
|
73
85
|
const PREFIX_OPS: Partial<Record<TokenKind, { op: UnaryOp; bp: number }>> = {
|
|
74
86
|
MINUS: { op: '-', bp: BP.UNARY },
|
|
87
|
+
KW_NOT: { op: 'not', bp: BP.NOT },
|
|
75
88
|
};
|
|
76
89
|
|
|
77
90
|
export function parseExpr(ts: TokenStream, minBp = 0): Expr | null {
|
|
@@ -152,12 +165,14 @@ function parseMethodCall(ts: TokenStream, receiver: Expr): Expr | null {
|
|
|
152
165
|
ts.advance(); // consume method name
|
|
153
166
|
|
|
154
167
|
if (ts.peek().kind !== 'LPAREN') {
|
|
155
|
-
|
|
168
|
+
// A missing '(' here is not an unclosed group — the call's argument list
|
|
169
|
+
// never opened — so it's its own error, not S0001.
|
|
170
|
+
ts.report('S0014', ts.peek().span);
|
|
156
171
|
return null;
|
|
157
172
|
}
|
|
158
|
-
ts.advance(); // consume '('
|
|
173
|
+
const openParen = ts.advance(); // consume '('
|
|
159
174
|
|
|
160
|
-
const parsed = ts.parseSeparated(() => parseExpr(ts), 'COMMA', 'RPAREN', 'S0001');
|
|
175
|
+
const parsed = ts.parseSeparated(() => parseExpr(ts), 'COMMA', 'RPAREN', 'S0001', false, openParen.span);
|
|
161
176
|
if (parsed === null) return null;
|
|
162
177
|
|
|
163
178
|
return {
|
|
@@ -171,12 +186,12 @@ function parseMethodCall(ts: TokenStream, receiver: Expr): Expr | null {
|
|
|
171
186
|
|
|
172
187
|
// 'list[index]' — LBRACKET already confirmed on lookahead by the Pratt loop.
|
|
173
188
|
function parseIndex(ts: TokenStream, list: Expr): Expr | null {
|
|
174
|
-
ts.advance(); // consume '['
|
|
189
|
+
const openBracket = ts.advance(); // consume '['
|
|
175
190
|
|
|
176
191
|
const index = parseExpr(ts);
|
|
177
192
|
if (index === null) return null;
|
|
178
193
|
|
|
179
|
-
const rbracket = ts.expect('RBRACKET', 'S0013');
|
|
194
|
+
const rbracket = ts.expect('RBRACKET', 'S0013', [{ key: 'opener', span: openBracket.span }]);
|
|
180
195
|
if (rbracket === null) return null;
|
|
181
196
|
|
|
182
197
|
return {
|
|
@@ -255,7 +270,7 @@ function parseAtom(ts: TokenStream): Expr | null {
|
|
|
255
270
|
}
|
|
256
271
|
const closing = ts.peek();
|
|
257
272
|
if (closing.kind !== 'RPAREN') {
|
|
258
|
-
ts.report('S0001', closing.span);
|
|
273
|
+
ts.report('S0001', closing.span, [{ key: 'opener', span: tok.span }]);
|
|
259
274
|
return null;
|
|
260
275
|
}
|
|
261
276
|
ts.advance(); // consume ')'
|
|
@@ -291,8 +306,8 @@ function parseAtom(ts: TokenStream): Expr | null {
|
|
|
291
306
|
|
|
292
307
|
// 'name(arg, arg, …)' — callee token already consumed by parseAtom.
|
|
293
308
|
function parseCall(ts: TokenStream, callee: Token): Expr | null {
|
|
294
|
-
ts.advance(); // consume '('
|
|
295
|
-
const parsed = ts.parseSeparated(() => parseExpr(ts), 'COMMA', 'RPAREN', 'S0001');
|
|
309
|
+
const openParen = ts.advance(); // consume '('
|
|
310
|
+
const parsed = ts.parseSeparated(() => parseExpr(ts), 'COMMA', 'RPAREN', 'S0001', false, openParen.span);
|
|
296
311
|
if (parsed === null) return null;
|
|
297
312
|
|
|
298
313
|
return {
|
|
@@ -306,7 +321,7 @@ function parseCall(ts: TokenStream, callee: Token): Expr | null {
|
|
|
306
321
|
// '[' expr, expr, … ']' — list literal. Already peeked '[' in parseAtom.
|
|
307
322
|
function parseList(ts: TokenStream): Expr | null {
|
|
308
323
|
const openTok = ts.advance(); // consume '['
|
|
309
|
-
const parsed = ts.parseSeparated(() => parseExpr(ts), 'COMMA', 'RBRACKET', 'S0013');
|
|
324
|
+
const parsed = ts.parseSeparated(() => parseExpr(ts), 'COMMA', 'RBRACKET', 'S0013', false, openTok.span);
|
|
310
325
|
if (parsed === null) return null;
|
|
311
326
|
|
|
312
327
|
return { kind: 'list', elements: parsed.items, span: { start: openTok.span.start, end: parsed.close.span.end } };
|
package/src/parser/index.ts
CHANGED
|
@@ -1,50 +1,50 @@
|
|
|
1
1
|
import type { Token, Marker } from '../lexer/token.js';
|
|
2
|
-
import
|
|
2
|
+
import { Lexer } from '../lexer/index.js';
|
|
3
|
+
import type { Program } from './ast.js';
|
|
3
4
|
import { TokenStream } from './token-stream.js';
|
|
4
5
|
import { parseStmt } from './stmt.js';
|
|
5
|
-
import {
|
|
6
|
+
import { parseArgsSection } from './type-expr.js';
|
|
7
|
+
import { typecheck } from './typechecker.js';
|
|
8
|
+
import type { TypedResult } from './typechecker.js';
|
|
6
9
|
|
|
7
10
|
export interface ParseResult {
|
|
8
11
|
program: Program | null;
|
|
9
12
|
errorMarkers: Marker[];
|
|
10
13
|
}
|
|
11
14
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
function parseProgram(ts: TokenStream): Program | null {
|
|
18
|
-
let args: ArgDef[] = [];
|
|
19
|
-
if (ts.peek().kind === 'KW_ARGS') {
|
|
20
|
-
const result = parseArgs(ts);
|
|
21
|
-
if (result === null) return null;
|
|
22
|
-
args = result;
|
|
23
|
-
|
|
24
|
-
if (ts.expect('SEMICOLON', 'S0011') === null) return null;
|
|
15
|
+
export const parseTokens = (tokens: Token[]): ParseResult => {
|
|
16
|
+
const ts = new TokenStream(tokens);
|
|
17
|
+
const args = parseArgsSection(ts);
|
|
18
|
+
if (args === null) {
|
|
19
|
+
return { program: null, errorMarkers: ts.errors };
|
|
25
20
|
}
|
|
26
21
|
|
|
27
|
-
const parsed = ts.parseSeparated(
|
|
28
|
-
|
|
22
|
+
const parsed = ts.parseSeparated(
|
|
23
|
+
() => parseStmt(ts), 'SEMICOLON', 'EOF', 'S0011', true
|
|
24
|
+
);
|
|
29
25
|
|
|
30
|
-
|
|
31
|
-
}
|
|
26
|
+
if (parsed === null) {
|
|
27
|
+
return { program: null, errorMarkers: ts.errors };
|
|
28
|
+
}
|
|
32
29
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
// rest of the toolchain calls.
|
|
38
|
-
export class Parser {
|
|
39
|
-
private readonly tokens: Token[];
|
|
30
|
+
const program: Program = {
|
|
31
|
+
args,
|
|
32
|
+
stmts: parsed.items
|
|
33
|
+
};
|
|
40
34
|
|
|
41
|
-
|
|
42
|
-
|
|
35
|
+
return { program, errorMarkers: ts.errors };
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export const parse = (src: string): TypedResult => {
|
|
39
|
+
const lexResult = new Lexer(src).tokenize();
|
|
40
|
+
if (lexResult.errorMarkers.length > 0) {
|
|
41
|
+
return { typedProgram: null, errorMarkers: lexResult.errorMarkers };
|
|
43
42
|
}
|
|
44
43
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
return { program, errorMarkers: ts.errors };
|
|
44
|
+
const parseResult = parseTokens(lexResult.tokens);
|
|
45
|
+
if (parseResult.program === null || parseResult.errorMarkers.length > 0) {
|
|
46
|
+
return { typedProgram: null, errorMarkers: parseResult.errorMarkers };
|
|
49
47
|
}
|
|
48
|
+
|
|
49
|
+
return typecheck(parseResult.program);
|
|
50
50
|
}
|
package/src/parser/printer.ts
CHANGED
|
@@ -144,3 +144,25 @@ export const formatValue = (value: RuntimeValue): string => {
|
|
|
144
144
|
return chalk.yellow('Done');
|
|
145
145
|
}
|
|
146
146
|
};
|
|
147
|
+
|
|
148
|
+
export const valueToString = (value: RuntimeValue): string => {
|
|
149
|
+
switch (value.type) {
|
|
150
|
+
case 'Int':
|
|
151
|
+
return String(value.value);
|
|
152
|
+
case 'Float':
|
|
153
|
+
const floatStr = String(value.value);
|
|
154
|
+
return floatStr.includes('.') ? floatStr : floatStr + '.0';
|
|
155
|
+
case 'Bool':
|
|
156
|
+
return value.value ? 'True' : 'False';
|
|
157
|
+
case 'String':
|
|
158
|
+
return value.value;
|
|
159
|
+
case 'List': {
|
|
160
|
+
const items = value.elements.map(valueToString).join(', ');
|
|
161
|
+
return `[${items}]`;
|
|
162
|
+
}
|
|
163
|
+
case 'None':
|
|
164
|
+
return 'None';
|
|
165
|
+
case 'Done':
|
|
166
|
+
return 'Done';
|
|
167
|
+
}
|
|
168
|
+
};
|
package/src/parser/stmt.ts
CHANGED
|
@@ -14,7 +14,7 @@ import { parseTypeExpr } from './type-expr.js';
|
|
|
14
14
|
// this consumes its own.
|
|
15
15
|
export function parseBlock(ts: TokenStream, openTok?: Token): Block | null {
|
|
16
16
|
openTok ??= ts.advance(); // consume '{' unless already consumed
|
|
17
|
-
const parsed = ts.parseSeparated(() => parseStmt(ts), 'SEMICOLON', 'RBRACE', 'S0005', true);
|
|
17
|
+
const parsed = ts.parseSeparated(() => parseStmt(ts), 'SEMICOLON', 'RBRACE', 'S0005', true, openTok.span);
|
|
18
18
|
if (parsed === null) return null;
|
|
19
19
|
|
|
20
20
|
return { kind: 'block', stmts: parsed.items, span: { start: openTok.span.start, end: parsed.close.span.end } };
|
|
@@ -24,14 +24,15 @@ export function parseBlock(ts: TokenStream, openTok?: Token): Block | null {
|
|
|
24
24
|
// The body braces already delimit the construct, but the test stays
|
|
25
25
|
// parenthesized to match the C-family/TS surface (§5).
|
|
26
26
|
function parseCond(ts: TokenStream): Expr | null {
|
|
27
|
-
|
|
27
|
+
const open = ts.expect('LPAREN', 'S0006');
|
|
28
|
+
if (open === null) return null;
|
|
28
29
|
|
|
29
30
|
const cond = parseExpr(ts);
|
|
30
31
|
if (cond === null) {
|
|
31
32
|
return null;
|
|
32
33
|
}
|
|
33
34
|
|
|
34
|
-
if (ts.expect('RPAREN', 'S0001') === null) return null;
|
|
35
|
+
if (ts.expect('RPAREN', 'S0001', [{ key: 'opener', span: open.span }]) === null) return null;
|
|
35
36
|
|
|
36
37
|
return cond;
|
|
37
38
|
}
|
|
@@ -149,6 +150,7 @@ function parseAssign(ts: TokenStream): Statement | null {
|
|
|
149
150
|
return {
|
|
150
151
|
kind: 'assign',
|
|
151
152
|
name: nameTok.value,
|
|
153
|
+
nameSpan: nameTok.span,
|
|
152
154
|
value,
|
|
153
155
|
span: { start: nameTok.span.start, end: value.span.end },
|
|
154
156
|
};
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { Token, TokenKind, Marker, Span } from '../lexer/token.js';
|
|
1
|
+
import type { Token, TokenKind, Marker, RelatedMarker, Span } from '../lexer/token.js';
|
|
2
2
|
|
|
3
3
|
// The token stream is everything the grammar productions in expr.ts,
|
|
4
4
|
// stmt.ts and type-expr.ts share but that isn't grammar itself: the
|
|
@@ -39,17 +39,20 @@ export class TokenStream {
|
|
|
39
39
|
// Record a diagnostic at a given span. The one place productions push
|
|
40
40
|
// to the error log when they need to report something expect() can't
|
|
41
41
|
// express (e.g. "this token was fine but the *next* thing is wrong").
|
|
42
|
-
public report(code: string, span: Span): void {
|
|
43
|
-
|
|
42
|
+
public report(code: string, span: Span, related?: RelatedMarker[]): void {
|
|
43
|
+
const marker: Marker = { code, span };
|
|
44
|
+
if (related !== undefined && related.length > 0) marker.related = related;
|
|
45
|
+
this.errors.push(marker);
|
|
44
46
|
}
|
|
45
47
|
|
|
46
48
|
// Consume-or-diagnose: the shape every "expect this exact token here"
|
|
47
49
|
// check in the grammar shares. Returns the consumed token, or records
|
|
48
|
-
// `code` at the offending token's span and returns null.
|
|
49
|
-
|
|
50
|
+
// `code` at the offending token's span and returns null. `related` carries
|
|
51
|
+
// any supporting spans (e.g. the '(' this missing ')' should have closed).
|
|
52
|
+
public expect(kind: TokenKind, code: string, related?: RelatedMarker[]): Token | null {
|
|
50
53
|
const tok = this.peek();
|
|
51
54
|
if (tok.kind !== kind) {
|
|
52
|
-
this.report(code, tok.span);
|
|
55
|
+
this.report(code, tok.span, related);
|
|
53
56
|
return null;
|
|
54
57
|
}
|
|
55
58
|
return this.advance();
|
|
@@ -87,12 +90,16 @@ export class TokenStream {
|
|
|
87
90
|
// malformed statement doesn't take the rest of the file's diagnostics
|
|
88
91
|
// down with it. The list can still come back null if synchronize()
|
|
89
92
|
// runs all the way to EOF without ever finding `close`.
|
|
93
|
+
// `openSpan`, when given, is the span of the opening delimiter (the '(', '{'
|
|
94
|
+
// or '['); it rides along on the close-token error so an unclosed group can
|
|
95
|
+
// point back at where it opened.
|
|
90
96
|
public parseSeparated<T>(
|
|
91
97
|
parseItem: () => T | null,
|
|
92
98
|
sep: TokenKind,
|
|
93
99
|
close: TokenKind,
|
|
94
100
|
closeCode: string,
|
|
95
101
|
recover = false,
|
|
102
|
+
openSpan: Span | null = null,
|
|
96
103
|
): { items: T[]; close: Token } | null {
|
|
97
104
|
const items: T[] = [];
|
|
98
105
|
if (this.peek().kind !== close) {
|
|
@@ -111,10 +118,15 @@ export class TokenStream {
|
|
|
111
118
|
items.push(item);
|
|
112
119
|
if (this.peek().kind !== sep) break;
|
|
113
120
|
this.advance(); // consume separator
|
|
114
|
-
|
|
121
|
+
// Break on the close OR on end-of-input: a trailing separator right
|
|
122
|
+
// before EOF means the group is simply unclosed, so fall straight to
|
|
123
|
+
// the close-token error below instead of trying to parse another item
|
|
124
|
+
// (which would spuriously demand an expression at end of file).
|
|
125
|
+
if (this.peek().kind === close || this.peek().kind === 'EOF') break;
|
|
115
126
|
}
|
|
116
127
|
}
|
|
117
|
-
const
|
|
128
|
+
const related: RelatedMarker[] = openSpan !== null ? [{ key: 'opener', span: openSpan }] : [];
|
|
129
|
+
const closeTok = this.expect(close, closeCode, related);
|
|
118
130
|
if (closeTok === null) return null;
|
|
119
131
|
return { items, close: closeTok };
|
|
120
132
|
}
|
package/src/parser/type-expr.ts
CHANGED
|
@@ -50,14 +50,20 @@ function parseArgDef(ts: TokenStream): ArgDef | null {
|
|
|
50
50
|
return { name: nameTok.value, type: typeTok.value as ArgType };
|
|
51
51
|
}
|
|
52
52
|
|
|
53
|
-
// 'args (name: Type, …)' — the program's typed input declaration
|
|
54
|
-
|
|
53
|
+
// 'args (name: Type, …) ;' — the program's typed input declaration, if
|
|
54
|
+
// present. Returns [] (not null) when there's no 'args' keyword at all;
|
|
55
|
+
// null is reserved for an actual parse error.
|
|
56
|
+
export function parseArgsSection(ts: TokenStream): ArgDef[] | null {
|
|
57
|
+
if (ts.peek().kind !== 'KW_ARGS') return [];
|
|
55
58
|
ts.advance(); // consume 'args'
|
|
56
59
|
|
|
57
|
-
|
|
60
|
+
const open = ts.expect('LPAREN', 'S0006');
|
|
61
|
+
if (open === null) return null;
|
|
58
62
|
|
|
59
|
-
const parsed = ts.parseSeparated(() => parseArgDef(ts), 'COMMA', 'RPAREN', 'S0001');
|
|
63
|
+
const parsed = ts.parseSeparated(() => parseArgDef(ts), 'COMMA', 'RPAREN', 'S0001', false, open.span);
|
|
60
64
|
if (parsed === null) return null;
|
|
61
65
|
|
|
66
|
+
if (ts.expect('SEMICOLON', 'S0011') === null) return null;
|
|
67
|
+
|
|
62
68
|
return parsed.items;
|
|
63
69
|
}
|