pineforge-codegen 0.6.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pineforge_codegen/__init__.py +53 -0
- pineforge_codegen/analyzer/__init__.py +60 -0
- pineforge_codegen/analyzer/base.py +1563 -0
- pineforge_codegen/analyzer/call_handlers.py +895 -0
- pineforge_codegen/analyzer/contracts.py +163 -0
- pineforge_codegen/analyzer/diagnostics.py +118 -0
- pineforge_codegen/analyzer/tables.py +204 -0
- pineforge_codegen/analyzer/types.py +250 -0
- pineforge_codegen/ast_nodes.py +293 -0
- pineforge_codegen/codegen/__init__.py +78 -0
- pineforge_codegen/codegen/base.py +1381 -0
- pineforge_codegen/codegen/emit_top.py +875 -0
- pineforge_codegen/codegen/helpers.py +163 -0
- pineforge_codegen/codegen/helpers_syminfo.py +134 -0
- pineforge_codegen/codegen/input.py +189 -0
- pineforge_codegen/codegen/security.py +1564 -0
- pineforge_codegen/codegen/ta.py +298 -0
- pineforge_codegen/codegen/tables.py +613 -0
- pineforge_codegen/codegen/types.py +573 -0
- pineforge_codegen/codegen/visit_call.py +1305 -0
- pineforge_codegen/codegen/visit_expr.py +701 -0
- pineforge_codegen/codegen/visit_stmt.py +729 -0
- pineforge_codegen/errors.py +98 -0
- pineforge_codegen/lexer.py +531 -0
- pineforge_codegen/parser.py +1198 -0
- pineforge_codegen/pragmas.py +117 -0
- pineforge_codegen/signatures.py +808 -0
- pineforge_codegen/support_checker.py +1111 -0
- pineforge_codegen/symbols.py +118 -0
- pineforge_codegen/tokens.py +406 -0
- pineforge_codegen/tv_input_choices.py +86 -0
- pineforge_codegen-0.6.5.dist-info/METADATA +462 -0
- pineforge_codegen-0.6.5.dist-info/RECORD +35 -0
- pineforge_codegen-0.6.5.dist-info/WHEEL +4 -0
- pineforge_codegen-0.6.5.dist-info/licenses/LICENSE +197 -0
|
@@ -0,0 +1,1198 @@
|
|
|
1
|
+
"""Recursive-descent parser for PineScript v6 tokens.
|
|
2
|
+
|
|
3
|
+
Rewritten parser (Tasks 5 & 6) that:
|
|
4
|
+
- Uses the new Lexer (pineforge_codegen.lexer) with TokenType enum and Token dataclass
|
|
5
|
+
- Produces AST nodes from pineforge_codegen.ast_nodes with ASTNode base class
|
|
6
|
+
- Sets SourceLocation (loc) on every node
|
|
7
|
+
- Handles all PineScript v6 constructs: expressions, declarations, control flow,
|
|
8
|
+
function definitions, strategy/indicator declarations
|
|
9
|
+
- Implements proper operator precedence climbing
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import re
|
|
15
|
+
|
|
16
|
+
from .lexer import Token, TokenType
|
|
17
|
+
from .errors import SourceLocation
|
|
18
|
+
from .ast_nodes import (
|
|
19
|
+
ASTNode,
|
|
20
|
+
Program, StrategyDecl, ImportStmt,
|
|
21
|
+
VarDecl, Assignment, TupleAssign,
|
|
22
|
+
IfStmt, ForStmt, ForInStmt, WhileStmt, SwitchStmt, BreakStmt, ContinueStmt,
|
|
23
|
+
FuncDef, ExprStmt,
|
|
24
|
+
BinOp, UnaryOp, Ternary, FuncCall, Subscript,
|
|
25
|
+
Identifier, MemberAccess, TypeAnnotation,
|
|
26
|
+
NumberLiteral, StringLiteral, BoolLiteral, NaLiteral, ColorLiteral,
|
|
27
|
+
TupleLiteral,
|
|
28
|
+
TypeField, TypeDecl, EnumDecl, MethodDef,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class ParseError(Exception):
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# Type annotation keywords
|
|
37
|
+
TYPE_KEYWORDS = {
|
|
38
|
+
TokenType.TYPE_INT, TokenType.TYPE_FLOAT,
|
|
39
|
+
TokenType.TYPE_BOOL, TokenType.TYPE_STRING,
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
# Compound assignment token types and their corresponding operator strings
|
|
43
|
+
COMPOUND_ASSIGN_OPS = {
|
|
44
|
+
TokenType.COLON_EQUALS: ":=",
|
|
45
|
+
TokenType.PLUS_EQUALS: "+=",
|
|
46
|
+
TokenType.MINUS_EQUALS: "-=",
|
|
47
|
+
TokenType.STAR_EQUALS: "*=",
|
|
48
|
+
TokenType.SLASH_EQUALS: "/=",
|
|
49
|
+
TokenType.PERCENT_EQUALS: "%=",
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class Parser:
|
|
54
|
+
def __init__(self, tokens: list[Token], *, source: str = "", filename: str = "<input>") -> None:
|
|
55
|
+
self.tokens = tokens
|
|
56
|
+
self.pos = 0
|
|
57
|
+
self._source = source
|
|
58
|
+
self._filename = filename
|
|
59
|
+
|
|
60
|
+
# ------------------------------------------------------------------
|
|
61
|
+
# Helpers
|
|
62
|
+
# ------------------------------------------------------------------
|
|
63
|
+
|
|
64
|
+
def _current(self) -> Token:
|
|
65
|
+
if self.pos < len(self.tokens):
|
|
66
|
+
return self.tokens[self.pos]
|
|
67
|
+
return Token(TokenType.EOF_TOKEN, "", 0, 0)
|
|
68
|
+
|
|
69
|
+
def _peek(self, offset: int = 1) -> Token:
|
|
70
|
+
idx = self.pos + offset
|
|
71
|
+
if idx < len(self.tokens):
|
|
72
|
+
return self.tokens[idx]
|
|
73
|
+
return Token(TokenType.EOF_TOKEN, "", 0, 0)
|
|
74
|
+
|
|
75
|
+
def _at_end(self) -> bool:
|
|
76
|
+
return self._current().type == TokenType.EOF_TOKEN
|
|
77
|
+
|
|
78
|
+
def _check(self, tt: TokenType) -> bool:
|
|
79
|
+
return self._current().type == tt
|
|
80
|
+
|
|
81
|
+
def _match(self, *types: TokenType) -> Token | None:
|
|
82
|
+
if self._current().type in types:
|
|
83
|
+
return self._advance()
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
def _advance(self) -> Token:
|
|
87
|
+
tok = self._current()
|
|
88
|
+
self.pos += 1
|
|
89
|
+
return tok
|
|
90
|
+
|
|
91
|
+
def _consume(self, tt: TokenType, msg: str = "") -> Token:
|
|
92
|
+
if self._current().type == tt:
|
|
93
|
+
return self._advance()
|
|
94
|
+
cur = self._current()
|
|
95
|
+
raise ParseError(
|
|
96
|
+
f"Expected {tt.name} got {cur.type.name}({cur.value!r}) "
|
|
97
|
+
f"L{cur.line}:{cur.col}. {msg}"
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
def _skip_newlines(self) -> None:
|
|
101
|
+
while self._check(TokenType.NEWLINE):
|
|
102
|
+
self._advance()
|
|
103
|
+
|
|
104
|
+
def _skip_expr_continuation(self) -> None:
|
|
105
|
+
"""Skip NEWLINE tokens for expression continuation.
|
|
106
|
+
|
|
107
|
+
After a binary operator, skip NEWLINE tokens so the expression
|
|
108
|
+
parser sees the next operand. INDENT/DEDENT are NOT skipped here
|
|
109
|
+
— they are handled by the lexer's line continuation logic.
|
|
110
|
+
"""
|
|
111
|
+
while self._check(TokenType.NEWLINE):
|
|
112
|
+
self._advance()
|
|
113
|
+
|
|
114
|
+
def _loc(self, tok: Token) -> SourceLocation:
|
|
115
|
+
"""Build a SourceLocation from a token."""
|
|
116
|
+
return SourceLocation(file=self._filename, line=tok.line, col=tok.col, end_col=tok.end_col)
|
|
117
|
+
|
|
118
|
+
def _set_loc(self, node: ASTNode, tok: Token) -> ASTNode:
|
|
119
|
+
"""Set loc on a node from a token and return the node."""
|
|
120
|
+
node.loc = self._loc(tok)
|
|
121
|
+
return node
|
|
122
|
+
|
|
123
|
+
# ------------------------------------------------------------------
|
|
124
|
+
# Top-level
|
|
125
|
+
# ------------------------------------------------------------------
|
|
126
|
+
|
|
127
|
+
def parse(self) -> Program:
|
|
128
|
+
"""Parse the full program, returning a Program node."""
|
|
129
|
+
version = self._extract_version()
|
|
130
|
+
prog = Program(version=version)
|
|
131
|
+
self._skip_newlines()
|
|
132
|
+
|
|
133
|
+
while not self._at_end():
|
|
134
|
+
try:
|
|
135
|
+
stmt = self._parse_statement()
|
|
136
|
+
if stmt is not None:
|
|
137
|
+
if isinstance(stmt, list):
|
|
138
|
+
prog.body.extend(stmt)
|
|
139
|
+
else:
|
|
140
|
+
prog.body.append(stmt)
|
|
141
|
+
except ParseError:
|
|
142
|
+
# Error recovery: skip to next newline and continue
|
|
143
|
+
self._recover()
|
|
144
|
+
self._skip_newlines()
|
|
145
|
+
|
|
146
|
+
return prog
|
|
147
|
+
|
|
148
|
+
def _extract_version(self) -> int | None:
|
|
149
|
+
"""Extract version number from //@version=N annotation in source."""
|
|
150
|
+
if not self._source:
|
|
151
|
+
return None
|
|
152
|
+
m = re.search(r'//@version=(\d+)', self._source)
|
|
153
|
+
if m:
|
|
154
|
+
return int(m.group(1))
|
|
155
|
+
return None
|
|
156
|
+
|
|
157
|
+
def _recover(self) -> None:
|
|
158
|
+
"""Skip tokens until next NEWLINE or EOF for error recovery."""
|
|
159
|
+
while not self._at_end() and not self._check(TokenType.NEWLINE):
|
|
160
|
+
self._advance()
|
|
161
|
+
if self._check(TokenType.NEWLINE):
|
|
162
|
+
self._advance()
|
|
163
|
+
|
|
164
|
+
# ------------------------------------------------------------------
|
|
165
|
+
# Statement parsing
|
|
166
|
+
# ------------------------------------------------------------------
|
|
167
|
+
|
|
168
|
+
def _parse_statement(self):
|
|
169
|
+
cur = self._current()
|
|
170
|
+
|
|
171
|
+
# Control flow keywords
|
|
172
|
+
if cur.type == TokenType.IF:
|
|
173
|
+
return self._parse_if_stmt()
|
|
174
|
+
if cur.type == TokenType.FOR:
|
|
175
|
+
return self._parse_for_stmt()
|
|
176
|
+
if cur.type == TokenType.WHILE:
|
|
177
|
+
return self._parse_while_stmt()
|
|
178
|
+
if cur.type == TokenType.SWITCH:
|
|
179
|
+
return self._parse_switch_stmt()
|
|
180
|
+
if cur.type == TokenType.BREAK:
|
|
181
|
+
tok = self._advance()
|
|
182
|
+
return self._set_loc(BreakStmt(), tok)
|
|
183
|
+
if cur.type == TokenType.CONTINUE:
|
|
184
|
+
tok = self._advance()
|
|
185
|
+
return self._set_loc(ContinueStmt(), tok)
|
|
186
|
+
|
|
187
|
+
# import statement
|
|
188
|
+
if cur.type == TokenType.IMPORT:
|
|
189
|
+
return self._parse_import_stmt()
|
|
190
|
+
|
|
191
|
+
# var / varip declaration
|
|
192
|
+
if cur.type in (TokenType.VAR, TokenType.VARIP):
|
|
193
|
+
return self._parse_var_keyword_decl()
|
|
194
|
+
|
|
195
|
+
# Type-annotated declaration: float x = ..., int x = ...
|
|
196
|
+
if cur.type in TYPE_KEYWORDS and self._peek().type == TokenType.IDENT:
|
|
197
|
+
# Check that the IDENT is followed by = (not == ) to confirm declaration
|
|
198
|
+
if self._peek(2).type == TokenType.EQUALS:
|
|
199
|
+
return self._parse_typed_decl()
|
|
200
|
+
|
|
201
|
+
# IDENT-prefixed type-annotated declaration: ``Sample s = ...``,
|
|
202
|
+
# ``array<Sample> arr = ...``, ``matrix<float> m = ...`` — when the
|
|
203
|
+
# user does not prefix with ``var`` / ``varip``. Without this branch
|
|
204
|
+
# the parser splits ``Sample s = ...`` into an orphan
|
|
205
|
+
# ``ExprStmt(Sample)`` plus a bare ``s = ...`` VarDecl that drops the
|
|
206
|
+
# UDT type annotation, so codegen ends up with ``double s = 0.0`` and
|
|
207
|
+
# ``s.score()`` lowers to namespace dispatch on a primitive. Probe:
|
|
208
|
+
# data/validation/udt-method-probe-19-array-of-udt-method.
|
|
209
|
+
if cur.type == TokenType.IDENT and self._is_ident_typed_var_decl():
|
|
210
|
+
return self._parse_typed_decl()
|
|
211
|
+
|
|
212
|
+
# Tuple assignment: [a, b] = expr vs tuple literal: [a, b]
|
|
213
|
+
if cur.type == TokenType.LBRACKET:
|
|
214
|
+
# Look ahead past matching bracket to see if '=' follows
|
|
215
|
+
if self._is_tuple_assign():
|
|
216
|
+
return self._parse_tuple_assign()
|
|
217
|
+
# Otherwise it's an expression statement (e.g., [a, b] as return value)
|
|
218
|
+
return self._parse_expr_or_assign_stmt()
|
|
219
|
+
|
|
220
|
+
# method declaration: method name(TypeName self, ...) =>
|
|
221
|
+
if cur.type == TokenType.METHOD:
|
|
222
|
+
return self._parse_method_def()
|
|
223
|
+
|
|
224
|
+
if cur.type == TokenType.IDENT:
|
|
225
|
+
# type/enum block declarations
|
|
226
|
+
if cur.value in ("enum", "type") and self._peek().type == TokenType.IDENT:
|
|
227
|
+
# Check if this is followed by a NEWLINE + INDENT block
|
|
228
|
+
if self._peek(2).type == TokenType.NEWLINE:
|
|
229
|
+
return self._parse_type_or_enum_decl()
|
|
230
|
+
|
|
231
|
+
# strategy() / indicator() declaration
|
|
232
|
+
if cur.value in ("strategy", "indicator") and self._peek().type == TokenType.LPAREN:
|
|
233
|
+
return self._parse_strategy_decl()
|
|
234
|
+
|
|
235
|
+
# Check for function definition: name(params) =>
|
|
236
|
+
if self._is_func_def():
|
|
237
|
+
return self._parse_func_def()
|
|
238
|
+
|
|
239
|
+
# Variable declaration: IDENT = expr (but not IDENT == expr)
|
|
240
|
+
if self._peek().type == TokenType.EQUALS and self._peek(2).type != TokenType.EQUALS:
|
|
241
|
+
return self._parse_var_decl()
|
|
242
|
+
|
|
243
|
+
# Reassignment / compound assignment: IDENT := += -= *= /= %=
|
|
244
|
+
if self._peek().type in COMPOUND_ASSIGN_OPS:
|
|
245
|
+
return self._parse_assignment()
|
|
246
|
+
|
|
247
|
+
# Check for member.member := or member.member += etc.
|
|
248
|
+
# We need to parse an expression first, then check if it's followed by an assignment op
|
|
249
|
+
return self._parse_expr_or_assign_stmt()
|
|
250
|
+
|
|
251
|
+
# Fallback: expression statement
|
|
252
|
+
return self._parse_expr_or_assign_stmt()
|
|
253
|
+
|
|
254
|
+
def _parse_expr_or_assign_stmt(self):
|
|
255
|
+
"""Parse an expression, then check if it's actually the target of an assignment."""
|
|
256
|
+
start_tok = self._current()
|
|
257
|
+
expr = self._parse_expression()
|
|
258
|
+
|
|
259
|
+
# After parsing the expression, check if we have an assignment op
|
|
260
|
+
if self._current().type in COMPOUND_ASSIGN_OPS:
|
|
261
|
+
op = COMPOUND_ASSIGN_OPS[self._current().type]
|
|
262
|
+
self._advance()
|
|
263
|
+
value = self._parse_expression()
|
|
264
|
+
node = Assignment(target=expr, op=op, value=value)
|
|
265
|
+
return self._set_loc(node, start_tok)
|
|
266
|
+
|
|
267
|
+
return self._set_loc(ExprStmt(expr=expr), start_tok)
|
|
268
|
+
|
|
269
|
+
def _is_ident_typed_var_decl(self) -> bool:
|
|
270
|
+
"""Look ahead for ``IDENT [<...>] IDENT '='`` (UDT-typed declaration).
|
|
271
|
+
|
|
272
|
+
Triggered by ``Sample s = expr`` / ``array<Sample> w = expr`` /
|
|
273
|
+
``matrix<float> m = expr`` at statement start. Excludes ``IDENT '='``
|
|
274
|
+
(a plain assignment / VarDecl), ``IDENT '=='`` (comparison), keyword
|
|
275
|
+
names (``enum``, ``type``, ``strategy``, ``indicator``, ``na``,
|
|
276
|
+
``true``, ``false``), and function-definition shapes
|
|
277
|
+
(``IDENT '(' ... ')' '=>'``).
|
|
278
|
+
|
|
279
|
+
Probe: data/validation/udt-method-probe-19-array-of-udt-method.
|
|
280
|
+
"""
|
|
281
|
+
cur = self._current()
|
|
282
|
+
if cur.type != TokenType.IDENT:
|
|
283
|
+
return False
|
|
284
|
+
if cur.value in ("enum", "type", "strategy", "indicator", "na", "true", "false"):
|
|
285
|
+
return False
|
|
286
|
+
# Skip past optional generic args after the type ident: IDENT [< ... >]
|
|
287
|
+
i = self.pos + 1
|
|
288
|
+
if i < len(self.tokens) and self.tokens[i].type == TokenType.LT:
|
|
289
|
+
depth = 1
|
|
290
|
+
i += 1
|
|
291
|
+
while i < len(self.tokens) and depth > 0:
|
|
292
|
+
tt = self.tokens[i].type
|
|
293
|
+
if tt == TokenType.LT:
|
|
294
|
+
depth += 1
|
|
295
|
+
elif tt == TokenType.GT:
|
|
296
|
+
depth -= 1
|
|
297
|
+
elif tt in (TokenType.NEWLINE, TokenType.EOF_TOKEN):
|
|
298
|
+
return False
|
|
299
|
+
i += 1
|
|
300
|
+
# Now expect an IDENT (variable name).
|
|
301
|
+
if i >= len(self.tokens) or self.tokens[i].type != TokenType.IDENT:
|
|
302
|
+
return False
|
|
303
|
+
# Followed by '=' (and not '==').
|
|
304
|
+
if i + 1 >= len(self.tokens) or self.tokens[i + 1].type != TokenType.EQUALS:
|
|
305
|
+
return False
|
|
306
|
+
if i + 2 < len(self.tokens) and self.tokens[i + 2].type == TokenType.EQUALS:
|
|
307
|
+
return False
|
|
308
|
+
return True
|
|
309
|
+
|
|
310
|
+
def _is_func_def(self) -> bool:
|
|
311
|
+
"""Look ahead to check if this is a function definition: name(args) =>"""
|
|
312
|
+
if self._current().type != TokenType.IDENT or self._peek().type != TokenType.LPAREN:
|
|
313
|
+
return False
|
|
314
|
+
# Scan forward past matching parens to see if => follows
|
|
315
|
+
depth = 0
|
|
316
|
+
i = self.pos + 1
|
|
317
|
+
while i < len(self.tokens):
|
|
318
|
+
tt = self.tokens[i].type
|
|
319
|
+
if tt == TokenType.LPAREN:
|
|
320
|
+
depth += 1
|
|
321
|
+
elif tt == TokenType.RPAREN:
|
|
322
|
+
depth -= 1
|
|
323
|
+
if depth == 0:
|
|
324
|
+
# Check if next non-newline token is =>
|
|
325
|
+
j = i + 1
|
|
326
|
+
while j < len(self.tokens) and self.tokens[j].type == TokenType.NEWLINE:
|
|
327
|
+
j += 1
|
|
328
|
+
return j < len(self.tokens) and self.tokens[j].type == TokenType.FAT_ARROW
|
|
329
|
+
elif tt in (TokenType.EOF_TOKEN, TokenType.NEWLINE):
|
|
330
|
+
if depth == 0:
|
|
331
|
+
return False
|
|
332
|
+
i += 1
|
|
333
|
+
return False
|
|
334
|
+
|
|
335
|
+
# -- Declarations --
|
|
336
|
+
|
|
337
|
+
def _parse_strategy_decl(self) -> StrategyDecl:
|
|
338
|
+
start_tok = self._advance() # consume 'strategy' or 'indicator'
|
|
339
|
+
# Parse arguments as a function call, then convert to StrategyDecl
|
|
340
|
+
self._consume(TokenType.LPAREN)
|
|
341
|
+
args, kwargs = self._parse_call_args()
|
|
342
|
+
self._consume(TokenType.RPAREN)
|
|
343
|
+
node = StrategyDecl(args=args, kwargs=kwargs)
|
|
344
|
+
node.annotations = {"decl_kind": start_tok.value}
|
|
345
|
+
return self._set_loc(node, start_tok)
|
|
346
|
+
|
|
347
|
+
def _parse_import_stmt(self) -> ImportStmt:
|
|
348
|
+
"""Parse: import path/to/library/version"""
|
|
349
|
+
start_tok = self._current()
|
|
350
|
+
self._consume(TokenType.IMPORT)
|
|
351
|
+
# Consume the rest of the line as the import path
|
|
352
|
+
parts: list[str] = []
|
|
353
|
+
while (not self._at_end()
|
|
354
|
+
and not self._check(TokenType.NEWLINE)
|
|
355
|
+
and not self._check(TokenType.EOF_TOKEN)):
|
|
356
|
+
parts.append(self._advance().value)
|
|
357
|
+
path = "".join(parts)
|
|
358
|
+
node = ImportStmt(path=path)
|
|
359
|
+
return self._set_loc(node, start_tok)
|
|
360
|
+
|
|
361
|
+
def _parse_var_decl(self) -> VarDecl | list:
|
|
362
|
+
"""Parse var declaration(s). Returns a single VarDecl or a list for comma-separated."""
|
|
363
|
+
start_tok = self._current()
|
|
364
|
+
name_tok = self._consume(TokenType.IDENT)
|
|
365
|
+
self._consume(TokenType.EQUALS)
|
|
366
|
+
value = self._parse_expression()
|
|
367
|
+
first = VarDecl(name=name_tok.value, value=value)
|
|
368
|
+
self._set_loc(first, start_tok)
|
|
369
|
+
|
|
370
|
+
# Check for comma-separated additional declarations: x=1, y=2, z=3
|
|
371
|
+
if not self._check(TokenType.COMMA):
|
|
372
|
+
return first
|
|
373
|
+
|
|
374
|
+
decls = [first]
|
|
375
|
+
while self._match(TokenType.COMMA):
|
|
376
|
+
st = self._current()
|
|
377
|
+
n = self._consume(TokenType.IDENT)
|
|
378
|
+
self._consume(TokenType.EQUALS)
|
|
379
|
+
v = self._parse_expression()
|
|
380
|
+
d = VarDecl(name=n.value, value=v)
|
|
381
|
+
decls.append(self._set_loc(d, st))
|
|
382
|
+
return decls
|
|
383
|
+
|
|
384
|
+
def _parse_assignment(self) -> Assignment:
|
|
385
|
+
start_tok = self._current()
|
|
386
|
+
name_tok = self._consume(TokenType.IDENT)
|
|
387
|
+
op_tok = self._advance() # consume :=, +=, -=, etc.
|
|
388
|
+
op = COMPOUND_ASSIGN_OPS[op_tok.type]
|
|
389
|
+
value = self._parse_expression()
|
|
390
|
+
target = Identifier(name=name_tok.value)
|
|
391
|
+
self._set_loc(target, name_tok)
|
|
392
|
+
node = Assignment(target=target, op=op, value=value)
|
|
393
|
+
return self._set_loc(node, start_tok)
|
|
394
|
+
|
|
395
|
+
def _parse_type_hint_string(self) -> str:
|
|
396
|
+
"""Parse primitive, UDT, array<T>, or map<K,V> type hints."""
|
|
397
|
+
base = self._advance().value
|
|
398
|
+
if not self._check(TokenType.LT):
|
|
399
|
+
return base
|
|
400
|
+
|
|
401
|
+
parts: list[str] = []
|
|
402
|
+
depth = 0
|
|
403
|
+
self._advance() # <
|
|
404
|
+
while not self._at_end():
|
|
405
|
+
tok = self._current()
|
|
406
|
+
if tok.type == TokenType.LT:
|
|
407
|
+
depth += 1
|
|
408
|
+
parts.append("<")
|
|
409
|
+
self._advance()
|
|
410
|
+
continue
|
|
411
|
+
if tok.type == TokenType.GT:
|
|
412
|
+
if depth == 0:
|
|
413
|
+
self._advance()
|
|
414
|
+
break
|
|
415
|
+
depth -= 1
|
|
416
|
+
parts.append(">")
|
|
417
|
+
self._advance()
|
|
418
|
+
continue
|
|
419
|
+
if tok.type == TokenType.COMMA:
|
|
420
|
+
parts.append(",")
|
|
421
|
+
else:
|
|
422
|
+
parts.append(str(tok.value))
|
|
423
|
+
self._advance()
|
|
424
|
+
return f"{base}<{''.join(parts)}>"
|
|
425
|
+
|
|
426
|
+
def _parse_template_args(self) -> list[str]:
|
|
427
|
+
"""Parse and return generic args after a member name, e.g. new<K,V>()."""
|
|
428
|
+
args: list[str] = []
|
|
429
|
+
if not self._check(TokenType.LT):
|
|
430
|
+
return args
|
|
431
|
+
self._advance() # <
|
|
432
|
+
current: list[str] = []
|
|
433
|
+
depth = 0
|
|
434
|
+
while not self._at_end():
|
|
435
|
+
tok = self._current()
|
|
436
|
+
if tok.type == TokenType.LT:
|
|
437
|
+
depth += 1
|
|
438
|
+
current.append("<")
|
|
439
|
+
elif tok.type == TokenType.GT:
|
|
440
|
+
if depth == 0:
|
|
441
|
+
arg = "".join(current).strip()
|
|
442
|
+
if arg:
|
|
443
|
+
args.append(arg)
|
|
444
|
+
self._advance()
|
|
445
|
+
break
|
|
446
|
+
depth -= 1
|
|
447
|
+
current.append(">")
|
|
448
|
+
elif tok.type == TokenType.COMMA and depth == 0:
|
|
449
|
+
args.append("".join(current).strip())
|
|
450
|
+
current = []
|
|
451
|
+
else:
|
|
452
|
+
current.append(str(tok.value))
|
|
453
|
+
self._advance()
|
|
454
|
+
return args
|
|
455
|
+
|
|
456
|
+
def _looks_like_call_template_args(self) -> bool:
|
|
457
|
+
"""True when current '<' starts generic args immediately followed by '('."""
|
|
458
|
+
if not self._check(TokenType.LT):
|
|
459
|
+
return False
|
|
460
|
+
depth = 0
|
|
461
|
+
i = self.pos
|
|
462
|
+
while i < len(self.tokens):
|
|
463
|
+
tt = self.tokens[i].type
|
|
464
|
+
if tt == TokenType.LT:
|
|
465
|
+
depth += 1
|
|
466
|
+
elif tt == TokenType.GT:
|
|
467
|
+
depth -= 1
|
|
468
|
+
if depth == 0:
|
|
469
|
+
return i + 1 < len(self.tokens) and self.tokens[i + 1].type == TokenType.LPAREN
|
|
470
|
+
elif tt in (TokenType.NEWLINE, TokenType.EOF_TOKEN) and depth > 0:
|
|
471
|
+
return False
|
|
472
|
+
i += 1
|
|
473
|
+
return False
|
|
474
|
+
|
|
475
|
+
def _parse_var_keyword_decl(self) -> VarDecl:
|
|
476
|
+
"""Parse: var [type] name = expr or varip [type] name = expr"""
|
|
477
|
+
start_tok = self._current()
|
|
478
|
+
is_var = self._current().type == TokenType.VAR
|
|
479
|
+
is_varip = self._current().type == TokenType.VARIP
|
|
480
|
+
self._advance()
|
|
481
|
+
|
|
482
|
+
type_hint = None
|
|
483
|
+
if self._current().type in TYPE_KEYWORDS:
|
|
484
|
+
type_hint = self._parse_type_hint_string()
|
|
485
|
+
elif (self._current().type == TokenType.IDENT
|
|
486
|
+
and self._peek().type in (TokenType.LT, TokenType.IDENT)
|
|
487
|
+
and self._current().value not in ("na",)):
|
|
488
|
+
# Complex type: array<float>, table, etc.
|
|
489
|
+
type_hint = self._parse_type_hint_string()
|
|
490
|
+
|
|
491
|
+
name_tok = self._consume(TokenType.IDENT)
|
|
492
|
+
self._consume(TokenType.EQUALS)
|
|
493
|
+
value = self._parse_expression()
|
|
494
|
+
node = VarDecl(
|
|
495
|
+
name=name_tok.value, value=value,
|
|
496
|
+
is_var=is_var, is_varip=is_varip, type_hint=type_hint,
|
|
497
|
+
)
|
|
498
|
+
return self._set_loc(node, start_tok)
|
|
499
|
+
|
|
500
|
+
def _parse_typed_decl(self) -> VarDecl:
|
|
501
|
+
"""Parse: float x = expr"""
|
|
502
|
+
start_tok = self._current()
|
|
503
|
+
type_hint = self._parse_type_hint_string()
|
|
504
|
+
name_tok = self._consume(TokenType.IDENT)
|
|
505
|
+
self._consume(TokenType.EQUALS)
|
|
506
|
+
value = self._parse_expression()
|
|
507
|
+
node = VarDecl(name=name_tok.value, value=value, type_hint=type_hint)
|
|
508
|
+
return self._set_loc(node, start_tok)
|
|
509
|
+
|
|
510
|
+
def _is_tuple_assign(self) -> bool:
|
|
511
|
+
"""Look ahead to check if [a, b, ...] is followed by '=' (tuple assignment)."""
|
|
512
|
+
depth = 0
|
|
513
|
+
i = self.pos
|
|
514
|
+
while i < len(self.tokens):
|
|
515
|
+
tt = self.tokens[i].type
|
|
516
|
+
if tt == TokenType.LBRACKET:
|
|
517
|
+
depth += 1
|
|
518
|
+
elif tt == TokenType.RBRACKET:
|
|
519
|
+
depth -= 1
|
|
520
|
+
if depth == 0:
|
|
521
|
+
# Check if next token is '=' (but not '==')
|
|
522
|
+
j = i + 1
|
|
523
|
+
if j < len(self.tokens) and self.tokens[j].type == TokenType.EQUALS:
|
|
524
|
+
# Make sure it's not ==
|
|
525
|
+
k = j + 1
|
|
526
|
+
if k >= len(self.tokens) or self.tokens[k].type != TokenType.EQUALS:
|
|
527
|
+
return True
|
|
528
|
+
return False
|
|
529
|
+
elif tt in (TokenType.EOF_TOKEN,):
|
|
530
|
+
return False
|
|
531
|
+
i += 1
|
|
532
|
+
return False
|
|
533
|
+
|
|
534
|
+
def _parse_tuple_assign(self) -> TupleAssign:
|
|
535
|
+
"""Parse: [a, b, c] = expr"""
|
|
536
|
+
start_tok = self._current()
|
|
537
|
+
self._consume(TokenType.LBRACKET)
|
|
538
|
+
names = []
|
|
539
|
+
while not self._check(TokenType.RBRACKET):
|
|
540
|
+
# Allow underscore as discard placeholder
|
|
541
|
+
if self._check(TokenType.IDENT):
|
|
542
|
+
names.append(self._consume(TokenType.IDENT).value)
|
|
543
|
+
else:
|
|
544
|
+
# Handle _ for tuple discard — lexer produces IDENT for _
|
|
545
|
+
names.append(self._advance().value)
|
|
546
|
+
self._match(TokenType.COMMA)
|
|
547
|
+
self._consume(TokenType.RBRACKET)
|
|
548
|
+
self._consume(TokenType.EQUALS)
|
|
549
|
+
value = self._parse_expression()
|
|
550
|
+
node = TupleAssign(names=names, value=value)
|
|
551
|
+
return self._set_loc(node, start_tok)
|
|
552
|
+
|
|
553
|
+
# -- Function definition --
|
|
554
|
+
|
|
555
|
+
def _parse_func_def(self) -> FuncDef:
|
|
556
|
+
"""Parse: name(param1, param2) => expr_or_block"""
|
|
557
|
+
start_tok = self._current()
|
|
558
|
+
name = self._consume(TokenType.IDENT).value
|
|
559
|
+
self._consume(TokenType.LPAREN)
|
|
560
|
+
TYPE_TOKENS = {TokenType.TYPE_INT, TokenType.TYPE_FLOAT,
|
|
561
|
+
TokenType.TYPE_BOOL, TokenType.TYPE_STRING}
|
|
562
|
+
params = []
|
|
563
|
+
while not self._check(TokenType.RPAREN):
|
|
564
|
+
# Pine: series float x / series int x — one parameter (not "series" + "x")
|
|
565
|
+
if self._check(TokenType.IDENT) and self._current().value == "series":
|
|
566
|
+
self._advance() # consume 'series'
|
|
567
|
+
if self._current().type in TYPE_TOKENS:
|
|
568
|
+
self._advance() # float, int, ...
|
|
569
|
+
param_name = self._consume(TokenType.IDENT).value
|
|
570
|
+
if self._check(TokenType.EQUALS):
|
|
571
|
+
self._advance()
|
|
572
|
+
self._parse_expression()
|
|
573
|
+
params.append(param_name)
|
|
574
|
+
self._match(TokenType.COMMA)
|
|
575
|
+
continue
|
|
576
|
+
# Handle optional type annotation: type param (e.g., int len, float src)
|
|
577
|
+
if self._current().type in TYPE_TOKENS:
|
|
578
|
+
self._advance() # skip the type annotation
|
|
579
|
+
param_name = self._consume(TokenType.IDENT).value
|
|
580
|
+
if self._check(TokenType.IDENT):
|
|
581
|
+
# 'param_name' was actually a type name parsed as IDENT, next is real name
|
|
582
|
+
param_name = self._consume(TokenType.IDENT).value
|
|
583
|
+
# Skip default value: param = expr
|
|
584
|
+
if self._check(TokenType.EQUALS):
|
|
585
|
+
self._advance() # consume '='
|
|
586
|
+
self._parse_expression() # consume default value (discarded)
|
|
587
|
+
params.append(param_name)
|
|
588
|
+
self._match(TokenType.COMMA)
|
|
589
|
+
self._consume(TokenType.RPAREN)
|
|
590
|
+
self._skip_newlines()
|
|
591
|
+
self._consume(TokenType.FAT_ARROW)
|
|
592
|
+
|
|
593
|
+
# Single expression or indented block
|
|
594
|
+
if self._check(TokenType.NEWLINE):
|
|
595
|
+
self._advance()
|
|
596
|
+
self._consume(TokenType.INDENT)
|
|
597
|
+
body = self._parse_block()
|
|
598
|
+
self._consume(TokenType.DEDENT)
|
|
599
|
+
node = FuncDef(name=name, params=params, body=body, is_single_expr=False)
|
|
600
|
+
else:
|
|
601
|
+
expr = self._parse_expression()
|
|
602
|
+
node = FuncDef(name=name, params=params, body=[ExprStmt(expr=expr)], is_single_expr=True)
|
|
603
|
+
|
|
604
|
+
return self._set_loc(node, start_tok)
|
|
605
|
+
|
|
606
|
+
def _parse_type_or_enum_decl(self):
|
|
607
|
+
"""Parse type or enum block declarations."""
|
|
608
|
+
start_tok = self._current()
|
|
609
|
+
kind = self._advance().value # 'type' or 'enum'
|
|
610
|
+
if kind == "enum":
|
|
611
|
+
return self._parse_enum_decl(start_tok)
|
|
612
|
+
return self._parse_type_decl(start_tok)
|
|
613
|
+
|
|
614
|
+
def _parse_type_decl(self, start_tok):
|
|
615
|
+
"""Parse: type Name\\n float field = default"""
|
|
616
|
+
name = self._consume(TokenType.IDENT).value
|
|
617
|
+
self._skip_newlines()
|
|
618
|
+
fields = []
|
|
619
|
+
if self._check(TokenType.INDENT):
|
|
620
|
+
self._advance() # INDENT
|
|
621
|
+
self._skip_newlines()
|
|
622
|
+
while not self._check(TokenType.DEDENT) and not self._at_end():
|
|
623
|
+
# Parse field: type_name field_name [= default]
|
|
624
|
+
type_name = self._parse_type_hint_string()
|
|
625
|
+
|
|
626
|
+
field_name = self._consume(TokenType.IDENT).value
|
|
627
|
+
default = None
|
|
628
|
+
if self._check(TokenType.EQUALS) and self._peek().type != TokenType.EQUALS:
|
|
629
|
+
self._advance() # =
|
|
630
|
+
default = self._parse_expression()
|
|
631
|
+
fields.append(TypeField(type_name=type_name, name=field_name, default=default))
|
|
632
|
+
self._skip_newlines()
|
|
633
|
+
self._consume(TokenType.DEDENT)
|
|
634
|
+
node = TypeDecl(name=name, fields=fields)
|
|
635
|
+
return self._set_loc(node, start_tok)
|
|
636
|
+
|
|
637
|
+
def _parse_enum_decl(self, start_tok):
|
|
638
|
+
"""Parse: enum Name\\n Member1 [= expr]\\n Member2"""
|
|
639
|
+
name = self._consume(TokenType.IDENT).value
|
|
640
|
+
self._skip_newlines()
|
|
641
|
+
members = []
|
|
642
|
+
member_values: dict = {}
|
|
643
|
+
if self._check(TokenType.INDENT):
|
|
644
|
+
self._advance() # INDENT
|
|
645
|
+
self._skip_newlines()
|
|
646
|
+
while not self._check(TokenType.DEDENT) and not self._at_end():
|
|
647
|
+
if self._check(TokenType.IDENT):
|
|
648
|
+
mname = self._consume(TokenType.IDENT).value
|
|
649
|
+
members.append(mname)
|
|
650
|
+
if (self._check(TokenType.EQUALS)
|
|
651
|
+
and self._peek().type != TokenType.EQUALS):
|
|
652
|
+
self._advance() # =
|
|
653
|
+
member_values[mname] = self._parse_expression()
|
|
654
|
+
else:
|
|
655
|
+
self._advance() # skip unexpected tokens
|
|
656
|
+
self._skip_newlines()
|
|
657
|
+
self._consume(TokenType.DEDENT)
|
|
658
|
+
node = EnumDecl(name=name, members=members, member_values=member_values)
|
|
659
|
+
return self._set_loc(node, start_tok)
|
|
660
|
+
|
|
661
|
+
def _parse_method_def(self):
|
|
662
|
+
"""Parse: method name(TypeName self, params...) => body"""
|
|
663
|
+
start_tok = self._advance() # consume 'method'
|
|
664
|
+
name = self._consume(TokenType.IDENT).value
|
|
665
|
+
self._consume(TokenType.LPAREN)
|
|
666
|
+
# First param is the type + self: TypeName self
|
|
667
|
+
type_name = self._consume(TokenType.IDENT).value
|
|
668
|
+
params = [self._consume(TokenType.IDENT).value] # 'self' or user's name
|
|
669
|
+
param_type_hints = [type_name]
|
|
670
|
+
# Preserve per-param default expressions so codegen can substitute
|
|
671
|
+
# them at the UDT-method call site when a caller omits trailing
|
|
672
|
+
# args. See data/validation/udt-method-probe-04-default-param.
|
|
673
|
+
param_defaults: list = [None]
|
|
674
|
+
while self._match(TokenType.COMMA):
|
|
675
|
+
# Skip optional type annotations
|
|
676
|
+
param_type = None
|
|
677
|
+
if self._current().type in TYPE_KEYWORDS:
|
|
678
|
+
param_type = self._parse_type_hint_string()
|
|
679
|
+
elif (self._current().type == TokenType.IDENT
|
|
680
|
+
and self._peek().type == TokenType.IDENT):
|
|
681
|
+
param_type = self._parse_type_hint_string()
|
|
682
|
+
p = self._consume(TokenType.IDENT).value
|
|
683
|
+
pdefault = None
|
|
684
|
+
if self._check(TokenType.EQUALS):
|
|
685
|
+
self._advance()
|
|
686
|
+
pdefault = self._parse_expression()
|
|
687
|
+
params.append(p)
|
|
688
|
+
param_type_hints.append(param_type)
|
|
689
|
+
param_defaults.append(pdefault)
|
|
690
|
+
self._consume(TokenType.RPAREN)
|
|
691
|
+
self._skip_newlines()
|
|
692
|
+
self._consume(TokenType.FAT_ARROW)
|
|
693
|
+
|
|
694
|
+
if self._check(TokenType.NEWLINE):
|
|
695
|
+
self._advance()
|
|
696
|
+
self._consume(TokenType.INDENT)
|
|
697
|
+
body = self._parse_block()
|
|
698
|
+
self._consume(TokenType.DEDENT)
|
|
699
|
+
node = MethodDef(name=name, type_name=type_name, params=params, body=body)
|
|
700
|
+
else:
|
|
701
|
+
expr = self._parse_expression()
|
|
702
|
+
node = MethodDef(name=name, type_name=type_name, params=params,
|
|
703
|
+
body=[ExprStmt(expr=expr)], is_single_expr=True)
|
|
704
|
+
node.annotations = {
|
|
705
|
+
"param_type_hints": param_type_hints,
|
|
706
|
+
"param_defaults": param_defaults,
|
|
707
|
+
}
|
|
708
|
+
return self._set_loc(node, start_tok)
|
|
709
|
+
|
|
710
|
+
# -- Control flow --
|
|
711
|
+
|
|
712
|
+
def _parse_if_stmt(self) -> IfStmt:
|
|
713
|
+
start_tok = self._current()
|
|
714
|
+
self._consume(TokenType.IF)
|
|
715
|
+
condition = self._parse_expression()
|
|
716
|
+
|
|
717
|
+
self._consume(TokenType.NEWLINE)
|
|
718
|
+
self._consume(TokenType.INDENT)
|
|
719
|
+
body = self._parse_block()
|
|
720
|
+
self._consume(TokenType.DEDENT)
|
|
721
|
+
|
|
722
|
+
else_body: list = []
|
|
723
|
+
if self._check(TokenType.ELSE):
|
|
724
|
+
self._advance()
|
|
725
|
+
if self._check(TokenType.IF):
|
|
726
|
+
# else if -> nested IfStmt in else_body
|
|
727
|
+
else_body = [self._parse_if_stmt()]
|
|
728
|
+
else:
|
|
729
|
+
self._consume(TokenType.NEWLINE)
|
|
730
|
+
self._consume(TokenType.INDENT)
|
|
731
|
+
else_body = self._parse_block()
|
|
732
|
+
self._consume(TokenType.DEDENT)
|
|
733
|
+
|
|
734
|
+
node = IfStmt(condition=condition, body=body, else_body=else_body)
|
|
735
|
+
return self._set_loc(node, start_tok)
|
|
736
|
+
|
|
737
|
+
def _parse_for_stmt(self):
|
|
738
|
+
start_tok = self._current()
|
|
739
|
+
self._consume(TokenType.FOR)
|
|
740
|
+
|
|
741
|
+
# Check for for...in: for [a, b] in arr (destructured)
|
|
742
|
+
if self._check(TokenType.LBRACKET):
|
|
743
|
+
self._advance() # [
|
|
744
|
+
vars_list = []
|
|
745
|
+
while not self._check(TokenType.RBRACKET):
|
|
746
|
+
vars_list.append(self._consume(TokenType.IDENT).value)
|
|
747
|
+
self._match(TokenType.COMMA)
|
|
748
|
+
self._consume(TokenType.RBRACKET)
|
|
749
|
+
self._consume(TokenType.IN)
|
|
750
|
+
iterable = self._parse_expression()
|
|
751
|
+
self._consume(TokenType.NEWLINE)
|
|
752
|
+
self._consume(TokenType.INDENT)
|
|
753
|
+
body = self._parse_block()
|
|
754
|
+
self._consume(TokenType.DEDENT)
|
|
755
|
+
node = ForInStmt(vars=vars_list, iterable=iterable, body=body)
|
|
756
|
+
return self._set_loc(node, start_tok)
|
|
757
|
+
|
|
758
|
+
var_name = self._consume(TokenType.IDENT).value
|
|
759
|
+
|
|
760
|
+
# for x in arr
|
|
761
|
+
if self._check(TokenType.IN):
|
|
762
|
+
self._advance() # consume 'in'
|
|
763
|
+
iterable = self._parse_expression()
|
|
764
|
+
self._consume(TokenType.NEWLINE)
|
|
765
|
+
self._consume(TokenType.INDENT)
|
|
766
|
+
body = self._parse_block()
|
|
767
|
+
self._consume(TokenType.DEDENT)
|
|
768
|
+
node = ForInStmt(var=var_name, iterable=iterable, body=body)
|
|
769
|
+
return self._set_loc(node, start_tok)
|
|
770
|
+
|
|
771
|
+
# Traditional: for var = start to end [by step]
|
|
772
|
+
self._consume(TokenType.EQUALS)
|
|
773
|
+
start = self._parse_expression()
|
|
774
|
+
self._consume(TokenType.TO)
|
|
775
|
+
end = self._parse_expression()
|
|
776
|
+
step = None
|
|
777
|
+
if self._match(TokenType.BY):
|
|
778
|
+
step = self._parse_expression()
|
|
779
|
+
self._consume(TokenType.NEWLINE)
|
|
780
|
+
self._consume(TokenType.INDENT)
|
|
781
|
+
body = self._parse_block()
|
|
782
|
+
self._consume(TokenType.DEDENT)
|
|
783
|
+
node = ForStmt(var=var_name, start=start, end=end, step=step, body=body)
|
|
784
|
+
return self._set_loc(node, start_tok)
|
|
785
|
+
|
|
786
|
+
def _parse_while_stmt(self) -> WhileStmt:
|
|
787
|
+
start_tok = self._current()
|
|
788
|
+
self._consume(TokenType.WHILE)
|
|
789
|
+
condition = self._parse_expression()
|
|
790
|
+
self._consume(TokenType.NEWLINE)
|
|
791
|
+
self._consume(TokenType.INDENT)
|
|
792
|
+
body = self._parse_block()
|
|
793
|
+
self._consume(TokenType.DEDENT)
|
|
794
|
+
node = WhileStmt(condition=condition, body=body)
|
|
795
|
+
return self._set_loc(node, start_tok)
|
|
796
|
+
|
|
797
|
+
def _parse_switch_stmt(self) -> SwitchStmt:
|
|
798
|
+
start_tok = self._current()
|
|
799
|
+
self._consume(TokenType.SWITCH)
|
|
800
|
+
|
|
801
|
+
# Optional expression after switch
|
|
802
|
+
expr = None
|
|
803
|
+
if not self._check(TokenType.NEWLINE):
|
|
804
|
+
expr = self._parse_expression()
|
|
805
|
+
|
|
806
|
+
self._consume(TokenType.NEWLINE)
|
|
807
|
+
self._consume(TokenType.INDENT)
|
|
808
|
+
|
|
809
|
+
cases = []
|
|
810
|
+
default_body = []
|
|
811
|
+
self._skip_newlines()
|
|
812
|
+
while not self._check(TokenType.DEDENT) and not self._at_end():
|
|
813
|
+
# Default case: => body
|
|
814
|
+
if self._check(TokenType.FAT_ARROW):
|
|
815
|
+
self._advance()
|
|
816
|
+
if self._check(TokenType.NEWLINE):
|
|
817
|
+
self._advance()
|
|
818
|
+
self._consume(TokenType.INDENT)
|
|
819
|
+
default_body = self._parse_block()
|
|
820
|
+
self._consume(TokenType.DEDENT)
|
|
821
|
+
else:
|
|
822
|
+
default_body = [ExprStmt(expr=self._parse_expression())]
|
|
823
|
+
else:
|
|
824
|
+
# case_expr => body
|
|
825
|
+
case_expr = self._parse_expression()
|
|
826
|
+
self._consume(TokenType.FAT_ARROW)
|
|
827
|
+
if self._check(TokenType.NEWLINE):
|
|
828
|
+
self._advance()
|
|
829
|
+
self._consume(TokenType.INDENT)
|
|
830
|
+
case_body = self._parse_block()
|
|
831
|
+
self._consume(TokenType.DEDENT)
|
|
832
|
+
else:
|
|
833
|
+
case_body = [ExprStmt(expr=self._parse_expression())]
|
|
834
|
+
cases.append((case_expr, case_body))
|
|
835
|
+
self._skip_newlines()
|
|
836
|
+
|
|
837
|
+
self._consume(TokenType.DEDENT)
|
|
838
|
+
node = SwitchStmt(expr=expr, cases=cases, default_body=default_body)
|
|
839
|
+
return self._set_loc(node, start_tok)
|
|
840
|
+
|
|
841
|
+
# -- Block parsing --
|
|
842
|
+
|
|
843
|
+
def _parse_block(self) -> list:
|
|
844
|
+
stmts: list = []
|
|
845
|
+
self._skip_newlines()
|
|
846
|
+
while not self._check(TokenType.DEDENT) and not self._at_end():
|
|
847
|
+
try:
|
|
848
|
+
stmt = self._parse_statement()
|
|
849
|
+
if stmt is not None:
|
|
850
|
+
if isinstance(stmt, list):
|
|
851
|
+
stmts.extend(stmt)
|
|
852
|
+
else:
|
|
853
|
+
stmts.append(stmt)
|
|
854
|
+
except ParseError:
|
|
855
|
+
self._recover()
|
|
856
|
+
self._skip_newlines()
|
|
857
|
+
return stmts
|
|
858
|
+
|
|
859
|
+
# ------------------------------------------------------------------
|
|
860
|
+
# Expression parsing (precedence climbing)
|
|
861
|
+
# ------------------------------------------------------------------
|
|
862
|
+
#
|
|
863
|
+
# Precedence (lowest to highest):
|
|
864
|
+
# 1. Ternary: ? :
|
|
865
|
+
# 2. Logical OR: or
|
|
866
|
+
# 3. Logical AND: and
|
|
867
|
+
# 4. Logical NOT: not (unary)
|
|
868
|
+
# 5. Comparison: == != > < >= <=
|
|
869
|
+
# 6. Addition: + -
|
|
870
|
+
# 7. Multiplication: * / %
|
|
871
|
+
# 8. Unary: - +
|
|
872
|
+
# 9. Postfix: [n] .member (args)
|
|
873
|
+
# 10. Primary: literals, identifiers, (expr)
|
|
874
|
+
|
|
875
|
+
# Tokens that can appear as member names (after dot)
|
|
876
|
+
_MEMBER_NAME_TOKENS = {
|
|
877
|
+
TokenType.IDENT,
|
|
878
|
+
TokenType.TYPE_INT, TokenType.TYPE_FLOAT,
|
|
879
|
+
TokenType.TYPE_BOOL, TokenType.TYPE_STRING,
|
|
880
|
+
}
|
|
881
|
+
|
|
882
|
+
def _parse_expression(self):
|
|
883
|
+
# if/switch can be used as expressions (RHS of assignments)
|
|
884
|
+
if self._check(TokenType.IF):
|
|
885
|
+
return self._parse_if_expr()
|
|
886
|
+
if self._check(TokenType.SWITCH):
|
|
887
|
+
return self._parse_switch_expr()
|
|
888
|
+
return self._parse_ternary()
|
|
889
|
+
|
|
890
|
+
def _parse_if_expr(self):
|
|
891
|
+
"""Parse if/else as an expression (returns IfStmt, codegen handles it)."""
|
|
892
|
+
return self._parse_if_stmt()
|
|
893
|
+
|
|
894
|
+
def _parse_switch_expr(self):
|
|
895
|
+
"""Parse switch as an expression (returns SwitchStmt, codegen handles it)."""
|
|
896
|
+
return self._parse_switch_stmt()
|
|
897
|
+
|
|
898
|
+
def _parse_ternary(self):
|
|
899
|
+
start_tok = self._current()
|
|
900
|
+
expr = self._parse_or()
|
|
901
|
+
if self._match(TokenType.QUESTION):
|
|
902
|
+
self._skip_newlines()
|
|
903
|
+
true_val = self._parse_expression()
|
|
904
|
+
self._skip_newlines()
|
|
905
|
+
self._consume(TokenType.COLON, "Expected ':' in ternary")
|
|
906
|
+
self._skip_newlines()
|
|
907
|
+
false_val = self._parse_expression()
|
|
908
|
+
node = Ternary(condition=expr, true_val=true_val, false_val=false_val)
|
|
909
|
+
return self._set_loc(node, start_tok)
|
|
910
|
+
return expr
|
|
911
|
+
|
|
912
|
+
def _try_line_continuation(self, *op_types: TokenType) -> bool:
|
|
913
|
+
"""Check if NEWLINE+INDENT+op is a line continuation, and consume if so.
|
|
914
|
+
Returns True if continuation was found and NEWLINE+INDENT consumed."""
|
|
915
|
+
saved = self.pos
|
|
916
|
+
if self._check(TokenType.NEWLINE):
|
|
917
|
+
self._advance()
|
|
918
|
+
if self._check(TokenType.INDENT):
|
|
919
|
+
self._advance()
|
|
920
|
+
if self._current().type in op_types:
|
|
921
|
+
return True
|
|
922
|
+
# Not a continuation — restore position
|
|
923
|
+
self.pos = saved
|
|
924
|
+
return False
|
|
925
|
+
|
|
926
|
+
def _parse_or(self):
|
|
927
|
+
start_tok = self._current()
|
|
928
|
+
left = self._parse_and()
|
|
929
|
+
in_continuation = False
|
|
930
|
+
while True:
|
|
931
|
+
self._skip_newlines_in_continuation(in_continuation)
|
|
932
|
+
if self._match(TokenType.OR):
|
|
933
|
+
right = self._parse_and()
|
|
934
|
+
left = BinOp(left=left, op="or", right=right)
|
|
935
|
+
self._set_loc(left, start_tok)
|
|
936
|
+
elif not in_continuation and self._try_line_continuation(TokenType.OR):
|
|
937
|
+
in_continuation = True
|
|
938
|
+
self._advance() # consume OR
|
|
939
|
+
right = self._parse_and()
|
|
940
|
+
left = BinOp(left=left, op="or", right=right)
|
|
941
|
+
self._set_loc(left, start_tok)
|
|
942
|
+
else:
|
|
943
|
+
break
|
|
944
|
+
if in_continuation:
|
|
945
|
+
self._match(TokenType.DEDENT)
|
|
946
|
+
return left
|
|
947
|
+
|
|
948
|
+
def _parse_and(self):
|
|
949
|
+
start_tok = self._current()
|
|
950
|
+
left = self._parse_not()
|
|
951
|
+
in_continuation = False
|
|
952
|
+
while True:
|
|
953
|
+
self._skip_newlines_in_continuation(in_continuation)
|
|
954
|
+
if self._match(TokenType.AND):
|
|
955
|
+
right = self._parse_not()
|
|
956
|
+
left = BinOp(left=left, op="and", right=right)
|
|
957
|
+
self._set_loc(left, start_tok)
|
|
958
|
+
elif not in_continuation and self._try_line_continuation(TokenType.AND):
|
|
959
|
+
in_continuation = True
|
|
960
|
+
self._advance() # consume AND
|
|
961
|
+
right = self._parse_not()
|
|
962
|
+
left = BinOp(left=left, op="and", right=right)
|
|
963
|
+
self._set_loc(left, start_tok)
|
|
964
|
+
else:
|
|
965
|
+
break
|
|
966
|
+
if in_continuation:
|
|
967
|
+
self._match(TokenType.DEDENT)
|
|
968
|
+
return left
|
|
969
|
+
|
|
970
|
+
def _skip_newlines_in_continuation(self, in_continuation: bool) -> None:
|
|
971
|
+
"""Inside a continuation block, skip NEWLINE tokens."""
|
|
972
|
+
if in_continuation:
|
|
973
|
+
while self._check(TokenType.NEWLINE):
|
|
974
|
+
self._advance()
|
|
975
|
+
|
|
976
|
+
def _parse_not(self):
|
|
977
|
+
if self._check(TokenType.NOT):
|
|
978
|
+
start_tok = self._current()
|
|
979
|
+
self._advance()
|
|
980
|
+
operand = self._parse_not()
|
|
981
|
+
node = UnaryOp(op="not", operand=operand)
|
|
982
|
+
return self._set_loc(node, start_tok)
|
|
983
|
+
return self._parse_comparison()
|
|
984
|
+
|
|
985
|
+
def _parse_comparison(self):
|
|
986
|
+
start_tok = self._current()
|
|
987
|
+
left = self._parse_addition()
|
|
988
|
+
comp_ops = {
|
|
989
|
+
TokenType.EQEQ: "==", TokenType.NOTEQ: "!=",
|
|
990
|
+
TokenType.GT: ">", TokenType.LT: "<",
|
|
991
|
+
TokenType.GE: ">=", TokenType.LE: "<=",
|
|
992
|
+
}
|
|
993
|
+
while self._current().type in comp_ops:
|
|
994
|
+
op = comp_ops[self._advance().type]
|
|
995
|
+
right = self._parse_addition()
|
|
996
|
+
left = BinOp(left=left, op=op, right=right)
|
|
997
|
+
self._set_loc(left, start_tok)
|
|
998
|
+
return left
|
|
999
|
+
|
|
1000
|
+
def _parse_addition(self):
|
|
1001
|
+
start_tok = self._current()
|
|
1002
|
+
left = self._parse_multiplication()
|
|
1003
|
+
while self._current().type in (TokenType.PLUS, TokenType.MINUS):
|
|
1004
|
+
op = "+" if self._advance().type == TokenType.PLUS else "-"
|
|
1005
|
+
right = self._parse_multiplication()
|
|
1006
|
+
left = BinOp(left=left, op=op, right=right)
|
|
1007
|
+
self._set_loc(left, start_tok)
|
|
1008
|
+
return left
|
|
1009
|
+
|
|
1010
|
+
def _parse_multiplication(self):
|
|
1011
|
+
start_tok = self._current()
|
|
1012
|
+
left = self._parse_unary()
|
|
1013
|
+
mul_ops = {TokenType.STAR: "*", TokenType.SLASH: "/", TokenType.PERCENT: "%"}
|
|
1014
|
+
while self._current().type in mul_ops:
|
|
1015
|
+
op = mul_ops[self._advance().type]
|
|
1016
|
+
right = self._parse_unary()
|
|
1017
|
+
left = BinOp(left=left, op=op, right=right)
|
|
1018
|
+
self._set_loc(left, start_tok)
|
|
1019
|
+
return left
|
|
1020
|
+
|
|
1021
|
+
def _parse_unary(self):
|
|
1022
|
+
if self._check(TokenType.MINUS):
|
|
1023
|
+
start_tok = self._current()
|
|
1024
|
+
self._advance()
|
|
1025
|
+
operand = self._parse_unary()
|
|
1026
|
+
node = UnaryOp(op="-", operand=operand)
|
|
1027
|
+
return self._set_loc(node, start_tok)
|
|
1028
|
+
if self._check(TokenType.PLUS):
|
|
1029
|
+
start_tok = self._current()
|
|
1030
|
+
self._advance()
|
|
1031
|
+
operand = self._parse_unary()
|
|
1032
|
+
node = UnaryOp(op="+", operand=operand)
|
|
1033
|
+
return self._set_loc(node, start_tok)
|
|
1034
|
+
return self._parse_postfix()
|
|
1035
|
+
|
|
1036
|
+
def _parse_postfix(self):
|
|
1037
|
+
expr = self._parse_primary()
|
|
1038
|
+
while True:
|
|
1039
|
+
# Subscript: expr[index]
|
|
1040
|
+
if self._check(TokenType.LBRACKET):
|
|
1041
|
+
start_tok = self._current()
|
|
1042
|
+
self._advance()
|
|
1043
|
+
index = self._parse_expression()
|
|
1044
|
+
self._consume(TokenType.RBRACKET)
|
|
1045
|
+
expr = Subscript(object=expr, index=index)
|
|
1046
|
+
self._set_loc(expr, start_tok)
|
|
1047
|
+
|
|
1048
|
+
# Member access: expr.member or expr.member(args)
|
|
1049
|
+
elif self._check(TokenType.DOT):
|
|
1050
|
+
self._advance()
|
|
1051
|
+
member_tok = self._consume_member_name()
|
|
1052
|
+
|
|
1053
|
+
template_args = []
|
|
1054
|
+
if self._looks_like_call_template_args():
|
|
1055
|
+
template_args = self._parse_template_args()
|
|
1056
|
+
|
|
1057
|
+
if self._check(TokenType.LPAREN):
|
|
1058
|
+
# Build callee as MemberAccess, then parse call
|
|
1059
|
+
callee = MemberAccess(object=expr, member=member_tok.value)
|
|
1060
|
+
self._set_loc(callee, member_tok)
|
|
1061
|
+
if template_args:
|
|
1062
|
+
callee.annotations = {"template_args": template_args}
|
|
1063
|
+
expr = self._parse_call_with_callee(callee)
|
|
1064
|
+
else:
|
|
1065
|
+
expr = MemberAccess(object=expr, member=member_tok.value)
|
|
1066
|
+
self._set_loc(expr, member_tok)
|
|
1067
|
+
if template_args:
|
|
1068
|
+
expr.annotations = {"template_args": template_args}
|
|
1069
|
+
|
|
1070
|
+
# Direct call: expr(args) — needed for identifiers followed by (
|
|
1071
|
+
elif self._check(TokenType.LPAREN) and self._is_call_position(expr):
|
|
1072
|
+
expr = self._parse_call_with_callee(expr)
|
|
1073
|
+
else:
|
|
1074
|
+
break
|
|
1075
|
+
return expr
|
|
1076
|
+
|
|
1077
|
+
def _is_call_position(self, expr) -> bool:
|
|
1078
|
+
"""Check if the current LPAREN should be treated as a function call."""
|
|
1079
|
+
return isinstance(expr, (Identifier, MemberAccess))
|
|
1080
|
+
|
|
1081
|
+
def _consume_member_name(self) -> Token:
|
|
1082
|
+
"""Consume an identifier or type keyword as a member name."""
|
|
1083
|
+
if self._current().type in self._MEMBER_NAME_TOKENS:
|
|
1084
|
+
return self._advance()
|
|
1085
|
+
return self._consume(TokenType.IDENT, "Expected member name")
|
|
1086
|
+
|
|
1087
|
+
def _parse_call_with_callee(self, callee) -> FuncCall:
|
|
1088
|
+
"""Parse (args, kwargs) after callee expression."""
|
|
1089
|
+
start_tok = self._current()
|
|
1090
|
+
self._consume(TokenType.LPAREN)
|
|
1091
|
+
args, kwargs = self._parse_call_args()
|
|
1092
|
+
self._consume(TokenType.RPAREN)
|
|
1093
|
+
node = FuncCall(callee=callee, args=args, kwargs=kwargs)
|
|
1094
|
+
return self._set_loc(node, start_tok)
|
|
1095
|
+
|
|
1096
|
+
def _parse_call_args(self) -> tuple[list, dict]:
|
|
1097
|
+
"""Parse function call arguments and keyword arguments."""
|
|
1098
|
+
args: list = []
|
|
1099
|
+
kwargs: dict = {}
|
|
1100
|
+
|
|
1101
|
+
while not self._check(TokenType.RPAREN) and not self._at_end():
|
|
1102
|
+
# Detect kwargs: IDENT = value (but not IDENT == value)
|
|
1103
|
+
if (self._current().type == TokenType.IDENT
|
|
1104
|
+
and self._peek().type == TokenType.EQUALS
|
|
1105
|
+
and self._peek(2).type != TokenType.EQUALS):
|
|
1106
|
+
key_tok = self._advance()
|
|
1107
|
+
self._advance() # consume =
|
|
1108
|
+
val = self._parse_expression()
|
|
1109
|
+
kwargs[key_tok.value] = val
|
|
1110
|
+
else:
|
|
1111
|
+
args.append(self._parse_expression())
|
|
1112
|
+
|
|
1113
|
+
self._match(TokenType.COMMA)
|
|
1114
|
+
|
|
1115
|
+
return args, kwargs
|
|
1116
|
+
|
|
1117
|
+
# -- Primary expressions --
|
|
1118
|
+
|
|
1119
|
+
def _parse_primary(self):
|
|
1120
|
+
cur = self._current()
|
|
1121
|
+
|
|
1122
|
+
# Array/tuple literal: [expr, expr, ...]
|
|
1123
|
+
# Produces a TupleLiteral node with all elements preserved.
|
|
1124
|
+
if cur.type == TokenType.LBRACKET:
|
|
1125
|
+
self._advance()
|
|
1126
|
+
elements = []
|
|
1127
|
+
while not self._check(TokenType.RBRACKET) and not self._at_end():
|
|
1128
|
+
elements.append(self._parse_expression())
|
|
1129
|
+
self._match(TokenType.COMMA)
|
|
1130
|
+
self._consume(TokenType.RBRACKET)
|
|
1131
|
+
node = TupleLiteral(elements=elements)
|
|
1132
|
+
return self._set_loc(node, cur)
|
|
1133
|
+
|
|
1134
|
+
# Parenthesized expression
|
|
1135
|
+
if cur.type == TokenType.LPAREN:
|
|
1136
|
+
self._advance()
|
|
1137
|
+
expr = self._parse_expression()
|
|
1138
|
+
self._consume(TokenType.RPAREN)
|
|
1139
|
+
return expr
|
|
1140
|
+
|
|
1141
|
+
# Number literal
|
|
1142
|
+
if cur.type == TokenType.NUMBER:
|
|
1143
|
+
self._advance()
|
|
1144
|
+
if "." in cur.value or "e" in cur.value or "E" in cur.value:
|
|
1145
|
+
val = float(cur.value)
|
|
1146
|
+
else:
|
|
1147
|
+
val = int(cur.value)
|
|
1148
|
+
node = NumberLiteral(value=val)
|
|
1149
|
+
return self._set_loc(node, cur)
|
|
1150
|
+
|
|
1151
|
+
# String literal
|
|
1152
|
+
if cur.type == TokenType.STRING:
|
|
1153
|
+
self._advance()
|
|
1154
|
+
node = StringLiteral(value=cur.value)
|
|
1155
|
+
return self._set_loc(node, cur)
|
|
1156
|
+
|
|
1157
|
+
# Boolean literals
|
|
1158
|
+
if cur.type == TokenType.TRUE:
|
|
1159
|
+
self._advance()
|
|
1160
|
+
node = BoolLiteral(value=True)
|
|
1161
|
+
return self._set_loc(node, cur)
|
|
1162
|
+
if cur.type == TokenType.FALSE:
|
|
1163
|
+
self._advance()
|
|
1164
|
+
node = BoolLiteral(value=False)
|
|
1165
|
+
return self._set_loc(node, cur)
|
|
1166
|
+
|
|
1167
|
+
# Color literal
|
|
1168
|
+
if cur.type == TokenType.COLOR:
|
|
1169
|
+
self._advance()
|
|
1170
|
+
node = ColorLiteral(value=cur.value)
|
|
1171
|
+
return self._set_loc(node, cur)
|
|
1172
|
+
|
|
1173
|
+
# na literal (can also be used as function: na(x))
|
|
1174
|
+
if cur.type == TokenType.NA:
|
|
1175
|
+
self._advance()
|
|
1176
|
+
if self._check(TokenType.LPAREN):
|
|
1177
|
+
# na used as function call
|
|
1178
|
+
callee = Identifier(name="na")
|
|
1179
|
+
self._set_loc(callee, cur)
|
|
1180
|
+
return self._parse_call_with_callee(callee)
|
|
1181
|
+
node = NaLiteral()
|
|
1182
|
+
return self._set_loc(node, cur)
|
|
1183
|
+
|
|
1184
|
+
# Type keywords used as values (e.g., type=float in input kwargs)
|
|
1185
|
+
if cur.type in TYPE_KEYWORDS:
|
|
1186
|
+
self._advance()
|
|
1187
|
+
node = Identifier(name=cur.value)
|
|
1188
|
+
return self._set_loc(node, cur)
|
|
1189
|
+
|
|
1190
|
+
# Identifier (may be followed by function call via postfix)
|
|
1191
|
+
if cur.type == TokenType.IDENT:
|
|
1192
|
+
self._advance()
|
|
1193
|
+
node = Identifier(name=cur.value)
|
|
1194
|
+
return self._set_loc(node, cur)
|
|
1195
|
+
|
|
1196
|
+
raise ParseError(
|
|
1197
|
+
f"Unexpected token {cur.type.name}({cur.value!r}) at L{cur.line}:{cur.col}"
|
|
1198
|
+
)
|