pineforge-codegen 0.6.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pineforge_codegen/__init__.py +53 -0
- pineforge_codegen/analyzer/__init__.py +60 -0
- pineforge_codegen/analyzer/base.py +1563 -0
- pineforge_codegen/analyzer/call_handlers.py +895 -0
- pineforge_codegen/analyzer/contracts.py +163 -0
- pineforge_codegen/analyzer/diagnostics.py +118 -0
- pineforge_codegen/analyzer/tables.py +204 -0
- pineforge_codegen/analyzer/types.py +250 -0
- pineforge_codegen/ast_nodes.py +293 -0
- pineforge_codegen/codegen/__init__.py +78 -0
- pineforge_codegen/codegen/base.py +1381 -0
- pineforge_codegen/codegen/emit_top.py +875 -0
- pineforge_codegen/codegen/helpers.py +163 -0
- pineforge_codegen/codegen/helpers_syminfo.py +134 -0
- pineforge_codegen/codegen/input.py +189 -0
- pineforge_codegen/codegen/security.py +1564 -0
- pineforge_codegen/codegen/ta.py +298 -0
- pineforge_codegen/codegen/tables.py +613 -0
- pineforge_codegen/codegen/types.py +573 -0
- pineforge_codegen/codegen/visit_call.py +1305 -0
- pineforge_codegen/codegen/visit_expr.py +701 -0
- pineforge_codegen/codegen/visit_stmt.py +729 -0
- pineforge_codegen/errors.py +98 -0
- pineforge_codegen/lexer.py +531 -0
- pineforge_codegen/parser.py +1198 -0
- pineforge_codegen/pragmas.py +117 -0
- pineforge_codegen/signatures.py +808 -0
- pineforge_codegen/support_checker.py +1111 -0
- pineforge_codegen/symbols.py +118 -0
- pineforge_codegen/tokens.py +406 -0
- pineforge_codegen/tv_input_choices.py +86 -0
- pineforge_codegen-0.6.5.dist-info/METADATA +462 -0
- pineforge_codegen-0.6.5.dist-info/RECORD +35 -0
- pineforge_codegen-0.6.5.dist-info/WHEEL +4 -0
- pineforge_codegen-0.6.5.dist-info/licenses/LICENSE +197 -0
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from enum import Enum
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Level(Enum):
|
|
8
|
+
ERROR = "error"
|
|
9
|
+
WARNING = "warning"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Phase(Enum):
|
|
13
|
+
LEXER = "LEXER"
|
|
14
|
+
PARSER = "PARSER"
|
|
15
|
+
ANALYZER = "ANALYZER"
|
|
16
|
+
CODEGEN = "CODEGEN"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class SourceLocation:
|
|
21
|
+
file: str
|
|
22
|
+
line: int
|
|
23
|
+
col: int
|
|
24
|
+
end_col: int
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class Diagnostic:
|
|
29
|
+
level: Level
|
|
30
|
+
phase: Phase
|
|
31
|
+
location: SourceLocation
|
|
32
|
+
message: str
|
|
33
|
+
hint: str | None = None
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class CompileError(Exception):
|
|
37
|
+
def __init__(self, diagnostics: list[Diagnostic]):
|
|
38
|
+
self.diagnostics = diagnostics
|
|
39
|
+
# Build a plain-text summary for the base Exception message. Each
|
|
40
|
+
# diagnostic is prefixed with its ``file:line:col`` so the location is
|
|
41
|
+
# reachable from ``str(err)`` alone — a bare
|
|
42
|
+
# ``except CompileError as e: print(e)`` must not swallow the line
|
|
43
|
+
# number. (The rich rustc-style rendering is still available via
|
|
44
|
+
# :meth:`format`.)
|
|
45
|
+
messages = []
|
|
46
|
+
for d in diagnostics:
|
|
47
|
+
loc = d.location
|
|
48
|
+
if loc is not None:
|
|
49
|
+
messages.append(f"{loc.file}:{loc.line}:{loc.col}: {d.message}")
|
|
50
|
+
else:
|
|
51
|
+
messages.append(d.message)
|
|
52
|
+
super().__init__("; ".join(messages))
|
|
53
|
+
|
|
54
|
+
def format(self, source: str) -> str:
|
|
55
|
+
"""Format diagnostics with source context, rustc-style."""
|
|
56
|
+
lines = source.splitlines()
|
|
57
|
+
parts: list[str] = []
|
|
58
|
+
|
|
59
|
+
for d in self.diagnostics:
|
|
60
|
+
loc = d.location
|
|
61
|
+
level_str = d.level.value # "error" or "warning"
|
|
62
|
+
phase_str = d.phase.value # "ANALYZER", etc.
|
|
63
|
+
|
|
64
|
+
# Header: error[ANALYZER]: message
|
|
65
|
+
header = f"{level_str}[{phase_str}]: {d.message}"
|
|
66
|
+
|
|
67
|
+
# Arrow line: --> file:line:col (rustc-style)
|
|
68
|
+
arrow = f" --> {loc.file}:{loc.line}:{loc.col}"
|
|
69
|
+
|
|
70
|
+
# Gutter width based on line number digits
|
|
71
|
+
gutter_width = len(str(loc.line))
|
|
72
|
+
gutter = " " * gutter_width
|
|
73
|
+
|
|
74
|
+
separator = f" {gutter}|"
|
|
75
|
+
|
|
76
|
+
# Source line (1-based indexing)
|
|
77
|
+
source_line = ""
|
|
78
|
+
if 1 <= loc.line <= len(lines):
|
|
79
|
+
source_line = lines[loc.line - 1]
|
|
80
|
+
|
|
81
|
+
# Build the underline: spaces up to col, then ^ for the span
|
|
82
|
+
# col is 1-based
|
|
83
|
+
underline_start = loc.col - 1 # 0-based
|
|
84
|
+
underline_len = max(1, loc.end_col - loc.col)
|
|
85
|
+
underline = " " * underline_start + "^" * underline_len
|
|
86
|
+
|
|
87
|
+
code_line = f" {loc.line} | {source_line}"
|
|
88
|
+
point_line = f" {gutter}| {underline}"
|
|
89
|
+
|
|
90
|
+
block = "\n".join([header, arrow, separator, code_line, point_line])
|
|
91
|
+
|
|
92
|
+
# Optional hint
|
|
93
|
+
if d.hint:
|
|
94
|
+
block += f"\n {gutter}= hint: {d.hint}"
|
|
95
|
+
|
|
96
|
+
parts.append(block)
|
|
97
|
+
|
|
98
|
+
return "\n\n".join(parts)
|
|
@@ -0,0 +1,531 @@
|
|
|
1
|
+
"""Lexer for PineScript v6 source code.
|
|
2
|
+
|
|
3
|
+
Improvements over tokens.py:
|
|
4
|
+
- Token carries end_col for span tracking
|
|
5
|
+
- Scientific notation support (1.5e-3)
|
|
6
|
+
- Leading-dot float support (.5 -> 0.5)
|
|
7
|
+
- PERCENT_EQUALS operator (%=)
|
|
8
|
+
- IMPORT and METHOD keywords
|
|
9
|
+
- Uses Diagnostic/CompileError for malformed tokens instead of silent skips
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from dataclasses import dataclass
|
|
15
|
+
from enum import Enum, auto
|
|
16
|
+
|
|
17
|
+
from pineforge_codegen.errors import (
|
|
18
|
+
CompileError,
|
|
19
|
+
Diagnostic,
|
|
20
|
+
Level,
|
|
21
|
+
Phase,
|
|
22
|
+
SourceLocation,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class TokenType(Enum):
|
|
27
|
+
# Literals
|
|
28
|
+
NUMBER = auto()
|
|
29
|
+
STRING = auto()
|
|
30
|
+
IDENT = auto()
|
|
31
|
+
|
|
32
|
+
# Structure
|
|
33
|
+
NEWLINE = auto()
|
|
34
|
+
INDENT = auto()
|
|
35
|
+
DEDENT = auto()
|
|
36
|
+
EOF_TOKEN = auto()
|
|
37
|
+
|
|
38
|
+
# Delimiters
|
|
39
|
+
LPAREN = auto()
|
|
40
|
+
RPAREN = auto()
|
|
41
|
+
LBRACKET = auto()
|
|
42
|
+
RBRACKET = auto()
|
|
43
|
+
COMMA = auto()
|
|
44
|
+
DOT = auto()
|
|
45
|
+
|
|
46
|
+
# Assignment
|
|
47
|
+
EQUALS = auto()
|
|
48
|
+
COLON_EQUALS = auto()
|
|
49
|
+
PLUS_EQUALS = auto()
|
|
50
|
+
MINUS_EQUALS = auto()
|
|
51
|
+
STAR_EQUALS = auto()
|
|
52
|
+
SLASH_EQUALS = auto()
|
|
53
|
+
PERCENT_EQUALS = auto()
|
|
54
|
+
|
|
55
|
+
# Arithmetic
|
|
56
|
+
PLUS = auto()
|
|
57
|
+
MINUS = auto()
|
|
58
|
+
STAR = auto()
|
|
59
|
+
SLASH = auto()
|
|
60
|
+
PERCENT = auto()
|
|
61
|
+
|
|
62
|
+
# Comparison
|
|
63
|
+
EQEQ = auto() # ==
|
|
64
|
+
NOTEQ = auto() # !=
|
|
65
|
+
GT = auto() # >
|
|
66
|
+
LT = auto() # <
|
|
67
|
+
GE = auto() # >=
|
|
68
|
+
LE = auto() # <=
|
|
69
|
+
|
|
70
|
+
# Logical (keywords)
|
|
71
|
+
AND = auto()
|
|
72
|
+
OR = auto()
|
|
73
|
+
NOT = auto()
|
|
74
|
+
|
|
75
|
+
# Ternary
|
|
76
|
+
QUESTION = auto()
|
|
77
|
+
COLON = auto()
|
|
78
|
+
|
|
79
|
+
# Arrow
|
|
80
|
+
FAT_ARROW = auto() # =>
|
|
81
|
+
|
|
82
|
+
# Keywords
|
|
83
|
+
IF = auto()
|
|
84
|
+
ELSE = auto()
|
|
85
|
+
FOR = auto()
|
|
86
|
+
WHILE = auto()
|
|
87
|
+
SWITCH = auto()
|
|
88
|
+
BREAK = auto()
|
|
89
|
+
CONTINUE = auto()
|
|
90
|
+
VAR = auto()
|
|
91
|
+
VARIP = auto()
|
|
92
|
+
TO = auto()
|
|
93
|
+
BY = auto()
|
|
94
|
+
TRUE = auto()
|
|
95
|
+
FALSE = auto()
|
|
96
|
+
NA = auto()
|
|
97
|
+
IMPORT = auto()
|
|
98
|
+
METHOD = auto()
|
|
99
|
+
IN = auto()
|
|
100
|
+
|
|
101
|
+
# Color literal (#rrggbb / #rrggbbaa)
|
|
102
|
+
COLOR = auto()
|
|
103
|
+
|
|
104
|
+
# Type keywords
|
|
105
|
+
TYPE_INT = auto()
|
|
106
|
+
TYPE_FLOAT = auto()
|
|
107
|
+
TYPE_BOOL = auto()
|
|
108
|
+
TYPE_STRING = auto()
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@dataclass
|
|
112
|
+
class Token:
|
|
113
|
+
type: TokenType
|
|
114
|
+
value: str
|
|
115
|
+
line: int
|
|
116
|
+
col: int
|
|
117
|
+
end_col: int = 0
|
|
118
|
+
|
|
119
|
+
def __post_init__(self) -> None:
|
|
120
|
+
# Default end_col to col + len(value) if not explicitly set
|
|
121
|
+
if self.end_col == 0:
|
|
122
|
+
self.end_col = self.col + len(self.value)
|
|
123
|
+
|
|
124
|
+
def __repr__(self) -> str:
|
|
125
|
+
return f"Token({self.type.name}, {self.value!r}, L{self.line}:{self.col}-{self.end_col})"
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
KEYWORDS: dict[str, TokenType] = {
|
|
129
|
+
"if": TokenType.IF,
|
|
130
|
+
"else": TokenType.ELSE,
|
|
131
|
+
"for": TokenType.FOR,
|
|
132
|
+
"while": TokenType.WHILE,
|
|
133
|
+
"switch": TokenType.SWITCH,
|
|
134
|
+
"break": TokenType.BREAK,
|
|
135
|
+
"continue": TokenType.CONTINUE,
|
|
136
|
+
"var": TokenType.VAR,
|
|
137
|
+
"varip": TokenType.VARIP,
|
|
138
|
+
"to": TokenType.TO,
|
|
139
|
+
"by": TokenType.BY,
|
|
140
|
+
"not": TokenType.NOT,
|
|
141
|
+
"and": TokenType.AND,
|
|
142
|
+
"or": TokenType.OR,
|
|
143
|
+
"true": TokenType.TRUE,
|
|
144
|
+
"false": TokenType.FALSE,
|
|
145
|
+
"na": TokenType.NA,
|
|
146
|
+
"import": TokenType.IMPORT,
|
|
147
|
+
"method": TokenType.METHOD,
|
|
148
|
+
"in": TokenType.IN,
|
|
149
|
+
"int": TokenType.TYPE_INT,
|
|
150
|
+
"float": TokenType.TYPE_FLOAT,
|
|
151
|
+
"bool": TokenType.TYPE_BOOL,
|
|
152
|
+
"string": TokenType.TYPE_STRING,
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
class Lexer:
|
|
157
|
+
"""Converts PineScript v6 source string into a list of tokens."""
|
|
158
|
+
|
|
159
|
+
# Token types that indicate line continuation when they end a line.
|
|
160
|
+
# If a line ends with one of these, the next line is a continuation
|
|
161
|
+
# and its INDENT/DEDENT should be suppressed.
|
|
162
|
+
CONTINUATION_TOKENS = {
|
|
163
|
+
TokenType.AND, TokenType.OR,
|
|
164
|
+
TokenType.PLUS, TokenType.MINUS, TokenType.STAR, TokenType.SLASH,
|
|
165
|
+
TokenType.PERCENT,
|
|
166
|
+
TokenType.GT, TokenType.LT, TokenType.GE, TokenType.LE,
|
|
167
|
+
TokenType.EQEQ, TokenType.NOTEQ,
|
|
168
|
+
TokenType.QUESTION, TokenType.COLON,
|
|
169
|
+
TokenType.COMMA, TokenType.DOT,
|
|
170
|
+
TokenType.EQUALS, TokenType.COLON_EQUALS,
|
|
171
|
+
TokenType.PLUS_EQUALS, TokenType.MINUS_EQUALS,
|
|
172
|
+
TokenType.STAR_EQUALS, TokenType.SLASH_EQUALS,
|
|
173
|
+
TokenType.PERCENT_EQUALS,
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
def __init__(self, source: str, filename: str = "<input>") -> None:
|
|
177
|
+
self.source = source
|
|
178
|
+
self.filename = filename
|
|
179
|
+
self.pos = 0
|
|
180
|
+
self.line = 1
|
|
181
|
+
self.col = 1
|
|
182
|
+
self.tokens: list[Token] = []
|
|
183
|
+
self.indent_stack: list[int] = [0]
|
|
184
|
+
self.paren_depth = 0 # Track () and [] nesting to suppress NEWLINE/INDENT/DEDENT
|
|
185
|
+
self._in_continuation = False # True when current line is a continuation
|
|
186
|
+
self._diagnostics: list[Diagnostic] = []
|
|
187
|
+
|
|
188
|
+
def _peek(self, offset: int = 0) -> str:
|
|
189
|
+
idx = self.pos + offset
|
|
190
|
+
return self.source[idx] if idx < len(self.source) else "\0"
|
|
191
|
+
|
|
192
|
+
def _advance(self) -> str:
|
|
193
|
+
ch = self.source[self.pos]
|
|
194
|
+
self.pos += 1
|
|
195
|
+
if ch == "\n":
|
|
196
|
+
self.line += 1
|
|
197
|
+
self.col = 1
|
|
198
|
+
else:
|
|
199
|
+
self.col += 1
|
|
200
|
+
return ch
|
|
201
|
+
|
|
202
|
+
def _at_end(self) -> bool:
|
|
203
|
+
return self.pos >= len(self.source)
|
|
204
|
+
|
|
205
|
+
def _emit(self, tt: TokenType, value: str, line: int, col: int, end_col: int | None = None) -> None:
|
|
206
|
+
if end_col is None:
|
|
207
|
+
end_col = col + len(value)
|
|
208
|
+
self.tokens.append(Token(tt, value, line, col, end_col))
|
|
209
|
+
|
|
210
|
+
def _emit_diagnostic(self, message: str, line: int, col: int, end_col: int, hint: str | None = None) -> None:
|
|
211
|
+
loc = SourceLocation(file=self.filename, line=line, col=col, end_col=end_col)
|
|
212
|
+
diag = Diagnostic(level=Level.ERROR, phase=Phase.LEXER, location=loc, message=message, hint=hint)
|
|
213
|
+
self._diagnostics.append(diag)
|
|
214
|
+
|
|
215
|
+
def _skip_line(self) -> None:
|
|
216
|
+
while not self._at_end() and self.source[self.pos] != "\n":
|
|
217
|
+
self._advance()
|
|
218
|
+
if not self._at_end():
|
|
219
|
+
self._advance()
|
|
220
|
+
|
|
221
|
+
def _skip_comment(self) -> None:
|
|
222
|
+
while not self._at_end() and self.source[self.pos] != "\n":
|
|
223
|
+
self._advance()
|
|
224
|
+
|
|
225
|
+
def tokenize(self) -> list[Token]:
|
|
226
|
+
while not self._at_end():
|
|
227
|
+
self._tokenize_line()
|
|
228
|
+
|
|
229
|
+
while len(self.indent_stack) > 1:
|
|
230
|
+
self.indent_stack.pop()
|
|
231
|
+
self._emit(TokenType.DEDENT, "", self.line, self.col)
|
|
232
|
+
|
|
233
|
+
self._emit(TokenType.EOF_TOKEN, "", self.line, self.col)
|
|
234
|
+
|
|
235
|
+
if self._diagnostics:
|
|
236
|
+
raise CompileError(self._diagnostics)
|
|
237
|
+
|
|
238
|
+
return self.tokens
|
|
239
|
+
|
|
240
|
+
def _tokenize_line(self) -> None:
|
|
241
|
+
if self._at_end():
|
|
242
|
+
return
|
|
243
|
+
|
|
244
|
+
line_start = self.pos
|
|
245
|
+
|
|
246
|
+
# Peek at line content to detect blank/comment-only lines
|
|
247
|
+
temp = self.pos
|
|
248
|
+
while temp < len(self.source) and self.source[temp] in (" ", "\t"):
|
|
249
|
+
temp += 1
|
|
250
|
+
if temp >= len(self.source) or self.source[temp] == "\n":
|
|
251
|
+
self._advance_to(min(temp + 1, len(self.source)))
|
|
252
|
+
return
|
|
253
|
+
if self.source[temp: temp + 3] == "//@":
|
|
254
|
+
self._skip_line()
|
|
255
|
+
return
|
|
256
|
+
if self.source[temp: temp + 2] == "//":
|
|
257
|
+
self._skip_line()
|
|
258
|
+
return
|
|
259
|
+
|
|
260
|
+
# Inside parens/brackets: skip indentation handling, treat as continuation
|
|
261
|
+
if self.paren_depth > 0:
|
|
262
|
+
while not self._at_end() and self.source[self.pos] in (" ", "\t"):
|
|
263
|
+
self._advance()
|
|
264
|
+
emitted_in_parens = False
|
|
265
|
+
while not self._at_end() and self.source[self.pos] != "\n":
|
|
266
|
+
self._skip_whitespace_inline()
|
|
267
|
+
if self._at_end() or self.source[self.pos] == "\n":
|
|
268
|
+
break
|
|
269
|
+
if self.source[self.pos: self.pos + 2] == "//":
|
|
270
|
+
self._skip_comment()
|
|
271
|
+
break
|
|
272
|
+
emitted_in_parens = True
|
|
273
|
+
self._read_token()
|
|
274
|
+
if not self._at_end() and self.source[self.pos] == "\n":
|
|
275
|
+
self._advance()
|
|
276
|
+
# If parens closed on this line, emit NEWLINE so parser sees end of statement
|
|
277
|
+
if self.paren_depth == 0 and emitted_in_parens:
|
|
278
|
+
self._emit(TokenType.NEWLINE, "\\n", self.line - 1, self.col)
|
|
279
|
+
return
|
|
280
|
+
|
|
281
|
+
# Indentation handling
|
|
282
|
+
indent_level = 0
|
|
283
|
+
while not self._at_end() and self.source[self.pos] in (" ", "\t"):
|
|
284
|
+
ch = self._advance()
|
|
285
|
+
indent_level += 1
|
|
286
|
+
|
|
287
|
+
raw = self.source[line_start: self.pos]
|
|
288
|
+
if "\t" in raw:
|
|
289
|
+
indent_level = raw.count("\t")
|
|
290
|
+
else:
|
|
291
|
+
indent_level = len(raw) // 4
|
|
292
|
+
|
|
293
|
+
# If we're in a continuation (previous line ended with an operator),
|
|
294
|
+
# suppress INDENT/DEDENT — the indentation is cosmetic, not structural
|
|
295
|
+
if not self._in_continuation:
|
|
296
|
+
current_indent = self.indent_stack[-1]
|
|
297
|
+
if indent_level > current_indent:
|
|
298
|
+
self.indent_stack.append(indent_level)
|
|
299
|
+
self._emit(TokenType.INDENT, "", self.line, 1)
|
|
300
|
+
elif indent_level < current_indent:
|
|
301
|
+
while len(self.indent_stack) > 1 and self.indent_stack[-1] > indent_level:
|
|
302
|
+
self.indent_stack.pop()
|
|
303
|
+
self._emit(TokenType.DEDENT, "", self.line, 1)
|
|
304
|
+
|
|
305
|
+
# Tokens on this line
|
|
306
|
+
emitted_something = False
|
|
307
|
+
while not self._at_end() and self.source[self.pos] != "\n":
|
|
308
|
+
self._skip_whitespace_inline()
|
|
309
|
+
if self._at_end() or self.source[self.pos] == "\n":
|
|
310
|
+
break
|
|
311
|
+
if self.source[self.pos: self.pos + 2] == "//":
|
|
312
|
+
self._skip_comment()
|
|
313
|
+
break
|
|
314
|
+
emitted_something = True
|
|
315
|
+
self._read_token()
|
|
316
|
+
|
|
317
|
+
if not self._at_end() and self.source[self.pos] == "\n":
|
|
318
|
+
self._advance()
|
|
319
|
+
|
|
320
|
+
# Check if this line ends with a continuation token
|
|
321
|
+
if emitted_something and self.paren_depth == 0:
|
|
322
|
+
last_token = self.tokens[-1] if self.tokens else None
|
|
323
|
+
if last_token and last_token.type in self.CONTINUATION_TOKENS:
|
|
324
|
+
# Next line is a continuation — don't emit NEWLINE
|
|
325
|
+
self._in_continuation = True
|
|
326
|
+
else:
|
|
327
|
+
self._in_continuation = False
|
|
328
|
+
self._emit(TokenType.NEWLINE, "\\n", self.line - 1, self.col)
|
|
329
|
+
else:
|
|
330
|
+
self._in_continuation = False
|
|
331
|
+
|
|
332
|
+
def _advance_to(self, target: int) -> None:
|
|
333
|
+
while self.pos < target and self.pos < len(self.source):
|
|
334
|
+
self._advance()
|
|
335
|
+
|
|
336
|
+
def _skip_whitespace_inline(self) -> None:
|
|
337
|
+
while not self._at_end() and self.source[self.pos] in (" ", "\t"):
|
|
338
|
+
self._advance()
|
|
339
|
+
|
|
340
|
+
def _read_token(self) -> None:
|
|
341
|
+
ch = self.source[self.pos]
|
|
342
|
+
start_line = self.line
|
|
343
|
+
start_col = self.col
|
|
344
|
+
|
|
345
|
+
# Numbers: digit-starting
|
|
346
|
+
if ch.isdigit():
|
|
347
|
+
self._read_number(start_line, start_col)
|
|
348
|
+
return
|
|
349
|
+
|
|
350
|
+
# Leading-dot float: .5, .123
|
|
351
|
+
if ch == "." and self.pos + 1 < len(self.source) and self.source[self.pos + 1].isdigit():
|
|
352
|
+
self._read_leading_dot_number(start_line, start_col)
|
|
353
|
+
return
|
|
354
|
+
|
|
355
|
+
# Strings
|
|
356
|
+
if ch == '"':
|
|
357
|
+
self._read_string(start_line, start_col)
|
|
358
|
+
return
|
|
359
|
+
if ch == "'":
|
|
360
|
+
self._read_string_single(start_line, start_col)
|
|
361
|
+
return
|
|
362
|
+
|
|
363
|
+
# Color literals (#rrggbb or #rrggbbaa)
|
|
364
|
+
if ch == "#":
|
|
365
|
+
self._advance() # consume #
|
|
366
|
+
buf = []
|
|
367
|
+
while not self._at_end() and self.source[self.pos] in "0123456789abcdefABCDEF":
|
|
368
|
+
buf.append(self._advance())
|
|
369
|
+
if not buf:
|
|
370
|
+
self._emit_diagnostic(
|
|
371
|
+
"Invalid color literal: expected hex digits after '#'",
|
|
372
|
+
start_line, start_col, start_col + 1,
|
|
373
|
+
hint="Color literals must be #RRGGBB or #RRGGBBAA",
|
|
374
|
+
)
|
|
375
|
+
else:
|
|
376
|
+
value = "#" + "".join(buf)
|
|
377
|
+
self._emit(TokenType.COLOR, value, start_line, start_col, start_col + len(value))
|
|
378
|
+
return
|
|
379
|
+
|
|
380
|
+
# Identifiers / keywords
|
|
381
|
+
if ch.isalpha() or ch == "_":
|
|
382
|
+
self._read_ident(start_line, start_col)
|
|
383
|
+
return
|
|
384
|
+
|
|
385
|
+
# Two-character operators (check before single-char)
|
|
386
|
+
two = self.source[self.pos: self.pos + 2] if self.pos + 1 < len(self.source) else ""
|
|
387
|
+
two_char_ops: dict[str, TokenType] = {
|
|
388
|
+
":=": TokenType.COLON_EQUALS,
|
|
389
|
+
"==": TokenType.EQEQ,
|
|
390
|
+
"!=": TokenType.NOTEQ,
|
|
391
|
+
">=": TokenType.GE,
|
|
392
|
+
"<=": TokenType.LE,
|
|
393
|
+
"=>": TokenType.FAT_ARROW,
|
|
394
|
+
"+=": TokenType.PLUS_EQUALS,
|
|
395
|
+
"-=": TokenType.MINUS_EQUALS,
|
|
396
|
+
"*=": TokenType.STAR_EQUALS,
|
|
397
|
+
"/=": TokenType.SLASH_EQUALS,
|
|
398
|
+
"%=": TokenType.PERCENT_EQUALS,
|
|
399
|
+
}
|
|
400
|
+
if two in two_char_ops:
|
|
401
|
+
self._advance()
|
|
402
|
+
self._advance()
|
|
403
|
+
self._emit(two_char_ops[two], two, start_line, start_col, start_col + 2)
|
|
404
|
+
return
|
|
405
|
+
|
|
406
|
+
# Single-character operators
|
|
407
|
+
singles: dict[str, TokenType] = {
|
|
408
|
+
"(": TokenType.LPAREN,
|
|
409
|
+
")": TokenType.RPAREN,
|
|
410
|
+
"[": TokenType.LBRACKET,
|
|
411
|
+
"]": TokenType.RBRACKET,
|
|
412
|
+
",": TokenType.COMMA,
|
|
413
|
+
".": TokenType.DOT,
|
|
414
|
+
"=": TokenType.EQUALS,
|
|
415
|
+
"+": TokenType.PLUS,
|
|
416
|
+
"-": TokenType.MINUS,
|
|
417
|
+
"*": TokenType.STAR,
|
|
418
|
+
"/": TokenType.SLASH,
|
|
419
|
+
"%": TokenType.PERCENT,
|
|
420
|
+
">": TokenType.GT,
|
|
421
|
+
"<": TokenType.LT,
|
|
422
|
+
"?": TokenType.QUESTION,
|
|
423
|
+
":": TokenType.COLON,
|
|
424
|
+
}
|
|
425
|
+
if ch in singles:
|
|
426
|
+
self._advance()
|
|
427
|
+
tt = singles[ch]
|
|
428
|
+
if tt in (TokenType.LPAREN, TokenType.LBRACKET):
|
|
429
|
+
self.paren_depth += 1
|
|
430
|
+
elif tt in (TokenType.RPAREN, TokenType.RBRACKET):
|
|
431
|
+
self.paren_depth = max(0, self.paren_depth - 1)
|
|
432
|
+
self._emit(tt, ch, start_line, start_col, start_col + 1)
|
|
433
|
+
return
|
|
434
|
+
|
|
435
|
+
# Unknown character — emit diagnostic instead of silently skipping
|
|
436
|
+
self._emit_diagnostic(
|
|
437
|
+
f"Unexpected character: {ch!r}",
|
|
438
|
+
start_line, start_col, start_col + 1,
|
|
439
|
+
)
|
|
440
|
+
self._advance()
|
|
441
|
+
|
|
442
|
+
def _read_number(self, start_line: int, start_col: int) -> None:
|
|
443
|
+
"""Read an integer or float, including optional scientific notation (e/E)."""
|
|
444
|
+
buf: list[str] = []
|
|
445
|
+
|
|
446
|
+
# Integer part
|
|
447
|
+
while not self._at_end() and self.source[self.pos].isdigit():
|
|
448
|
+
buf.append(self._advance())
|
|
449
|
+
|
|
450
|
+
# Optional fractional part
|
|
451
|
+
if not self._at_end() and self.source[self.pos] == ".":
|
|
452
|
+
# Consume dot if followed by digit OR if followed by non-identifier char
|
|
453
|
+
# (e.g., 0. is a valid float, but 0.member should not consume the dot)
|
|
454
|
+
next_char = self.source[self.pos + 1] if self.pos + 1 < len(self.source) else ""
|
|
455
|
+
if next_char.isdigit() or (not next_char.isalpha() and next_char != "_"):
|
|
456
|
+
buf.append(self._advance()) # consume '.'
|
|
457
|
+
while not self._at_end() and self.source[self.pos].isdigit():
|
|
458
|
+
buf.append(self._advance())
|
|
459
|
+
|
|
460
|
+
# Optional exponent
|
|
461
|
+
if not self._at_end() and self.source[self.pos] in ("e", "E"):
|
|
462
|
+
buf.append(self._advance()) # consume 'e'/'E'
|
|
463
|
+
if not self._at_end() and self.source[self.pos] in ("+", "-"):
|
|
464
|
+
buf.append(self._advance()) # consume sign
|
|
465
|
+
if not self._at_end() and self.source[self.pos].isdigit():
|
|
466
|
+
while not self._at_end() and self.source[self.pos].isdigit():
|
|
467
|
+
buf.append(self._advance())
|
|
468
|
+
else:
|
|
469
|
+
# Malformed exponent — emit diagnostic
|
|
470
|
+
self._emit_diagnostic(
|
|
471
|
+
"Malformed scientific notation: expected digits after exponent",
|
|
472
|
+
start_line, start_col, self.col,
|
|
473
|
+
hint="Example: 1.5e-3 or 2E10",
|
|
474
|
+
)
|
|
475
|
+
|
|
476
|
+
value = "".join(buf)
|
|
477
|
+
self._emit(TokenType.NUMBER, value, start_line, start_col, start_col + len(value))
|
|
478
|
+
|
|
479
|
+
def _read_leading_dot_number(self, start_line: int, start_col: int) -> None:
|
|
480
|
+
"""Read a leading-dot float like .5, normalising to '0.5'."""
|
|
481
|
+
self._advance() # consume '.'
|
|
482
|
+
buf: list[str] = []
|
|
483
|
+
while not self._at_end() and self.source[self.pos].isdigit():
|
|
484
|
+
buf.append(self._advance())
|
|
485
|
+
|
|
486
|
+
# Optional exponent
|
|
487
|
+
if not self._at_end() and self.source[self.pos] in ("e", "E"):
|
|
488
|
+
buf2: list[str] = [self._advance()] # 'e'/'E'
|
|
489
|
+
if not self._at_end() and self.source[self.pos] in ("+", "-"):
|
|
490
|
+
buf2.append(self._advance())
|
|
491
|
+
if not self._at_end() and self.source[self.pos].isdigit():
|
|
492
|
+
while not self._at_end() and self.source[self.pos].isdigit():
|
|
493
|
+
buf2.append(self._advance())
|
|
494
|
+
buf.extend(buf2)
|
|
495
|
+
# If malformed exponent after leading-dot, just ignore the e part
|
|
496
|
+
|
|
497
|
+
frac = "".join(buf)
|
|
498
|
+
value = "0." + frac
|
|
499
|
+
self._emit(TokenType.NUMBER, value, start_line, start_col, start_col + len(value))
|
|
500
|
+
|
|
501
|
+
def _read_string(self, start_line: int, start_col: int) -> None:
|
|
502
|
+
self._advance() # consume opening "
|
|
503
|
+
buf: list[str] = []
|
|
504
|
+
while not self._at_end() and self.source[self.pos] != '"':
|
|
505
|
+
if self.source[self.pos] == "\\" and self.pos + 1 < len(self.source):
|
|
506
|
+
self._advance() # skip backslash
|
|
507
|
+
buf.append(self._advance())
|
|
508
|
+
if not self._at_end():
|
|
509
|
+
self._advance() # consume closing "
|
|
510
|
+
value = "".join(buf)
|
|
511
|
+
self._emit(TokenType.STRING, value, start_line, start_col, self.col)
|
|
512
|
+
|
|
513
|
+
def _read_string_single(self, start_line: int, start_col: int) -> None:
|
|
514
|
+
self._advance() # consume opening '
|
|
515
|
+
buf: list[str] = []
|
|
516
|
+
while not self._at_end() and self.source[self.pos] != "'":
|
|
517
|
+
if self.source[self.pos] == "\\" and self.pos + 1 < len(self.source):
|
|
518
|
+
self._advance()
|
|
519
|
+
buf.append(self._advance())
|
|
520
|
+
if not self._at_end():
|
|
521
|
+
self._advance() # consume closing '
|
|
522
|
+
value = "".join(buf)
|
|
523
|
+
self._emit(TokenType.STRING, value, start_line, start_col, self.col)
|
|
524
|
+
|
|
525
|
+
def _read_ident(self, start_line: int, start_col: int) -> None:
|
|
526
|
+
buf: list[str] = []
|
|
527
|
+
while not self._at_end() and (self.source[self.pos].isalnum() or self.source[self.pos] == "_"):
|
|
528
|
+
buf.append(self._advance())
|
|
529
|
+
word = "".join(buf)
|
|
530
|
+
tt = KEYWORDS.get(word, TokenType.IDENT)
|
|
531
|
+
self._emit(tt, word, start_line, start_col, start_col + len(word))
|