pineforge-codegen 0.6.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. pineforge_codegen/__init__.py +53 -0
  2. pineforge_codegen/analyzer/__init__.py +60 -0
  3. pineforge_codegen/analyzer/base.py +1563 -0
  4. pineforge_codegen/analyzer/call_handlers.py +895 -0
  5. pineforge_codegen/analyzer/contracts.py +163 -0
  6. pineforge_codegen/analyzer/diagnostics.py +118 -0
  7. pineforge_codegen/analyzer/tables.py +204 -0
  8. pineforge_codegen/analyzer/types.py +250 -0
  9. pineforge_codegen/ast_nodes.py +293 -0
  10. pineforge_codegen/codegen/__init__.py +78 -0
  11. pineforge_codegen/codegen/base.py +1381 -0
  12. pineforge_codegen/codegen/emit_top.py +875 -0
  13. pineforge_codegen/codegen/helpers.py +163 -0
  14. pineforge_codegen/codegen/helpers_syminfo.py +134 -0
  15. pineforge_codegen/codegen/input.py +189 -0
  16. pineforge_codegen/codegen/security.py +1564 -0
  17. pineforge_codegen/codegen/ta.py +298 -0
  18. pineforge_codegen/codegen/tables.py +613 -0
  19. pineforge_codegen/codegen/types.py +573 -0
  20. pineforge_codegen/codegen/visit_call.py +1305 -0
  21. pineforge_codegen/codegen/visit_expr.py +701 -0
  22. pineforge_codegen/codegen/visit_stmt.py +729 -0
  23. pineforge_codegen/errors.py +98 -0
  24. pineforge_codegen/lexer.py +531 -0
  25. pineforge_codegen/parser.py +1198 -0
  26. pineforge_codegen/pragmas.py +117 -0
  27. pineforge_codegen/signatures.py +808 -0
  28. pineforge_codegen/support_checker.py +1111 -0
  29. pineforge_codegen/symbols.py +118 -0
  30. pineforge_codegen/tokens.py +406 -0
  31. pineforge_codegen/tv_input_choices.py +86 -0
  32. pineforge_codegen-0.6.5.dist-info/METADATA +462 -0
  33. pineforge_codegen-0.6.5.dist-info/RECORD +35 -0
  34. pineforge_codegen-0.6.5.dist-info/WHEEL +4 -0
  35. pineforge_codegen-0.6.5.dist-info/licenses/LICENSE +197 -0
@@ -0,0 +1,98 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+ from enum import Enum
5
+
6
+
7
+ class Level(Enum):
8
+ ERROR = "error"
9
+ WARNING = "warning"
10
+
11
+
12
+ class Phase(Enum):
13
+ LEXER = "LEXER"
14
+ PARSER = "PARSER"
15
+ ANALYZER = "ANALYZER"
16
+ CODEGEN = "CODEGEN"
17
+
18
+
19
+ @dataclass
20
+ class SourceLocation:
21
+ file: str
22
+ line: int
23
+ col: int
24
+ end_col: int
25
+
26
+
27
+ @dataclass
28
+ class Diagnostic:
29
+ level: Level
30
+ phase: Phase
31
+ location: SourceLocation
32
+ message: str
33
+ hint: str | None = None
34
+
35
+
36
+ class CompileError(Exception):
37
+ def __init__(self, diagnostics: list[Diagnostic]):
38
+ self.diagnostics = diagnostics
39
+ # Build a plain-text summary for the base Exception message. Each
40
+ # diagnostic is prefixed with its ``file:line:col`` so the location is
41
+ # reachable from ``str(err)`` alone — a bare
42
+ # ``except CompileError as e: print(e)`` must not swallow the line
43
+ # number. (The rich rustc-style rendering is still available via
44
+ # :meth:`format`.)
45
+ messages = []
46
+ for d in diagnostics:
47
+ loc = d.location
48
+ if loc is not None:
49
+ messages.append(f"{loc.file}:{loc.line}:{loc.col}: {d.message}")
50
+ else:
51
+ messages.append(d.message)
52
+ super().__init__("; ".join(messages))
53
+
54
+ def format(self, source: str) -> str:
55
+ """Format diagnostics with source context, rustc-style."""
56
+ lines = source.splitlines()
57
+ parts: list[str] = []
58
+
59
+ for d in self.diagnostics:
60
+ loc = d.location
61
+ level_str = d.level.value # "error" or "warning"
62
+ phase_str = d.phase.value # "ANALYZER", etc.
63
+
64
+ # Header: error[ANALYZER]: message
65
+ header = f"{level_str}[{phase_str}]: {d.message}"
66
+
67
+ # Arrow line: --> file:line:col (rustc-style)
68
+ arrow = f" --> {loc.file}:{loc.line}:{loc.col}"
69
+
70
+ # Gutter width based on line number digits
71
+ gutter_width = len(str(loc.line))
72
+ gutter = " " * gutter_width
73
+
74
+ separator = f" {gutter}|"
75
+
76
+ # Source line (1-based indexing)
77
+ source_line = ""
78
+ if 1 <= loc.line <= len(lines):
79
+ source_line = lines[loc.line - 1]
80
+
81
+ # Build the underline: spaces up to col, then ^ for the span
82
+ # col is 1-based
83
+ underline_start = loc.col - 1 # 0-based
84
+ underline_len = max(1, loc.end_col - loc.col)
85
+ underline = " " * underline_start + "^" * underline_len
86
+
87
+ code_line = f" {loc.line} | {source_line}"
88
+ point_line = f" {gutter}| {underline}"
89
+
90
+ block = "\n".join([header, arrow, separator, code_line, point_line])
91
+
92
+ # Optional hint
93
+ if d.hint:
94
+ block += f"\n {gutter}= hint: {d.hint}"
95
+
96
+ parts.append(block)
97
+
98
+ return "\n\n".join(parts)
@@ -0,0 +1,531 @@
1
+ """Lexer for PineScript v6 source code.
2
+
3
+ Improvements over tokens.py:
4
+ - Token carries end_col for span tracking
5
+ - Scientific notation support (1.5e-3)
6
+ - Leading-dot float support (.5 -> 0.5)
7
+ - PERCENT_EQUALS operator (%=)
8
+ - IMPORT and METHOD keywords
9
+ - Uses Diagnostic/CompileError for malformed tokens instead of silent skips
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from dataclasses import dataclass
15
+ from enum import Enum, auto
16
+
17
+ from pineforge_codegen.errors import (
18
+ CompileError,
19
+ Diagnostic,
20
+ Level,
21
+ Phase,
22
+ SourceLocation,
23
+ )
24
+
25
+
26
+ class TokenType(Enum):
27
+ # Literals
28
+ NUMBER = auto()
29
+ STRING = auto()
30
+ IDENT = auto()
31
+
32
+ # Structure
33
+ NEWLINE = auto()
34
+ INDENT = auto()
35
+ DEDENT = auto()
36
+ EOF_TOKEN = auto()
37
+
38
+ # Delimiters
39
+ LPAREN = auto()
40
+ RPAREN = auto()
41
+ LBRACKET = auto()
42
+ RBRACKET = auto()
43
+ COMMA = auto()
44
+ DOT = auto()
45
+
46
+ # Assignment
47
+ EQUALS = auto()
48
+ COLON_EQUALS = auto()
49
+ PLUS_EQUALS = auto()
50
+ MINUS_EQUALS = auto()
51
+ STAR_EQUALS = auto()
52
+ SLASH_EQUALS = auto()
53
+ PERCENT_EQUALS = auto()
54
+
55
+ # Arithmetic
56
+ PLUS = auto()
57
+ MINUS = auto()
58
+ STAR = auto()
59
+ SLASH = auto()
60
+ PERCENT = auto()
61
+
62
+ # Comparison
63
+ EQEQ = auto() # ==
64
+ NOTEQ = auto() # !=
65
+ GT = auto() # >
66
+ LT = auto() # <
67
+ GE = auto() # >=
68
+ LE = auto() # <=
69
+
70
+ # Logical (keywords)
71
+ AND = auto()
72
+ OR = auto()
73
+ NOT = auto()
74
+
75
+ # Ternary
76
+ QUESTION = auto()
77
+ COLON = auto()
78
+
79
+ # Arrow
80
+ FAT_ARROW = auto() # =>
81
+
82
+ # Keywords
83
+ IF = auto()
84
+ ELSE = auto()
85
+ FOR = auto()
86
+ WHILE = auto()
87
+ SWITCH = auto()
88
+ BREAK = auto()
89
+ CONTINUE = auto()
90
+ VAR = auto()
91
+ VARIP = auto()
92
+ TO = auto()
93
+ BY = auto()
94
+ TRUE = auto()
95
+ FALSE = auto()
96
+ NA = auto()
97
+ IMPORT = auto()
98
+ METHOD = auto()
99
+ IN = auto()
100
+
101
+ # Color literal (#rrggbb / #rrggbbaa)
102
+ COLOR = auto()
103
+
104
+ # Type keywords
105
+ TYPE_INT = auto()
106
+ TYPE_FLOAT = auto()
107
+ TYPE_BOOL = auto()
108
+ TYPE_STRING = auto()
109
+
110
+
111
+ @dataclass
112
+ class Token:
113
+ type: TokenType
114
+ value: str
115
+ line: int
116
+ col: int
117
+ end_col: int = 0
118
+
119
+ def __post_init__(self) -> None:
120
+ # Default end_col to col + len(value) if not explicitly set
121
+ if self.end_col == 0:
122
+ self.end_col = self.col + len(self.value)
123
+
124
+ def __repr__(self) -> str:
125
+ return f"Token({self.type.name}, {self.value!r}, L{self.line}:{self.col}-{self.end_col})"
126
+
127
+
128
+ KEYWORDS: dict[str, TokenType] = {
129
+ "if": TokenType.IF,
130
+ "else": TokenType.ELSE,
131
+ "for": TokenType.FOR,
132
+ "while": TokenType.WHILE,
133
+ "switch": TokenType.SWITCH,
134
+ "break": TokenType.BREAK,
135
+ "continue": TokenType.CONTINUE,
136
+ "var": TokenType.VAR,
137
+ "varip": TokenType.VARIP,
138
+ "to": TokenType.TO,
139
+ "by": TokenType.BY,
140
+ "not": TokenType.NOT,
141
+ "and": TokenType.AND,
142
+ "or": TokenType.OR,
143
+ "true": TokenType.TRUE,
144
+ "false": TokenType.FALSE,
145
+ "na": TokenType.NA,
146
+ "import": TokenType.IMPORT,
147
+ "method": TokenType.METHOD,
148
+ "in": TokenType.IN,
149
+ "int": TokenType.TYPE_INT,
150
+ "float": TokenType.TYPE_FLOAT,
151
+ "bool": TokenType.TYPE_BOOL,
152
+ "string": TokenType.TYPE_STRING,
153
+ }
154
+
155
+
156
+ class Lexer:
157
+ """Converts PineScript v6 source string into a list of tokens."""
158
+
159
+ # Token types that indicate line continuation when they end a line.
160
+ # If a line ends with one of these, the next line is a continuation
161
+ # and its INDENT/DEDENT should be suppressed.
162
+ CONTINUATION_TOKENS = {
163
+ TokenType.AND, TokenType.OR,
164
+ TokenType.PLUS, TokenType.MINUS, TokenType.STAR, TokenType.SLASH,
165
+ TokenType.PERCENT,
166
+ TokenType.GT, TokenType.LT, TokenType.GE, TokenType.LE,
167
+ TokenType.EQEQ, TokenType.NOTEQ,
168
+ TokenType.QUESTION, TokenType.COLON,
169
+ TokenType.COMMA, TokenType.DOT,
170
+ TokenType.EQUALS, TokenType.COLON_EQUALS,
171
+ TokenType.PLUS_EQUALS, TokenType.MINUS_EQUALS,
172
+ TokenType.STAR_EQUALS, TokenType.SLASH_EQUALS,
173
+ TokenType.PERCENT_EQUALS,
174
+ }
175
+
176
+ def __init__(self, source: str, filename: str = "<input>") -> None:
177
+ self.source = source
178
+ self.filename = filename
179
+ self.pos = 0
180
+ self.line = 1
181
+ self.col = 1
182
+ self.tokens: list[Token] = []
183
+ self.indent_stack: list[int] = [0]
184
+ self.paren_depth = 0 # Track () and [] nesting to suppress NEWLINE/INDENT/DEDENT
185
+ self._in_continuation = False # True when current line is a continuation
186
+ self._diagnostics: list[Diagnostic] = []
187
+
188
+ def _peek(self, offset: int = 0) -> str:
189
+ idx = self.pos + offset
190
+ return self.source[idx] if idx < len(self.source) else "\0"
191
+
192
+ def _advance(self) -> str:
193
+ ch = self.source[self.pos]
194
+ self.pos += 1
195
+ if ch == "\n":
196
+ self.line += 1
197
+ self.col = 1
198
+ else:
199
+ self.col += 1
200
+ return ch
201
+
202
+ def _at_end(self) -> bool:
203
+ return self.pos >= len(self.source)
204
+
205
+ def _emit(self, tt: TokenType, value: str, line: int, col: int, end_col: int | None = None) -> None:
206
+ if end_col is None:
207
+ end_col = col + len(value)
208
+ self.tokens.append(Token(tt, value, line, col, end_col))
209
+
210
+ def _emit_diagnostic(self, message: str, line: int, col: int, end_col: int, hint: str | None = None) -> None:
211
+ loc = SourceLocation(file=self.filename, line=line, col=col, end_col=end_col)
212
+ diag = Diagnostic(level=Level.ERROR, phase=Phase.LEXER, location=loc, message=message, hint=hint)
213
+ self._diagnostics.append(diag)
214
+
215
+ def _skip_line(self) -> None:
216
+ while not self._at_end() and self.source[self.pos] != "\n":
217
+ self._advance()
218
+ if not self._at_end():
219
+ self._advance()
220
+
221
+ def _skip_comment(self) -> None:
222
+ while not self._at_end() and self.source[self.pos] != "\n":
223
+ self._advance()
224
+
225
+ def tokenize(self) -> list[Token]:
226
+ while not self._at_end():
227
+ self._tokenize_line()
228
+
229
+ while len(self.indent_stack) > 1:
230
+ self.indent_stack.pop()
231
+ self._emit(TokenType.DEDENT, "", self.line, self.col)
232
+
233
+ self._emit(TokenType.EOF_TOKEN, "", self.line, self.col)
234
+
235
+ if self._diagnostics:
236
+ raise CompileError(self._diagnostics)
237
+
238
+ return self.tokens
239
+
240
+ def _tokenize_line(self) -> None:
241
+ if self._at_end():
242
+ return
243
+
244
+ line_start = self.pos
245
+
246
+ # Peek at line content to detect blank/comment-only lines
247
+ temp = self.pos
248
+ while temp < len(self.source) and self.source[temp] in (" ", "\t"):
249
+ temp += 1
250
+ if temp >= len(self.source) or self.source[temp] == "\n":
251
+ self._advance_to(min(temp + 1, len(self.source)))
252
+ return
253
+ if self.source[temp: temp + 3] == "//@":
254
+ self._skip_line()
255
+ return
256
+ if self.source[temp: temp + 2] == "//":
257
+ self._skip_line()
258
+ return
259
+
260
+ # Inside parens/brackets: skip indentation handling, treat as continuation
261
+ if self.paren_depth > 0:
262
+ while not self._at_end() and self.source[self.pos] in (" ", "\t"):
263
+ self._advance()
264
+ emitted_in_parens = False
265
+ while not self._at_end() and self.source[self.pos] != "\n":
266
+ self._skip_whitespace_inline()
267
+ if self._at_end() or self.source[self.pos] == "\n":
268
+ break
269
+ if self.source[self.pos: self.pos + 2] == "//":
270
+ self._skip_comment()
271
+ break
272
+ emitted_in_parens = True
273
+ self._read_token()
274
+ if not self._at_end() and self.source[self.pos] == "\n":
275
+ self._advance()
276
+ # If parens closed on this line, emit NEWLINE so parser sees end of statement
277
+ if self.paren_depth == 0 and emitted_in_parens:
278
+ self._emit(TokenType.NEWLINE, "\\n", self.line - 1, self.col)
279
+ return
280
+
281
+ # Indentation handling
282
+ indent_level = 0
283
+ while not self._at_end() and self.source[self.pos] in (" ", "\t"):
284
+ ch = self._advance()
285
+ indent_level += 1
286
+
287
+ raw = self.source[line_start: self.pos]
288
+ if "\t" in raw:
289
+ indent_level = raw.count("\t")
290
+ else:
291
+ indent_level = len(raw) // 4
292
+
293
+ # If we're in a continuation (previous line ended with an operator),
294
+ # suppress INDENT/DEDENT — the indentation is cosmetic, not structural
295
+ if not self._in_continuation:
296
+ current_indent = self.indent_stack[-1]
297
+ if indent_level > current_indent:
298
+ self.indent_stack.append(indent_level)
299
+ self._emit(TokenType.INDENT, "", self.line, 1)
300
+ elif indent_level < current_indent:
301
+ while len(self.indent_stack) > 1 and self.indent_stack[-1] > indent_level:
302
+ self.indent_stack.pop()
303
+ self._emit(TokenType.DEDENT, "", self.line, 1)
304
+
305
+ # Tokens on this line
306
+ emitted_something = False
307
+ while not self._at_end() and self.source[self.pos] != "\n":
308
+ self._skip_whitespace_inline()
309
+ if self._at_end() or self.source[self.pos] == "\n":
310
+ break
311
+ if self.source[self.pos: self.pos + 2] == "//":
312
+ self._skip_comment()
313
+ break
314
+ emitted_something = True
315
+ self._read_token()
316
+
317
+ if not self._at_end() and self.source[self.pos] == "\n":
318
+ self._advance()
319
+
320
+ # Check if this line ends with a continuation token
321
+ if emitted_something and self.paren_depth == 0:
322
+ last_token = self.tokens[-1] if self.tokens else None
323
+ if last_token and last_token.type in self.CONTINUATION_TOKENS:
324
+ # Next line is a continuation — don't emit NEWLINE
325
+ self._in_continuation = True
326
+ else:
327
+ self._in_continuation = False
328
+ self._emit(TokenType.NEWLINE, "\\n", self.line - 1, self.col)
329
+ else:
330
+ self._in_continuation = False
331
+
332
+ def _advance_to(self, target: int) -> None:
333
+ while self.pos < target and self.pos < len(self.source):
334
+ self._advance()
335
+
336
+ def _skip_whitespace_inline(self) -> None:
337
+ while not self._at_end() and self.source[self.pos] in (" ", "\t"):
338
+ self._advance()
339
+
340
+ def _read_token(self) -> None:
341
+ ch = self.source[self.pos]
342
+ start_line = self.line
343
+ start_col = self.col
344
+
345
+ # Numbers: digit-starting
346
+ if ch.isdigit():
347
+ self._read_number(start_line, start_col)
348
+ return
349
+
350
+ # Leading-dot float: .5, .123
351
+ if ch == "." and self.pos + 1 < len(self.source) and self.source[self.pos + 1].isdigit():
352
+ self._read_leading_dot_number(start_line, start_col)
353
+ return
354
+
355
+ # Strings
356
+ if ch == '"':
357
+ self._read_string(start_line, start_col)
358
+ return
359
+ if ch == "'":
360
+ self._read_string_single(start_line, start_col)
361
+ return
362
+
363
+ # Color literals (#rrggbb or #rrggbbaa)
364
+ if ch == "#":
365
+ self._advance() # consume #
366
+ buf = []
367
+ while not self._at_end() and self.source[self.pos] in "0123456789abcdefABCDEF":
368
+ buf.append(self._advance())
369
+ if not buf:
370
+ self._emit_diagnostic(
371
+ "Invalid color literal: expected hex digits after '#'",
372
+ start_line, start_col, start_col + 1,
373
+ hint="Color literals must be #RRGGBB or #RRGGBBAA",
374
+ )
375
+ else:
376
+ value = "#" + "".join(buf)
377
+ self._emit(TokenType.COLOR, value, start_line, start_col, start_col + len(value))
378
+ return
379
+
380
+ # Identifiers / keywords
381
+ if ch.isalpha() or ch == "_":
382
+ self._read_ident(start_line, start_col)
383
+ return
384
+
385
+ # Two-character operators (check before single-char)
386
+ two = self.source[self.pos: self.pos + 2] if self.pos + 1 < len(self.source) else ""
387
+ two_char_ops: dict[str, TokenType] = {
388
+ ":=": TokenType.COLON_EQUALS,
389
+ "==": TokenType.EQEQ,
390
+ "!=": TokenType.NOTEQ,
391
+ ">=": TokenType.GE,
392
+ "<=": TokenType.LE,
393
+ "=>": TokenType.FAT_ARROW,
394
+ "+=": TokenType.PLUS_EQUALS,
395
+ "-=": TokenType.MINUS_EQUALS,
396
+ "*=": TokenType.STAR_EQUALS,
397
+ "/=": TokenType.SLASH_EQUALS,
398
+ "%=": TokenType.PERCENT_EQUALS,
399
+ }
400
+ if two in two_char_ops:
401
+ self._advance()
402
+ self._advance()
403
+ self._emit(two_char_ops[two], two, start_line, start_col, start_col + 2)
404
+ return
405
+
406
+ # Single-character operators
407
+ singles: dict[str, TokenType] = {
408
+ "(": TokenType.LPAREN,
409
+ ")": TokenType.RPAREN,
410
+ "[": TokenType.LBRACKET,
411
+ "]": TokenType.RBRACKET,
412
+ ",": TokenType.COMMA,
413
+ ".": TokenType.DOT,
414
+ "=": TokenType.EQUALS,
415
+ "+": TokenType.PLUS,
416
+ "-": TokenType.MINUS,
417
+ "*": TokenType.STAR,
418
+ "/": TokenType.SLASH,
419
+ "%": TokenType.PERCENT,
420
+ ">": TokenType.GT,
421
+ "<": TokenType.LT,
422
+ "?": TokenType.QUESTION,
423
+ ":": TokenType.COLON,
424
+ }
425
+ if ch in singles:
426
+ self._advance()
427
+ tt = singles[ch]
428
+ if tt in (TokenType.LPAREN, TokenType.LBRACKET):
429
+ self.paren_depth += 1
430
+ elif tt in (TokenType.RPAREN, TokenType.RBRACKET):
431
+ self.paren_depth = max(0, self.paren_depth - 1)
432
+ self._emit(tt, ch, start_line, start_col, start_col + 1)
433
+ return
434
+
435
+ # Unknown character — emit diagnostic instead of silently skipping
436
+ self._emit_diagnostic(
437
+ f"Unexpected character: {ch!r}",
438
+ start_line, start_col, start_col + 1,
439
+ )
440
+ self._advance()
441
+
442
+ def _read_number(self, start_line: int, start_col: int) -> None:
443
+ """Read an integer or float, including optional scientific notation (e/E)."""
444
+ buf: list[str] = []
445
+
446
+ # Integer part
447
+ while not self._at_end() and self.source[self.pos].isdigit():
448
+ buf.append(self._advance())
449
+
450
+ # Optional fractional part
451
+ if not self._at_end() and self.source[self.pos] == ".":
452
+ # Consume dot if followed by digit OR if followed by non-identifier char
453
+ # (e.g., 0. is a valid float, but 0.member should not consume the dot)
454
+ next_char = self.source[self.pos + 1] if self.pos + 1 < len(self.source) else ""
455
+ if next_char.isdigit() or (not next_char.isalpha() and next_char != "_"):
456
+ buf.append(self._advance()) # consume '.'
457
+ while not self._at_end() and self.source[self.pos].isdigit():
458
+ buf.append(self._advance())
459
+
460
+ # Optional exponent
461
+ if not self._at_end() and self.source[self.pos] in ("e", "E"):
462
+ buf.append(self._advance()) # consume 'e'/'E'
463
+ if not self._at_end() and self.source[self.pos] in ("+", "-"):
464
+ buf.append(self._advance()) # consume sign
465
+ if not self._at_end() and self.source[self.pos].isdigit():
466
+ while not self._at_end() and self.source[self.pos].isdigit():
467
+ buf.append(self._advance())
468
+ else:
469
+ # Malformed exponent — emit diagnostic
470
+ self._emit_diagnostic(
471
+ "Malformed scientific notation: expected digits after exponent",
472
+ start_line, start_col, self.col,
473
+ hint="Example: 1.5e-3 or 2E10",
474
+ )
475
+
476
+ value = "".join(buf)
477
+ self._emit(TokenType.NUMBER, value, start_line, start_col, start_col + len(value))
478
+
479
+ def _read_leading_dot_number(self, start_line: int, start_col: int) -> None:
480
+ """Read a leading-dot float like .5, normalising to '0.5'."""
481
+ self._advance() # consume '.'
482
+ buf: list[str] = []
483
+ while not self._at_end() and self.source[self.pos].isdigit():
484
+ buf.append(self._advance())
485
+
486
+ # Optional exponent
487
+ if not self._at_end() and self.source[self.pos] in ("e", "E"):
488
+ buf2: list[str] = [self._advance()] # 'e'/'E'
489
+ if not self._at_end() and self.source[self.pos] in ("+", "-"):
490
+ buf2.append(self._advance())
491
+ if not self._at_end() and self.source[self.pos].isdigit():
492
+ while not self._at_end() and self.source[self.pos].isdigit():
493
+ buf2.append(self._advance())
494
+ buf.extend(buf2)
495
+ # If malformed exponent after leading-dot, just ignore the e part
496
+
497
+ frac = "".join(buf)
498
+ value = "0." + frac
499
+ self._emit(TokenType.NUMBER, value, start_line, start_col, start_col + len(value))
500
+
501
+ def _read_string(self, start_line: int, start_col: int) -> None:
502
+ self._advance() # consume opening "
503
+ buf: list[str] = []
504
+ while not self._at_end() and self.source[self.pos] != '"':
505
+ if self.source[self.pos] == "\\" and self.pos + 1 < len(self.source):
506
+ self._advance() # skip backslash
507
+ buf.append(self._advance())
508
+ if not self._at_end():
509
+ self._advance() # consume closing "
510
+ value = "".join(buf)
511
+ self._emit(TokenType.STRING, value, start_line, start_col, self.col)
512
+
513
+ def _read_string_single(self, start_line: int, start_col: int) -> None:
514
+ self._advance() # consume opening '
515
+ buf: list[str] = []
516
+ while not self._at_end() and self.source[self.pos] != "'":
517
+ if self.source[self.pos] == "\\" and self.pos + 1 < len(self.source):
518
+ self._advance()
519
+ buf.append(self._advance())
520
+ if not self._at_end():
521
+ self._advance() # consume closing '
522
+ value = "".join(buf)
523
+ self._emit(TokenType.STRING, value, start_line, start_col, self.col)
524
+
525
+ def _read_ident(self, start_line: int, start_col: int) -> None:
526
+ buf: list[str] = []
527
+ while not self._at_end() and (self.source[self.pos].isalnum() or self.source[self.pos] == "_"):
528
+ buf.append(self._advance())
529
+ word = "".join(buf)
530
+ tt = KEYWORDS.get(word, TokenType.IDENT)
531
+ self._emit(tt, word, start_line, start_col, start_col + len(word))