zexus 1.6.8 → 1.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. package/README.md +12 -5
  2. package/package.json +1 -1
  3. package/src/__init__.py +7 -0
  4. package/src/zexus/__init__.py +1 -1
  5. package/src/zexus/__pycache__/__init__.cpython-312.pyc +0 -0
  6. package/src/zexus/__pycache__/capability_system.cpython-312.pyc +0 -0
  7. package/src/zexus/__pycache__/debug_sanitizer.cpython-312.pyc +0 -0
  8. package/src/zexus/__pycache__/environment.cpython-312.pyc +0 -0
  9. package/src/zexus/__pycache__/error_reporter.cpython-312.pyc +0 -0
  10. package/src/zexus/__pycache__/input_validation.cpython-312.pyc +0 -0
  11. package/src/zexus/__pycache__/lexer.cpython-312.pyc +0 -0
  12. package/src/zexus/__pycache__/module_cache.cpython-312.pyc +0 -0
  13. package/src/zexus/__pycache__/module_manager.cpython-312.pyc +0 -0
  14. package/src/zexus/__pycache__/object.cpython-312.pyc +0 -0
  15. package/src/zexus/__pycache__/security.cpython-312.pyc +0 -0
  16. package/src/zexus/__pycache__/security_enforcement.cpython-312.pyc +0 -0
  17. package/src/zexus/__pycache__/syntax_validator.cpython-312.pyc +0 -0
  18. package/src/zexus/__pycache__/zexus_ast.cpython-312.pyc +0 -0
  19. package/src/zexus/__pycache__/zexus_token.cpython-312.pyc +0 -0
  20. package/src/zexus/access_control_system/__pycache__/__init__.cpython-312.pyc +0 -0
  21. package/src/zexus/access_control_system/__pycache__/access_control.cpython-312.pyc +0 -0
  22. package/src/zexus/advanced_types.py +17 -2
  23. package/src/zexus/blockchain/__init__.py +411 -0
  24. package/src/zexus/blockchain/accelerator.py +1160 -0
  25. package/src/zexus/blockchain/chain.py +660 -0
  26. package/src/zexus/blockchain/consensus.py +821 -0
  27. package/src/zexus/blockchain/contract_vm.py +1019 -0
  28. package/src/zexus/blockchain/crypto.py +79 -14
  29. package/src/zexus/blockchain/events.py +526 -0
  30. package/src/zexus/blockchain/loadtest.py +721 -0
  31. package/src/zexus/blockchain/monitoring.py +350 -0
  32. package/src/zexus/blockchain/mpt.py +716 -0
  33. package/src/zexus/blockchain/multichain.py +951 -0
  34. package/src/zexus/blockchain/multiprocess_executor.py +338 -0
  35. package/src/zexus/blockchain/network.py +886 -0
  36. package/src/zexus/blockchain/node.py +666 -0
  37. package/src/zexus/blockchain/rpc.py +1203 -0
  38. package/src/zexus/blockchain/rust_bridge.py +421 -0
  39. package/src/zexus/blockchain/storage.py +423 -0
  40. package/src/zexus/blockchain/tokens.py +750 -0
  41. package/src/zexus/blockchain/upgradeable.py +1004 -0
  42. package/src/zexus/blockchain/verification.py +1602 -0
  43. package/src/zexus/blockchain/wallet.py +621 -0
  44. package/src/zexus/capability_system.py +184 -9
  45. package/src/zexus/cli/__pycache__/main.cpython-312.pyc +0 -0
  46. package/src/zexus/cli/main.py +383 -34
  47. package/src/zexus/cli/zpm.py +1 -1
  48. package/src/zexus/compiler/__pycache__/bytecode.cpython-312.pyc +0 -0
  49. package/src/zexus/compiler/__pycache__/lexer.cpython-312.pyc +0 -0
  50. package/src/zexus/compiler/__pycache__/parser.cpython-312.pyc +0 -0
  51. package/src/zexus/compiler/__pycache__/semantic.cpython-312.pyc +0 -0
  52. package/src/zexus/compiler/__pycache__/zexus_ast.cpython-312.pyc +0 -0
  53. package/src/zexus/compiler/bytecode.py +124 -7
  54. package/src/zexus/compiler/compat_runtime.py +6 -2
  55. package/src/zexus/compiler/lexer.py +16 -5
  56. package/src/zexus/compiler/parser.py +108 -7
  57. package/src/zexus/compiler/semantic.py +18 -19
  58. package/src/zexus/compiler/zexus_ast.py +26 -1
  59. package/src/zexus/concurrency_system.py +79 -0
  60. package/src/zexus/config.py +54 -0
  61. package/src/zexus/crypto_bridge.py +244 -8
  62. package/src/zexus/dap/__init__.py +10 -0
  63. package/src/zexus/dap/__main__.py +4 -0
  64. package/src/zexus/dap/dap_server.py +391 -0
  65. package/src/zexus/dap/debug_engine.py +298 -0
  66. package/src/zexus/environment.py +112 -9
  67. package/src/zexus/evaluator/__pycache__/bytecode_compiler.cpython-312.pyc +0 -0
  68. package/src/zexus/evaluator/__pycache__/core.cpython-312.pyc +0 -0
  69. package/src/zexus/evaluator/__pycache__/expressions.cpython-312.pyc +0 -0
  70. package/src/zexus/evaluator/__pycache__/functions.cpython-312.pyc +0 -0
  71. package/src/zexus/evaluator/__pycache__/resource_limiter.cpython-312.pyc +0 -0
  72. package/src/zexus/evaluator/__pycache__/statements.cpython-312.pyc +0 -0
  73. package/src/zexus/evaluator/__pycache__/unified_execution.cpython-312.pyc +0 -0
  74. package/src/zexus/evaluator/__pycache__/utils.cpython-312.pyc +0 -0
  75. package/src/zexus/evaluator/bytecode_compiler.py +457 -37
  76. package/src/zexus/evaluator/core.py +644 -50
  77. package/src/zexus/evaluator/expressions.py +358 -62
  78. package/src/zexus/evaluator/functions.py +458 -20
  79. package/src/zexus/evaluator/resource_limiter.py +4 -4
  80. package/src/zexus/evaluator/statements.py +774 -122
  81. package/src/zexus/evaluator/unified_execution.py +573 -72
  82. package/src/zexus/evaluator/utils.py +14 -2
  83. package/src/zexus/evaluator_original.py +1 -1
  84. package/src/zexus/event_loop.py +186 -0
  85. package/src/zexus/lexer.py +742 -458
  86. package/src/zexus/lsp/__init__.py +1 -1
  87. package/src/zexus/lsp/definition_provider.py +163 -9
  88. package/src/zexus/lsp/server.py +22 -8
  89. package/src/zexus/lsp/symbol_provider.py +182 -9
  90. package/src/zexus/module_cache.py +239 -9
  91. package/src/zexus/module_manager.py +129 -1
  92. package/src/zexus/object.py +76 -6
  93. package/src/zexus/parser/__pycache__/parser.cpython-312.pyc +0 -0
  94. package/src/zexus/parser/__pycache__/strategy_context.cpython-312.pyc +0 -0
  95. package/src/zexus/parser/__pycache__/strategy_structural.cpython-312.pyc +0 -0
  96. package/src/zexus/parser/parser.py +1349 -408
  97. package/src/zexus/parser/strategy_context.py +755 -58
  98. package/src/zexus/parser/strategy_structural.py +121 -21
  99. package/src/zexus/persistence.py +15 -1
  100. package/src/zexus/renderer/__init__.py +61 -0
  101. package/src/zexus/renderer/__pycache__/__init__.cpython-312.pyc +0 -0
  102. package/src/zexus/renderer/__pycache__/backend.cpython-312.pyc +0 -0
  103. package/src/zexus/renderer/__pycache__/canvas.cpython-312.pyc +0 -0
  104. package/src/zexus/renderer/__pycache__/color_system.cpython-312.pyc +0 -0
  105. package/src/zexus/renderer/__pycache__/layout.cpython-312.pyc +0 -0
  106. package/src/zexus/renderer/__pycache__/main_renderer.cpython-312.pyc +0 -0
  107. package/src/zexus/renderer/__pycache__/painter.cpython-312.pyc +0 -0
  108. package/src/zexus/renderer/backend.py +261 -0
  109. package/src/zexus/renderer/canvas.py +78 -0
  110. package/src/zexus/renderer/color_system.py +201 -0
  111. package/src/zexus/renderer/graphics.py +31 -0
  112. package/src/zexus/renderer/layout.py +222 -0
  113. package/src/zexus/renderer/main_renderer.py +66 -0
  114. package/src/zexus/renderer/painter.py +30 -0
  115. package/src/zexus/renderer/tk_backend.py +208 -0
  116. package/src/zexus/renderer/web_backend.py +260 -0
  117. package/src/zexus/runtime/__init__.py +10 -2
  118. package/src/zexus/runtime/__pycache__/__init__.cpython-312.pyc +0 -0
  119. package/src/zexus/runtime/__pycache__/async_runtime.cpython-312.pyc +0 -0
  120. package/src/zexus/runtime/__pycache__/load_manager.cpython-312.pyc +0 -0
  121. package/src/zexus/runtime/file_flags.py +137 -0
  122. package/src/zexus/runtime/load_manager.py +368 -0
  123. package/src/zexus/safety/__pycache__/__init__.cpython-312.pyc +0 -0
  124. package/src/zexus/safety/__pycache__/memory_safety.cpython-312.pyc +0 -0
  125. package/src/zexus/security.py +424 -34
  126. package/src/zexus/stdlib/fs.py +23 -18
  127. package/src/zexus/stdlib/http.py +289 -186
  128. package/src/zexus/stdlib/sockets.py +207 -163
  129. package/src/zexus/stdlib/websockets.py +282 -0
  130. package/src/zexus/stdlib_integration.py +369 -2
  131. package/src/zexus/strategy_recovery.py +6 -3
  132. package/src/zexus/type_checker.py +423 -0
  133. package/src/zexus/virtual_filesystem.py +189 -2
  134. package/src/zexus/vm/__init__.py +113 -3
  135. package/src/zexus/vm/__pycache__/async_optimizer.cpython-312.pyc +0 -0
  136. package/src/zexus/vm/__pycache__/bytecode.cpython-312.pyc +0 -0
  137. package/src/zexus/vm/__pycache__/bytecode_converter.cpython-312.pyc +0 -0
  138. package/src/zexus/vm/__pycache__/cache.cpython-312.pyc +0 -0
  139. package/src/zexus/vm/__pycache__/compiler.cpython-312.pyc +0 -0
  140. package/src/zexus/vm/__pycache__/gas_metering.cpython-312.pyc +0 -0
  141. package/src/zexus/vm/__pycache__/jit.cpython-312.pyc +0 -0
  142. package/src/zexus/vm/__pycache__/parallel_vm.cpython-312.pyc +0 -0
  143. package/src/zexus/vm/__pycache__/vm.cpython-312.pyc +0 -0
  144. package/src/zexus/vm/async_optimizer.py +80 -6
  145. package/src/zexus/vm/binary_bytecode.py +659 -0
  146. package/src/zexus/vm/bytecode.py +59 -11
  147. package/src/zexus/vm/bytecode_converter.py +26 -12
  148. package/src/zexus/vm/cabi.c +1985 -0
  149. package/src/zexus/vm/cabi.cpython-312-x86_64-linux-gnu.so +0 -0
  150. package/src/zexus/vm/cabi.h +127 -0
  151. package/src/zexus/vm/cache.py +561 -17
  152. package/src/zexus/vm/compiler.py +818 -51
  153. package/src/zexus/vm/fastops.c +15743 -0
  154. package/src/zexus/vm/fastops.cpython-312-x86_64-linux-gnu.so +0 -0
  155. package/src/zexus/vm/fastops.pyx +288 -0
  156. package/src/zexus/vm/gas_metering.py +50 -9
  157. package/src/zexus/vm/jit.py +364 -20
  158. package/src/zexus/vm/native_jit_backend.py +1816 -0
  159. package/src/zexus/vm/native_runtime.cpp +1388 -0
  160. package/src/zexus/vm/native_runtime.cpython-312-x86_64-linux-gnu.so +0 -0
  161. package/src/zexus/vm/optimizer.py +161 -11
  162. package/src/zexus/vm/parallel_vm.py +140 -45
  163. package/src/zexus/vm/peephole_optimizer.py +82 -4
  164. package/src/zexus/vm/profiler.py +38 -18
  165. package/src/zexus/vm/register_allocator.py +16 -5
  166. package/src/zexus/vm/register_vm.py +8 -5
  167. package/src/zexus/vm/vm.py +3581 -531
  168. package/src/zexus/vm/wasm_compiler.py +658 -0
  169. package/src/zexus/zexus_ast.py +137 -11
  170. package/src/zexus/zexus_token.py +16 -5
  171. package/src/zexus/zpm/installer.py +55 -15
  172. package/src/zexus/zpm/package_manager.py +1 -1
  173. package/src/zexus/zpm/registry.py +257 -28
  174. package/src/zexus.egg-info/PKG-INFO +16 -6
  175. package/src/zexus.egg-info/SOURCES.txt +129 -17
  176. package/src/zexus.egg-info/entry_points.txt +1 -0
  177. package/src/zexus.egg-info/requires.txt +4 -0
@@ -2,6 +2,187 @@
2
2
  from .zexus_token import *
3
3
  from .error_reporter import get_error_reporter, SyntaxError as ZexusSyntaxError
4
4
 
5
+ _LITERAL_KEYWORDS = {
6
+ "true": TRUE,
7
+ "false": FALSE,
8
+ "null": NULL,
9
+ }
10
+
11
+ _STRICT_KEYWORDS = {
12
+ 'if', 'elif', 'else', 'while', 'for', 'each', 'in',
13
+ 'return', 'break', 'continue', 'throw', 'try', 'catch',
14
+ 'await', 'async', 'spawn', 'let', 'const', 'print',
15
+ 'use', 'find', 'load', 'export', 'import', 'debug', 'match', 'lambda',
16
+ 'case', 'default'
17
+ }
18
+
19
+ _CONTEXTS_ALLOWING_KEYWORD_IDENTS = {
20
+ LET, CONST, DOT, COMMA, LBRACKET, COLON, ASSIGN
21
+ }
22
+
23
+ _KEYWORDS = {
24
+ "let": LET,
25
+ "const": CONST,
26
+ "data": DATA,
27
+ "print": PRINT,
28
+ "if": IF,
29
+ "then": THEN,
30
+ "elif": ELIF,
31
+ "else": ELSE,
32
+ "true": TRUE,
33
+ "false": FALSE,
34
+ "null": NULL,
35
+ "return": RETURN,
36
+ "for": FOR,
37
+ "each": EACH,
38
+ "in": IN,
39
+ "action": ACTION,
40
+ "function": FUNCTION,
41
+ "while": WHILE,
42
+ "use": USE,
43
+ "find": FIND,
44
+ "load": LOAD,
45
+ "exactly": EXACTLY,
46
+ "embedded": EMBEDDED,
47
+ "export": EXPORT,
48
+ "lambda": LAMBDA,
49
+ "debug": DEBUG,
50
+ "try": TRY,
51
+ "catch": CATCH,
52
+ "finally": FINALLY,
53
+ "continue": CONTINUE,
54
+ "break": BREAK,
55
+ "throw": THROW,
56
+ "external": EXTERNAL,
57
+ "screen": SCREEN,
58
+ "component": COMPONENT,
59
+ "theme": THEME,
60
+ "color": COLOR,
61
+ "canvas": CANVAS,
62
+ "graphics": GRAPHICS,
63
+ "animation": ANIMATION,
64
+ "clock": CLOCK,
65
+ "async": ASYNC,
66
+ "await": AWAIT,
67
+ "channel": CHANNEL,
68
+ "send": SEND,
69
+ "receive": RECEIVE,
70
+ "atomic": ATOMIC,
71
+ "event": EVENT,
72
+ "emit": EMIT,
73
+ "enum": ENUM,
74
+ "protocol": PROTOCOL,
75
+ "import": IMPORT,
76
+ "public": PUBLIC,
77
+ "private": PRIVATE,
78
+ "sealed": SEALED,
79
+ "secure": SECURE,
80
+ "pure": PURE,
81
+ "view": VIEW,
82
+ "payable": PAYABLE,
83
+ "modifier": MODIFIER,
84
+ "entity": ENTITY,
85
+ "verify": VERIFY,
86
+ "contract": CONTRACT,
87
+ "protect": PROTECT,
88
+ "implements": IMPLEMENTS,
89
+ "this": THIS,
90
+ "as": AS,
91
+ "interface": INTERFACE,
92
+ "capability": CAPABILITY,
93
+ "grant": GRANT,
94
+ "revoke": REVOKE,
95
+ "module": MODULE,
96
+ "package": PACKAGE,
97
+ "using": USING,
98
+ "type_alias": TYPE_ALIAS,
99
+ "seal": SEAL,
100
+ "audit": AUDIT,
101
+ "restrict": RESTRICT,
102
+ "sandbox": SANDBOX,
103
+ "trail": TRAIL,
104
+ "middleware": MIDDLEWARE,
105
+ "auth": AUTH,
106
+ "throttle": THROTTLE,
107
+ "cache": CACHE,
108
+ "ledger": LEDGER,
109
+ "state": STATE,
110
+ "revert": REVERT,
111
+ "limit": LIMIT,
112
+ "persistent": PERSISTENT,
113
+ "storage": STORAGE,
114
+ "require": REQUIRE,
115
+ "and": AND,
116
+ "or": OR,
117
+ "native": NATIVE,
118
+ "gc": GC,
119
+ "inline": INLINE,
120
+ "buffer": BUFFER,
121
+ "simd": SIMD,
122
+ "defer": DEFER,
123
+ "pattern": PATTERN,
124
+ "match": MATCH,
125
+ "case": CASE,
126
+ "default": DEFAULT,
127
+ "enum": ENUM,
128
+ "stream": STREAM,
129
+ "watch": WATCH,
130
+ "log": LOG,
131
+ "inject": INJECT,
132
+ "validate": VALIDATE,
133
+ "sanitize": SANITIZE,
134
+ }
135
+
136
+ _FUNCTION_DECL_KEYWORDS = {"action", "function"}
137
+
138
+ _FUNCTION_STATEMENT_BOUNDARIES = {
139
+ None,
140
+ SEMICOLON,
141
+ LBRACE,
142
+ RBRACE,
143
+ RBRACKET,
144
+ INT,
145
+ STRING,
146
+ FLOAT,
147
+ RPAREN,
148
+ TRUE,
149
+ FALSE,
150
+ NULL,
151
+ RETURN,
152
+ ASSIGN,
153
+ ASYNC,
154
+ EXPORT,
155
+ PUBLIC,
156
+ PRIVATE,
157
+ SEALED,
158
+ INLINE,
159
+ SECURE,
160
+ PURE,
161
+ VIEW,
162
+ PAYABLE,
163
+ NATIVE,
164
+ }
165
+
166
+ _DATA_KEYWORD_CONTRACT_CONTEXTS = {
167
+ SEMICOLON,
168
+ LBRACE,
169
+ RBRACE,
170
+ RBRACKET,
171
+ STRING,
172
+ INT,
173
+ FLOAT,
174
+ TRUE,
175
+ FALSE,
176
+ NULL,
177
+ PRIVATE,
178
+ PUBLIC,
179
+ SEALED,
180
+ SECURE,
181
+ PURE,
182
+ VIEW,
183
+ PAYABLE,
184
+ }
185
+
5
186
  class Lexer:
6
187
  def __init__(self, source_code, filename="<stdin>"):
7
188
  self.input = source_code
@@ -18,6 +199,11 @@ class Lexer:
18
199
  self._next_paren_has_lambda = False
19
200
  # Track last token type to enable context-aware keyword handling
20
201
  self.last_token_type = None
202
+ # Track statement boundaries and nesting depth to disambiguate keywords vs identifiers
203
+ self.at_statement_boundary = True
204
+ self.paren_depth = 0
205
+ self.bracket_depth = 0
206
+ self.brace_depth = 0
21
207
 
22
208
  # Register source with error reporter
23
209
  self.error_reporter = get_error_reporter()
@@ -48,308 +234,414 @@ class Lexer:
48
234
  return self.input[self.read_position]
49
235
 
50
236
  def next_token(self):
51
- self.skip_whitespace()
237
+ # NOTE: This method must not recurse. Large files can contain thousands
238
+ # of consecutive comment/blank lines; using recursion here can hit
239
+ # Python's recursion limit and cause unpredictable failures.
240
+ while True:
241
+ self.skip_whitespace()
52
242
 
53
- # CRITICAL FIX: Skip single line comments (both # and // styles)
54
- if self.ch == '#' and self.peek_char() != '{':
55
- self.skip_comment()
56
- return self.next_token()
243
+ # Skip single line comments (both # and // styles)
244
+ if self.ch == '#' and self.peek_char() != '{':
245
+ self.skip_comment()
246
+ continue
57
247
 
58
- # NEW: Handle // style comments
59
- if self.ch == '/' and self.peek_char() == '/':
60
- self.skip_double_slash_comment()
61
- return self.next_token()
248
+ # Handle // style comments and /* */ block comments
249
+ if self.ch == '/' and self.peek_char() == '/':
250
+ self.skip_double_slash_comment()
251
+ continue
62
252
 
63
- tok = None
64
- current_line = self.line
65
- current_column = self.column
253
+ # Block comments: /* ... */
254
+ if self.ch == '/' and self.peek_char() == '*':
255
+ self.skip_block_comment()
256
+ continue
66
257
 
67
- if self.ch == '=':
68
- # Equality '=='
69
- if self.peek_char() == '=':
70
- ch = self.ch
71
- self.read_char()
72
- literal = ch + self.ch
73
- tok = Token(EQ, literal)
74
- tok.line = current_line
75
- tok.column = current_column
76
- # Arrow '=>' (treat as lambda shorthand)
77
- elif self.peek_char() == '>':
78
- ch = self.ch
79
- self.read_char()
80
- literal = ch + self.ch
81
- tok = Token(LAMBDA, literal)
258
+ tok = None
259
+ current_line = self.line
260
+ current_column = self.column
261
+
262
+ if self.ch == '=':
263
+ # Equality '=='
264
+ if self.peek_char() == '=':
265
+ ch = self.ch
266
+ self.read_char()
267
+ literal = ch + self.ch
268
+ tok = Token(EQ, literal)
269
+ tok.line = current_line
270
+ tok.column = current_column
271
+ # Arrow '=>' (treat as lambda shorthand)
272
+ elif self.peek_char() == '>':
273
+ ch = self.ch
274
+ self.read_char()
275
+ literal = ch + self.ch
276
+ tok = Token(LAMBDA, literal)
277
+ tok.line = current_line
278
+ tok.column = current_column
279
+ else:
280
+ tok = Token(ASSIGN, self.ch)
281
+ tok.line = current_line
282
+ tok.column = current_column
283
+ elif self.ch == '!':
284
+ if self.peek_char() == '=':
285
+ ch = self.ch
286
+ self.read_char()
287
+ literal = ch + self.ch
288
+ tok = Token(NOT_EQ, literal)
289
+ tok.line = current_line
290
+ tok.column = current_column
291
+ else:
292
+ tok = Token(BANG, self.ch)
293
+ tok.line = current_line
294
+ tok.column = current_column
295
+ elif self.ch == '&':
296
+ if self.peek_char() == '&':
297
+ ch = self.ch
298
+ self.read_char()
299
+ literal = ch + self.ch
300
+ tok = Token(AND, literal)
301
+ tok.line = current_line
302
+ tok.column = current_column
303
+ else:
304
+ # Single '&' is not supported - suggest using '&&'
305
+ error = self.error_reporter.report_error(
306
+ ZexusSyntaxError,
307
+ f"Unexpected character '{self.ch}'",
308
+ line=current_line,
309
+ column=current_column,
310
+ filename=self.filename,
311
+ suggestion="Did you mean '&&' for logical AND?"
312
+ )
313
+ raise error
314
+ elif self.ch == '|':
315
+ if self.peek_char() == '|':
316
+ ch = self.ch
317
+ self.read_char()
318
+ literal = ch + self.ch
319
+ tok = Token(OR, literal)
320
+ tok.line = current_line
321
+ tok.column = current_column
322
+ else:
323
+ # Single '|' is not supported - suggest using '||'
324
+ error = self.error_reporter.report_error(
325
+ ZexusSyntaxError,
326
+ f"Unexpected character '{self.ch}'",
327
+ line=current_line,
328
+ column=current_column,
329
+ filename=self.filename,
330
+ suggestion="Did you mean '||' for logical OR?"
331
+ )
332
+ raise error
333
+ elif self.ch == '<':
334
+ if self.peek_char() == '=':
335
+ ch = self.ch
336
+ self.read_char()
337
+ literal = ch + self.ch
338
+ tok = Token(LTE, literal)
339
+ tok.line = current_line
340
+ tok.column = current_column
341
+ elif self.peek_char() == '<':
342
+ ch = self.ch
343
+ self.read_char()
344
+ literal = ch + self.ch
345
+ tok = Token(IMPORT_OP, literal)
346
+ tok.line = current_line
347
+ tok.column = current_column
348
+ else:
349
+ tok = Token(LT, self.ch)
350
+ tok.line = current_line
351
+ tok.column = current_column
352
+ elif self.ch == '>':
353
+ if self.peek_char() == '=':
354
+ ch = self.ch
355
+ self.read_char()
356
+ literal = ch + self.ch
357
+ tok = Token(GTE, literal)
358
+ tok.line = current_line
359
+ tok.column = current_column
360
+ elif self.peek_char() == '>':
361
+ ch = self.ch
362
+ self.read_char()
363
+ literal = ch + self.ch
364
+ tok = Token(APPEND, literal)
365
+ tok.line = current_line
366
+ tok.column = current_column
367
+ else:
368
+ tok = Token(GT, self.ch)
369
+ tok.line = current_line
370
+ tok.column = current_column
371
+ elif self.ch == '?':
372
+ # Check for nullish coalescing '??'
373
+ if self.peek_char() == '?':
374
+ ch = self.ch
375
+ self.read_char()
376
+ literal = ch + self.ch
377
+ tok = Token(NULLISH, literal)
378
+ tok.line = current_line
379
+ tok.column = current_column
380
+ else:
381
+ tok = Token(QUESTION, self.ch)
382
+ tok.line = current_line
383
+ tok.column = current_column
384
+ elif self.ch == '"':
385
+ # Check for triple-quote multiline string
386
+ if self.peek_char() == '"' and self.read_position + 1 < len(self.input) and self.input[self.read_position + 1] == '"':
387
+ string_literal = self.read_multiline_string()
388
+ else:
389
+ string_literal = self.read_string()
390
+ # If read_string returned a list, it's an interpolated string
391
+ if isinstance(string_literal, list):
392
+ tok = Token(INTERP_STRING, string_literal)
393
+ else:
394
+ tok = Token(STRING, string_literal)
82
395
  tok.line = current_line
83
396
  tok.column = current_column
84
- else:
85
- tok = Token(ASSIGN, self.ch)
397
+ elif self.ch == "'":
398
+ # Single-quoted strings
399
+ if self.peek_char() == "'" and self.read_position + 1 < len(self.input) and self.input[self.read_position + 1] == "'":
400
+ string_literal = self.read_multiline_string(quote_char="'")
401
+ else:
402
+ string_literal = self.read_single_quoted_string()
403
+ tok = Token(STRING, string_literal)
86
404
  tok.line = current_line
87
405
  tok.column = current_column
88
- elif self.ch == '!':
89
- if self.peek_char() == '=':
90
- ch = self.ch
91
- self.read_char()
92
- literal = ch + self.ch
93
- tok = Token(NOT_EQ, literal)
406
+ elif self.ch == '[':
407
+ tok = Token(LBRACKET, self.ch)
94
408
  tok.line = current_line
95
409
  tok.column = current_column
96
- else:
97
- tok = Token(BANG, self.ch)
410
+ elif self.ch == ']':
411
+ tok = Token(RBRACKET, self.ch)
98
412
  tok.line = current_line
99
413
  tok.column = current_column
100
- elif self.ch == '&':
101
- if self.peek_char() == '&':
102
- ch = self.ch
103
- self.read_char()
104
- literal = ch + self.ch
105
- tok = Token(AND, literal)
414
+ elif self.ch == '@':
415
+ tok = Token(AT, self.ch)
106
416
  tok.line = current_line
107
417
  tok.column = current_column
108
- else:
109
- # Single '&' is not supported - suggest using '&&'
110
- error = self.error_reporter.report_error(
111
- ZexusSyntaxError,
112
- f"Unexpected character '{self.ch}'",
113
- line=current_line,
114
- column=current_column,
115
- filename=self.filename,
116
- suggestion="Did you mean '&&' for logical AND?"
117
- )
118
- raise error
119
- elif self.ch == '|':
120
- if self.peek_char() == '|':
121
- ch = self.ch
122
- self.read_char()
123
- literal = ch + self.ch
124
- tok = Token(OR, literal)
418
+ elif self.ch == '(':
419
+ # Quick char-level scan: detect if this '(' pairs with a ')' that
420
+ # is followed by '=>' (arrow). If so, set a hint flag so parser
421
+ # can treat the parentheses as a lambda-parameter list.
422
+ try:
423
+ src = self.input
424
+ i = self.position
425
+ depth = 0
426
+ found = False
427
+ scan_limit = len(src)
428
+ while i < scan_limit:
429
+ c = src[i]
430
+ if c == '(':
431
+ depth += 1
432
+ elif c == ')':
433
+ depth -= 1
434
+ if depth == 0:
435
+ # look ahead for '=>' skipping whitespace
436
+ j = i + 1
437
+ while j < len(src) and src[j].isspace():
438
+ j += 1
439
+ if j + 1 < len(src) and src[j] == '=' and src[j + 1] == '>':
440
+ found = True
441
+ break
442
+ i += 1
443
+ self._next_paren_has_lambda = found
444
+ except Exception:
445
+ self._next_paren_has_lambda = False
446
+
447
+ tok = Token(LPAREN, self.ch)
125
448
  tok.line = current_line
126
449
  tok.column = current_column
127
- else:
128
- # Single '|' is not supported - suggest using '||'
129
- error = self.error_reporter.report_error(
130
- ZexusSyntaxError,
131
- f"Unexpected character '{self.ch}'",
132
- line=current_line,
133
- column=current_column,
134
- filename=self.filename,
135
- suggestion="Did you mean '||' for logical OR?"
136
- )
137
- raise error
138
- elif self.ch == '<':
139
- if self.peek_char() == '=':
140
- ch = self.ch
141
- self.read_char()
142
- literal = ch + self.ch
143
- tok = Token(LTE, literal)
450
+ elif self.ch == ')':
451
+ tok = Token(RPAREN, self.ch)
144
452
  tok.line = current_line
145
453
  tok.column = current_column
146
- elif self.peek_char() == '<':
147
- ch = self.ch
148
- self.read_char()
149
- literal = ch + self.ch
150
- tok = Token(IMPORT_OP, literal)
454
+ elif self.ch == '{':
455
+ # Check if this might be start of embedded block
456
+ lookback = self.input[max(0, self.position-10):self.position]
457
+ if 'embedded' in lookback:
458
+ self.in_embedded_block = True
459
+ tok = Token(LBRACE, self.ch)
151
460
  tok.line = current_line
152
461
  tok.column = current_column
153
- else:
154
- tok = Token(LT, self.ch)
462
+ elif self.ch == '}':
463
+ if self.in_embedded_block:
464
+ self.in_embedded_block = False
465
+ tok = Token(RBRACE, self.ch)
155
466
  tok.line = current_line
156
467
  tok.column = current_column
157
- elif self.ch == '>':
158
- if self.peek_char() == '=':
159
- ch = self.ch
160
- self.read_char()
161
- literal = ch + self.ch
162
- tok = Token(GTE, literal)
468
+ elif self.ch == ',':
469
+ tok = Token(COMMA, self.ch)
163
470
  tok.line = current_line
164
471
  tok.column = current_column
165
- elif self.peek_char() == '>':
166
- ch = self.ch
167
- self.read_char()
168
- literal = ch + self.ch
169
- tok = Token(APPEND, literal)
472
+ elif self.ch == ';':
473
+ tok = Token(SEMICOLON, self.ch)
170
474
  tok.line = current_line
171
475
  tok.column = current_column
172
- else:
173
- tok = Token(GT, self.ch)
476
+ elif self.ch == ':':
477
+ tok = Token(COLON, self.ch)
174
478
  tok.line = current_line
175
479
  tok.column = current_column
176
- elif self.ch == '?':
177
- # Check for nullish coalescing '??'
178
- if self.peek_char() == '?':
179
- ch = self.ch
180
- self.read_char()
181
- literal = ch + self.ch
182
- tok = Token(NULLISH, literal)
480
+ elif self.ch == '+':
481
+ if self.peek_char() == '=':
482
+ ch = self.ch
483
+ self.read_char()
484
+ tok = Token(PLUS_ASSIGN, ch + self.ch)
485
+ tok.line = current_line
486
+ tok.column = current_column
487
+ else:
488
+ tok = Token(PLUS, self.ch)
489
+ tok.line = current_line
490
+ tok.column = current_column
491
+ elif self.ch == '-':
492
+ if self.peek_char() == '=':
493
+ ch = self.ch
494
+ self.read_char()
495
+ tok = Token(MINUS_ASSIGN, ch + self.ch)
496
+ tok.line = current_line
497
+ tok.column = current_column
498
+ else:
499
+ tok = Token(MINUS, self.ch)
500
+ tok.line = current_line
501
+ tok.column = current_column
502
+ elif self.ch == '*':
503
+ if self.peek_char() == '*':
504
+ ch = self.ch
505
+ self.read_char()
506
+ if self.peek_char() == '=':
507
+ self.read_char()
508
+ tok = Token(POWER_ASSIGN, '**=')
509
+ tok.line = current_line
510
+ tok.column = current_column
511
+ else:
512
+ tok = Token(POWER, ch + self.ch)
513
+ tok.line = current_line
514
+ tok.column = current_column
515
+ elif self.peek_char() == '=':
516
+ ch = self.ch
517
+ self.read_char()
518
+ tok = Token(STAR_ASSIGN, ch + self.ch)
519
+ tok.line = current_line
520
+ tok.column = current_column
521
+ else:
522
+ tok = Token(STAR, self.ch)
523
+ tok.line = current_line
524
+ tok.column = current_column
525
+ elif self.ch == '/':
526
+ if self.peek_char() == '=':
527
+ ch = self.ch
528
+ self.read_char()
529
+ tok = Token(SLASH_ASSIGN, ch + self.ch)
530
+ tok.line = current_line
531
+ tok.column = current_column
532
+ else:
533
+ tok = Token(SLASH, self.ch)
534
+ tok.line = current_line
535
+ tok.column = current_column
536
+ elif self.ch == '%':
537
+ if self.peek_char() == '=':
538
+ ch = self.ch
539
+ self.read_char()
540
+ tok = Token(MOD_ASSIGN, ch + self.ch)
541
+ tok.line = current_line
542
+ tok.column = current_column
543
+ else:
544
+ tok = Token(MOD, self.ch)
545
+ tok.line = current_line
546
+ tok.column = current_column
547
+ elif self.ch == '.':
548
+ tok = Token(DOT, self.ch)
183
549
  tok.line = current_line
184
550
  tok.column = current_column
185
- else:
186
- tok = Token(QUESTION, self.ch)
551
+ elif self.ch == "":
552
+ tok = Token(EOF, "")
187
553
  tok.line = current_line
188
554
  tok.column = current_column
189
- elif self.ch == '"':
190
- string_literal = self.read_string()
191
- tok = Token(STRING, string_literal)
192
- tok.line = current_line
193
- tok.column = current_column
194
- elif self.ch == '[':
195
- tok = Token(LBRACKET, self.ch)
196
- tok.line = current_line
197
- tok.column = current_column
198
- elif self.ch == ']':
199
- tok = Token(RBRACKET, self.ch)
200
- tok.line = current_line
201
- tok.column = current_column
202
- elif self.ch == '@':
203
- tok = Token(AT, self.ch)
204
- tok.line = current_line
205
- tok.column = current_column
206
- elif self.ch == '(':
207
- # Quick char-level scan: detect if this '(' pairs with a ')' that
208
- # is followed by '=>' (arrow). If so, set a hint flag so parser
209
- # can treat the parentheses as a lambda-parameter list.
210
- try:
211
- src = self.input
212
- i = self.position
213
- depth = 0
214
- found = False
215
- while i < len(src):
216
- c = src[i]
217
- if c == '(':
218
- depth += 1
219
- elif c == ')':
220
- depth -= 1
221
- if depth == 0:
222
- # look ahead for '=>' skipping whitespace
223
- j = i + 1
224
- while j < len(src) and src[j].isspace():
225
- j += 1
226
- if j + 1 < len(src) and src[j] == '=' and src[j + 1] == '>':
227
- found = True
228
- break
229
- i += 1
230
- self._next_paren_has_lambda = found
231
- except Exception:
232
- self._next_paren_has_lambda = False
233
-
234
- tok = Token(LPAREN, self.ch)
235
- tok.line = current_line
236
- tok.column = current_column
237
- elif self.ch == ')':
238
- tok = Token(RPAREN, self.ch)
239
- tok.line = current_line
240
- tok.column = current_column
241
- elif self.ch == '{':
242
- # Check if this might be start of embedded block
243
- lookback = self.input[max(0, self.position-10):self.position]
244
- if 'embedded' in lookback:
245
- self.in_embedded_block = True
246
- tok = Token(LBRACE, self.ch)
247
- tok.line = current_line
248
- tok.column = current_column
249
- elif self.ch == '}':
250
- if self.in_embedded_block:
251
- self.in_embedded_block = False
252
- tok = Token(RBRACE, self.ch)
253
- tok.line = current_line
254
- tok.column = current_column
255
- elif self.ch == ',':
256
- tok = Token(COMMA, self.ch)
257
- tok.line = current_line
258
- tok.column = current_column
259
- elif self.ch == ';':
260
- tok = Token(SEMICOLON, self.ch)
261
- tok.line = current_line
262
- tok.column = current_column
263
- elif self.ch == ':':
264
- tok = Token(COLON, self.ch)
265
- tok.line = current_line
266
- tok.column = current_column
267
- elif self.ch == '+':
268
- tok = Token(PLUS, self.ch)
269
- tok.line = current_line
270
- tok.column = current_column
271
- elif self.ch == '-':
272
- tok = Token(MINUS, self.ch)
273
- tok.line = current_line
274
- tok.column = current_column
275
- elif self.ch == '*':
276
- tok = Token(STAR, self.ch)
277
- tok.line = current_line
278
- tok.column = current_column
279
- elif self.ch == '/':
280
- # Check if this is division or comment
281
- if self.peek_char() == '/':
282
- # It's a // comment, handle above
283
- self.skip_double_slash_comment()
284
- return self.next_token()
285
555
  else:
286
- tok = Token(SLASH, self.ch)
287
- tok.line = current_line
288
- tok.column = current_column
289
- elif self.ch == '%':
290
- tok = Token(MOD, self.ch)
291
- tok.line = current_line
292
- tok.column = current_column
293
- elif self.ch == '.':
294
- tok = Token(DOT, self.ch)
295
- tok.line = current_line
296
- tok.column = current_column
297
- elif self.ch == "":
298
- tok = Token(EOF, "")
299
- tok.line = current_line
300
- tok.column = current_column
301
- else:
302
- if self.is_letter(self.ch):
303
- literal = self.read_identifier()
556
+ if self.is_letter(self.ch):
557
+ literal = self.read_identifier()
304
558
 
305
- if self.in_embedded_block:
306
- token_type = IDENT
307
- else:
308
- token_type = self.lookup_ident(literal)
559
+ if self.in_embedded_block:
560
+ token_type = IDENT
561
+ else:
562
+ token_type = self.lookup_ident(literal)
309
563
 
310
- tok = Token(token_type, literal)
311
- tok.line = current_line
312
- tok.column = current_column
313
- self.last_token_type = tok.type
314
- return tok
315
- elif self.is_digit(self.ch):
316
- num_literal = self.read_number()
317
- if '.' in num_literal:
318
- tok = Token(FLOAT, num_literal)
319
- else:
320
- tok = Token(INT, num_literal)
321
- tok.line = current_line
322
- tok.column = current_column
323
- self.last_token_type = tok.type
324
- return tok
325
- else:
326
- if self.ch in ['\n', '\r']:
327
- self.read_char()
328
- return self.next_token()
329
- # For embedded code, treat unknown printable chars as IDENT
330
- if self.ch.isprintable():
331
- literal = self.read_embedded_char()
332
- tok = Token(IDENT, literal)
564
+ tok = Token(token_type, literal)
333
565
  tok.line = current_line
334
566
  tok.column = current_column
335
- self.last_token_type = tok.type
567
+ self._finalize_token(tok)
336
568
  return tok
337
- # Unknown character - report helpful error
338
- char_desc = f"'{self.ch}'" if self.ch.isprintable() else f"'\\x{ord(self.ch):02x}'"
339
- error = self.error_reporter.report_error(
340
- ZexusSyntaxError,
341
- f"Unexpected character {char_desc}",
342
- line=current_line,
343
- column=current_column,
344
- filename=self.filename,
345
- suggestion="Remove or replace this character with valid Zexus syntax."
346
- )
347
- raise error
569
+ elif self.is_digit(self.ch):
570
+ num_literal = self.read_number()
571
+ if '.' in num_literal:
572
+ tok = Token(FLOAT, num_literal)
573
+ else:
574
+ tok = Token(INT, num_literal)
575
+ tok.line = current_line
576
+ tok.column = current_column
577
+ self._finalize_token(tok)
578
+ return tok
579
+ else:
580
+ if self.ch in ['\n', '\r']:
581
+ self.read_char()
582
+ continue
583
+ # For embedded code, treat unknown printable chars as IDENT
584
+ if self.ch.isprintable():
585
+ literal = self.read_embedded_char()
586
+ tok = Token(IDENT, literal)
587
+ tok.line = current_line
588
+ tok.column = current_column
589
+ self._finalize_token(tok)
590
+ return tok
591
+ # Unknown character - report helpful error
592
+ char_desc = f"'{self.ch}'" if self.ch.isprintable() else f"'\\x{ord(self.ch):02x}'"
593
+ error = self.error_reporter.report_error(
594
+ ZexusSyntaxError,
595
+ f"Unexpected character {char_desc}",
596
+ line=current_line,
597
+ column=current_column,
598
+ filename=self.filename,
599
+ suggestion="Remove or replace this character with valid Zexus syntax."
600
+ )
601
+ raise error
348
602
 
349
- self.read_char()
350
- # Track the token type for context-aware keyword handling
351
- self.last_token_type = tok.type
352
- return tok
603
+ self.read_char()
604
+ self._finalize_token(tok)
605
+ return tok
606
+
607
+ def _finalize_token(self, tok):
608
+ """Update lexer state after producing a token."""
609
+ if tok is None:
610
+ return
611
+
612
+ token_type = tok.type
613
+
614
+ # Maintain nesting depth for parentheses and brackets to help newline handling
615
+ if token_type == LPAREN:
616
+ self.paren_depth += 1
617
+ elif token_type == RPAREN:
618
+ if self.paren_depth > 0:
619
+ self.paren_depth -= 1
620
+ elif token_type == LBRACKET:
621
+ self.bracket_depth += 1
622
+ elif token_type == RBRACKET:
623
+ if self.bracket_depth > 0:
624
+ self.bracket_depth -= 1
625
+ elif token_type == LBRACE:
626
+ self.brace_depth += 1
627
+ elif token_type == RBRACE:
628
+ if self.brace_depth > 0:
629
+ self.brace_depth -= 1
630
+
631
+ # Update last token type for context-aware keyword handling
632
+ self.last_token_type = token_type
633
+
634
+ # Determine whether the next non-whitespace token is at a statement boundary
635
+ if token_type in {SEMICOLON, RBRACE, LBRACE, EOF}:
636
+ self.at_statement_boundary = True
637
+ elif token_type in {COMMA, DOT, ASSIGN, COLON, LPAREN, LBRACKET, AT}:
638
+ self.at_statement_boundary = False
639
+ elif token_type in {LET, CONST}:
640
+ # Declarations expect an identifier next
641
+ self.at_statement_boundary = False
642
+ else:
643
+ # Default: remain in the current statement
644
+ self.at_statement_boundary = False
353
645
 
354
646
  def read_embedded_char(self):
355
647
  """Read a single character as identifier for embedded code compatibility"""
@@ -379,6 +671,8 @@ class Lexer:
379
671
  start_line = self.line
380
672
  start_column = self.column
381
673
  result = []
674
+ has_interpolation = False
675
+ parts = [] # list of ("str", text) or ("expr", text)
382
676
  while True:
383
677
  self.read_char()
384
678
  if self.ch == "":
@@ -412,16 +706,170 @@ class Lexer:
412
706
  'r': '\r',
413
707
  '\\': '\\',
414
708
  '"': '"',
415
- "'": "'"
709
+ "'": "'",
710
+ '$': '$'
416
711
  }
417
712
  result.append(escape_map.get(self.ch, self.ch))
713
+ elif self.ch == '$' and self.peek_char() == '{':
714
+ # String interpolation: ${expr}
715
+ has_interpolation = True
716
+ # Save current string part
717
+ if result:
718
+ parts.append(("str", ''.join(result)))
719
+ result = []
720
+ else:
721
+ parts.append(("str", ""))
722
+ # Skip the '{'
723
+ self.read_char()
724
+ # Read expression until matching '}'
725
+ expr_chars = []
726
+ brace_depth = 1
727
+ while brace_depth > 0:
728
+ self.read_char()
729
+ if self.ch == "":
730
+ error = self.error_reporter.report_error(
731
+ ZexusSyntaxError,
732
+ "Unterminated interpolation expression in string",
733
+ line=start_line,
734
+ column=start_column,
735
+ filename=self.filename,
736
+ suggestion="Add a closing } to terminate the interpolation."
737
+ )
738
+ raise error
739
+ elif self.ch == '{':
740
+ brace_depth += 1
741
+ expr_chars.append(self.ch)
742
+ elif self.ch == '}':
743
+ brace_depth -= 1
744
+ if brace_depth > 0:
745
+ expr_chars.append(self.ch)
746
+ else:
747
+ expr_chars.append(self.ch)
748
+ parts.append(("expr", ''.join(expr_chars)))
418
749
  elif self.ch == '"':
419
750
  # End of string
420
751
  break
421
752
  else:
422
753
  result.append(self.ch)
754
+
755
+ if has_interpolation:
756
+ # Add trailing string part
757
+ parts.append(("str", ''.join(result)))
758
+ return parts # Return list of parts for interpolation
423
759
  return ''.join(result)
424
760
 
761
+ def read_single_quoted_string(self):
762
+ """Read a single-quoted string literal ('...')"""
763
+ start_line = self.line
764
+ start_column = self.column
765
+ result = []
766
+ while True:
767
+ self.read_char()
768
+ if self.ch == "":
769
+ error = self.error_reporter.report_error(
770
+ ZexusSyntaxError,
771
+ "Unterminated string literal",
772
+ line=start_line,
773
+ column=start_column,
774
+ filename=self.filename,
775
+ suggestion="Add a closing quote ' to terminate the string."
776
+ )
777
+ raise error
778
+ elif self.ch == '\\':
779
+ self.read_char()
780
+ if self.ch == '':
781
+ error = self.error_reporter.report_error(
782
+ ZexusSyntaxError,
783
+ "Incomplete escape sequence at end of file",
784
+ line=self.line,
785
+ column=self.column,
786
+ filename=self.filename,
787
+ suggestion="Remove the backslash or complete the escape sequence."
788
+ )
789
+ raise error
790
+ escape_map = {
791
+ 'n': '\n', 't': '\t', 'r': '\r',
792
+ '\\': '\\', "'": "'", '"': '"'
793
+ }
794
+ result.append(escape_map.get(self.ch, self.ch))
795
+ elif self.ch == "'":
796
+ break
797
+ else:
798
+ result.append(self.ch)
799
+ return ''.join(result)
800
+
801
+ def read_multiline_string(self, quote_char='"'):
802
+ """Read a triple-quoted multiline string (\"\"\"...\"\"\" or '''...''')"""
803
+ start_line = self.line
804
+ start_column = self.column
805
+ # Skip the three opening quotes
806
+ self.read_char() # skip 2nd quote
807
+ self.read_char() # skip 3rd quote
808
+ result = []
809
+ while True:
810
+ self.read_char()
811
+ if self.ch == "":
812
+ error = self.error_reporter.report_error(
813
+ ZexusSyntaxError,
814
+ "Unterminated multiline string literal",
815
+ line=start_line,
816
+ column=start_column,
817
+ filename=self.filename,
818
+ suggestion=f"Add closing {quote_char}{quote_char}{quote_char} to terminate the multiline string."
819
+ )
820
+ raise error
821
+ elif self.ch == '\\':
822
+ self.read_char()
823
+ if self.ch == '':
824
+ break
825
+ escape_map = {
826
+ 'n': '\n', 't': '\t', 'r': '\r',
827
+ '\\': '\\', quote_char: quote_char
828
+ }
829
+ result.append(escape_map.get(self.ch, self.ch))
830
+ elif self.ch == quote_char:
831
+ # Check for triple close
832
+ if self.peek_char() == quote_char and self.read_position + 1 < len(self.input) and self.input[self.read_position + 1] == quote_char:
833
+ self.read_char() # skip 2nd closing quote
834
+ self.read_char() # skip 3rd closing quote
835
+ break
836
+ else:
837
+ result.append(self.ch)
838
+ else:
839
+ result.append(self.ch)
840
+ return ''.join(result)
841
+
842
+ def skip_block_comment(self):
843
+ """Skip /* ... */ block comments (can be nested)"""
844
+ start_line = self.line
845
+ start_column = self.column
846
+ # Skip the opening /*
847
+ self.read_char() # skip *
848
+ self.read_char() # move past *
849
+ depth = 1
850
+ while depth > 0:
851
+ if self.ch == "":
852
+ error = self.error_reporter.report_error(
853
+ ZexusSyntaxError,
854
+ "Unterminated block comment",
855
+ line=start_line,
856
+ column=start_column,
857
+ filename=self.filename,
858
+ suggestion="Add closing */ to terminate the block comment."
859
+ )
860
+ raise error
861
+ elif self.ch == '/' and self.peek_char() == '*':
862
+ depth += 1
863
+ self.read_char()
864
+ self.read_char()
865
+ elif self.ch == '*' and self.peek_char() == '/':
866
+ depth -= 1
867
+ self.read_char()
868
+ self.read_char()
869
+ else:
870
+ self.read_char()
871
+ self.skip_whitespace()
872
+
425
873
  def read_identifier(self):
426
874
  start_position = self.position
427
875
  while self.is_letter(self.ch) or self.is_digit(self.ch):
@@ -448,200 +896,32 @@ class Lexer:
448
896
  return number_str
449
897
 
450
898
  def lookup_ident(self, ident):
451
- # Special case: Always treat true, false, and null as keywords
452
- if ident in ['true', 'false', 'null']:
453
- keywords = {
454
- "true": TRUE,
455
- "false": FALSE,
456
- "null": NULL,
457
- }
458
- return keywords[ident]
459
-
460
- # Define strict keywords that should NEVER be treated as identifiers
461
- # These are control flow, operators, and modifiers that must always be keywords
462
- strict_keywords = {
463
- 'if', 'elif', 'else', 'while', 'for', 'each', 'in',
464
- 'return', 'break', 'continue', 'throw', 'try', 'catch',
465
- 'await', 'async', 'spawn', 'let', 'const', 'print',
466
- 'use', 'export', 'import', 'debug', 'match'
467
- }
468
-
469
- # If this is a strict keyword, always treat as keyword
470
- if ident in strict_keywords:
471
- # Fall through to normal keyword lookup at the end
472
- pass
473
- else:
474
- # Context-aware keyword recognition: allow non-strict keywords as identifiers in certain contexts
475
- # These contexts are where variable names are expected:
476
- # - After LET, CONST (variable declarations)
477
- # - After DOT (property/method names)
478
- # - After COMMA (function parameters, after first param)
479
- # - After LBRACKET (map keys when used as identifiers)
480
- # - After ASSIGN (right-hand side can use keywords as identifiers: x = data)
481
- # - After COLON (map keys, type annotations)
482
- #
483
- # Note: LPAREN removed - it was causing keywords after '(' to become identifiers
484
- # even at the start of new statements. Instead, keywords as param names will
485
- # work after the first parameter (via COMMA).
486
- contexts_allowing_keywords_as_idents = {
487
- LET, CONST, DOT, COMMA, LBRACKET, COLON, ASSIGN
488
- }
489
-
490
- if self.last_token_type in contexts_allowing_keywords_as_idents:
491
- # In these contexts, treat non-strict keywords as identifiers
492
- return IDENT
493
-
494
- # Special case: ACTION and FUNCTION keywords should only be recognized
495
- # when they actually start a definition, not when used as variable names in expressions
496
- # Allow them as keywords at statement boundaries or after contract/data blocks
497
- if ident in ['action', 'function']:
498
- # These should be keywords at the start of a statement or after RETURN
499
- # Allow after: None, SEMICOLON, LBRACE, RBRACE, INT, STRING, RPAREN (end of previous statement)
500
- # Also allow after RETURN for function expressions: return function() {...}
501
- # Also allow after ASYNC for async functions: async function name() {...}
502
- # Also allow after EXPORT for exported functions: export function name() {...}
503
- statement_boundaries = {None, SEMICOLON, LBRACE, RBRACE, INT, STRING, FLOAT, RPAREN, TRUE, FALSE, NULL, RETURN, ASYNC, EXPORT}
504
- if self.last_token_type in statement_boundaries:
505
- # Treat as keyword
506
- pass # Fall through to keyword lookup
507
- else:
508
- # In expression context, treat as identifier
509
- return IDENT
510
-
511
- # Special case: DATA keyword should only be recognized in contract storage contexts
512
- # When used as a parameter name, variable name, or in expressions, treat as identifier
513
- if ident == 'data':
514
- # Allow as keyword only in contract contexts (after CONTRACT or in contract body)
515
- # In all other contexts (parameters, variables, expressions), treat as identifier
516
- # Safe contexts for DATA keyword: after statement boundaries and value literals in contracts
517
- # This includes: LBRACE (contract start), RBRACE (after Map {}), RBRACKET (after List []),
518
- # STRING, INT, FLOAT, TRUE, FALSE (after literal values), SEMICOLON
519
- contract_contexts = {SEMICOLON, LBRACE, RBRACE, RBRACKET, STRING, INT, FLOAT, TRUE, FALSE, NULL}
520
- if self.last_token_type in contract_contexts:
521
- # Might be a data declaration in contract, allow as keyword
522
- pass # Fall through to keyword lookup
523
- else:
524
- # In expression context, parameter list, or other contexts, treat as identifier
525
- return IDENT
526
-
527
- # keyword lookup mapping (string -> token constant)
528
- keywords = {
529
- "let": LET,
530
- "const": CONST, # NEW: Const keyword for immutable variables
531
- "data": DATA, # NEW: Data keyword for dataclass definitions
532
- "print": PRINT,
533
- "if": IF,
534
- "then": THEN, # NEW: Then keyword for if-then-else expressions
535
- "elif": ELIF, # NEW: Elif keyword for else-if conditionals
536
- "else": ELSE,
537
- "true": TRUE,
538
- "false": FALSE,
539
- "null": NULL,
540
- "return": RETURN,
541
- "for": FOR,
542
- "each": EACH,
543
- "in": IN,
544
- "action": ACTION,
545
- "function": FUNCTION,
546
- "while": WHILE,
547
- "use": USE,
548
- "exactly": EXACTLY,
549
- "embedded": EMBEDDED,
550
- "export": EXPORT,
551
- "lambda": LAMBDA,
552
- "debug": DEBUG, # DUAL-MODE: Works as both statement (debug x;) and function (debug(x))
553
- "try": TRY, # NEW: Try keyword
554
- "catch": CATCH, # NEW: Catch keyword
555
- "continue": CONTINUE, # NEW: Continue on error keyword
556
- "break": BREAK, # NEW: Break loop keyword
557
- "throw": THROW, # NEW: Throw error keyword
558
- "external": EXTERNAL, # NEW: External keyword
559
- # "from": FROM, # NOT a keyword - only recognized contextually in import statements
560
- "screen": SCREEN, # NEW: renderer keyword
561
- "component": COMPONENT, # NEW: renderer keyword
562
- "theme": THEME, # NEW: renderer keyword
563
- "canvas": CANVAS, # NEW (optional recognition)
564
- "graphics": GRAPHICS, # NEW (optional recognition)
565
- "animation": ANIMATION, # NEW (optional recognition)
566
- "clock": CLOCK, # NEW (optional recognition)
567
- "async": ASYNC,
568
- "await": AWAIT,
569
- "channel": CHANNEL, # NEW: Channel for concurrent communication
570
- "send": SEND, # NEW: Send to channel
571
- "receive": RECEIVE, # NEW: Receive from channel
572
- "atomic": ATOMIC, # NEW: Atomic operations
573
- "event": EVENT,
574
- "emit": EMIT,
575
- "enum": ENUM,
576
- "protocol": PROTOCOL,
577
- "import": IMPORT,
578
- # Modifiers
579
- "public": PUBLIC,
580
- "private": PRIVATE,
581
- "sealed": SEALED,
582
- "secure": SECURE,
583
- "pure": PURE,
584
- "view": VIEW,
585
- "payable": PAYABLE,
586
- "modifier": MODIFIER,
587
- # NEW: Entity, Verify, Contract, Protect
588
- "entity": ENTITY,
589
- "verify": VERIFY,
590
- "contract": CONTRACT,
591
- "protect": PROTECT,
592
- "implements": IMPLEMENTS,
593
- "this": THIS,
594
- "interface": INTERFACE,
595
- "capability": CAPABILITY, # NEW: Capability keyword for security
596
- "grant": GRANT, # NEW: Grant keyword for capability grants
597
- "revoke": REVOKE, # NEW: Revoke keyword for capability revocation
598
- "module": MODULE, # NEW: Module keyword for code organization
599
- "package": PACKAGE, # NEW: Package keyword for package definition
600
- "using": USING, # NEW: Using keyword for resource management
601
- "type_alias": TYPE_ALIAS, # NEW: Type alias keyword for type definitions
602
- "seal": SEAL, # NEW: Seal keyword for immutable objects
603
- "audit": AUDIT, # NEW: Audit keyword for compliance logging
604
- "restrict": RESTRICT, # NEW: Restrict keyword for field-level access control
605
- "sandbox": SANDBOX, # NEW: Sandbox keyword for isolated execution
606
- "trail": TRAIL, # NEW: Trail keyword for real-time logging
607
- # Advanced features
608
- "middleware": MIDDLEWARE,
609
- "auth": AUTH,
610
- "throttle": THROTTLE,
611
- "cache": CACHE,
612
- # Blockchain & Smart Contract keywords
613
- "ledger": LEDGER, # Immutable state ledger
614
- "state": STATE, # State management
615
- "revert": REVERT, # Revert transaction
616
- # NOTE: "tx" removed as keyword - users can use it as variable name
617
- # Only uppercase "TX" is reserved for transaction context
618
- "limit": LIMIT, # Gas/resource limit
619
- # NOTE: hash, signature, verify_sig, gas are BUILTINS, not keywords
620
- # NEW: Persistent storage keywords
621
- "persistent": PERSISTENT, # NEW: Persistent keyword
622
- "storage": STORAGE, # NEW: Storage keyword
623
- "require": REQUIRE, # Already defined in zexus_token.py
624
- # Logical operators as keywords (alternative to && and ||)
625
- "and": AND, # Logical AND (alternative to &&)
626
- "or": OR, # Logical OR (alternative to ||)
627
- # Performance optimization keywords
628
- "native": NATIVE, # Performance: call C/C++ code
629
- "gc": GC, # Performance: control garbage collection
630
- "inline": INLINE, # Performance: function inlining
631
- "buffer": BUFFER, # Performance: direct memory access
632
- "simd": SIMD, # Performance: vector operations
633
- "defer": DEFER, # Convenience: cleanup code execution
634
- "pattern": PATTERN, # Convenience: pattern matching
635
- "match": MATCH, # Match expression for pattern matching
636
- "enum": ENUM, # Advanced: type-safe enumerations
637
- "stream": STREAM, # Advanced: event streaming
638
- "watch": WATCH, # Advanced: reactive state management
639
- "log": LOG, # Output logging to file
640
- "inject": INJECT, # Advanced: dependency injection
641
- "validate": VALIDATE, # Data validation
642
- "sanitize": SANITIZE, # Data sanitization
643
- }
644
- return keywords.get(ident, IDENT)
899
+ # Always treat literal keywords as reserved regardless of context.
900
+ literal_token = _LITERAL_KEYWORDS.get(ident)
901
+ if literal_token is not None:
902
+ return literal_token
903
+
904
+ token = _KEYWORDS.get(ident)
905
+ if token is None:
906
+ return IDENT
907
+
908
+ if ident in _FUNCTION_DECL_KEYWORDS:
909
+ if self.last_token_type in _FUNCTION_STATEMENT_BOUNDARIES:
910
+ return token
911
+ return IDENT
912
+
913
+ if ident == "data":
914
+ if self.last_token_type in _DATA_KEYWORD_CONTRACT_CONTEXTS:
915
+ return token
916
+ return IDENT
917
+
918
+ if ident in _STRICT_KEYWORDS:
919
+ return token
920
+
921
+ if not self.at_statement_boundary and self.last_token_type in _CONTEXTS_ALLOWING_KEYWORD_IDENTS:
922
+ return IDENT
923
+
924
+ return token
645
925
 
646
926
  def is_letter(self, char):
647
927
  return 'a' <= char <= 'z' or 'A' <= char <= 'Z' or char == '_'
@@ -651,4 +931,8 @@ class Lexer:
651
931
 
652
932
  def skip_whitespace(self):
653
933
  while self.ch in [' ', '\t', '\n', '\r']:
934
+ if self.ch in ['\n', '\r']:
935
+ # Treat newline as potential statement boundary when not inside paren/bracket expressions
936
+ if self.paren_depth == 0 and self.bracket_depth == 0:
937
+ self.at_statement_boundary = True
654
938
  self.read_char()