zexus 1.7.1 → 1.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/package.json +1 -1
- package/src/__init__.py +7 -0
- package/src/zexus/__init__.py +1 -1
- package/src/zexus/__pycache__/__init__.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/capability_system.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/debug_sanitizer.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/environment.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/error_reporter.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/input_validation.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/lexer.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/module_cache.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/module_manager.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/object.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/security.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/security_enforcement.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/syntax_validator.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/zexus_ast.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/zexus_token.cpython-312.pyc +0 -0
- package/src/zexus/access_control_system/__pycache__/__init__.cpython-312.pyc +0 -0
- package/src/zexus/access_control_system/__pycache__/access_control.cpython-312.pyc +0 -0
- package/src/zexus/advanced_types.py +17 -2
- package/src/zexus/blockchain/__init__.py +411 -0
- package/src/zexus/blockchain/accelerator.py +1160 -0
- package/src/zexus/blockchain/chain.py +660 -0
- package/src/zexus/blockchain/consensus.py +821 -0
- package/src/zexus/blockchain/contract_vm.py +1019 -0
- package/src/zexus/blockchain/crypto.py +79 -14
- package/src/zexus/blockchain/events.py +526 -0
- package/src/zexus/blockchain/loadtest.py +721 -0
- package/src/zexus/blockchain/monitoring.py +350 -0
- package/src/zexus/blockchain/mpt.py +716 -0
- package/src/zexus/blockchain/multichain.py +951 -0
- package/src/zexus/blockchain/multiprocess_executor.py +338 -0
- package/src/zexus/blockchain/network.py +886 -0
- package/src/zexus/blockchain/node.py +666 -0
- package/src/zexus/blockchain/rpc.py +1203 -0
- package/src/zexus/blockchain/rust_bridge.py +421 -0
- package/src/zexus/blockchain/storage.py +423 -0
- package/src/zexus/blockchain/tokens.py +750 -0
- package/src/zexus/blockchain/upgradeable.py +1004 -0
- package/src/zexus/blockchain/verification.py +1602 -0
- package/src/zexus/blockchain/wallet.py +621 -0
- package/src/zexus/cli/__pycache__/main.cpython-312.pyc +0 -0
- package/src/zexus/cli/main.py +300 -20
- package/src/zexus/cli/zpm.py +1 -1
- package/src/zexus/compiler/__pycache__/bytecode.cpython-312.pyc +0 -0
- package/src/zexus/compiler/__pycache__/lexer.cpython-312.pyc +0 -0
- package/src/zexus/compiler/__pycache__/parser.cpython-312.pyc +0 -0
- package/src/zexus/compiler/__pycache__/semantic.cpython-312.pyc +0 -0
- package/src/zexus/compiler/__pycache__/zexus_ast.cpython-312.pyc +0 -0
- package/src/zexus/compiler/lexer.py +10 -5
- package/src/zexus/concurrency_system.py +79 -0
- package/src/zexus/config.py +54 -0
- package/src/zexus/crypto_bridge.py +244 -8
- package/src/zexus/dap/__init__.py +10 -0
- package/src/zexus/dap/__main__.py +4 -0
- package/src/zexus/dap/dap_server.py +391 -0
- package/src/zexus/dap/debug_engine.py +298 -0
- package/src/zexus/environment.py +10 -1
- package/src/zexus/evaluator/__pycache__/bytecode_compiler.cpython-312.pyc +0 -0
- package/src/zexus/evaluator/__pycache__/core.cpython-312.pyc +0 -0
- package/src/zexus/evaluator/__pycache__/expressions.cpython-312.pyc +0 -0
- package/src/zexus/evaluator/__pycache__/functions.cpython-312.pyc +0 -0
- package/src/zexus/evaluator/__pycache__/resource_limiter.cpython-312.pyc +0 -0
- package/src/zexus/evaluator/__pycache__/statements.cpython-312.pyc +0 -0
- package/src/zexus/evaluator/__pycache__/unified_execution.cpython-312.pyc +0 -0
- package/src/zexus/evaluator/__pycache__/utils.cpython-312.pyc +0 -0
- package/src/zexus/evaluator/bytecode_compiler.py +441 -37
- package/src/zexus/evaluator/core.py +560 -49
- package/src/zexus/evaluator/expressions.py +122 -49
- package/src/zexus/evaluator/functions.py +417 -16
- package/src/zexus/evaluator/statements.py +521 -118
- package/src/zexus/evaluator/unified_execution.py +573 -72
- package/src/zexus/evaluator/utils.py +14 -2
- package/src/zexus/event_loop.py +186 -0
- package/src/zexus/lexer.py +742 -486
- package/src/zexus/lsp/__init__.py +1 -1
- package/src/zexus/lsp/definition_provider.py +163 -9
- package/src/zexus/lsp/server.py +22 -8
- package/src/zexus/lsp/symbol_provider.py +182 -9
- package/src/zexus/module_cache.py +237 -9
- package/src/zexus/object.py +64 -6
- package/src/zexus/parser/__pycache__/parser.cpython-312.pyc +0 -0
- package/src/zexus/parser/__pycache__/strategy_context.cpython-312.pyc +0 -0
- package/src/zexus/parser/__pycache__/strategy_structural.cpython-312.pyc +0 -0
- package/src/zexus/parser/parser.py +786 -285
- package/src/zexus/parser/strategy_context.py +407 -66
- package/src/zexus/parser/strategy_structural.py +117 -19
- package/src/zexus/persistence.py +15 -1
- package/src/zexus/renderer/__init__.py +15 -0
- package/src/zexus/renderer/__pycache__/__init__.cpython-312.pyc +0 -0
- package/src/zexus/renderer/__pycache__/backend.cpython-312.pyc +0 -0
- package/src/zexus/renderer/__pycache__/canvas.cpython-312.pyc +0 -0
- package/src/zexus/renderer/__pycache__/color_system.cpython-312.pyc +0 -0
- package/src/zexus/renderer/__pycache__/layout.cpython-312.pyc +0 -0
- package/src/zexus/renderer/__pycache__/main_renderer.cpython-312.pyc +0 -0
- package/src/zexus/renderer/__pycache__/painter.cpython-312.pyc +0 -0
- package/src/zexus/renderer/tk_backend.py +208 -0
- package/src/zexus/renderer/web_backend.py +260 -0
- package/src/zexus/runtime/__pycache__/__init__.cpython-312.pyc +0 -0
- package/src/zexus/runtime/__pycache__/async_runtime.cpython-312.pyc +0 -0
- package/src/zexus/runtime/__pycache__/load_manager.cpython-312.pyc +0 -0
- package/src/zexus/runtime/file_flags.py +137 -0
- package/src/zexus/safety/__pycache__/__init__.cpython-312.pyc +0 -0
- package/src/zexus/safety/__pycache__/memory_safety.cpython-312.pyc +0 -0
- package/src/zexus/security.py +424 -34
- package/src/zexus/stdlib/fs.py +23 -18
- package/src/zexus/stdlib/http.py +289 -186
- package/src/zexus/stdlib/sockets.py +207 -163
- package/src/zexus/stdlib/websockets.py +282 -0
- package/src/zexus/stdlib_integration.py +369 -2
- package/src/zexus/strategy_recovery.py +6 -3
- package/src/zexus/type_checker.py +423 -0
- package/src/zexus/virtual_filesystem.py +189 -2
- package/src/zexus/vm/__init__.py +113 -3
- package/src/zexus/vm/__pycache__/async_optimizer.cpython-312.pyc +0 -0
- package/src/zexus/vm/__pycache__/bytecode.cpython-312.pyc +0 -0
- package/src/zexus/vm/__pycache__/bytecode_converter.cpython-312.pyc +0 -0
- package/src/zexus/vm/__pycache__/cache.cpython-312.pyc +0 -0
- package/src/zexus/vm/__pycache__/compiler.cpython-312.pyc +0 -0
- package/src/zexus/vm/__pycache__/gas_metering.cpython-312.pyc +0 -0
- package/src/zexus/vm/__pycache__/jit.cpython-312.pyc +0 -0
- package/src/zexus/vm/__pycache__/parallel_vm.cpython-312.pyc +0 -0
- package/src/zexus/vm/__pycache__/vm.cpython-312.pyc +0 -0
- package/src/zexus/vm/async_optimizer.py +14 -1
- package/src/zexus/vm/binary_bytecode.py +659 -0
- package/src/zexus/vm/bytecode.py +28 -1
- package/src/zexus/vm/bytecode_converter.py +26 -12
- package/src/zexus/vm/cabi.c +1985 -0
- package/src/zexus/vm/cabi.cpython-312-x86_64-linux-gnu.so +0 -0
- package/src/zexus/vm/cabi.h +127 -0
- package/src/zexus/vm/cache.py +557 -17
- package/src/zexus/vm/compiler.py +703 -5
- package/src/zexus/vm/fastops.c +15743 -0
- package/src/zexus/vm/fastops.cpython-312-x86_64-linux-gnu.so +0 -0
- package/src/zexus/vm/fastops.pyx +288 -0
- package/src/zexus/vm/gas_metering.py +50 -9
- package/src/zexus/vm/jit.py +83 -2
- package/src/zexus/vm/native_jit_backend.py +1816 -0
- package/src/zexus/vm/native_runtime.cpp +1388 -0
- package/src/zexus/vm/native_runtime.cpython-312-x86_64-linux-gnu.so +0 -0
- package/src/zexus/vm/optimizer.py +161 -11
- package/src/zexus/vm/parallel_vm.py +118 -42
- package/src/zexus/vm/peephole_optimizer.py +82 -4
- package/src/zexus/vm/profiler.py +38 -18
- package/src/zexus/vm/register_allocator.py +16 -5
- package/src/zexus/vm/register_vm.py +8 -5
- package/src/zexus/vm/vm.py +3411 -573
- package/src/zexus/vm/wasm_compiler.py +658 -0
- package/src/zexus/zexus_ast.py +63 -11
- package/src/zexus/zexus_token.py +13 -5
- package/src/zexus/zpm/installer.py +55 -15
- package/src/zexus/zpm/package_manager.py +1 -1
- package/src/zexus/zpm/registry.py +257 -28
- package/src/zexus.egg-info/PKG-INFO +7 -4
- package/src/zexus.egg-info/SOURCES.txt +116 -9
- package/src/zexus.egg-info/entry_points.txt +1 -0
- package/src/zexus.egg-info/requires.txt +4 -0
package/src/zexus/lexer.py
CHANGED
|
@@ -2,6 +2,187 @@
|
|
|
2
2
|
from .zexus_token import *
|
|
3
3
|
from .error_reporter import get_error_reporter, SyntaxError as ZexusSyntaxError
|
|
4
4
|
|
|
5
|
+
_LITERAL_KEYWORDS = {
|
|
6
|
+
"true": TRUE,
|
|
7
|
+
"false": FALSE,
|
|
8
|
+
"null": NULL,
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
_STRICT_KEYWORDS = {
|
|
12
|
+
'if', 'elif', 'else', 'while', 'for', 'each', 'in',
|
|
13
|
+
'return', 'break', 'continue', 'throw', 'try', 'catch',
|
|
14
|
+
'await', 'async', 'spawn', 'let', 'const', 'print',
|
|
15
|
+
'use', 'find', 'load', 'export', 'import', 'debug', 'match', 'lambda',
|
|
16
|
+
'case', 'default'
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
_CONTEXTS_ALLOWING_KEYWORD_IDENTS = {
|
|
20
|
+
LET, CONST, DOT, COMMA, LBRACKET, COLON, ASSIGN
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
_KEYWORDS = {
|
|
24
|
+
"let": LET,
|
|
25
|
+
"const": CONST,
|
|
26
|
+
"data": DATA,
|
|
27
|
+
"print": PRINT,
|
|
28
|
+
"if": IF,
|
|
29
|
+
"then": THEN,
|
|
30
|
+
"elif": ELIF,
|
|
31
|
+
"else": ELSE,
|
|
32
|
+
"true": TRUE,
|
|
33
|
+
"false": FALSE,
|
|
34
|
+
"null": NULL,
|
|
35
|
+
"return": RETURN,
|
|
36
|
+
"for": FOR,
|
|
37
|
+
"each": EACH,
|
|
38
|
+
"in": IN,
|
|
39
|
+
"action": ACTION,
|
|
40
|
+
"function": FUNCTION,
|
|
41
|
+
"while": WHILE,
|
|
42
|
+
"use": USE,
|
|
43
|
+
"find": FIND,
|
|
44
|
+
"load": LOAD,
|
|
45
|
+
"exactly": EXACTLY,
|
|
46
|
+
"embedded": EMBEDDED,
|
|
47
|
+
"export": EXPORT,
|
|
48
|
+
"lambda": LAMBDA,
|
|
49
|
+
"debug": DEBUG,
|
|
50
|
+
"try": TRY,
|
|
51
|
+
"catch": CATCH,
|
|
52
|
+
"finally": FINALLY,
|
|
53
|
+
"continue": CONTINUE,
|
|
54
|
+
"break": BREAK,
|
|
55
|
+
"throw": THROW,
|
|
56
|
+
"external": EXTERNAL,
|
|
57
|
+
"screen": SCREEN,
|
|
58
|
+
"component": COMPONENT,
|
|
59
|
+
"theme": THEME,
|
|
60
|
+
"color": COLOR,
|
|
61
|
+
"canvas": CANVAS,
|
|
62
|
+
"graphics": GRAPHICS,
|
|
63
|
+
"animation": ANIMATION,
|
|
64
|
+
"clock": CLOCK,
|
|
65
|
+
"async": ASYNC,
|
|
66
|
+
"await": AWAIT,
|
|
67
|
+
"channel": CHANNEL,
|
|
68
|
+
"send": SEND,
|
|
69
|
+
"receive": RECEIVE,
|
|
70
|
+
"atomic": ATOMIC,
|
|
71
|
+
"event": EVENT,
|
|
72
|
+
"emit": EMIT,
|
|
73
|
+
"enum": ENUM,
|
|
74
|
+
"protocol": PROTOCOL,
|
|
75
|
+
"import": IMPORT,
|
|
76
|
+
"public": PUBLIC,
|
|
77
|
+
"private": PRIVATE,
|
|
78
|
+
"sealed": SEALED,
|
|
79
|
+
"secure": SECURE,
|
|
80
|
+
"pure": PURE,
|
|
81
|
+
"view": VIEW,
|
|
82
|
+
"payable": PAYABLE,
|
|
83
|
+
"modifier": MODIFIER,
|
|
84
|
+
"entity": ENTITY,
|
|
85
|
+
"verify": VERIFY,
|
|
86
|
+
"contract": CONTRACT,
|
|
87
|
+
"protect": PROTECT,
|
|
88
|
+
"implements": IMPLEMENTS,
|
|
89
|
+
"this": THIS,
|
|
90
|
+
"as": AS,
|
|
91
|
+
"interface": INTERFACE,
|
|
92
|
+
"capability": CAPABILITY,
|
|
93
|
+
"grant": GRANT,
|
|
94
|
+
"revoke": REVOKE,
|
|
95
|
+
"module": MODULE,
|
|
96
|
+
"package": PACKAGE,
|
|
97
|
+
"using": USING,
|
|
98
|
+
"type_alias": TYPE_ALIAS,
|
|
99
|
+
"seal": SEAL,
|
|
100
|
+
"audit": AUDIT,
|
|
101
|
+
"restrict": RESTRICT,
|
|
102
|
+
"sandbox": SANDBOX,
|
|
103
|
+
"trail": TRAIL,
|
|
104
|
+
"middleware": MIDDLEWARE,
|
|
105
|
+
"auth": AUTH,
|
|
106
|
+
"throttle": THROTTLE,
|
|
107
|
+
"cache": CACHE,
|
|
108
|
+
"ledger": LEDGER,
|
|
109
|
+
"state": STATE,
|
|
110
|
+
"revert": REVERT,
|
|
111
|
+
"limit": LIMIT,
|
|
112
|
+
"persistent": PERSISTENT,
|
|
113
|
+
"storage": STORAGE,
|
|
114
|
+
"require": REQUIRE,
|
|
115
|
+
"and": AND,
|
|
116
|
+
"or": OR,
|
|
117
|
+
"native": NATIVE,
|
|
118
|
+
"gc": GC,
|
|
119
|
+
"inline": INLINE,
|
|
120
|
+
"buffer": BUFFER,
|
|
121
|
+
"simd": SIMD,
|
|
122
|
+
"defer": DEFER,
|
|
123
|
+
"pattern": PATTERN,
|
|
124
|
+
"match": MATCH,
|
|
125
|
+
"case": CASE,
|
|
126
|
+
"default": DEFAULT,
|
|
127
|
+
"enum": ENUM,
|
|
128
|
+
"stream": STREAM,
|
|
129
|
+
"watch": WATCH,
|
|
130
|
+
"log": LOG,
|
|
131
|
+
"inject": INJECT,
|
|
132
|
+
"validate": VALIDATE,
|
|
133
|
+
"sanitize": SANITIZE,
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
_FUNCTION_DECL_KEYWORDS = {"action", "function"}
|
|
137
|
+
|
|
138
|
+
_FUNCTION_STATEMENT_BOUNDARIES = {
|
|
139
|
+
None,
|
|
140
|
+
SEMICOLON,
|
|
141
|
+
LBRACE,
|
|
142
|
+
RBRACE,
|
|
143
|
+
RBRACKET,
|
|
144
|
+
INT,
|
|
145
|
+
STRING,
|
|
146
|
+
FLOAT,
|
|
147
|
+
RPAREN,
|
|
148
|
+
TRUE,
|
|
149
|
+
FALSE,
|
|
150
|
+
NULL,
|
|
151
|
+
RETURN,
|
|
152
|
+
ASSIGN,
|
|
153
|
+
ASYNC,
|
|
154
|
+
EXPORT,
|
|
155
|
+
PUBLIC,
|
|
156
|
+
PRIVATE,
|
|
157
|
+
SEALED,
|
|
158
|
+
INLINE,
|
|
159
|
+
SECURE,
|
|
160
|
+
PURE,
|
|
161
|
+
VIEW,
|
|
162
|
+
PAYABLE,
|
|
163
|
+
NATIVE,
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
_DATA_KEYWORD_CONTRACT_CONTEXTS = {
|
|
167
|
+
SEMICOLON,
|
|
168
|
+
LBRACE,
|
|
169
|
+
RBRACE,
|
|
170
|
+
RBRACKET,
|
|
171
|
+
STRING,
|
|
172
|
+
INT,
|
|
173
|
+
FLOAT,
|
|
174
|
+
TRUE,
|
|
175
|
+
FALSE,
|
|
176
|
+
NULL,
|
|
177
|
+
PRIVATE,
|
|
178
|
+
PUBLIC,
|
|
179
|
+
SEALED,
|
|
180
|
+
SECURE,
|
|
181
|
+
PURE,
|
|
182
|
+
VIEW,
|
|
183
|
+
PAYABLE,
|
|
184
|
+
}
|
|
185
|
+
|
|
5
186
|
class Lexer:
|
|
6
187
|
def __init__(self, source_code, filename="<stdin>"):
|
|
7
188
|
self.input = source_code
|
|
@@ -18,6 +199,11 @@ class Lexer:
|
|
|
18
199
|
self._next_paren_has_lambda = False
|
|
19
200
|
# Track last token type to enable context-aware keyword handling
|
|
20
201
|
self.last_token_type = None
|
|
202
|
+
# Track statement boundaries and nesting depth to disambiguate keywords vs identifiers
|
|
203
|
+
self.at_statement_boundary = True
|
|
204
|
+
self.paren_depth = 0
|
|
205
|
+
self.bracket_depth = 0
|
|
206
|
+
self.brace_depth = 0
|
|
21
207
|
|
|
22
208
|
# Register source with error reporter
|
|
23
209
|
self.error_reporter = get_error_reporter()
|
|
@@ -48,308 +234,414 @@ class Lexer:
|
|
|
48
234
|
return self.input[self.read_position]
|
|
49
235
|
|
|
50
236
|
def next_token(self):
|
|
51
|
-
|
|
237
|
+
# NOTE: This method must not recurse. Large files can contain thousands
|
|
238
|
+
# of consecutive comment/blank lines; using recursion here can hit
|
|
239
|
+
# Python's recursion limit and cause unpredictable failures.
|
|
240
|
+
while True:
|
|
241
|
+
self.skip_whitespace()
|
|
52
242
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
243
|
+
# Skip single line comments (both # and // styles)
|
|
244
|
+
if self.ch == '#' and self.peek_char() != '{':
|
|
245
|
+
self.skip_comment()
|
|
246
|
+
continue
|
|
57
247
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
248
|
+
# Handle // style comments and /* */ block comments
|
|
249
|
+
if self.ch == '/' and self.peek_char() == '/':
|
|
250
|
+
self.skip_double_slash_comment()
|
|
251
|
+
continue
|
|
62
252
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
253
|
+
# Block comments: /* ... */
|
|
254
|
+
if self.ch == '/' and self.peek_char() == '*':
|
|
255
|
+
self.skip_block_comment()
|
|
256
|
+
continue
|
|
66
257
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
258
|
+
tok = None
|
|
259
|
+
current_line = self.line
|
|
260
|
+
current_column = self.column
|
|
261
|
+
|
|
262
|
+
if self.ch == '=':
|
|
263
|
+
# Equality '=='
|
|
264
|
+
if self.peek_char() == '=':
|
|
265
|
+
ch = self.ch
|
|
266
|
+
self.read_char()
|
|
267
|
+
literal = ch + self.ch
|
|
268
|
+
tok = Token(EQ, literal)
|
|
269
|
+
tok.line = current_line
|
|
270
|
+
tok.column = current_column
|
|
271
|
+
# Arrow '=>' (treat as lambda shorthand)
|
|
272
|
+
elif self.peek_char() == '>':
|
|
273
|
+
ch = self.ch
|
|
274
|
+
self.read_char()
|
|
275
|
+
literal = ch + self.ch
|
|
276
|
+
tok = Token(LAMBDA, literal)
|
|
277
|
+
tok.line = current_line
|
|
278
|
+
tok.column = current_column
|
|
279
|
+
else:
|
|
280
|
+
tok = Token(ASSIGN, self.ch)
|
|
281
|
+
tok.line = current_line
|
|
282
|
+
tok.column = current_column
|
|
283
|
+
elif self.ch == '!':
|
|
284
|
+
if self.peek_char() == '=':
|
|
285
|
+
ch = self.ch
|
|
286
|
+
self.read_char()
|
|
287
|
+
literal = ch + self.ch
|
|
288
|
+
tok = Token(NOT_EQ, literal)
|
|
289
|
+
tok.line = current_line
|
|
290
|
+
tok.column = current_column
|
|
291
|
+
else:
|
|
292
|
+
tok = Token(BANG, self.ch)
|
|
293
|
+
tok.line = current_line
|
|
294
|
+
tok.column = current_column
|
|
295
|
+
elif self.ch == '&':
|
|
296
|
+
if self.peek_char() == '&':
|
|
297
|
+
ch = self.ch
|
|
298
|
+
self.read_char()
|
|
299
|
+
literal = ch + self.ch
|
|
300
|
+
tok = Token(AND, literal)
|
|
301
|
+
tok.line = current_line
|
|
302
|
+
tok.column = current_column
|
|
303
|
+
else:
|
|
304
|
+
# Single '&' is not supported - suggest using '&&'
|
|
305
|
+
error = self.error_reporter.report_error(
|
|
306
|
+
ZexusSyntaxError,
|
|
307
|
+
f"Unexpected character '{self.ch}'",
|
|
308
|
+
line=current_line,
|
|
309
|
+
column=current_column,
|
|
310
|
+
filename=self.filename,
|
|
311
|
+
suggestion="Did you mean '&&' for logical AND?"
|
|
312
|
+
)
|
|
313
|
+
raise error
|
|
314
|
+
elif self.ch == '|':
|
|
315
|
+
if self.peek_char() == '|':
|
|
316
|
+
ch = self.ch
|
|
317
|
+
self.read_char()
|
|
318
|
+
literal = ch + self.ch
|
|
319
|
+
tok = Token(OR, literal)
|
|
320
|
+
tok.line = current_line
|
|
321
|
+
tok.column = current_column
|
|
322
|
+
else:
|
|
323
|
+
# Single '|' is not supported - suggest using '||'
|
|
324
|
+
error = self.error_reporter.report_error(
|
|
325
|
+
ZexusSyntaxError,
|
|
326
|
+
f"Unexpected character '{self.ch}'",
|
|
327
|
+
line=current_line,
|
|
328
|
+
column=current_column,
|
|
329
|
+
filename=self.filename,
|
|
330
|
+
suggestion="Did you mean '||' for logical OR?"
|
|
331
|
+
)
|
|
332
|
+
raise error
|
|
333
|
+
elif self.ch == '<':
|
|
334
|
+
if self.peek_char() == '=':
|
|
335
|
+
ch = self.ch
|
|
336
|
+
self.read_char()
|
|
337
|
+
literal = ch + self.ch
|
|
338
|
+
tok = Token(LTE, literal)
|
|
339
|
+
tok.line = current_line
|
|
340
|
+
tok.column = current_column
|
|
341
|
+
elif self.peek_char() == '<':
|
|
342
|
+
ch = self.ch
|
|
343
|
+
self.read_char()
|
|
344
|
+
literal = ch + self.ch
|
|
345
|
+
tok = Token(IMPORT_OP, literal)
|
|
346
|
+
tok.line = current_line
|
|
347
|
+
tok.column = current_column
|
|
348
|
+
else:
|
|
349
|
+
tok = Token(LT, self.ch)
|
|
350
|
+
tok.line = current_line
|
|
351
|
+
tok.column = current_column
|
|
352
|
+
elif self.ch == '>':
|
|
353
|
+
if self.peek_char() == '=':
|
|
354
|
+
ch = self.ch
|
|
355
|
+
self.read_char()
|
|
356
|
+
literal = ch + self.ch
|
|
357
|
+
tok = Token(GTE, literal)
|
|
358
|
+
tok.line = current_line
|
|
359
|
+
tok.column = current_column
|
|
360
|
+
elif self.peek_char() == '>':
|
|
361
|
+
ch = self.ch
|
|
362
|
+
self.read_char()
|
|
363
|
+
literal = ch + self.ch
|
|
364
|
+
tok = Token(APPEND, literal)
|
|
365
|
+
tok.line = current_line
|
|
366
|
+
tok.column = current_column
|
|
367
|
+
else:
|
|
368
|
+
tok = Token(GT, self.ch)
|
|
369
|
+
tok.line = current_line
|
|
370
|
+
tok.column = current_column
|
|
371
|
+
elif self.ch == '?':
|
|
372
|
+
# Check for nullish coalescing '??'
|
|
373
|
+
if self.peek_char() == '?':
|
|
374
|
+
ch = self.ch
|
|
375
|
+
self.read_char()
|
|
376
|
+
literal = ch + self.ch
|
|
377
|
+
tok = Token(NULLISH, literal)
|
|
378
|
+
tok.line = current_line
|
|
379
|
+
tok.column = current_column
|
|
380
|
+
else:
|
|
381
|
+
tok = Token(QUESTION, self.ch)
|
|
382
|
+
tok.line = current_line
|
|
383
|
+
tok.column = current_column
|
|
384
|
+
elif self.ch == '"':
|
|
385
|
+
# Check for triple-quote multiline string
|
|
386
|
+
if self.peek_char() == '"' and self.read_position + 1 < len(self.input) and self.input[self.read_position + 1] == '"':
|
|
387
|
+
string_literal = self.read_multiline_string()
|
|
388
|
+
else:
|
|
389
|
+
string_literal = self.read_string()
|
|
390
|
+
# If read_string returned a list, it's an interpolated string
|
|
391
|
+
if isinstance(string_literal, list):
|
|
392
|
+
tok = Token(INTERP_STRING, string_literal)
|
|
393
|
+
else:
|
|
394
|
+
tok = Token(STRING, string_literal)
|
|
82
395
|
tok.line = current_line
|
|
83
396
|
tok.column = current_column
|
|
84
|
-
|
|
85
|
-
|
|
397
|
+
elif self.ch == "'":
|
|
398
|
+
# Single-quoted strings
|
|
399
|
+
if self.peek_char() == "'" and self.read_position + 1 < len(self.input) and self.input[self.read_position + 1] == "'":
|
|
400
|
+
string_literal = self.read_multiline_string(quote_char="'")
|
|
401
|
+
else:
|
|
402
|
+
string_literal = self.read_single_quoted_string()
|
|
403
|
+
tok = Token(STRING, string_literal)
|
|
86
404
|
tok.line = current_line
|
|
87
405
|
tok.column = current_column
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
ch = self.ch
|
|
91
|
-
self.read_char()
|
|
92
|
-
literal = ch + self.ch
|
|
93
|
-
tok = Token(NOT_EQ, literal)
|
|
406
|
+
elif self.ch == '[':
|
|
407
|
+
tok = Token(LBRACKET, self.ch)
|
|
94
408
|
tok.line = current_line
|
|
95
409
|
tok.column = current_column
|
|
96
|
-
|
|
97
|
-
tok = Token(
|
|
410
|
+
elif self.ch == ']':
|
|
411
|
+
tok = Token(RBRACKET, self.ch)
|
|
98
412
|
tok.line = current_line
|
|
99
413
|
tok.column = current_column
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
ch = self.ch
|
|
103
|
-
self.read_char()
|
|
104
|
-
literal = ch + self.ch
|
|
105
|
-
tok = Token(AND, literal)
|
|
414
|
+
elif self.ch == '@':
|
|
415
|
+
tok = Token(AT, self.ch)
|
|
106
416
|
tok.line = current_line
|
|
107
417
|
tok.column = current_column
|
|
108
|
-
|
|
109
|
-
#
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
418
|
+
elif self.ch == '(':
|
|
419
|
+
# Quick char-level scan: detect if this '(' pairs with a ')' that
|
|
420
|
+
# is followed by '=>' (arrow). If so, set a hint flag so parser
|
|
421
|
+
# can treat the parentheses as a lambda-parameter list.
|
|
422
|
+
try:
|
|
423
|
+
src = self.input
|
|
424
|
+
i = self.position
|
|
425
|
+
depth = 0
|
|
426
|
+
found = False
|
|
427
|
+
scan_limit = len(src)
|
|
428
|
+
while i < scan_limit:
|
|
429
|
+
c = src[i]
|
|
430
|
+
if c == '(':
|
|
431
|
+
depth += 1
|
|
432
|
+
elif c == ')':
|
|
433
|
+
depth -= 1
|
|
434
|
+
if depth == 0:
|
|
435
|
+
# look ahead for '=>' skipping whitespace
|
|
436
|
+
j = i + 1
|
|
437
|
+
while j < len(src) and src[j].isspace():
|
|
438
|
+
j += 1
|
|
439
|
+
if j + 1 < len(src) and src[j] == '=' and src[j + 1] == '>':
|
|
440
|
+
found = True
|
|
441
|
+
break
|
|
442
|
+
i += 1
|
|
443
|
+
self._next_paren_has_lambda = found
|
|
444
|
+
except Exception:
|
|
445
|
+
self._next_paren_has_lambda = False
|
|
446
|
+
|
|
447
|
+
tok = Token(LPAREN, self.ch)
|
|
125
448
|
tok.line = current_line
|
|
126
449
|
tok.column = current_column
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
error = self.error_reporter.report_error(
|
|
130
|
-
ZexusSyntaxError,
|
|
131
|
-
f"Unexpected character '{self.ch}'",
|
|
132
|
-
line=current_line,
|
|
133
|
-
column=current_column,
|
|
134
|
-
filename=self.filename,
|
|
135
|
-
suggestion="Did you mean '||' for logical OR?"
|
|
136
|
-
)
|
|
137
|
-
raise error
|
|
138
|
-
elif self.ch == '<':
|
|
139
|
-
if self.peek_char() == '=':
|
|
140
|
-
ch = self.ch
|
|
141
|
-
self.read_char()
|
|
142
|
-
literal = ch + self.ch
|
|
143
|
-
tok = Token(LTE, literal)
|
|
450
|
+
elif self.ch == ')':
|
|
451
|
+
tok = Token(RPAREN, self.ch)
|
|
144
452
|
tok.line = current_line
|
|
145
453
|
tok.column = current_column
|
|
146
|
-
elif self.
|
|
147
|
-
|
|
148
|
-
self.
|
|
149
|
-
|
|
150
|
-
|
|
454
|
+
elif self.ch == '{':
|
|
455
|
+
# Check if this might be start of embedded block
|
|
456
|
+
lookback = self.input[max(0, self.position-10):self.position]
|
|
457
|
+
if 'embedded' in lookback:
|
|
458
|
+
self.in_embedded_block = True
|
|
459
|
+
tok = Token(LBRACE, self.ch)
|
|
151
460
|
tok.line = current_line
|
|
152
461
|
tok.column = current_column
|
|
153
|
-
|
|
154
|
-
|
|
462
|
+
elif self.ch == '}':
|
|
463
|
+
if self.in_embedded_block:
|
|
464
|
+
self.in_embedded_block = False
|
|
465
|
+
tok = Token(RBRACE, self.ch)
|
|
155
466
|
tok.line = current_line
|
|
156
467
|
tok.column = current_column
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
ch = self.ch
|
|
160
|
-
self.read_char()
|
|
161
|
-
literal = ch + self.ch
|
|
162
|
-
tok = Token(GTE, literal)
|
|
468
|
+
elif self.ch == ',':
|
|
469
|
+
tok = Token(COMMA, self.ch)
|
|
163
470
|
tok.line = current_line
|
|
164
471
|
tok.column = current_column
|
|
165
|
-
elif self.
|
|
166
|
-
|
|
167
|
-
self.read_char()
|
|
168
|
-
literal = ch + self.ch
|
|
169
|
-
tok = Token(APPEND, literal)
|
|
472
|
+
elif self.ch == ';':
|
|
473
|
+
tok = Token(SEMICOLON, self.ch)
|
|
170
474
|
tok.line = current_line
|
|
171
475
|
tok.column = current_column
|
|
172
|
-
|
|
173
|
-
tok = Token(
|
|
476
|
+
elif self.ch == ':':
|
|
477
|
+
tok = Token(COLON, self.ch)
|
|
174
478
|
tok.line = current_line
|
|
175
479
|
tok.column = current_column
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
480
|
+
elif self.ch == '+':
|
|
481
|
+
if self.peek_char() == '=':
|
|
482
|
+
ch = self.ch
|
|
483
|
+
self.read_char()
|
|
484
|
+
tok = Token(PLUS_ASSIGN, ch + self.ch)
|
|
485
|
+
tok.line = current_line
|
|
486
|
+
tok.column = current_column
|
|
487
|
+
else:
|
|
488
|
+
tok = Token(PLUS, self.ch)
|
|
489
|
+
tok.line = current_line
|
|
490
|
+
tok.column = current_column
|
|
491
|
+
elif self.ch == '-':
|
|
492
|
+
if self.peek_char() == '=':
|
|
493
|
+
ch = self.ch
|
|
494
|
+
self.read_char()
|
|
495
|
+
tok = Token(MINUS_ASSIGN, ch + self.ch)
|
|
496
|
+
tok.line = current_line
|
|
497
|
+
tok.column = current_column
|
|
498
|
+
else:
|
|
499
|
+
tok = Token(MINUS, self.ch)
|
|
500
|
+
tok.line = current_line
|
|
501
|
+
tok.column = current_column
|
|
502
|
+
elif self.ch == '*':
|
|
503
|
+
if self.peek_char() == '*':
|
|
504
|
+
ch = self.ch
|
|
505
|
+
self.read_char()
|
|
506
|
+
if self.peek_char() == '=':
|
|
507
|
+
self.read_char()
|
|
508
|
+
tok = Token(POWER_ASSIGN, '**=')
|
|
509
|
+
tok.line = current_line
|
|
510
|
+
tok.column = current_column
|
|
511
|
+
else:
|
|
512
|
+
tok = Token(POWER, ch + self.ch)
|
|
513
|
+
tok.line = current_line
|
|
514
|
+
tok.column = current_column
|
|
515
|
+
elif self.peek_char() == '=':
|
|
516
|
+
ch = self.ch
|
|
517
|
+
self.read_char()
|
|
518
|
+
tok = Token(STAR_ASSIGN, ch + self.ch)
|
|
519
|
+
tok.line = current_line
|
|
520
|
+
tok.column = current_column
|
|
521
|
+
else:
|
|
522
|
+
tok = Token(STAR, self.ch)
|
|
523
|
+
tok.line = current_line
|
|
524
|
+
tok.column = current_column
|
|
525
|
+
elif self.ch == '/':
|
|
526
|
+
if self.peek_char() == '=':
|
|
527
|
+
ch = self.ch
|
|
528
|
+
self.read_char()
|
|
529
|
+
tok = Token(SLASH_ASSIGN, ch + self.ch)
|
|
530
|
+
tok.line = current_line
|
|
531
|
+
tok.column = current_column
|
|
532
|
+
else:
|
|
533
|
+
tok = Token(SLASH, self.ch)
|
|
534
|
+
tok.line = current_line
|
|
535
|
+
tok.column = current_column
|
|
536
|
+
elif self.ch == '%':
|
|
537
|
+
if self.peek_char() == '=':
|
|
538
|
+
ch = self.ch
|
|
539
|
+
self.read_char()
|
|
540
|
+
tok = Token(MOD_ASSIGN, ch + self.ch)
|
|
541
|
+
tok.line = current_line
|
|
542
|
+
tok.column = current_column
|
|
543
|
+
else:
|
|
544
|
+
tok = Token(MOD, self.ch)
|
|
545
|
+
tok.line = current_line
|
|
546
|
+
tok.column = current_column
|
|
547
|
+
elif self.ch == '.':
|
|
548
|
+
tok = Token(DOT, self.ch)
|
|
183
549
|
tok.line = current_line
|
|
184
550
|
tok.column = current_column
|
|
185
|
-
|
|
186
|
-
tok = Token(
|
|
551
|
+
elif self.ch == "":
|
|
552
|
+
tok = Token(EOF, "")
|
|
187
553
|
tok.line = current_line
|
|
188
554
|
tok.column = current_column
|
|
189
|
-
elif self.ch == '"':
|
|
190
|
-
string_literal = self.read_string()
|
|
191
|
-
tok = Token(STRING, string_literal)
|
|
192
|
-
tok.line = current_line
|
|
193
|
-
tok.column = current_column
|
|
194
|
-
elif self.ch == '[':
|
|
195
|
-
tok = Token(LBRACKET, self.ch)
|
|
196
|
-
tok.line = current_line
|
|
197
|
-
tok.column = current_column
|
|
198
|
-
elif self.ch == ']':
|
|
199
|
-
tok = Token(RBRACKET, self.ch)
|
|
200
|
-
tok.line = current_line
|
|
201
|
-
tok.column = current_column
|
|
202
|
-
elif self.ch == '@':
|
|
203
|
-
tok = Token(AT, self.ch)
|
|
204
|
-
tok.line = current_line
|
|
205
|
-
tok.column = current_column
|
|
206
|
-
elif self.ch == '(':
|
|
207
|
-
# Quick char-level scan: detect if this '(' pairs with a ')' that
|
|
208
|
-
# is followed by '=>' (arrow). If so, set a hint flag so parser
|
|
209
|
-
# can treat the parentheses as a lambda-parameter list.
|
|
210
|
-
try:
|
|
211
|
-
src = self.input
|
|
212
|
-
i = self.position
|
|
213
|
-
depth = 0
|
|
214
|
-
found = False
|
|
215
|
-
while i < len(src):
|
|
216
|
-
c = src[i]
|
|
217
|
-
if c == '(':
|
|
218
|
-
depth += 1
|
|
219
|
-
elif c == ')':
|
|
220
|
-
depth -= 1
|
|
221
|
-
if depth == 0:
|
|
222
|
-
# look ahead for '=>' skipping whitespace
|
|
223
|
-
j = i + 1
|
|
224
|
-
while j < len(src) and src[j].isspace():
|
|
225
|
-
j += 1
|
|
226
|
-
if j + 1 < len(src) and src[j] == '=' and src[j + 1] == '>':
|
|
227
|
-
found = True
|
|
228
|
-
break
|
|
229
|
-
i += 1
|
|
230
|
-
self._next_paren_has_lambda = found
|
|
231
|
-
except Exception:
|
|
232
|
-
self._next_paren_has_lambda = False
|
|
233
|
-
|
|
234
|
-
tok = Token(LPAREN, self.ch)
|
|
235
|
-
tok.line = current_line
|
|
236
|
-
tok.column = current_column
|
|
237
|
-
elif self.ch == ')':
|
|
238
|
-
tok = Token(RPAREN, self.ch)
|
|
239
|
-
tok.line = current_line
|
|
240
|
-
tok.column = current_column
|
|
241
|
-
elif self.ch == '{':
|
|
242
|
-
# Check if this might be start of embedded block
|
|
243
|
-
lookback = self.input[max(0, self.position-10):self.position]
|
|
244
|
-
if 'embedded' in lookback:
|
|
245
|
-
self.in_embedded_block = True
|
|
246
|
-
tok = Token(LBRACE, self.ch)
|
|
247
|
-
tok.line = current_line
|
|
248
|
-
tok.column = current_column
|
|
249
|
-
elif self.ch == '}':
|
|
250
|
-
if self.in_embedded_block:
|
|
251
|
-
self.in_embedded_block = False
|
|
252
|
-
tok = Token(RBRACE, self.ch)
|
|
253
|
-
tok.line = current_line
|
|
254
|
-
tok.column = current_column
|
|
255
|
-
elif self.ch == ',':
|
|
256
|
-
tok = Token(COMMA, self.ch)
|
|
257
|
-
tok.line = current_line
|
|
258
|
-
tok.column = current_column
|
|
259
|
-
elif self.ch == ';':
|
|
260
|
-
tok = Token(SEMICOLON, self.ch)
|
|
261
|
-
tok.line = current_line
|
|
262
|
-
tok.column = current_column
|
|
263
|
-
elif self.ch == ':':
|
|
264
|
-
tok = Token(COLON, self.ch)
|
|
265
|
-
tok.line = current_line
|
|
266
|
-
tok.column = current_column
|
|
267
|
-
elif self.ch == '+':
|
|
268
|
-
tok = Token(PLUS, self.ch)
|
|
269
|
-
tok.line = current_line
|
|
270
|
-
tok.column = current_column
|
|
271
|
-
elif self.ch == '-':
|
|
272
|
-
tok = Token(MINUS, self.ch)
|
|
273
|
-
tok.line = current_line
|
|
274
|
-
tok.column = current_column
|
|
275
|
-
elif self.ch == '*':
|
|
276
|
-
tok = Token(STAR, self.ch)
|
|
277
|
-
tok.line = current_line
|
|
278
|
-
tok.column = current_column
|
|
279
|
-
elif self.ch == '/':
|
|
280
|
-
# Check if this is division or comment
|
|
281
|
-
if self.peek_char() == '/':
|
|
282
|
-
# It's a // comment, handle above
|
|
283
|
-
self.skip_double_slash_comment()
|
|
284
|
-
return self.next_token()
|
|
285
555
|
else:
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
tok.column = current_column
|
|
289
|
-
elif self.ch == '%':
|
|
290
|
-
tok = Token(MOD, self.ch)
|
|
291
|
-
tok.line = current_line
|
|
292
|
-
tok.column = current_column
|
|
293
|
-
elif self.ch == '.':
|
|
294
|
-
tok = Token(DOT, self.ch)
|
|
295
|
-
tok.line = current_line
|
|
296
|
-
tok.column = current_column
|
|
297
|
-
elif self.ch == "":
|
|
298
|
-
tok = Token(EOF, "")
|
|
299
|
-
tok.line = current_line
|
|
300
|
-
tok.column = current_column
|
|
301
|
-
else:
|
|
302
|
-
if self.is_letter(self.ch):
|
|
303
|
-
literal = self.read_identifier()
|
|
556
|
+
if self.is_letter(self.ch):
|
|
557
|
+
literal = self.read_identifier()
|
|
304
558
|
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
559
|
+
if self.in_embedded_block:
|
|
560
|
+
token_type = IDENT
|
|
561
|
+
else:
|
|
562
|
+
token_type = self.lookup_ident(literal)
|
|
309
563
|
|
|
310
|
-
|
|
311
|
-
tok.line = current_line
|
|
312
|
-
tok.column = current_column
|
|
313
|
-
self.last_token_type = tok.type
|
|
314
|
-
return tok
|
|
315
|
-
elif self.is_digit(self.ch):
|
|
316
|
-
num_literal = self.read_number()
|
|
317
|
-
if '.' in num_literal:
|
|
318
|
-
tok = Token(FLOAT, num_literal)
|
|
319
|
-
else:
|
|
320
|
-
tok = Token(INT, num_literal)
|
|
321
|
-
tok.line = current_line
|
|
322
|
-
tok.column = current_column
|
|
323
|
-
self.last_token_type = tok.type
|
|
324
|
-
return tok
|
|
325
|
-
else:
|
|
326
|
-
if self.ch in ['\n', '\r']:
|
|
327
|
-
self.read_char()
|
|
328
|
-
return self.next_token()
|
|
329
|
-
# For embedded code, treat unknown printable chars as IDENT
|
|
330
|
-
if self.ch.isprintable():
|
|
331
|
-
literal = self.read_embedded_char()
|
|
332
|
-
tok = Token(IDENT, literal)
|
|
564
|
+
tok = Token(token_type, literal)
|
|
333
565
|
tok.line = current_line
|
|
334
566
|
tok.column = current_column
|
|
335
|
-
self.
|
|
567
|
+
self._finalize_token(tok)
|
|
336
568
|
return tok
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
569
|
+
elif self.is_digit(self.ch):
|
|
570
|
+
num_literal = self.read_number()
|
|
571
|
+
if '.' in num_literal:
|
|
572
|
+
tok = Token(FLOAT, num_literal)
|
|
573
|
+
else:
|
|
574
|
+
tok = Token(INT, num_literal)
|
|
575
|
+
tok.line = current_line
|
|
576
|
+
tok.column = current_column
|
|
577
|
+
self._finalize_token(tok)
|
|
578
|
+
return tok
|
|
579
|
+
else:
|
|
580
|
+
if self.ch in ['\n', '\r']:
|
|
581
|
+
self.read_char()
|
|
582
|
+
continue
|
|
583
|
+
# For embedded code, treat unknown printable chars as IDENT
|
|
584
|
+
if self.ch.isprintable():
|
|
585
|
+
literal = self.read_embedded_char()
|
|
586
|
+
tok = Token(IDENT, literal)
|
|
587
|
+
tok.line = current_line
|
|
588
|
+
tok.column = current_column
|
|
589
|
+
self._finalize_token(tok)
|
|
590
|
+
return tok
|
|
591
|
+
# Unknown character - report helpful error
|
|
592
|
+
char_desc = f"'{self.ch}'" if self.ch.isprintable() else f"'\\x{ord(self.ch):02x}'"
|
|
593
|
+
error = self.error_reporter.report_error(
|
|
594
|
+
ZexusSyntaxError,
|
|
595
|
+
f"Unexpected character {char_desc}",
|
|
596
|
+
line=current_line,
|
|
597
|
+
column=current_column,
|
|
598
|
+
filename=self.filename,
|
|
599
|
+
suggestion="Remove or replace this character with valid Zexus syntax."
|
|
600
|
+
)
|
|
601
|
+
raise error
|
|
348
602
|
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
603
|
+
self.read_char()
|
|
604
|
+
self._finalize_token(tok)
|
|
605
|
+
return tok
|
|
606
|
+
|
|
607
|
+
def _finalize_token(self, tok):
|
|
608
|
+
"""Update lexer state after producing a token."""
|
|
609
|
+
if tok is None:
|
|
610
|
+
return
|
|
611
|
+
|
|
612
|
+
token_type = tok.type
|
|
613
|
+
|
|
614
|
+
# Maintain nesting depth for parentheses and brackets to help newline handling
|
|
615
|
+
if token_type == LPAREN:
|
|
616
|
+
self.paren_depth += 1
|
|
617
|
+
elif token_type == RPAREN:
|
|
618
|
+
if self.paren_depth > 0:
|
|
619
|
+
self.paren_depth -= 1
|
|
620
|
+
elif token_type == LBRACKET:
|
|
621
|
+
self.bracket_depth += 1
|
|
622
|
+
elif token_type == RBRACKET:
|
|
623
|
+
if self.bracket_depth > 0:
|
|
624
|
+
self.bracket_depth -= 1
|
|
625
|
+
elif token_type == LBRACE:
|
|
626
|
+
self.brace_depth += 1
|
|
627
|
+
elif token_type == RBRACE:
|
|
628
|
+
if self.brace_depth > 0:
|
|
629
|
+
self.brace_depth -= 1
|
|
630
|
+
|
|
631
|
+
# Update last token type for context-aware keyword handling
|
|
632
|
+
self.last_token_type = token_type
|
|
633
|
+
|
|
634
|
+
# Determine whether the next non-whitespace token is at a statement boundary
|
|
635
|
+
if token_type in {SEMICOLON, RBRACE, LBRACE, EOF}:
|
|
636
|
+
self.at_statement_boundary = True
|
|
637
|
+
elif token_type in {COMMA, DOT, ASSIGN, COLON, LPAREN, LBRACKET, AT}:
|
|
638
|
+
self.at_statement_boundary = False
|
|
639
|
+
elif token_type in {LET, CONST}:
|
|
640
|
+
# Declarations expect an identifier next
|
|
641
|
+
self.at_statement_boundary = False
|
|
642
|
+
else:
|
|
643
|
+
# Default: remain in the current statement
|
|
644
|
+
self.at_statement_boundary = False
|
|
353
645
|
|
|
354
646
|
def read_embedded_char(self):
|
|
355
647
|
"""Read a single character as identifier for embedded code compatibility"""
|
|
@@ -379,6 +671,8 @@ class Lexer:
|
|
|
379
671
|
start_line = self.line
|
|
380
672
|
start_column = self.column
|
|
381
673
|
result = []
|
|
674
|
+
has_interpolation = False
|
|
675
|
+
parts = [] # list of ("str", text) or ("expr", text)
|
|
382
676
|
while True:
|
|
383
677
|
self.read_char()
|
|
384
678
|
if self.ch == "":
|
|
@@ -412,16 +706,170 @@ class Lexer:
|
|
|
412
706
|
'r': '\r',
|
|
413
707
|
'\\': '\\',
|
|
414
708
|
'"': '"',
|
|
415
|
-
"'": "'"
|
|
709
|
+
"'": "'",
|
|
710
|
+
'$': '$'
|
|
416
711
|
}
|
|
417
712
|
result.append(escape_map.get(self.ch, self.ch))
|
|
713
|
+
elif self.ch == '$' and self.peek_char() == '{':
|
|
714
|
+
# String interpolation: ${expr}
|
|
715
|
+
has_interpolation = True
|
|
716
|
+
# Save current string part
|
|
717
|
+
if result:
|
|
718
|
+
parts.append(("str", ''.join(result)))
|
|
719
|
+
result = []
|
|
720
|
+
else:
|
|
721
|
+
parts.append(("str", ""))
|
|
722
|
+
# Skip the '{'
|
|
723
|
+
self.read_char()
|
|
724
|
+
# Read expression until matching '}'
|
|
725
|
+
expr_chars = []
|
|
726
|
+
brace_depth = 1
|
|
727
|
+
while brace_depth > 0:
|
|
728
|
+
self.read_char()
|
|
729
|
+
if self.ch == "":
|
|
730
|
+
error = self.error_reporter.report_error(
|
|
731
|
+
ZexusSyntaxError,
|
|
732
|
+
"Unterminated interpolation expression in string",
|
|
733
|
+
line=start_line,
|
|
734
|
+
column=start_column,
|
|
735
|
+
filename=self.filename,
|
|
736
|
+
suggestion="Add a closing } to terminate the interpolation."
|
|
737
|
+
)
|
|
738
|
+
raise error
|
|
739
|
+
elif self.ch == '{':
|
|
740
|
+
brace_depth += 1
|
|
741
|
+
expr_chars.append(self.ch)
|
|
742
|
+
elif self.ch == '}':
|
|
743
|
+
brace_depth -= 1
|
|
744
|
+
if brace_depth > 0:
|
|
745
|
+
expr_chars.append(self.ch)
|
|
746
|
+
else:
|
|
747
|
+
expr_chars.append(self.ch)
|
|
748
|
+
parts.append(("expr", ''.join(expr_chars)))
|
|
418
749
|
elif self.ch == '"':
|
|
419
750
|
# End of string
|
|
420
751
|
break
|
|
421
752
|
else:
|
|
422
753
|
result.append(self.ch)
|
|
754
|
+
|
|
755
|
+
if has_interpolation:
|
|
756
|
+
# Add trailing string part
|
|
757
|
+
parts.append(("str", ''.join(result)))
|
|
758
|
+
return parts # Return list of parts for interpolation
|
|
423
759
|
return ''.join(result)
|
|
424
760
|
|
|
761
|
+
def read_single_quoted_string(self):
|
|
762
|
+
"""Read a single-quoted string literal ('...')"""
|
|
763
|
+
start_line = self.line
|
|
764
|
+
start_column = self.column
|
|
765
|
+
result = []
|
|
766
|
+
while True:
|
|
767
|
+
self.read_char()
|
|
768
|
+
if self.ch == "":
|
|
769
|
+
error = self.error_reporter.report_error(
|
|
770
|
+
ZexusSyntaxError,
|
|
771
|
+
"Unterminated string literal",
|
|
772
|
+
line=start_line,
|
|
773
|
+
column=start_column,
|
|
774
|
+
filename=self.filename,
|
|
775
|
+
suggestion="Add a closing quote ' to terminate the string."
|
|
776
|
+
)
|
|
777
|
+
raise error
|
|
778
|
+
elif self.ch == '\\':
|
|
779
|
+
self.read_char()
|
|
780
|
+
if self.ch == '':
|
|
781
|
+
error = self.error_reporter.report_error(
|
|
782
|
+
ZexusSyntaxError,
|
|
783
|
+
"Incomplete escape sequence at end of file",
|
|
784
|
+
line=self.line,
|
|
785
|
+
column=self.column,
|
|
786
|
+
filename=self.filename,
|
|
787
|
+
suggestion="Remove the backslash or complete the escape sequence."
|
|
788
|
+
)
|
|
789
|
+
raise error
|
|
790
|
+
escape_map = {
|
|
791
|
+
'n': '\n', 't': '\t', 'r': '\r',
|
|
792
|
+
'\\': '\\', "'": "'", '"': '"'
|
|
793
|
+
}
|
|
794
|
+
result.append(escape_map.get(self.ch, self.ch))
|
|
795
|
+
elif self.ch == "'":
|
|
796
|
+
break
|
|
797
|
+
else:
|
|
798
|
+
result.append(self.ch)
|
|
799
|
+
return ''.join(result)
|
|
800
|
+
|
|
801
|
+
def read_multiline_string(self, quote_char='"'):
|
|
802
|
+
"""Read a triple-quoted multiline string (\"\"\"...\"\"\" or '''...''')"""
|
|
803
|
+
start_line = self.line
|
|
804
|
+
start_column = self.column
|
|
805
|
+
# Skip the three opening quotes
|
|
806
|
+
self.read_char() # skip 2nd quote
|
|
807
|
+
self.read_char() # skip 3rd quote
|
|
808
|
+
result = []
|
|
809
|
+
while True:
|
|
810
|
+
self.read_char()
|
|
811
|
+
if self.ch == "":
|
|
812
|
+
error = self.error_reporter.report_error(
|
|
813
|
+
ZexusSyntaxError,
|
|
814
|
+
"Unterminated multiline string literal",
|
|
815
|
+
line=start_line,
|
|
816
|
+
column=start_column,
|
|
817
|
+
filename=self.filename,
|
|
818
|
+
suggestion=f"Add closing {quote_char}{quote_char}{quote_char} to terminate the multiline string."
|
|
819
|
+
)
|
|
820
|
+
raise error
|
|
821
|
+
elif self.ch == '\\':
|
|
822
|
+
self.read_char()
|
|
823
|
+
if self.ch == '':
|
|
824
|
+
break
|
|
825
|
+
escape_map = {
|
|
826
|
+
'n': '\n', 't': '\t', 'r': '\r',
|
|
827
|
+
'\\': '\\', quote_char: quote_char
|
|
828
|
+
}
|
|
829
|
+
result.append(escape_map.get(self.ch, self.ch))
|
|
830
|
+
elif self.ch == quote_char:
|
|
831
|
+
# Check for triple close
|
|
832
|
+
if self.peek_char() == quote_char and self.read_position + 1 < len(self.input) and self.input[self.read_position + 1] == quote_char:
|
|
833
|
+
self.read_char() # skip 2nd closing quote
|
|
834
|
+
self.read_char() # skip 3rd closing quote
|
|
835
|
+
break
|
|
836
|
+
else:
|
|
837
|
+
result.append(self.ch)
|
|
838
|
+
else:
|
|
839
|
+
result.append(self.ch)
|
|
840
|
+
return ''.join(result)
|
|
841
|
+
|
|
842
|
+
def skip_block_comment(self):
|
|
843
|
+
"""Skip /* ... */ block comments (can be nested)"""
|
|
844
|
+
start_line = self.line
|
|
845
|
+
start_column = self.column
|
|
846
|
+
# Skip the opening /*
|
|
847
|
+
self.read_char() # skip *
|
|
848
|
+
self.read_char() # move past *
|
|
849
|
+
depth = 1
|
|
850
|
+
while depth > 0:
|
|
851
|
+
if self.ch == "":
|
|
852
|
+
error = self.error_reporter.report_error(
|
|
853
|
+
ZexusSyntaxError,
|
|
854
|
+
"Unterminated block comment",
|
|
855
|
+
line=start_line,
|
|
856
|
+
column=start_column,
|
|
857
|
+
filename=self.filename,
|
|
858
|
+
suggestion="Add closing */ to terminate the block comment."
|
|
859
|
+
)
|
|
860
|
+
raise error
|
|
861
|
+
elif self.ch == '/' and self.peek_char() == '*':
|
|
862
|
+
depth += 1
|
|
863
|
+
self.read_char()
|
|
864
|
+
self.read_char()
|
|
865
|
+
elif self.ch == '*' and self.peek_char() == '/':
|
|
866
|
+
depth -= 1
|
|
867
|
+
self.read_char()
|
|
868
|
+
self.read_char()
|
|
869
|
+
else:
|
|
870
|
+
self.read_char()
|
|
871
|
+
self.skip_whitespace()
|
|
872
|
+
|
|
425
873
|
def read_identifier(self):
|
|
426
874
|
start_position = self.position
|
|
427
875
|
while self.is_letter(self.ch) or self.is_digit(self.ch):
|
|
@@ -448,228 +896,32 @@ class Lexer:
|
|
|
448
896
|
return number_str
|
|
449
897
|
|
|
450
898
|
def lookup_ident(self, ident):
|
|
451
|
-
#
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
return
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
if ident in
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
# - After DOT (property/method names)
|
|
478
|
-
# - After COMMA (function parameters, after first param)
|
|
479
|
-
# - After LBRACKET (map keys when used as identifiers)
|
|
480
|
-
# - After ASSIGN (right-hand side can use keywords as identifiers: x = data)
|
|
481
|
-
# - After COLON (map keys, type annotations)
|
|
482
|
-
#
|
|
483
|
-
# Note: LPAREN removed - it was causing keywords after '(' to become identifiers
|
|
484
|
-
# even at the start of new statements. Instead, keywords as param names will
|
|
485
|
-
# work after the first parameter (via COMMA).
|
|
486
|
-
contexts_allowing_keywords_as_idents = {
|
|
487
|
-
LET, CONST, DOT, COMMA, LBRACKET, COLON, ASSIGN
|
|
488
|
-
}
|
|
489
|
-
|
|
490
|
-
if self.last_token_type in contexts_allowing_keywords_as_idents:
|
|
491
|
-
# In these contexts, treat non-strict keywords as identifiers
|
|
492
|
-
return IDENT
|
|
493
|
-
|
|
494
|
-
# Special case: ACTION and FUNCTION keywords should only be recognized
|
|
495
|
-
# when they actually start a definition, not when used as variable names in expressions
|
|
496
|
-
# Allow them as keywords at statement boundaries or after contract/data blocks
|
|
497
|
-
if ident in ['action', 'function']:
|
|
498
|
-
# These should be keywords at the start of a statement or after RETURN
|
|
499
|
-
# Allow after: None, SEMICOLON, LBRACE, RBRACE, INT, STRING, RPAREN (end of previous statement)
|
|
500
|
-
# Also allow after RETURN for function expressions: return function() {...}
|
|
501
|
-
# Also allow after ASYNC for async functions: async function name() {...}
|
|
502
|
-
# Also allow after EXPORT for exported functions: export function name() {...}
|
|
503
|
-
statement_boundaries = {
|
|
504
|
-
None,
|
|
505
|
-
SEMICOLON,
|
|
506
|
-
LBRACE,
|
|
507
|
-
RBRACE,
|
|
508
|
-
INT,
|
|
509
|
-
STRING,
|
|
510
|
-
FLOAT,
|
|
511
|
-
RPAREN,
|
|
512
|
-
TRUE,
|
|
513
|
-
FALSE,
|
|
514
|
-
NULL,
|
|
515
|
-
RETURN,
|
|
516
|
-
ASYNC,
|
|
517
|
-
EXPORT,
|
|
518
|
-
PUBLIC,
|
|
519
|
-
PRIVATE,
|
|
520
|
-
SEALED,
|
|
521
|
-
INLINE,
|
|
522
|
-
SECURE,
|
|
523
|
-
PURE,
|
|
524
|
-
VIEW,
|
|
525
|
-
PAYABLE,
|
|
526
|
-
NATIVE,
|
|
527
|
-
}
|
|
528
|
-
if self.last_token_type in statement_boundaries:
|
|
529
|
-
# Treat as keyword
|
|
530
|
-
pass # Fall through to keyword lookup
|
|
531
|
-
else:
|
|
532
|
-
# In expression context, treat as identifier
|
|
533
|
-
return IDENT
|
|
534
|
-
|
|
535
|
-
# Special case: DATA keyword should only be recognized in contract storage contexts
|
|
536
|
-
# When used as a parameter name, variable name, or in expressions, treat as identifier
|
|
537
|
-
if ident == 'data':
|
|
538
|
-
# Allow as keyword only in contract contexts (after CONTRACT or in contract body)
|
|
539
|
-
# In all other contexts (parameters, variables, expressions), treat as identifier
|
|
540
|
-
# Safe contexts for DATA keyword: after statement boundaries and value literals in contracts
|
|
541
|
-
# This includes: LBRACE (contract start), RBRACE (after Map {}), RBRACKET (after List []),
|
|
542
|
-
# STRING, INT, FLOAT, TRUE, FALSE (after literal values), SEMICOLON
|
|
543
|
-
contract_contexts = {SEMICOLON, LBRACE, RBRACE, RBRACKET, STRING, INT, FLOAT, TRUE, FALSE, NULL}
|
|
544
|
-
if self.last_token_type in contract_contexts:
|
|
545
|
-
# Might be a data declaration in contract, allow as keyword
|
|
546
|
-
pass # Fall through to keyword lookup
|
|
547
|
-
else:
|
|
548
|
-
# In expression context, parameter list, or other contexts, treat as identifier
|
|
549
|
-
return IDENT
|
|
550
|
-
|
|
551
|
-
# keyword lookup mapping (string -> token constant)
|
|
552
|
-
keywords = {
|
|
553
|
-
"let": LET,
|
|
554
|
-
"const": CONST, # NEW: Const keyword for immutable variables
|
|
555
|
-
"data": DATA, # NEW: Data keyword for dataclass definitions
|
|
556
|
-
"print": PRINT,
|
|
557
|
-
"if": IF,
|
|
558
|
-
"then": THEN, # NEW: Then keyword for if-then-else expressions
|
|
559
|
-
"elif": ELIF, # NEW: Elif keyword for else-if conditionals
|
|
560
|
-
"else": ELSE,
|
|
561
|
-
"true": TRUE,
|
|
562
|
-
"false": FALSE,
|
|
563
|
-
"null": NULL,
|
|
564
|
-
"return": RETURN,
|
|
565
|
-
"for": FOR,
|
|
566
|
-
"each": EACH,
|
|
567
|
-
"in": IN,
|
|
568
|
-
"action": ACTION,
|
|
569
|
-
"function": FUNCTION,
|
|
570
|
-
"while": WHILE,
|
|
571
|
-
"use": USE,
|
|
572
|
-
"find": FIND,
|
|
573
|
-
"load": LOAD,
|
|
574
|
-
"exactly": EXACTLY,
|
|
575
|
-
"embedded": EMBEDDED,
|
|
576
|
-
"export": EXPORT,
|
|
577
|
-
"lambda": LAMBDA,
|
|
578
|
-
"debug": DEBUG, # DUAL-MODE: Works as both statement (debug x;) and function (debug(x))
|
|
579
|
-
"try": TRY, # NEW: Try keyword
|
|
580
|
-
"catch": CATCH, # NEW: Catch keyword
|
|
581
|
-
"continue": CONTINUE, # NEW: Continue on error keyword
|
|
582
|
-
"break": BREAK, # NEW: Break loop keyword
|
|
583
|
-
"throw": THROW, # NEW: Throw error keyword
|
|
584
|
-
"external": EXTERNAL, # NEW: External keyword
|
|
585
|
-
# "from": FROM, # NOT a keyword - only recognized contextually in import statements
|
|
586
|
-
"screen": SCREEN, # NEW: renderer keyword
|
|
587
|
-
"component": COMPONENT, # NEW: renderer keyword
|
|
588
|
-
"theme": THEME, # NEW: renderer keyword
|
|
589
|
-
"color": COLOR, # NEW: renderer keyword
|
|
590
|
-
"canvas": CANVAS, # NEW (optional recognition)
|
|
591
|
-
"graphics": GRAPHICS, # NEW (optional recognition)
|
|
592
|
-
"animation": ANIMATION, # NEW (optional recognition)
|
|
593
|
-
"clock": CLOCK, # NEW (optional recognition)
|
|
594
|
-
"async": ASYNC,
|
|
595
|
-
"await": AWAIT,
|
|
596
|
-
"channel": CHANNEL, # NEW: Channel for concurrent communication
|
|
597
|
-
"send": SEND, # NEW: Send to channel
|
|
598
|
-
"receive": RECEIVE, # NEW: Receive from channel
|
|
599
|
-
"atomic": ATOMIC, # NEW: Atomic operations
|
|
600
|
-
"event": EVENT,
|
|
601
|
-
"emit": EMIT,
|
|
602
|
-
"enum": ENUM,
|
|
603
|
-
"protocol": PROTOCOL,
|
|
604
|
-
"import": IMPORT,
|
|
605
|
-
# Modifiers
|
|
606
|
-
"public": PUBLIC,
|
|
607
|
-
"private": PRIVATE,
|
|
608
|
-
"sealed": SEALED,
|
|
609
|
-
"secure": SECURE,
|
|
610
|
-
"pure": PURE,
|
|
611
|
-
"view": VIEW,
|
|
612
|
-
"payable": PAYABLE,
|
|
613
|
-
"modifier": MODIFIER,
|
|
614
|
-
# NEW: Entity, Verify, Contract, Protect
|
|
615
|
-
"entity": ENTITY,
|
|
616
|
-
"verify": VERIFY,
|
|
617
|
-
"contract": CONTRACT,
|
|
618
|
-
"protect": PROTECT,
|
|
619
|
-
"implements": IMPLEMENTS,
|
|
620
|
-
"this": THIS,
|
|
621
|
-
"as": AS,
|
|
622
|
-
"interface": INTERFACE,
|
|
623
|
-
"capability": CAPABILITY, # NEW: Capability keyword for security
|
|
624
|
-
"grant": GRANT, # NEW: Grant keyword for capability grants
|
|
625
|
-
"revoke": REVOKE, # NEW: Revoke keyword for capability revocation
|
|
626
|
-
"module": MODULE, # NEW: Module keyword for code organization
|
|
627
|
-
"package": PACKAGE, # NEW: Package keyword for package definition
|
|
628
|
-
"using": USING, # NEW: Using keyword for resource management
|
|
629
|
-
"type_alias": TYPE_ALIAS, # NEW: Type alias keyword for type definitions
|
|
630
|
-
"seal": SEAL, # NEW: Seal keyword for immutable objects
|
|
631
|
-
"audit": AUDIT, # NEW: Audit keyword for compliance logging
|
|
632
|
-
"restrict": RESTRICT, # NEW: Restrict keyword for field-level access control
|
|
633
|
-
"sandbox": SANDBOX, # NEW: Sandbox keyword for isolated execution
|
|
634
|
-
"trail": TRAIL, # NEW: Trail keyword for real-time logging
|
|
635
|
-
# Advanced features
|
|
636
|
-
"middleware": MIDDLEWARE,
|
|
637
|
-
"auth": AUTH,
|
|
638
|
-
"throttle": THROTTLE,
|
|
639
|
-
"cache": CACHE,
|
|
640
|
-
# Blockchain & Smart Contract keywords
|
|
641
|
-
"ledger": LEDGER, # Immutable state ledger
|
|
642
|
-
"state": STATE, # State management
|
|
643
|
-
"revert": REVERT, # Revert transaction
|
|
644
|
-
# NOTE: "tx" removed as keyword - users can use it as variable name
|
|
645
|
-
# Only uppercase "TX" is reserved for transaction context
|
|
646
|
-
"limit": LIMIT, # Gas/resource limit
|
|
647
|
-
# NOTE: hash, signature, verify_sig, gas are BUILTINS, not keywords
|
|
648
|
-
# NEW: Persistent storage keywords
|
|
649
|
-
"persistent": PERSISTENT, # NEW: Persistent keyword
|
|
650
|
-
"storage": STORAGE, # NEW: Storage keyword
|
|
651
|
-
"require": REQUIRE, # Already defined in zexus_token.py
|
|
652
|
-
# Logical operators as keywords (alternative to && and ||)
|
|
653
|
-
"and": AND, # Logical AND (alternative to &&)
|
|
654
|
-
"or": OR, # Logical OR (alternative to ||)
|
|
655
|
-
# Performance optimization keywords
|
|
656
|
-
"native": NATIVE, # Performance: call C/C++ code
|
|
657
|
-
"gc": GC, # Performance: control garbage collection
|
|
658
|
-
"inline": INLINE, # Performance: function inlining
|
|
659
|
-
"buffer": BUFFER, # Performance: direct memory access
|
|
660
|
-
"simd": SIMD, # Performance: vector operations
|
|
661
|
-
"defer": DEFER, # Convenience: cleanup code execution
|
|
662
|
-
"pattern": PATTERN, # Convenience: pattern matching
|
|
663
|
-
"match": MATCH, # Match expression for pattern matching
|
|
664
|
-
"enum": ENUM, # Advanced: type-safe enumerations
|
|
665
|
-
"stream": STREAM, # Advanced: event streaming
|
|
666
|
-
"watch": WATCH, # Advanced: reactive state management
|
|
667
|
-
"log": LOG, # Output logging to file
|
|
668
|
-
"inject": INJECT, # Advanced: dependency injection
|
|
669
|
-
"validate": VALIDATE, # Data validation
|
|
670
|
-
"sanitize": SANITIZE, # Data sanitization
|
|
671
|
-
}
|
|
672
|
-
return keywords.get(ident, IDENT)
|
|
899
|
+
# Always treat literal keywords as reserved regardless of context.
|
|
900
|
+
literal_token = _LITERAL_KEYWORDS.get(ident)
|
|
901
|
+
if literal_token is not None:
|
|
902
|
+
return literal_token
|
|
903
|
+
|
|
904
|
+
token = _KEYWORDS.get(ident)
|
|
905
|
+
if token is None:
|
|
906
|
+
return IDENT
|
|
907
|
+
|
|
908
|
+
if ident in _FUNCTION_DECL_KEYWORDS:
|
|
909
|
+
if self.last_token_type in _FUNCTION_STATEMENT_BOUNDARIES:
|
|
910
|
+
return token
|
|
911
|
+
return IDENT
|
|
912
|
+
|
|
913
|
+
if ident == "data":
|
|
914
|
+
if self.last_token_type in _DATA_KEYWORD_CONTRACT_CONTEXTS:
|
|
915
|
+
return token
|
|
916
|
+
return IDENT
|
|
917
|
+
|
|
918
|
+
if ident in _STRICT_KEYWORDS:
|
|
919
|
+
return token
|
|
920
|
+
|
|
921
|
+
if not self.at_statement_boundary and self.last_token_type in _CONTEXTS_ALLOWING_KEYWORD_IDENTS:
|
|
922
|
+
return IDENT
|
|
923
|
+
|
|
924
|
+
return token
|
|
673
925
|
|
|
674
926
|
def is_letter(self, char):
|
|
675
927
|
return 'a' <= char <= 'z' or 'A' <= char <= 'Z' or char == '_'
|
|
@@ -679,4 +931,8 @@ class Lexer:
|
|
|
679
931
|
|
|
680
932
|
def skip_whitespace(self):
|
|
681
933
|
while self.ch in [' ', '\t', '\n', '\r']:
|
|
934
|
+
if self.ch in ['\n', '\r']:
|
|
935
|
+
# Treat newline as potential statement boundary when not inside paren/bracket expressions
|
|
936
|
+
if self.paren_depth == 0 and self.bracket_depth == 0:
|
|
937
|
+
self.at_statement_boundary = True
|
|
682
938
|
self.read_char()
|