zexus 1.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. package/LICENSE +0 -0
  2. package/README.md +2513 -0
  3. package/bin/zexus +2 -0
  4. package/bin/zpics +2 -0
  5. package/bin/zpm +2 -0
  6. package/bin/zx +2 -0
  7. package/bin/zx-deploy +2 -0
  8. package/bin/zx-dev +2 -0
  9. package/bin/zx-run +2 -0
  10. package/package.json +66 -0
  11. package/scripts/README.md +24 -0
  12. package/scripts/postinstall.js +44 -0
  13. package/shared_config.json +24 -0
  14. package/src/README.md +1525 -0
  15. package/src/tests/run_zexus_tests.py +117 -0
  16. package/src/tests/test_all_phases.zx +346 -0
  17. package/src/tests/test_blockchain_features.zx +306 -0
  18. package/src/tests/test_complexity_features.zx +321 -0
  19. package/src/tests/test_core_integration.py +185 -0
  20. package/src/tests/test_phase10_ecosystem.zx +177 -0
  21. package/src/tests/test_phase1_modifiers.zx +87 -0
  22. package/src/tests/test_phase2_plugins.zx +80 -0
  23. package/src/tests/test_phase3_security.zx +97 -0
  24. package/src/tests/test_phase4_vfs.zx +116 -0
  25. package/src/tests/test_phase5_types.zx +117 -0
  26. package/src/tests/test_phase6_metaprogramming.zx +125 -0
  27. package/src/tests/test_phase7_optimization.zx +132 -0
  28. package/src/tests/test_phase9_advanced_types.zx +157 -0
  29. package/src/tests/test_security_features.py +419 -0
  30. package/src/tests/test_security_features.zx +276 -0
  31. package/src/tests/test_simple_zx.zx +1 -0
  32. package/src/tests/test_verification_simple.zx +69 -0
  33. package/src/zexus/__init__.py +28 -0
  34. package/src/zexus/__main__.py +5 -0
  35. package/src/zexus/__pycache__/__init__.cpython-312.pyc +0 -0
  36. package/src/zexus/__pycache__/advanced_types.cpython-312.pyc +0 -0
  37. package/src/zexus/__pycache__/builtin_modules.cpython-312.pyc +0 -0
  38. package/src/zexus/__pycache__/capability_system.cpython-312.pyc +0 -0
  39. package/src/zexus/__pycache__/complexity_system.cpython-312.pyc +0 -0
  40. package/src/zexus/__pycache__/concurrency_system.cpython-312.pyc +0 -0
  41. package/src/zexus/__pycache__/config.cpython-312.pyc +0 -0
  42. package/src/zexus/__pycache__/dependency_injection.cpython-312.pyc +0 -0
  43. package/src/zexus/__pycache__/ecosystem.cpython-312.pyc +0 -0
  44. package/src/zexus/__pycache__/environment.cpython-312.pyc +0 -0
  45. package/src/zexus/__pycache__/error_reporter.cpython-312.pyc +0 -0
  46. package/src/zexus/__pycache__/hybrid_orchestrator.cpython-312.pyc +0 -0
  47. package/src/zexus/__pycache__/lexer.cpython-312.pyc +0 -0
  48. package/src/zexus/__pycache__/metaprogramming.cpython-312.pyc +0 -0
  49. package/src/zexus/__pycache__/module_cache.cpython-312.pyc +0 -0
  50. package/src/zexus/__pycache__/object.cpython-312.pyc +0 -0
  51. package/src/zexus/__pycache__/optimization.cpython-312.pyc +0 -0
  52. package/src/zexus/__pycache__/plugin_system.cpython-312.pyc +0 -0
  53. package/src/zexus/__pycache__/policy_engine.cpython-312.pyc +0 -0
  54. package/src/zexus/__pycache__/security.cpython-312.pyc +0 -0
  55. package/src/zexus/__pycache__/stdlib_integration.cpython-312.pyc +0 -0
  56. package/src/zexus/__pycache__/strategy_recovery.cpython-312.pyc +0 -0
  57. package/src/zexus/__pycache__/syntax_validator.cpython-312.pyc +0 -0
  58. package/src/zexus/__pycache__/type_system.cpython-312.pyc +0 -0
  59. package/src/zexus/__pycache__/virtual_filesystem.cpython-312.pyc +0 -0
  60. package/src/zexus/__pycache__/zexus_ast.cpython-312.pyc +0 -0
  61. package/src/zexus/__pycache__/zexus_token.cpython-312.pyc +0 -0
  62. package/src/zexus/advanced_types.py +401 -0
  63. package/src/zexus/blockchain/__init__.py +40 -0
  64. package/src/zexus/blockchain/__pycache__/__init__.cpython-312.pyc +0 -0
  65. package/src/zexus/blockchain/__pycache__/crypto.cpython-312.pyc +0 -0
  66. package/src/zexus/blockchain/__pycache__/ledger.cpython-312.pyc +0 -0
  67. package/src/zexus/blockchain/__pycache__/transaction.cpython-312.pyc +0 -0
  68. package/src/zexus/blockchain/crypto.py +463 -0
  69. package/src/zexus/blockchain/ledger.py +255 -0
  70. package/src/zexus/blockchain/transaction.py +267 -0
  71. package/src/zexus/builtin_modules.py +284 -0
  72. package/src/zexus/builtin_plugins.py +317 -0
  73. package/src/zexus/capability_system.py +372 -0
  74. package/src/zexus/cli/__init__.py +2 -0
  75. package/src/zexus/cli/__pycache__/__init__.cpython-312.pyc +0 -0
  76. package/src/zexus/cli/__pycache__/main.cpython-312.pyc +0 -0
  77. package/src/zexus/cli/main.py +707 -0
  78. package/src/zexus/cli/zpm.py +203 -0
  79. package/src/zexus/compare_interpreter_compiler.py +146 -0
  80. package/src/zexus/compiler/__init__.py +169 -0
  81. package/src/zexus/compiler/__pycache__/__init__.cpython-312.pyc +0 -0
  82. package/src/zexus/compiler/__pycache__/lexer.cpython-312.pyc +0 -0
  83. package/src/zexus/compiler/__pycache__/parser.cpython-312.pyc +0 -0
  84. package/src/zexus/compiler/__pycache__/zexus_ast.cpython-312.pyc +0 -0
  85. package/src/zexus/compiler/bytecode.py +266 -0
  86. package/src/zexus/compiler/compat_runtime.py +277 -0
  87. package/src/zexus/compiler/lexer.py +257 -0
  88. package/src/zexus/compiler/parser.py +779 -0
  89. package/src/zexus/compiler/semantic.py +118 -0
  90. package/src/zexus/compiler/zexus_ast.py +454 -0
  91. package/src/zexus/complexity_system.py +575 -0
  92. package/src/zexus/concurrency_system.py +493 -0
  93. package/src/zexus/config.py +201 -0
  94. package/src/zexus/crypto_bridge.py +19 -0
  95. package/src/zexus/dependency_injection.py +423 -0
  96. package/src/zexus/ecosystem.py +434 -0
  97. package/src/zexus/environment.py +101 -0
  98. package/src/zexus/environment_manager.py +119 -0
  99. package/src/zexus/error_reporter.py +314 -0
  100. package/src/zexus/evaluator/__init__.py +12 -0
  101. package/src/zexus/evaluator/__pycache__/__init__.cpython-312.pyc +0 -0
  102. package/src/zexus/evaluator/__pycache__/bytecode_compiler.cpython-312.pyc +0 -0
  103. package/src/zexus/evaluator/__pycache__/core.cpython-312.pyc +0 -0
  104. package/src/zexus/evaluator/__pycache__/expressions.cpython-312.pyc +0 -0
  105. package/src/zexus/evaluator/__pycache__/functions.cpython-312.pyc +0 -0
  106. package/src/zexus/evaluator/__pycache__/integration.cpython-312.pyc +0 -0
  107. package/src/zexus/evaluator/__pycache__/statements.cpython-312.pyc +0 -0
  108. package/src/zexus/evaluator/__pycache__/utils.cpython-312.pyc +0 -0
  109. package/src/zexus/evaluator/bytecode_compiler.py +700 -0
  110. package/src/zexus/evaluator/core.py +891 -0
  111. package/src/zexus/evaluator/expressions.py +827 -0
  112. package/src/zexus/evaluator/functions.py +3989 -0
  113. package/src/zexus/evaluator/integration.py +396 -0
  114. package/src/zexus/evaluator/statements.py +4303 -0
  115. package/src/zexus/evaluator/utils.py +126 -0
  116. package/src/zexus/evaluator_original.py +2041 -0
  117. package/src/zexus/external_bridge.py +16 -0
  118. package/src/zexus/find_affected_imports.sh +155 -0
  119. package/src/zexus/hybrid_orchestrator.py +152 -0
  120. package/src/zexus/input_validation.py +259 -0
  121. package/src/zexus/lexer.py +571 -0
  122. package/src/zexus/logging.py +89 -0
  123. package/src/zexus/lsp/__init__.py +9 -0
  124. package/src/zexus/lsp/completion_provider.py +207 -0
  125. package/src/zexus/lsp/definition_provider.py +22 -0
  126. package/src/zexus/lsp/hover_provider.py +71 -0
  127. package/src/zexus/lsp/server.py +269 -0
  128. package/src/zexus/lsp/symbol_provider.py +31 -0
  129. package/src/zexus/metaprogramming.py +321 -0
  130. package/src/zexus/module_cache.py +89 -0
  131. package/src/zexus/module_manager.py +107 -0
  132. package/src/zexus/object.py +973 -0
  133. package/src/zexus/optimization.py +424 -0
  134. package/src/zexus/parser/__init__.py +31 -0
  135. package/src/zexus/parser/__pycache__/__init__.cpython-312.pyc +0 -0
  136. package/src/zexus/parser/__pycache__/parser.cpython-312.pyc +0 -0
  137. package/src/zexus/parser/__pycache__/strategy_context.cpython-312.pyc +0 -0
  138. package/src/zexus/parser/__pycache__/strategy_structural.cpython-312.pyc +0 -0
  139. package/src/zexus/parser/integration.py +86 -0
  140. package/src/zexus/parser/parser.py +3977 -0
  141. package/src/zexus/parser/strategy_context.py +7254 -0
  142. package/src/zexus/parser/strategy_structural.py +1033 -0
  143. package/src/zexus/persistence.py +391 -0
  144. package/src/zexus/plugin_system.py +290 -0
  145. package/src/zexus/policy_engine.py +365 -0
  146. package/src/zexus/profiler/__init__.py +5 -0
  147. package/src/zexus/profiler/profiler.py +233 -0
  148. package/src/zexus/purity_system.py +398 -0
  149. package/src/zexus/runtime/__init__.py +20 -0
  150. package/src/zexus/runtime/async_runtime.py +324 -0
  151. package/src/zexus/search_old_imports.sh +65 -0
  152. package/src/zexus/security.py +1407 -0
  153. package/src/zexus/stack_trace.py +233 -0
  154. package/src/zexus/stdlib/__init__.py +27 -0
  155. package/src/zexus/stdlib/blockchain.py +341 -0
  156. package/src/zexus/stdlib/compression.py +167 -0
  157. package/src/zexus/stdlib/crypto.py +124 -0
  158. package/src/zexus/stdlib/datetime.py +163 -0
  159. package/src/zexus/stdlib/db_mongo.py +199 -0
  160. package/src/zexus/stdlib/db_mysql.py +162 -0
  161. package/src/zexus/stdlib/db_postgres.py +163 -0
  162. package/src/zexus/stdlib/db_sqlite.py +133 -0
  163. package/src/zexus/stdlib/encoding.py +230 -0
  164. package/src/zexus/stdlib/fs.py +195 -0
  165. package/src/zexus/stdlib/http.py +219 -0
  166. package/src/zexus/stdlib/http_server.py +248 -0
  167. package/src/zexus/stdlib/json_module.py +61 -0
  168. package/src/zexus/stdlib/math.py +360 -0
  169. package/src/zexus/stdlib/os_module.py +265 -0
  170. package/src/zexus/stdlib/regex.py +148 -0
  171. package/src/zexus/stdlib/sockets.py +253 -0
  172. package/src/zexus/stdlib/test_framework.zx +208 -0
  173. package/src/zexus/stdlib/test_runner.zx +119 -0
  174. package/src/zexus/stdlib_integration.py +341 -0
  175. package/src/zexus/strategy_recovery.py +256 -0
  176. package/src/zexus/syntax_validator.py +356 -0
  177. package/src/zexus/testing/zpics.py +407 -0
  178. package/src/zexus/testing/zpics_runtime.py +369 -0
  179. package/src/zexus/type_system.py +374 -0
  180. package/src/zexus/validation_system.py +569 -0
  181. package/src/zexus/virtual_filesystem.py +355 -0
  182. package/src/zexus/vm/__init__.py +8 -0
  183. package/src/zexus/vm/__pycache__/__init__.cpython-312.pyc +0 -0
  184. package/src/zexus/vm/__pycache__/async_optimizer.cpython-312.pyc +0 -0
  185. package/src/zexus/vm/__pycache__/bytecode.cpython-312.pyc +0 -0
  186. package/src/zexus/vm/__pycache__/cache.cpython-312.pyc +0 -0
  187. package/src/zexus/vm/__pycache__/jit.cpython-312.pyc +0 -0
  188. package/src/zexus/vm/__pycache__/memory_manager.cpython-312.pyc +0 -0
  189. package/src/zexus/vm/__pycache__/memory_pool.cpython-312.pyc +0 -0
  190. package/src/zexus/vm/__pycache__/optimizer.cpython-312.pyc +0 -0
  191. package/src/zexus/vm/__pycache__/parallel_vm.cpython-312.pyc +0 -0
  192. package/src/zexus/vm/__pycache__/peephole_optimizer.cpython-312.pyc +0 -0
  193. package/src/zexus/vm/__pycache__/profiler.cpython-312.pyc +0 -0
  194. package/src/zexus/vm/__pycache__/register_allocator.cpython-312.pyc +0 -0
  195. package/src/zexus/vm/__pycache__/register_vm.cpython-312.pyc +0 -0
  196. package/src/zexus/vm/__pycache__/ssa_converter.cpython-312.pyc +0 -0
  197. package/src/zexus/vm/__pycache__/vm.cpython-312.pyc +0 -0
  198. package/src/zexus/vm/async_optimizer.py +420 -0
  199. package/src/zexus/vm/bytecode.py +428 -0
  200. package/src/zexus/vm/bytecode_converter.py +297 -0
  201. package/src/zexus/vm/cache.py +532 -0
  202. package/src/zexus/vm/jit.py +720 -0
  203. package/src/zexus/vm/memory_manager.py +520 -0
  204. package/src/zexus/vm/memory_pool.py +511 -0
  205. package/src/zexus/vm/optimizer.py +478 -0
  206. package/src/zexus/vm/parallel_vm.py +899 -0
  207. package/src/zexus/vm/peephole_optimizer.py +452 -0
  208. package/src/zexus/vm/profiler.py +527 -0
  209. package/src/zexus/vm/register_allocator.py +462 -0
  210. package/src/zexus/vm/register_vm.py +520 -0
  211. package/src/zexus/vm/ssa_converter.py +757 -0
  212. package/src/zexus/vm/vm.py +1392 -0
  213. package/src/zexus/zexus_ast.py +1782 -0
  214. package/src/zexus/zexus_token.py +253 -0
  215. package/src/zexus/zpm/__init__.py +15 -0
  216. package/src/zexus/zpm/installer.py +116 -0
  217. package/src/zexus/zpm/package_manager.py +208 -0
  218. package/src/zexus/zpm/publisher.py +98 -0
  219. package/src/zexus/zpm/registry.py +110 -0
  220. package/src/zexus.egg-info/PKG-INFO +2235 -0
  221. package/src/zexus.egg-info/SOURCES.txt +876 -0
  222. package/src/zexus.egg-info/dependency_links.txt +1 -0
  223. package/src/zexus.egg-info/entry_points.txt +3 -0
  224. package/src/zexus.egg-info/not-zip-safe +1 -0
  225. package/src/zexus.egg-info/requires.txt +14 -0
  226. package/src/zexus.egg-info/top_level.txt +2 -0
  227. package/zexus.json +14 -0
@@ -0,0 +1,1033 @@
1
+ # src/zexus/strategy_structural.py
2
+ from ..zexus_token import *
3
+ from typing import List, Dict
4
+ from ..config import config as zexus_config
5
+
6
+ class StructuralAnalyzer:
7
+ """Lightweight structural analyzer that splits token stream into top-level blocks.
8
+ Special handling for try/catch to avoid merging statements inside try blocks.
9
+ """
10
+
11
+ def __init__(self):
12
+ # blocks: id -> block_info
13
+ self.blocks = {}
14
+
15
+ def analyze(self, tokens: List):
16
+ """Analyze tokens and produce a block map used by the context parser.
17
+
18
+ block_info keys:
19
+ - id: unique id
20
+ - type/subtype: block type (e.g. 'try', 'let', 'print', 'block')
21
+ - tokens: list of tokens that belong to the block
22
+ - start_token: token object where block starts
23
+ - start_index / end_index: indices in original token stream
24
+ - parent: optional parent block id
25
+ """
26
+ self.blocks = {}
27
+ i = 0
28
+ block_id = 0
29
+ n = len(tokens)
30
+
31
+ # helper sets for stopping heuristics (mirrors context parser)
32
+ stop_types = {SEMICOLON, RBRACE}
33
+
34
+ # Modifier tokens that should be merged with the following statement
35
+ modifier_tokens = {PUBLIC, PRIVATE, SEALED, ASYNC, NATIVE, INLINE, SECURE, PURE, VIEW, PAYABLE}
36
+
37
+ # Statement starters (keywords that begin a new statement)
38
+ # NOTE: SEND and RECEIVE removed - they can be used as function calls in expressions
39
+ statement_starters = {
40
+ LET, CONST, DATA, PRINT, FOR, IF, WHILE, RETURN, CONTINUE, BREAK, THROW, ACTION, FUNCTION, TRY, EXTERNAL,
41
+ SCREEN, EXPORT, USE, DEBUG, ENTITY, CONTRACT, VERIFY, PROTECT, SEAL, PERSISTENT, AUDIT,
42
+ RESTRICT, SANDBOX, TRAIL, GC, BUFFER, SIMD,
43
+ DEFER, PATTERN, ENUM, STREAM, WATCH,
44
+ CAPABILITY, GRANT, REVOKE, VALIDATE, SANITIZE, IMMUTABLE,
45
+ INTERFACE, TYPE_ALIAS, MODULE, PACKAGE, USING,
46
+ CHANNEL, ATOMIC,
47
+ # Blockchain keywords
48
+ LEDGER, STATE, REQUIRE, REVERT, LIMIT
49
+ }
50
+
51
+ while i < n:
52
+ t = tokens[i]
53
+ # skip EOF tokens
54
+ if t.type == EOF:
55
+ i += 1
56
+ continue
57
+
58
+ # Helper: skip tokens that are empty/whitespace-only literals when building blocks
59
+ def _is_empty_token(tok):
60
+ lit = getattr(tok, 'literal', None)
61
+ return (lit == '' or lit is None) and tok.type != STRING and tok.type != IDENT
62
+
63
+ # === FIXED: Enhanced USE statement detection ===
64
+ if t.type == USE:
65
+ start_idx = i
66
+ use_tokens = [t]
67
+ i += 1
68
+
69
+ # Handle use { ... } from ... syntax
70
+ if i < n and tokens[i].type == LBRACE:
71
+ # Collect until closing brace
72
+ brace_count = 1
73
+ use_tokens.append(tokens[i])
74
+ i += 1
75
+
76
+ while i < n and brace_count > 0:
77
+ use_tokens.append(tokens[i])
78
+ if tokens[i].type == LBRACE:
79
+ brace_count += 1
80
+ elif tokens[i].type == RBRACE:
81
+ brace_count -= 1
82
+ i += 1
83
+
84
+ # Look for 'from' and file path
85
+ # FIX: Stop if we hit a statement starter, semicolon, or EOF
86
+ while i < n and tokens[i].type not in stop_types and tokens[i].type not in statement_starters:
87
+ # FIX: Check for FROM token type OR identifier 'from'
88
+ is_from = (tokens[i].type == FROM) or (tokens[i].type == IDENT and tokens[i].literal == 'from')
89
+
90
+ if is_from:
91
+ # Include 'from' and the following string
92
+ use_tokens.append(tokens[i])
93
+ i += 1
94
+ if i < n and tokens[i].type == STRING:
95
+ use_tokens.append(tokens[i])
96
+ i += 1
97
+ break
98
+ else:
99
+ use_tokens.append(tokens[i])
100
+ i += 1
101
+ else:
102
+ # Simple use 'path' syntax
103
+ # FIX: Stop at statement starters to prevent greedy consumption
104
+ while i < n and tokens[i].type not in stop_types and tokens[i].type != EOF:
105
+ if tokens[i].type in statement_starters:
106
+ break
107
+ use_tokens.append(tokens[i])
108
+ i += 1
109
+
110
+ # Create block for this use statement
111
+ filtered_tokens = [tk for tk in use_tokens if not _is_empty_token(tk)]
112
+ self.blocks[block_id] = {
113
+ 'id': block_id,
114
+ 'type': 'statement',
115
+ 'subtype': 'use_statement',
116
+ 'tokens': filtered_tokens,
117
+ 'start_token': tokens[start_idx],
118
+ 'start_index': start_idx,
119
+ 'end_index': i - 1,
120
+ 'parent': None
121
+ }
122
+ block_id += 1
123
+ continue
124
+
125
+ # Enhanced ENTITY statement detection
126
+ elif t.type == ENTITY:
127
+ start_idx = i
128
+ entity_tokens = [t]
129
+ i += 1
130
+
131
+ # Collect entity name
132
+ if i < n and tokens[i].type == IDENT:
133
+ entity_tokens.append(tokens[i])
134
+ i += 1
135
+
136
+ # Collect until closing brace
137
+ brace_count = 0
138
+ while i < n:
139
+ # Check if we've found the opening brace
140
+ if tokens[i].type == LBRACE:
141
+ brace_count = 1
142
+ entity_tokens.append(tokens[i])
143
+ i += 1
144
+ break
145
+ entity_tokens.append(tokens[i])
146
+ i += 1
147
+
148
+ # Now collect until matching closing brace
149
+ while i < n and brace_count > 0:
150
+ entity_tokens.append(tokens[i])
151
+ if tokens[i].type == LBRACE:
152
+ brace_count += 1
153
+ elif tokens[i].type == RBRACE:
154
+ brace_count -= 1
155
+ i += 1
156
+
157
+ # Create block
158
+ filtered_tokens = [tk for tk in entity_tokens if not _is_empty_token(tk)]
159
+ self.blocks[block_id] = {
160
+ 'id': block_id,
161
+ 'type': 'statement',
162
+ 'subtype': 'entity_statement',
163
+ 'tokens': filtered_tokens,
164
+ 'start_token': tokens[start_idx],
165
+ 'start_index': start_idx,
166
+ 'end_index': i - 1,
167
+ 'parent': None
168
+ }
169
+ block_id += 1
170
+ continue
171
+
172
+ # CONTRACT statement detection
173
+ elif t.type == CONTRACT:
174
+ start_idx = i
175
+ contract_tokens = [t]
176
+ i += 1
177
+
178
+ # Collect contract name
179
+ if i < n and tokens[i].type == IDENT:
180
+ contract_tokens.append(tokens[i])
181
+ i += 1
182
+
183
+ # Collect until closing brace
184
+ brace_count = 0
185
+ while i < n:
186
+ if tokens[i].type == LBRACE:
187
+ brace_count = 1
188
+ contract_tokens.append(tokens[i])
189
+ i += 1
190
+ break
191
+ contract_tokens.append(tokens[i])
192
+ i += 1
193
+
194
+ while i < n and brace_count > 0:
195
+ contract_tokens.append(tokens[i])
196
+ if tokens[i].type == LBRACE:
197
+ brace_count += 1
198
+ elif tokens[i].type == RBRACE:
199
+ brace_count -= 1
200
+ i += 1
201
+
202
+ filtered_tokens = [tk for tk in contract_tokens if not _is_empty_token(tk)]
203
+ self.blocks[block_id] = {
204
+ 'id': block_id,
205
+ 'type': 'statement',
206
+ 'subtype': 'contract_statement',
207
+ 'tokens': filtered_tokens,
208
+ 'start_token': tokens[start_idx],
209
+ 'start_index': start_idx,
210
+ 'end_index': i - 1,
211
+ 'parent': None
212
+ }
213
+ block_id += 1
214
+ continue
215
+
216
+ # VERIFY statement detection - handle verify { ... }, "message" pattern
217
+ elif t.type == VERIFY:
218
+ start_idx = i
219
+ verify_tokens = [t]
220
+ i += 1
221
+
222
+ # Check if next token is LBRACE (block form)
223
+ if i < n and tokens[i].type == LBRACE:
224
+ # Collect until matching closing brace
225
+ brace_count = 1
226
+ verify_tokens.append(tokens[i])
227
+ i += 1
228
+
229
+ while i < n and brace_count > 0:
230
+ verify_tokens.append(tokens[i])
231
+ if tokens[i].type == LBRACE:
232
+ brace_count += 1
233
+ elif tokens[i].type == RBRACE:
234
+ brace_count -= 1
235
+ i += 1
236
+
237
+ # Check for comma and message after the block
238
+ if i < n and tokens[i].type == COMMA:
239
+ verify_tokens.append(tokens[i])
240
+ i += 1
241
+
242
+ # Collect the message (until semicolon, EOF, or next statement starter)
243
+ while i < n and tokens[i].type not in stop_types and tokens[i].type not in statement_starters:
244
+ verify_tokens.append(tokens[i])
245
+ i += 1
246
+
247
+ # Create block for verify statement
248
+ filtered_tokens = [tk for tk in verify_tokens if not _is_empty_token(tk)]
249
+ self.blocks[block_id] = {
250
+ 'id': block_id,
251
+ 'type': 'statement',
252
+ 'subtype': VERIFY,
253
+ 'tokens': filtered_tokens,
254
+ 'start_token': tokens[start_idx],
255
+ 'start_index': start_idx,
256
+ 'end_index': i - 1,
257
+ 'parent': None
258
+ }
259
+ block_id += 1
260
+ continue
261
+ else:
262
+ # Not a block form, let it fall through to generic handling
263
+ i = start_idx
264
+
265
+ # Try-catch: collect the try block and catch block TOGETHER
266
+ if t.type == TRY:
267
+ start_idx = i
268
+ # collect try token + following block tokens (brace-aware)
269
+ try_block_tokens, next_idx = self._collect_brace_block(tokens, i + 1)
270
+
271
+ # Check for catch block
272
+ catch_tokens = []
273
+ final_idx = next_idx
274
+
275
+ if next_idx < n and tokens[next_idx].type == CATCH:
276
+ catch_token = tokens[next_idx]
277
+
278
+ # Collect tokens between CATCH and LBRACE (e.g. (e))
279
+ pre_brace_tokens = []
280
+ curr = next_idx + 1
281
+ while curr < n and tokens[curr].type != LBRACE and tokens[curr].type != EOF:
282
+ pre_brace_tokens.append(tokens[curr])
283
+ curr += 1
284
+
285
+ catch_block_tokens, after_catch_idx = self._collect_brace_block(tokens, curr)
286
+ catch_tokens = [catch_token] + pre_brace_tokens + catch_block_tokens
287
+ final_idx = after_catch_idx
288
+
289
+ # Combine all tokens
290
+ full_tokens = [t] + try_block_tokens + catch_tokens
291
+ full_tokens = [tk for tk in full_tokens if not _is_empty_token(tk)]
292
+
293
+ # Create the main try-catch block
294
+ self.blocks[block_id] = {
295
+ 'id': block_id,
296
+ 'type': 'statement',
297
+ 'subtype': 'try_catch_statement',
298
+ 'tokens': full_tokens,
299
+ 'start_token': t,
300
+ 'start_index': start_idx,
301
+ 'end_index': final_idx - 1,
302
+ 'parent': None
303
+ }
304
+ parent_id = block_id
305
+ block_id += 1
306
+ i = final_idx
307
+
308
+ # Process inner statements of TRY block
309
+ inner = try_block_tokens[1:-1] if try_block_tokens and len(try_block_tokens) >= 2 else []
310
+ inner = [tk for tk in inner if not _is_empty_token(tk)]
311
+ if inner:
312
+ if self._is_map_literal(inner):
313
+ # ... map literal handling ...
314
+ pass
315
+ else:
316
+ stmts = self._split_into_statements(inner)
317
+ for stmt_tokens in stmts:
318
+ self.blocks[block_id] = {
319
+ 'id': block_id,
320
+ 'type': 'statement',
321
+ 'subtype': stmt_tokens[0].type if stmt_tokens else 'unknown',
322
+ 'tokens': [tk for tk in stmt_tokens if not _is_empty_token(tk)],
323
+ 'start_token': (stmt_tokens[0] if stmt_tokens else try_block_tokens[0]),
324
+ 'start_index': start_idx, # Approximate
325
+ 'end_index': start_idx, # Approximate
326
+ 'parent': parent_id
327
+ }
328
+ block_id += 1
329
+
330
+ # Process inner statements of CATCH block
331
+ if catch_tokens:
332
+ # catch_tokens[0] is CATCH
333
+ # catch_tokens[1] might be (error) or {
334
+ # We need to find the brace block inside catch_tokens
335
+ catch_brace_tokens = []
336
+ for k, ctk in enumerate(catch_tokens):
337
+ if ctk.type == LBRACE:
338
+ catch_brace_tokens = catch_tokens[k:]
339
+ break
340
+
341
+ inner_catch = catch_brace_tokens[1:-1] if catch_brace_tokens and len(catch_brace_tokens) >= 2 else []
342
+ inner_catch = [tk for tk in inner_catch if not _is_empty_token(tk)]
343
+
344
+ if inner_catch:
345
+ stmts = self._split_into_statements(inner_catch)
346
+ for stmt_tokens in stmts:
347
+ self.blocks[block_id] = {
348
+ 'id': block_id,
349
+ 'type': 'statement',
350
+ 'subtype': stmt_tokens[0].type if stmt_tokens else 'unknown',
351
+ 'tokens': [tk for tk in stmt_tokens if not _is_empty_token(tk)],
352
+ 'start_token': (stmt_tokens[0] if stmt_tokens else catch_tokens[0]),
353
+ 'start_index': next_idx, # Approximate
354
+ 'end_index': next_idx, # Approximate
355
+ 'parent': parent_id
356
+ }
357
+ block_id += 1
358
+ continue
359
+
360
+ # Brace-delimited top-level block
361
+ if t.type == LBRACE:
362
+ block_tokens, next_idx = self._collect_brace_block(tokens, i)
363
+ this_block_id = block_id
364
+ # filter empty tokens before storing
365
+ filtered_block_tokens = [tk for tk in block_tokens if not _is_empty_token(tk)]
366
+ self.blocks[this_block_id] = {
367
+ 'id': this_block_id,
368
+ 'type': 'block',
369
+ 'subtype': 'brace_block',
370
+ 'tokens': filtered_block_tokens,
371
+ 'start_token': tokens[i],
372
+ 'start_index': i,
373
+ 'end_index': next_idx - 1,
374
+ 'parent': None
375
+ }
376
+ block_id += 1
377
+
378
+ # split inner tokens into child blocks unless it's a map literal
379
+ inner = block_tokens[1:-1] if block_tokens and len(block_tokens) >= 2 else []
380
+ inner = [tk for tk in inner if not _is_empty_token(tk)]
381
+ if inner:
382
+ if self._is_map_literal(inner):
383
+ self.blocks[block_id] = {
384
+ 'id': block_id,
385
+ 'type': 'map_literal',
386
+ 'subtype': 'map_literal',
387
+ 'tokens': [tk for tk in block_tokens if not _is_empty_token(tk)], # keep full braces
388
+ 'start_token': block_tokens[0],
389
+ 'start_index': i,
390
+ 'end_index': next_idx - 1,
391
+ 'parent': this_block_id
392
+ }
393
+ block_id += 1
394
+ else:
395
+ stmts = self._split_into_statements(inner)
396
+ for stmt_tokens in stmts:
397
+ self.blocks[block_id] = {
398
+ 'id': block_id,
399
+ 'type': 'statement',
400
+ 'subtype': stmt_tokens[0].type if stmt_tokens else 'unknown',
401
+ 'tokens': [tk for tk in stmt_tokens if not _is_empty_token(tk)],
402
+ 'start_token': (stmt_tokens[0] if stmt_tokens else block_tokens[0]),
403
+ 'start_index': i,
404
+ 'end_index': i + len(stmt_tokens),
405
+ 'parent': this_block_id
406
+ }
407
+ block_id += 1
408
+
409
+ i = next_idx
410
+ continue
411
+
412
+ # Modifier tokens: merge with the following statement
413
+ if t.type in modifier_tokens:
414
+ start_idx = i
415
+ modifier_list = []
416
+
417
+ # Collect consecutive modifiers
418
+ while i < n and tokens[i].type in modifier_tokens:
419
+ modifier_list.append(tokens[i])
420
+ i += 1
421
+
422
+ # Skip EOF/whitespace
423
+ while i < n and tokens[i].type == EOF:
424
+ i += 1
425
+
426
+ # If followed by a statement starter, continue to statement parsing
427
+ # by falling through to the elif below
428
+ if i < n and tokens[i].type in statement_starters:
429
+ # Update t to point to the statement starter
430
+ t = tokens[i]
431
+ # Don't increment i - let the statement parsing handle it
432
+ else:
433
+ # Modifiers without a following statement - this is an async expression!
434
+ # Collect the modifiers AND the following expression into one block
435
+ # Example: "async producer()" should be one block
436
+
437
+ # Start collecting the expression that follows
438
+ j = i
439
+ expr_tokens = modifier_list[:] # Include modifiers in the block
440
+ nesting = 0
441
+ started_expr = False
442
+
443
+ # Collect tokens for the expression
444
+ while j < n:
445
+ tj = tokens[j]
446
+
447
+ # Track nesting
448
+ if tj.type in {LPAREN, LBRACKET, LBRACE}:
449
+ nesting += 1
450
+ started_expr = True
451
+ elif tj.type in {RPAREN, RBRACKET, RBRACE}:
452
+ nesting -= 1
453
+
454
+ expr_tokens.append(tj)
455
+ j += 1
456
+
457
+ # Stop at semicolon when at nesting 0
458
+ if nesting == 0 and tj.type == SEMICOLON:
459
+ break
460
+
461
+ # Stop after completing a simple expression at nesting 0
462
+ # (identifier with optional call, or after closing all parens)
463
+ if started_expr and nesting == 0:
464
+ break
465
+
466
+ # Create block for async expression
467
+ self.blocks[block_id] = {
468
+ 'id': block_id,
469
+ 'type': 'statement',
470
+ 'subtype': modifier_list[0].type, # ASYNC
471
+ 'tokens': expr_tokens,
472
+ 'start_token': modifier_list[0],
473
+ 'start_index': start_idx,
474
+ 'end_index': j
475
+ }
476
+ block_id += 1
477
+ i = j
478
+ # Clear modifier_list so it doesn't affect next statement
479
+ del modifier_list
480
+ continue
481
+
482
+ # Statement-like tokens: try to collect tokens up to a statement boundary
483
+ # DUAL-MODE DEBUG: skip if debug( ) which is a function call, not statement
484
+ if t.type in statement_starters and not (t.type == DEBUG and i + 1 < n and tokens[i + 1].type == LPAREN):
485
+ # Check if we just processed modifiers
486
+ if 'modifier_list' in locals() and start_idx < i:
487
+ # Start from modifier position, include modifiers in stmt_tokens
488
+ stmt_start_idx = start_idx
489
+ stmt_tokens = modifier_list + [t]
490
+ j = i + 1
491
+ del modifier_list # Clear for next iteration
492
+ else:
493
+ stmt_start_idx = i
494
+ stmt_tokens = [t] # Start with the statement starter token
495
+ j = i + 1
496
+ nesting = 0 # Track nesting level for (), [], {}
497
+ found_brace_block = False # Did we encounter a { ... } block?
498
+ found_colon_block = False # Did we encounter a : (tolerable syntax)?
499
+ baseline_column = None # Track indentation for colon-based blocks
500
+ in_assignment = (t.type in {LET, CONST}) # Are we in an assignment RHS?
501
+ seen_assign = False # Track if we've seen the main ASSIGN in LET/CONST
502
+
503
+ while j < n:
504
+ tj = tokens[j]
505
+
506
+ # Check if this is a statement terminator at nesting 0 BEFORE updating nesting
507
+ if nesting == 0 and tj.type in stop_types and not found_colon_block:
508
+ break
509
+
510
+ # Track when we see the main ASSIGN in LET/CONST statements
511
+ if in_assignment and tj.type == ASSIGN and nesting == 0:
512
+ seen_assign = True
513
+
514
+ # CRITICAL FIX: Check if next token starts a new statement (assignment or function call)
515
+ # BUT: Don't break if we're in a LET/CONST before the main ASSIGN (type annotation case)
516
+ # ALSO: Don't break if we're in the middle of a property access chain (obj.prop = ...)
517
+ if nesting == 0 and len(stmt_tokens) > 1: # Only check if we've collected some tokens
518
+ # Pattern 1: IDENT followed by ASSIGN is an assignment statement
519
+ # EXCEPT: In LET/CONST before main assign (e.g., "let x : string =" - string is type, not new var)
520
+ # EXCEPT: After DOT (property access within same statement: obj.prop = ...)
521
+ if tj.type == IDENT and j + 1 < n and tokens[j + 1].type == ASSIGN:
522
+ # Check if previous token was DOT (we're in property chain)
523
+ prev_token = stmt_tokens[-1] if stmt_tokens else None
524
+ is_property_access = prev_token and prev_token.type == DOT
525
+
526
+ # Only break if:
527
+ # 1. NOT in property access chain, AND
528
+ # 2. (NOT in LET/CONST, OR we've already seen the main assign)
529
+ if not is_property_access and (not in_assignment or seen_assign):
530
+ break
531
+
532
+ # Pattern 2: IDENT followed by DOT could be start of property assignment (obj.prop = ...)
533
+ # This is a NEW statement if we're in LET/CONST and have seen the main assign
534
+ elif tj.type == IDENT and j + 1 < n and tokens[j + 1].type == DOT:
535
+ # Check if this is on a new line (likely a new statement)
536
+ if stmt_tokens:
537
+ last_line = stmt_tokens[-1].line
538
+ if tj.line > last_line and in_assignment and seen_assign:
539
+ # New line after completed assignment - this is a new statement
540
+ break
541
+
542
+ # Look ahead to see if this becomes a property assignment
543
+ # Pattern: IDENT DOT IDENT ASSIGN
544
+ if j + 3 < n and tokens[j + 2].type == IDENT and tokens[j + 3].type == ASSIGN:
545
+ # This is a property assignment starting!
546
+ # Break if we've already completed the LET/CONST
547
+ if in_assignment and seen_assign:
548
+ break
549
+ # IDENT followed by LPAREN is a function call (already handled below, but listed for clarity)
550
+
551
+ # Detect colon-based block (tolerable syntax for action/function/if/while etc.)
552
+ if tj.type == COLON and nesting == 0 and t.type in {ACTION, FUNCTION, IF, WHILE, FOR}:
553
+ found_colon_block = True
554
+ stmt_tokens.append(tj)
555
+ j += 1
556
+ # Record the baseline column for dedent detection
557
+ # This is the column of the first token AFTER the colon
558
+ if j < n:
559
+ baseline_column = tokens[j].column if hasattr(tokens[j], 'column') else 1
560
+ continue
561
+
562
+ # Track nesting level BEFORE dedent check (so we don't break inside {...} or [...] or (...))
563
+ if tj.type in {LPAREN, LBRACE, LBRACKET}:
564
+ # Only mark as brace block if NOT already in colon block (to distinguish code blocks from data literals)
565
+ if tj.type == LBRACE and not found_colon_block:
566
+ found_brace_block = True
567
+ nesting += 1
568
+ elif tj.type in {RPAREN, RBRACE, RBRACKET}:
569
+ nesting -= 1
570
+
571
+ # If we're in a colon block, collect until dedent
572
+ if found_colon_block and nesting == 0:
573
+ current_column = tj.column if hasattr(tj, 'column') else 1
574
+ # Stop if we hit a dedent (token BEFORE baseline column, indicating unindent)
575
+ # This works because baseline_column is the indented level (e.g., 6)
576
+ # and when we see column 2, that's < 6, so we stop
577
+ #print(f" [DEDENT CHECK] token={tj.type} col={current_column} baseline={baseline_column} nesting={nesting}")
578
+ if current_column < baseline_column and tj.type in statement_starters:
579
+ #print(f" [DEDENT BREAK] Breaking on dedent: {tj.type} at col {current_column}")
580
+ break
581
+
582
+ # Stop at new statement starters only if we're at nesting 0
583
+ # BUT: for LET/CONST, allow function expressions in the RHS
584
+ # ALSO: for WHILE/FOR/IF, don't break until we've found the opening brace (control structures need their conditions)
585
+ if nesting == 0 and tj.type in statement_starters and not found_colon_block:
586
+ # Exception: allow chained method calls
587
+ prev = tokens[j-1] if j > 0 else None
588
+ if not (prev and prev.type == DOT):
589
+ # CRITICAL FIX: For control flow statements (WHILE/FOR/IF), don't break on statement_starters
590
+ # until we've found the opening brace. This allows variable names that happen to be keywords
591
+ # (like "limit") to appear in the condition without being treated as new statements.
592
+ is_control_flow = t.type in {WHILE, FOR, IF}
593
+ if is_control_flow and not found_brace_block:
594
+ # We're still parsing the condition - don't break yet
595
+ pass
596
+ else:
597
+ # For LET/CONST, allow FUNCTION, SANDBOX, SANITIZE as RHS (expressions)
598
+ # Also allow DEBUG when followed by ( for debug(x) function calls in assignments
599
+ # Also allow IF when followed by THEN (if-then-else expression)
600
+ allow_in_assignment = tj.type in {FUNCTION, SANDBOX, SANITIZE}
601
+ allow_debug_call = tj.type == DEBUG and j + 1 < n and tokens[j + 1].type == LPAREN
602
+ allow_if_then_else = False
603
+ if tj.type == IF:
604
+ # Look ahead for THEN to detect if-then-else expression
605
+ for k in range(j + 1, min(j + 20, n)): # Look ahead up to 20 tokens
606
+ if tokens[k].type == THEN:
607
+ allow_if_then_else = True
608
+ break
609
+ elif tokens[k].type in {LBRACE, COLON}:
610
+ # Found statement form indicators
611
+ break
612
+ if not (in_assignment and (allow_in_assignment or allow_debug_call or allow_if_then_else)):
613
+ break
614
+
615
+ # CRITICAL FIX: Also break on modifier tokens at nesting 0 when followed by statement keywords
616
+ # This prevents previous statements from consuming modifiers like "async action foo()"
617
+ # But ALLOWS "async foo()" expressions to stay together
618
+ if nesting == 0 and tj.type in modifier_tokens and not found_colon_block and len(stmt_tokens) > 0:
619
+ # Look ahead to see if modifier is followed by a statement keyword
620
+ next_idx = j + 1
621
+ while next_idx < n and tokens[next_idx].type in modifier_tokens:
622
+ next_idx += 1
623
+ if next_idx < n and tokens[next_idx].type in statement_starters:
624
+ # Modifier followed by statement keyword - break here
625
+ break
626
+ # ALSO break if this is an ASYNC modifier followed by IDENT+LPAREN (async expression)
627
+ # This prevents LET statements from consuming "async func()" on the next line
628
+ if tj.type == ASYNC and next_idx < n and tokens[next_idx].type == IDENT:
629
+ if next_idx + 1 < n and tokens[next_idx + 1].type == LPAREN:
630
+ # This is "async ident(" - an async expression
631
+ break
632
+ # Otherwise, continue collecting (async expression case)
633
+
634
+ # FIX: Also break at expression statements (IDENT followed by LPAREN) when we're at nesting 0
635
+ # and not in an assignment context
636
+ # EXCEPTION: Don't break if we're parsing ACTION/FUNCTION (their names are followed by LPAREN for parameters)
637
+ # ALSO EXCEPTION: Don't break if we're parsing WHILE/FOR/IF and haven't found the brace yet (function calls in conditions)
638
+ is_control_flow = t.type in {WHILE, FOR, IF}
639
+ if nesting == 0 and not in_assignment and not found_colon_block and not found_brace_block and t.type not in {ACTION, FUNCTION} and not (is_control_flow and not found_brace_block):
640
+ if tj.type == IDENT and j + 1 < n and tokens[j + 1].type == LPAREN:
641
+ # This looks like a function call starting a new expression statement
642
+ # Only break if we've already collected some tokens (not the first token)
643
+ if len(stmt_tokens) > 1:
644
+ break
645
+
646
+ # Always collect tokens
647
+ stmt_tokens.append(tj)
648
+ j += 1
649
+
650
+ # MODIFIED: For RETURN, CONTINUE, and PRINT, stop after closing parens at nesting 0
651
+ # PRINT can have multiple comma-separated arguments inside the parens
652
+ if t.type in {RETURN, CONTINUE, PRINT} and nesting == 0 and tj.type == RPAREN:
653
+ break
654
+
655
+ # If we just closed a brace block and are back at nesting 0, stop
656
+ if found_brace_block and nesting == 0:
657
+ # CRITICAL FIX: For IF statements, check if followed by ELSE or ELIF
658
+ if t.type == IF:
659
+ # Look ahead for else/elif
660
+ if j < n and tokens[j].type in {ELSE, ELIF}:
661
+ # Found else/elif - continue collecting
662
+ found_brace_block = False
663
+ continue
664
+
665
+ # REQUIRE tolerance block: the {...} is part of the statement, not separate
666
+ # Don't break yet - the brace block is the tolerance logic
667
+ if t.type == REQUIRE:
668
+ found_brace_block = False
669
+ continue
670
+
671
+ break
672
+
673
+ # Skip any trailing semicolons
674
+ while j < n and tokens[j].type == SEMICOLON:
675
+ j += 1
676
+
677
+ # Create block for the collected statement
678
+ filtered_stmt_tokens = [tk for tk in stmt_tokens if not _is_empty_token(tk)]
679
+ if filtered_stmt_tokens: # Only create block if we have meaningful tokens
680
+ self.blocks[block_id] = {
681
+ 'id': block_id,
682
+ 'type': 'statement',
683
+ 'subtype': t.type,
684
+ 'tokens': filtered_stmt_tokens,
685
+ 'start_token': tokens[stmt_start_idx],
686
+ 'start_index': stmt_start_idx,
687
+ 'end_index': j,
688
+ 'parent': None
689
+ }
690
+ block_id += 1
691
+ i = j
692
+ continue
693
+
694
+ # Fallback: collect a run of tokens until a clear statement boundary
695
+ # Respect nesting so that constructs inside parentheses/braces aren't split
696
+ # FIX: Handle expression statements (function calls not assigned to variables)
697
+ start_idx = i
698
+ run_tokens = [t]
699
+ j = i + 1
700
+ nesting = 0
701
+
702
+ # Check if this is a simple function call expression statement: ident(...)
703
+ is_function_call_start = (t.type == IDENT and j < n and tokens[j].type == LPAREN)
704
+
705
+ while j < n:
706
+ tj = tokens[j]
707
+
708
+ # Update nesting for parentheses/brackets/braces
709
+ if tj.type in {LPAREN, LBRACE, LBRACKET}:
710
+ nesting += 1
711
+ elif tj.type in {RPAREN, RBRACE, RBRACKET}:
712
+ if nesting > 0:
713
+ nesting -= 1
714
+
715
+ # Only consider these as boundaries when at top-level (nesting == 0)
716
+ if nesting == 0:
717
+ # NEW: Line-based statement boundary detection
718
+ # If we have balanced parens and the next token is on a new line and could start a new statement, create boundary
719
+ last_line = run_tokens[-1].line if run_tokens else 0
720
+ if tj.line > last_line:
721
+ # Check if we have balanced parens in run_tokens (statement is syntactically complete)
722
+ paren_count = sum(1 if tok.type == LPAREN else -1 if tok.type == RPAREN else 0 for tok in run_tokens)
723
+ if paren_count == 0:
724
+ # Check if run_tokens contains an assignment (this is a complete assignment statement)
725
+ has_assign = any(tok.type == ASSIGN for tok in run_tokens)
726
+ if has_assign:
727
+ # Current token is on a new line and could start a new statement
728
+ # Check if it's IDENT (could be method call, function call, or property access)
729
+ if tj.type == IDENT:
730
+ # CRITICAL FIX: Don't break if the previous token was ASSIGN
731
+ # This means the IDENT is the RHS value, not a new statement
732
+ prev_tok = run_tokens[-1] if run_tokens else None
733
+ if prev_tok and prev_tok.type == ASSIGN:
734
+ # This IDENT is the RHS of the assignment, not a new statement
735
+ pass # Don't break, continue collecting
736
+ else:
737
+ # This is likely a new statement on a new line
738
+ # Don't add tj to run_tokens, break here
739
+ break
740
+
741
+ # Check if current token (tj) starts a new statement
742
+ # CRITICAL FIX: IDENT followed by ASSIGN is an assignment statement
743
+ # BUT: Don't treat it as a new statement if the previous token was DOT (property access)
744
+ is_assignment_start = False
745
+ if tj.type == IDENT and j + 1 < n and tokens[j + 1].type == ASSIGN:
746
+ # Check if previous token was DOT (part of property access)
747
+ prev_is_dot = (j > 0 and tokens[j - 1].type == DOT)
748
+ if not prev_is_dot:
749
+ is_assignment_start = True
750
+ # Pattern 2: IDENT followed by DOT could be property assignment (obj.prop = ...)
751
+ elif tj.type == IDENT and j + 1 < n and tokens[j + 1].type == DOT:
752
+ # Look ahead: IDENT DOT IDENT ASSIGN is a property assignment
753
+ if j + 3 < n and tokens[j + 2].type == IDENT and tokens[j + 3].type == ASSIGN:
754
+ is_assignment_start = True
755
+
756
+ is_new_statement = (
757
+ tj.type in stop_types or
758
+ tj.type in statement_starters or
759
+ tj.type == LBRACE or
760
+ tj.type == TRY or
761
+ is_assignment_start
762
+ )
763
+ if is_new_statement and j > start_idx: # Only break if we've collected at least one token
764
+ break
765
+
766
+ # FIX: If this is a function call and nesting just became 0 (closed all parens),
767
+ # check if next token looks like start of new statement
768
+ if is_function_call_start and nesting == 0 and j > start_idx + 1:
769
+ # We've closed the function call parens
770
+ # Check if next token starts a new statement (IDENT followed by LPAREN, or a statement keyword)
771
+ next_idx = j + 1
772
+ # Skip semicolons
773
+ while next_idx < n and tokens[next_idx].type == SEMICOLON:
774
+ next_idx += 1
775
+ if next_idx < n:
776
+ next_tok = tokens[next_idx]
777
+ # If next token is a statement starter OR an identifier followed by (, it's a new statement
778
+ if next_tok.type in statement_starters:
779
+ run_tokens.append(tj)
780
+ j += 1
781
+ break
782
+ elif next_tok.type == IDENT and next_idx + 1 < n and tokens[next_idx + 1].type == LPAREN:
783
+ # Next statement is also a function call
784
+ run_tokens.append(tj)
785
+ j += 1
786
+ break
787
+
788
+ run_tokens.append(tj)
789
+ j += 1
790
+
791
+ # Skip trailing semicolons (they're statement terminators, not part of the statement)
792
+ while j < n and tokens[j].type == SEMICOLON:
793
+ j += 1
794
+
795
+ filtered_run_tokens = [tk for tk in run_tokens if not _is_empty_token(tk)]
796
+ if filtered_run_tokens: # Only create block if we have meaningful tokens
797
+ self.blocks[block_id] = {
798
+ 'id': block_id,
799
+ 'type': 'statement',
800
+ 'subtype': (filtered_run_tokens[0].type if filtered_run_tokens else (run_tokens[0].type if run_tokens else 'token_run')),
801
+ 'tokens': filtered_run_tokens,
802
+ 'start_token': (filtered_run_tokens[0] if filtered_run_tokens else (run_tokens[0] if run_tokens else t)),
803
+ 'start_index': start_idx,
804
+ 'end_index': j - 1,
805
+ 'parent': None
806
+ }
807
+ block_id += 1
808
+ i = j
809
+
810
+ return self.blocks
811
+
812
+ def _collect_brace_block(self, tokens: List, start_index: int):
813
+ """Collect tokens comprising a brace-delimited block.
814
+ start_index should point at the token immediately after the 'try' or at a LBRACE.
815
+ Returns (collected_tokens_including_braces, next_index_after_block)
816
+ """
817
+ n = len(tokens)
818
+ # find the opening brace if start_index points to something else
819
+ i = start_index
820
+ # if the next token is not a LBRACE, try to find it
821
+ if i < n and tokens[i].type != LBRACE:
822
+ # scan forward to first LBRACE or EOF
823
+ while i < n and tokens[i].type != LBRACE and tokens[i].type != EOF:
824
+ i += 1
825
+ if i >= n or tokens[i].type != LBRACE:
826
+ # no brace, return empty block
827
+ return [], start_index
828
+
829
+ # i points to LBRACE
830
+ depth = 0
831
+ collected = []
832
+ while i < n:
833
+ tok = tokens[i]
834
+ collected.append(tok)
835
+ if tok.type == LBRACE:
836
+ depth += 1
837
+ elif tok.type == RBRACE:
838
+ depth -= 1
839
+ if depth == 0:
840
+ return collected, i + 1
841
+ i += 1
842
+
843
+ # Reached EOF without closing brace - return what we have (tolerant)
844
+ return collected, i
845
+
846
+ def _split_into_statements(self, tokens: List):
847
+ """Split a flat list of tokens into a list of statement token lists using statement boundaries."""
848
+ results = []
849
+ if not tokens:
850
+ return results
851
+
852
+ stop_types = {SEMICOLON, RBRACE}
853
+ # NOTE: SEND and RECEIVE removed - they can be used as function calls in expressions
854
+ statement_starters = {
855
+ LET, CONST, DATA, PRINT, FOR, IF, WHILE, RETURN, CONTINUE, BREAK, THROW, ACTION, FUNCTION, TRY, EXTERNAL,
856
+ SCREEN, EXPORT, USE, DEBUG, ENTITY, CONTRACT, VERIFY, PROTECT, SEAL, AUDIT,
857
+ RESTRICT, SANDBOX, TRAIL, NATIVE, GC, INLINE, BUFFER, SIMD,
858
+ DEFER, PATTERN, ENUM, STREAM, WATCH,
859
+ CAPABILITY, GRANT, REVOKE, VALIDATE, SANITIZE, IMMUTABLE,
860
+ INTERFACE, TYPE_ALIAS, MODULE, PACKAGE, USING,
861
+ CHANNEL, ATOMIC, ASYNC # Added ASYNC to recognize async expressions as statement boundaries
862
+ }
863
+
864
+ cur = []
865
+ i = 0
866
+ n = len(tokens)
867
+
868
+ while i < n:
869
+ t = tokens[i]
870
+
871
+ # Enhanced use statement detection (with braces) in inner blocks
872
+ if t.type == USE:
873
+ if cur: # Finish current statement
874
+ results.append(cur)
875
+ cur = []
876
+
877
+ # Collect the entire use statement
878
+ use_tokens = [t]
879
+ i += 1
880
+ brace_count = 0
881
+
882
+ # FIX: Check for statement starters here too to be safe
883
+ while i < n:
884
+ if brace_count == 0 and tokens[i].type in statement_starters:
885
+ break
886
+
887
+ use_tokens.append(tokens[i])
888
+ if tokens[i].type == LBRACE:
889
+ brace_count += 1
890
+ elif tokens[i].type == RBRACE:
891
+ brace_count -= 1
892
+ if brace_count == 0:
893
+ # Look for 'from' after closing brace
894
+ # FIX: Check FROM token type
895
+ if i + 1 < n and (tokens[i + 1].type == FROM or (tokens[i + 1].type == IDENT and tokens[i + 1].literal == 'from')):
896
+ use_tokens.append(tokens[i + 1])
897
+ i += 1
898
+ if i + 1 < n and tokens[i + 1].type == STRING:
899
+ use_tokens.append(tokens[i + 1])
900
+ i += 1
901
+ break
902
+ elif brace_count == 0 and tokens[i].type in stop_types:
903
+ break
904
+ i += 1
905
+
906
+ results.append(use_tokens)
907
+ i += 1
908
+ continue
909
+
910
+ # Entity/Contract statement detection (generic brace collector)
911
+ if t.type == ENTITY or t.type == CONTRACT:
912
+ if cur:
913
+ results.append(cur)
914
+ cur = []
915
+
916
+ # Collect until closing brace
917
+ entity_tokens = [t]
918
+ i += 1
919
+ brace_count = 0
920
+
921
+ while i < n:
922
+ entity_tokens.append(tokens[i])
923
+ if tokens[i].type == LBRACE:
924
+ brace_count += 1
925
+ elif tokens[i].type == RBRACE:
926
+ brace_count -= 1
927
+ if brace_count == 0:
928
+ break
929
+ i += 1
930
+
931
+ results.append(entity_tokens)
932
+ i += 1
933
+ continue
934
+
935
+ # start of a statement
936
+ if not cur:
937
+ cur.append(t)
938
+ i += 1
939
+ continue
940
+
941
+ # accumulate until boundary
942
+ if t.type in stop_types:
943
+ # end current statement (do not include terminator)
944
+ results.append(cur)
945
+ cur = []
946
+ i += 1
947
+ continue
948
+
949
+ if t.type in statement_starters:
950
+ # boundary: emit current and start new
951
+ results.append(cur)
952
+ cur = [t]
953
+ i += 1
954
+ continue
955
+
956
+ # Assignment RHS vs function-call heuristic:
957
+ # if current token is IDENT followed by LPAREN and the previous token was RPAREN (end of prev call), new statement
958
+ if t.type == IDENT and i + 1 < n and tokens[i + 1].type == LPAREN:
959
+ # New heuristic: if previous token was RPAREN (completing a call), this is likely a new statement
960
+ # BUT: if the token before RPAREN is DOT+IDENT (method call), don't create boundary
961
+ if cur and cur[-1].type == RPAREN:
962
+ # Check if this is a method call continuation (e.g., obj.method1().method2())
963
+ # Look for pattern: ... DOT IDENT LPAREN ... RPAREN <-- we are here
964
+ # Find the LPAREN that matches this RPAREN
965
+ paren_depth = 0
966
+ is_method_chain = False
967
+ for j in range(len(cur) - 1, -1, -1):
968
+ if cur[j].type == RPAREN:
969
+ paren_depth += 1
970
+ elif cur[j].type == LPAREN:
971
+ if paren_depth == 0:
972
+ # This is the matching LPAREN
973
+ # Check if it's preceded by DOT+IDENT (method call)
974
+ if j >= 2 and cur[j-1].type == IDENT and cur[j-2].type == DOT:
975
+ is_method_chain = True
976
+ break
977
+ else:
978
+ paren_depth -= 1
979
+
980
+ if not is_method_chain:
981
+ # Previous call is complete, and next is IDENT+LPAREN, so new statement
982
+ results.append(cur)
983
+ cur = [t]
984
+ i += 1
985
+ continue
986
+
987
+ # NEW: Check for line-based statement boundaries
988
+ # If we have balanced parens and the next token is on a new line and could start a new statement, create boundary
989
+ if cur:
990
+ # Check if parens are balanced
991
+ paren_count = sum(1 if tok.type == LPAREN else -1 if tok.type == RPAREN else 0 for tok in cur)
992
+ if paren_count == 0:
993
+ # Check if there's an ASSIGN in cur (this is a complete assignment statement)
994
+ has_assign = any(tok.type == ASSIGN for tok in cur)
995
+ if has_assign:
996
+ # Check if current token is on a new line
997
+ last_line = cur[-1].line if cur else 0
998
+ if t.line > last_line:
999
+ # Check if current token could start a new statement
1000
+ # IDENT followed by DOT or LPAREN could be a new statement
1001
+ if t.type == IDENT:
1002
+ # This is likely a new statement on a new line
1003
+ results.append(cur)
1004
+ cur = [t]
1005
+ i += 1
1006
+ continue
1007
+
1008
+ cur.append(t)
1009
+ i += 1
1010
+
1011
+ if cur:
1012
+ results.append(cur)
1013
+ return results
1014
+
1015
+ def _is_map_literal(self, inner_tokens: List):
1016
+ """Detect simple map/object literal pattern: STRING/IDENT followed by COLON somewhere early."""
1017
+ if not inner_tokens:
1018
+ return False
1019
+ # look at the first few tokens: key(:)value pairs
1020
+ for i in range(min(len(inner_tokens)-1, 8)):
1021
+ if inner_tokens[i].type in (STRING, IDENT) and i+1 < len(inner_tokens) and inner_tokens[i+1].type == COLON:
1022
+ return True
1023
+ return False
1024
+
1025
+ def print_structure(self):
1026
+ print("🔎 Structural Analyzer - Blocks:")
1027
+ for bid, info in self.blocks.items():
1028
+ start = info.get('start_index')
1029
+ end = info.get('end_index')
1030
+ ttype = info.get('type')
1031
+ subtype = info.get('subtype')
1032
+ token_literals = [t.literal for t in info.get('tokens', []) if getattr(t, 'literal', None)]
1033
+ print(f" [{bid}] {ttype}/{subtype} @ {start}-{end}: {token_literals}")