zexus 1.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +0 -0
- package/README.md +2513 -0
- package/bin/zexus +2 -0
- package/bin/zpics +2 -0
- package/bin/zpm +2 -0
- package/bin/zx +2 -0
- package/bin/zx-deploy +2 -0
- package/bin/zx-dev +2 -0
- package/bin/zx-run +2 -0
- package/package.json +66 -0
- package/scripts/README.md +24 -0
- package/scripts/postinstall.js +44 -0
- package/shared_config.json +24 -0
- package/src/README.md +1525 -0
- package/src/tests/run_zexus_tests.py +117 -0
- package/src/tests/test_all_phases.zx +346 -0
- package/src/tests/test_blockchain_features.zx +306 -0
- package/src/tests/test_complexity_features.zx +321 -0
- package/src/tests/test_core_integration.py +185 -0
- package/src/tests/test_phase10_ecosystem.zx +177 -0
- package/src/tests/test_phase1_modifiers.zx +87 -0
- package/src/tests/test_phase2_plugins.zx +80 -0
- package/src/tests/test_phase3_security.zx +97 -0
- package/src/tests/test_phase4_vfs.zx +116 -0
- package/src/tests/test_phase5_types.zx +117 -0
- package/src/tests/test_phase6_metaprogramming.zx +125 -0
- package/src/tests/test_phase7_optimization.zx +132 -0
- package/src/tests/test_phase9_advanced_types.zx +157 -0
- package/src/tests/test_security_features.py +419 -0
- package/src/tests/test_security_features.zx +276 -0
- package/src/tests/test_simple_zx.zx +1 -0
- package/src/tests/test_verification_simple.zx +69 -0
- package/src/zexus/__init__.py +28 -0
- package/src/zexus/__main__.py +5 -0
- package/src/zexus/__pycache__/__init__.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/advanced_types.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/builtin_modules.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/capability_system.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/complexity_system.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/concurrency_system.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/config.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/dependency_injection.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/ecosystem.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/environment.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/error_reporter.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/hybrid_orchestrator.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/lexer.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/metaprogramming.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/module_cache.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/object.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/optimization.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/plugin_system.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/policy_engine.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/security.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/stdlib_integration.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/strategy_recovery.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/syntax_validator.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/type_system.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/virtual_filesystem.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/zexus_ast.cpython-312.pyc +0 -0
- package/src/zexus/__pycache__/zexus_token.cpython-312.pyc +0 -0
- package/src/zexus/advanced_types.py +401 -0
- package/src/zexus/blockchain/__init__.py +40 -0
- package/src/zexus/blockchain/__pycache__/__init__.cpython-312.pyc +0 -0
- package/src/zexus/blockchain/__pycache__/crypto.cpython-312.pyc +0 -0
- package/src/zexus/blockchain/__pycache__/ledger.cpython-312.pyc +0 -0
- package/src/zexus/blockchain/__pycache__/transaction.cpython-312.pyc +0 -0
- package/src/zexus/blockchain/crypto.py +463 -0
- package/src/zexus/blockchain/ledger.py +255 -0
- package/src/zexus/blockchain/transaction.py +267 -0
- package/src/zexus/builtin_modules.py +284 -0
- package/src/zexus/builtin_plugins.py +317 -0
- package/src/zexus/capability_system.py +372 -0
- package/src/zexus/cli/__init__.py +2 -0
- package/src/zexus/cli/__pycache__/__init__.cpython-312.pyc +0 -0
- package/src/zexus/cli/__pycache__/main.cpython-312.pyc +0 -0
- package/src/zexus/cli/main.py +707 -0
- package/src/zexus/cli/zpm.py +203 -0
- package/src/zexus/compare_interpreter_compiler.py +146 -0
- package/src/zexus/compiler/__init__.py +169 -0
- package/src/zexus/compiler/__pycache__/__init__.cpython-312.pyc +0 -0
- package/src/zexus/compiler/__pycache__/lexer.cpython-312.pyc +0 -0
- package/src/zexus/compiler/__pycache__/parser.cpython-312.pyc +0 -0
- package/src/zexus/compiler/__pycache__/zexus_ast.cpython-312.pyc +0 -0
- package/src/zexus/compiler/bytecode.py +266 -0
- package/src/zexus/compiler/compat_runtime.py +277 -0
- package/src/zexus/compiler/lexer.py +257 -0
- package/src/zexus/compiler/parser.py +779 -0
- package/src/zexus/compiler/semantic.py +118 -0
- package/src/zexus/compiler/zexus_ast.py +454 -0
- package/src/zexus/complexity_system.py +575 -0
- package/src/zexus/concurrency_system.py +493 -0
- package/src/zexus/config.py +201 -0
- package/src/zexus/crypto_bridge.py +19 -0
- package/src/zexus/dependency_injection.py +423 -0
- package/src/zexus/ecosystem.py +434 -0
- package/src/zexus/environment.py +101 -0
- package/src/zexus/environment_manager.py +119 -0
- package/src/zexus/error_reporter.py +314 -0
- package/src/zexus/evaluator/__init__.py +12 -0
- package/src/zexus/evaluator/__pycache__/__init__.cpython-312.pyc +0 -0
- package/src/zexus/evaluator/__pycache__/bytecode_compiler.cpython-312.pyc +0 -0
- package/src/zexus/evaluator/__pycache__/core.cpython-312.pyc +0 -0
- package/src/zexus/evaluator/__pycache__/expressions.cpython-312.pyc +0 -0
- package/src/zexus/evaluator/__pycache__/functions.cpython-312.pyc +0 -0
- package/src/zexus/evaluator/__pycache__/integration.cpython-312.pyc +0 -0
- package/src/zexus/evaluator/__pycache__/statements.cpython-312.pyc +0 -0
- package/src/zexus/evaluator/__pycache__/utils.cpython-312.pyc +0 -0
- package/src/zexus/evaluator/bytecode_compiler.py +700 -0
- package/src/zexus/evaluator/core.py +891 -0
- package/src/zexus/evaluator/expressions.py +827 -0
- package/src/zexus/evaluator/functions.py +3989 -0
- package/src/zexus/evaluator/integration.py +396 -0
- package/src/zexus/evaluator/statements.py +4303 -0
- package/src/zexus/evaluator/utils.py +126 -0
- package/src/zexus/evaluator_original.py +2041 -0
- package/src/zexus/external_bridge.py +16 -0
- package/src/zexus/find_affected_imports.sh +155 -0
- package/src/zexus/hybrid_orchestrator.py +152 -0
- package/src/zexus/input_validation.py +259 -0
- package/src/zexus/lexer.py +571 -0
- package/src/zexus/logging.py +89 -0
- package/src/zexus/lsp/__init__.py +9 -0
- package/src/zexus/lsp/completion_provider.py +207 -0
- package/src/zexus/lsp/definition_provider.py +22 -0
- package/src/zexus/lsp/hover_provider.py +71 -0
- package/src/zexus/lsp/server.py +269 -0
- package/src/zexus/lsp/symbol_provider.py +31 -0
- package/src/zexus/metaprogramming.py +321 -0
- package/src/zexus/module_cache.py +89 -0
- package/src/zexus/module_manager.py +107 -0
- package/src/zexus/object.py +973 -0
- package/src/zexus/optimization.py +424 -0
- package/src/zexus/parser/__init__.py +31 -0
- package/src/zexus/parser/__pycache__/__init__.cpython-312.pyc +0 -0
- package/src/zexus/parser/__pycache__/parser.cpython-312.pyc +0 -0
- package/src/zexus/parser/__pycache__/strategy_context.cpython-312.pyc +0 -0
- package/src/zexus/parser/__pycache__/strategy_structural.cpython-312.pyc +0 -0
- package/src/zexus/parser/integration.py +86 -0
- package/src/zexus/parser/parser.py +3977 -0
- package/src/zexus/parser/strategy_context.py +7254 -0
- package/src/zexus/parser/strategy_structural.py +1033 -0
- package/src/zexus/persistence.py +391 -0
- package/src/zexus/plugin_system.py +290 -0
- package/src/zexus/policy_engine.py +365 -0
- package/src/zexus/profiler/__init__.py +5 -0
- package/src/zexus/profiler/profiler.py +233 -0
- package/src/zexus/purity_system.py +398 -0
- package/src/zexus/runtime/__init__.py +20 -0
- package/src/zexus/runtime/async_runtime.py +324 -0
- package/src/zexus/search_old_imports.sh +65 -0
- package/src/zexus/security.py +1407 -0
- package/src/zexus/stack_trace.py +233 -0
- package/src/zexus/stdlib/__init__.py +27 -0
- package/src/zexus/stdlib/blockchain.py +341 -0
- package/src/zexus/stdlib/compression.py +167 -0
- package/src/zexus/stdlib/crypto.py +124 -0
- package/src/zexus/stdlib/datetime.py +163 -0
- package/src/zexus/stdlib/db_mongo.py +199 -0
- package/src/zexus/stdlib/db_mysql.py +162 -0
- package/src/zexus/stdlib/db_postgres.py +163 -0
- package/src/zexus/stdlib/db_sqlite.py +133 -0
- package/src/zexus/stdlib/encoding.py +230 -0
- package/src/zexus/stdlib/fs.py +195 -0
- package/src/zexus/stdlib/http.py +219 -0
- package/src/zexus/stdlib/http_server.py +248 -0
- package/src/zexus/stdlib/json_module.py +61 -0
- package/src/zexus/stdlib/math.py +360 -0
- package/src/zexus/stdlib/os_module.py +265 -0
- package/src/zexus/stdlib/regex.py +148 -0
- package/src/zexus/stdlib/sockets.py +253 -0
- package/src/zexus/stdlib/test_framework.zx +208 -0
- package/src/zexus/stdlib/test_runner.zx +119 -0
- package/src/zexus/stdlib_integration.py +341 -0
- package/src/zexus/strategy_recovery.py +256 -0
- package/src/zexus/syntax_validator.py +356 -0
- package/src/zexus/testing/zpics.py +407 -0
- package/src/zexus/testing/zpics_runtime.py +369 -0
- package/src/zexus/type_system.py +374 -0
- package/src/zexus/validation_system.py +569 -0
- package/src/zexus/virtual_filesystem.py +355 -0
- package/src/zexus/vm/__init__.py +8 -0
- package/src/zexus/vm/__pycache__/__init__.cpython-312.pyc +0 -0
- package/src/zexus/vm/__pycache__/async_optimizer.cpython-312.pyc +0 -0
- package/src/zexus/vm/__pycache__/bytecode.cpython-312.pyc +0 -0
- package/src/zexus/vm/__pycache__/cache.cpython-312.pyc +0 -0
- package/src/zexus/vm/__pycache__/jit.cpython-312.pyc +0 -0
- package/src/zexus/vm/__pycache__/memory_manager.cpython-312.pyc +0 -0
- package/src/zexus/vm/__pycache__/memory_pool.cpython-312.pyc +0 -0
- package/src/zexus/vm/__pycache__/optimizer.cpython-312.pyc +0 -0
- package/src/zexus/vm/__pycache__/parallel_vm.cpython-312.pyc +0 -0
- package/src/zexus/vm/__pycache__/peephole_optimizer.cpython-312.pyc +0 -0
- package/src/zexus/vm/__pycache__/profiler.cpython-312.pyc +0 -0
- package/src/zexus/vm/__pycache__/register_allocator.cpython-312.pyc +0 -0
- package/src/zexus/vm/__pycache__/register_vm.cpython-312.pyc +0 -0
- package/src/zexus/vm/__pycache__/ssa_converter.cpython-312.pyc +0 -0
- package/src/zexus/vm/__pycache__/vm.cpython-312.pyc +0 -0
- package/src/zexus/vm/async_optimizer.py +420 -0
- package/src/zexus/vm/bytecode.py +428 -0
- package/src/zexus/vm/bytecode_converter.py +297 -0
- package/src/zexus/vm/cache.py +532 -0
- package/src/zexus/vm/jit.py +720 -0
- package/src/zexus/vm/memory_manager.py +520 -0
- package/src/zexus/vm/memory_pool.py +511 -0
- package/src/zexus/vm/optimizer.py +478 -0
- package/src/zexus/vm/parallel_vm.py +899 -0
- package/src/zexus/vm/peephole_optimizer.py +452 -0
- package/src/zexus/vm/profiler.py +527 -0
- package/src/zexus/vm/register_allocator.py +462 -0
- package/src/zexus/vm/register_vm.py +520 -0
- package/src/zexus/vm/ssa_converter.py +757 -0
- package/src/zexus/vm/vm.py +1392 -0
- package/src/zexus/zexus_ast.py +1782 -0
- package/src/zexus/zexus_token.py +253 -0
- package/src/zexus/zpm/__init__.py +15 -0
- package/src/zexus/zpm/installer.py +116 -0
- package/src/zexus/zpm/package_manager.py +208 -0
- package/src/zexus/zpm/publisher.py +98 -0
- package/src/zexus/zpm/registry.py +110 -0
- package/src/zexus.egg-info/PKG-INFO +2235 -0
- package/src/zexus.egg-info/SOURCES.txt +876 -0
- package/src/zexus.egg-info/dependency_links.txt +1 -0
- package/src/zexus.egg-info/entry_points.txt +3 -0
- package/src/zexus.egg-info/not-zip-safe +1 -0
- package/src/zexus.egg-info/requires.txt +14 -0
- package/src/zexus.egg-info/top_level.txt +2 -0
- package/zexus.json +14 -0
|
@@ -0,0 +1,1033 @@
|
|
|
1
|
+
# src/zexus/strategy_structural.py
|
|
2
|
+
from ..zexus_token import *
|
|
3
|
+
from typing import List, Dict
|
|
4
|
+
from ..config import config as zexus_config
|
|
5
|
+
|
|
6
|
+
class StructuralAnalyzer:
|
|
7
|
+
"""Lightweight structural analyzer that splits token stream into top-level blocks.
|
|
8
|
+
Special handling for try/catch to avoid merging statements inside try blocks.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
def __init__(self):
|
|
12
|
+
# blocks: id -> block_info
|
|
13
|
+
self.blocks = {}
|
|
14
|
+
|
|
15
|
+
def analyze(self, tokens: List):
|
|
16
|
+
"""Analyze tokens and produce a block map used by the context parser.
|
|
17
|
+
|
|
18
|
+
block_info keys:
|
|
19
|
+
- id: unique id
|
|
20
|
+
- type/subtype: block type (e.g. 'try', 'let', 'print', 'block')
|
|
21
|
+
- tokens: list of tokens that belong to the block
|
|
22
|
+
- start_token: token object where block starts
|
|
23
|
+
- start_index / end_index: indices in original token stream
|
|
24
|
+
- parent: optional parent block id
|
|
25
|
+
"""
|
|
26
|
+
self.blocks = {}
|
|
27
|
+
i = 0
|
|
28
|
+
block_id = 0
|
|
29
|
+
n = len(tokens)
|
|
30
|
+
|
|
31
|
+
# helper sets for stopping heuristics (mirrors context parser)
|
|
32
|
+
stop_types = {SEMICOLON, RBRACE}
|
|
33
|
+
|
|
34
|
+
# Modifier tokens that should be merged with the following statement
|
|
35
|
+
modifier_tokens = {PUBLIC, PRIVATE, SEALED, ASYNC, NATIVE, INLINE, SECURE, PURE, VIEW, PAYABLE}
|
|
36
|
+
|
|
37
|
+
# Statement starters (keywords that begin a new statement)
|
|
38
|
+
# NOTE: SEND and RECEIVE removed - they can be used as function calls in expressions
|
|
39
|
+
statement_starters = {
|
|
40
|
+
LET, CONST, DATA, PRINT, FOR, IF, WHILE, RETURN, CONTINUE, BREAK, THROW, ACTION, FUNCTION, TRY, EXTERNAL,
|
|
41
|
+
SCREEN, EXPORT, USE, DEBUG, ENTITY, CONTRACT, VERIFY, PROTECT, SEAL, PERSISTENT, AUDIT,
|
|
42
|
+
RESTRICT, SANDBOX, TRAIL, GC, BUFFER, SIMD,
|
|
43
|
+
DEFER, PATTERN, ENUM, STREAM, WATCH,
|
|
44
|
+
CAPABILITY, GRANT, REVOKE, VALIDATE, SANITIZE, IMMUTABLE,
|
|
45
|
+
INTERFACE, TYPE_ALIAS, MODULE, PACKAGE, USING,
|
|
46
|
+
CHANNEL, ATOMIC,
|
|
47
|
+
# Blockchain keywords
|
|
48
|
+
LEDGER, STATE, REQUIRE, REVERT, LIMIT
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
while i < n:
|
|
52
|
+
t = tokens[i]
|
|
53
|
+
# skip EOF tokens
|
|
54
|
+
if t.type == EOF:
|
|
55
|
+
i += 1
|
|
56
|
+
continue
|
|
57
|
+
|
|
58
|
+
# Helper: skip tokens that are empty/whitespace-only literals when building blocks
|
|
59
|
+
def _is_empty_token(tok):
|
|
60
|
+
lit = getattr(tok, 'literal', None)
|
|
61
|
+
return (lit == '' or lit is None) and tok.type != STRING and tok.type != IDENT
|
|
62
|
+
|
|
63
|
+
# === FIXED: Enhanced USE statement detection ===
|
|
64
|
+
if t.type == USE:
|
|
65
|
+
start_idx = i
|
|
66
|
+
use_tokens = [t]
|
|
67
|
+
i += 1
|
|
68
|
+
|
|
69
|
+
# Handle use { ... } from ... syntax
|
|
70
|
+
if i < n and tokens[i].type == LBRACE:
|
|
71
|
+
# Collect until closing brace
|
|
72
|
+
brace_count = 1
|
|
73
|
+
use_tokens.append(tokens[i])
|
|
74
|
+
i += 1
|
|
75
|
+
|
|
76
|
+
while i < n and brace_count > 0:
|
|
77
|
+
use_tokens.append(tokens[i])
|
|
78
|
+
if tokens[i].type == LBRACE:
|
|
79
|
+
brace_count += 1
|
|
80
|
+
elif tokens[i].type == RBRACE:
|
|
81
|
+
brace_count -= 1
|
|
82
|
+
i += 1
|
|
83
|
+
|
|
84
|
+
# Look for 'from' and file path
|
|
85
|
+
# FIX: Stop if we hit a statement starter, semicolon, or EOF
|
|
86
|
+
while i < n and tokens[i].type not in stop_types and tokens[i].type not in statement_starters:
|
|
87
|
+
# FIX: Check for FROM token type OR identifier 'from'
|
|
88
|
+
is_from = (tokens[i].type == FROM) or (tokens[i].type == IDENT and tokens[i].literal == 'from')
|
|
89
|
+
|
|
90
|
+
if is_from:
|
|
91
|
+
# Include 'from' and the following string
|
|
92
|
+
use_tokens.append(tokens[i])
|
|
93
|
+
i += 1
|
|
94
|
+
if i < n and tokens[i].type == STRING:
|
|
95
|
+
use_tokens.append(tokens[i])
|
|
96
|
+
i += 1
|
|
97
|
+
break
|
|
98
|
+
else:
|
|
99
|
+
use_tokens.append(tokens[i])
|
|
100
|
+
i += 1
|
|
101
|
+
else:
|
|
102
|
+
# Simple use 'path' syntax
|
|
103
|
+
# FIX: Stop at statement starters to prevent greedy consumption
|
|
104
|
+
while i < n and tokens[i].type not in stop_types and tokens[i].type != EOF:
|
|
105
|
+
if tokens[i].type in statement_starters:
|
|
106
|
+
break
|
|
107
|
+
use_tokens.append(tokens[i])
|
|
108
|
+
i += 1
|
|
109
|
+
|
|
110
|
+
# Create block for this use statement
|
|
111
|
+
filtered_tokens = [tk for tk in use_tokens if not _is_empty_token(tk)]
|
|
112
|
+
self.blocks[block_id] = {
|
|
113
|
+
'id': block_id,
|
|
114
|
+
'type': 'statement',
|
|
115
|
+
'subtype': 'use_statement',
|
|
116
|
+
'tokens': filtered_tokens,
|
|
117
|
+
'start_token': tokens[start_idx],
|
|
118
|
+
'start_index': start_idx,
|
|
119
|
+
'end_index': i - 1,
|
|
120
|
+
'parent': None
|
|
121
|
+
}
|
|
122
|
+
block_id += 1
|
|
123
|
+
continue
|
|
124
|
+
|
|
125
|
+
# Enhanced ENTITY statement detection
|
|
126
|
+
elif t.type == ENTITY:
|
|
127
|
+
start_idx = i
|
|
128
|
+
entity_tokens = [t]
|
|
129
|
+
i += 1
|
|
130
|
+
|
|
131
|
+
# Collect entity name
|
|
132
|
+
if i < n and tokens[i].type == IDENT:
|
|
133
|
+
entity_tokens.append(tokens[i])
|
|
134
|
+
i += 1
|
|
135
|
+
|
|
136
|
+
# Collect until closing brace
|
|
137
|
+
brace_count = 0
|
|
138
|
+
while i < n:
|
|
139
|
+
# Check if we've found the opening brace
|
|
140
|
+
if tokens[i].type == LBRACE:
|
|
141
|
+
brace_count = 1
|
|
142
|
+
entity_tokens.append(tokens[i])
|
|
143
|
+
i += 1
|
|
144
|
+
break
|
|
145
|
+
entity_tokens.append(tokens[i])
|
|
146
|
+
i += 1
|
|
147
|
+
|
|
148
|
+
# Now collect until matching closing brace
|
|
149
|
+
while i < n and brace_count > 0:
|
|
150
|
+
entity_tokens.append(tokens[i])
|
|
151
|
+
if tokens[i].type == LBRACE:
|
|
152
|
+
brace_count += 1
|
|
153
|
+
elif tokens[i].type == RBRACE:
|
|
154
|
+
brace_count -= 1
|
|
155
|
+
i += 1
|
|
156
|
+
|
|
157
|
+
# Create block
|
|
158
|
+
filtered_tokens = [tk for tk in entity_tokens if not _is_empty_token(tk)]
|
|
159
|
+
self.blocks[block_id] = {
|
|
160
|
+
'id': block_id,
|
|
161
|
+
'type': 'statement',
|
|
162
|
+
'subtype': 'entity_statement',
|
|
163
|
+
'tokens': filtered_tokens,
|
|
164
|
+
'start_token': tokens[start_idx],
|
|
165
|
+
'start_index': start_idx,
|
|
166
|
+
'end_index': i - 1,
|
|
167
|
+
'parent': None
|
|
168
|
+
}
|
|
169
|
+
block_id += 1
|
|
170
|
+
continue
|
|
171
|
+
|
|
172
|
+
# CONTRACT statement detection
|
|
173
|
+
elif t.type == CONTRACT:
|
|
174
|
+
start_idx = i
|
|
175
|
+
contract_tokens = [t]
|
|
176
|
+
i += 1
|
|
177
|
+
|
|
178
|
+
# Collect contract name
|
|
179
|
+
if i < n and tokens[i].type == IDENT:
|
|
180
|
+
contract_tokens.append(tokens[i])
|
|
181
|
+
i += 1
|
|
182
|
+
|
|
183
|
+
# Collect until closing brace
|
|
184
|
+
brace_count = 0
|
|
185
|
+
while i < n:
|
|
186
|
+
if tokens[i].type == LBRACE:
|
|
187
|
+
brace_count = 1
|
|
188
|
+
contract_tokens.append(tokens[i])
|
|
189
|
+
i += 1
|
|
190
|
+
break
|
|
191
|
+
contract_tokens.append(tokens[i])
|
|
192
|
+
i += 1
|
|
193
|
+
|
|
194
|
+
while i < n and brace_count > 0:
|
|
195
|
+
contract_tokens.append(tokens[i])
|
|
196
|
+
if tokens[i].type == LBRACE:
|
|
197
|
+
brace_count += 1
|
|
198
|
+
elif tokens[i].type == RBRACE:
|
|
199
|
+
brace_count -= 1
|
|
200
|
+
i += 1
|
|
201
|
+
|
|
202
|
+
filtered_tokens = [tk for tk in contract_tokens if not _is_empty_token(tk)]
|
|
203
|
+
self.blocks[block_id] = {
|
|
204
|
+
'id': block_id,
|
|
205
|
+
'type': 'statement',
|
|
206
|
+
'subtype': 'contract_statement',
|
|
207
|
+
'tokens': filtered_tokens,
|
|
208
|
+
'start_token': tokens[start_idx],
|
|
209
|
+
'start_index': start_idx,
|
|
210
|
+
'end_index': i - 1,
|
|
211
|
+
'parent': None
|
|
212
|
+
}
|
|
213
|
+
block_id += 1
|
|
214
|
+
continue
|
|
215
|
+
|
|
216
|
+
# VERIFY statement detection - handle verify { ... }, "message" pattern
|
|
217
|
+
elif t.type == VERIFY:
|
|
218
|
+
start_idx = i
|
|
219
|
+
verify_tokens = [t]
|
|
220
|
+
i += 1
|
|
221
|
+
|
|
222
|
+
# Check if next token is LBRACE (block form)
|
|
223
|
+
if i < n and tokens[i].type == LBRACE:
|
|
224
|
+
# Collect until matching closing brace
|
|
225
|
+
brace_count = 1
|
|
226
|
+
verify_tokens.append(tokens[i])
|
|
227
|
+
i += 1
|
|
228
|
+
|
|
229
|
+
while i < n and brace_count > 0:
|
|
230
|
+
verify_tokens.append(tokens[i])
|
|
231
|
+
if tokens[i].type == LBRACE:
|
|
232
|
+
brace_count += 1
|
|
233
|
+
elif tokens[i].type == RBRACE:
|
|
234
|
+
brace_count -= 1
|
|
235
|
+
i += 1
|
|
236
|
+
|
|
237
|
+
# Check for comma and message after the block
|
|
238
|
+
if i < n and tokens[i].type == COMMA:
|
|
239
|
+
verify_tokens.append(tokens[i])
|
|
240
|
+
i += 1
|
|
241
|
+
|
|
242
|
+
# Collect the message (until semicolon, EOF, or next statement starter)
|
|
243
|
+
while i < n and tokens[i].type not in stop_types and tokens[i].type not in statement_starters:
|
|
244
|
+
verify_tokens.append(tokens[i])
|
|
245
|
+
i += 1
|
|
246
|
+
|
|
247
|
+
# Create block for verify statement
|
|
248
|
+
filtered_tokens = [tk for tk in verify_tokens if not _is_empty_token(tk)]
|
|
249
|
+
self.blocks[block_id] = {
|
|
250
|
+
'id': block_id,
|
|
251
|
+
'type': 'statement',
|
|
252
|
+
'subtype': VERIFY,
|
|
253
|
+
'tokens': filtered_tokens,
|
|
254
|
+
'start_token': tokens[start_idx],
|
|
255
|
+
'start_index': start_idx,
|
|
256
|
+
'end_index': i - 1,
|
|
257
|
+
'parent': None
|
|
258
|
+
}
|
|
259
|
+
block_id += 1
|
|
260
|
+
continue
|
|
261
|
+
else:
|
|
262
|
+
# Not a block form, let it fall through to generic handling
|
|
263
|
+
i = start_idx
|
|
264
|
+
|
|
265
|
+
# Try-catch: collect the try block and catch block TOGETHER
|
|
266
|
+
if t.type == TRY:
|
|
267
|
+
start_idx = i
|
|
268
|
+
# collect try token + following block tokens (brace-aware)
|
|
269
|
+
try_block_tokens, next_idx = self._collect_brace_block(tokens, i + 1)
|
|
270
|
+
|
|
271
|
+
# Check for catch block
|
|
272
|
+
catch_tokens = []
|
|
273
|
+
final_idx = next_idx
|
|
274
|
+
|
|
275
|
+
if next_idx < n and tokens[next_idx].type == CATCH:
|
|
276
|
+
catch_token = tokens[next_idx]
|
|
277
|
+
|
|
278
|
+
# Collect tokens between CATCH and LBRACE (e.g. (e))
|
|
279
|
+
pre_brace_tokens = []
|
|
280
|
+
curr = next_idx + 1
|
|
281
|
+
while curr < n and tokens[curr].type != LBRACE and tokens[curr].type != EOF:
|
|
282
|
+
pre_brace_tokens.append(tokens[curr])
|
|
283
|
+
curr += 1
|
|
284
|
+
|
|
285
|
+
catch_block_tokens, after_catch_idx = self._collect_brace_block(tokens, curr)
|
|
286
|
+
catch_tokens = [catch_token] + pre_brace_tokens + catch_block_tokens
|
|
287
|
+
final_idx = after_catch_idx
|
|
288
|
+
|
|
289
|
+
# Combine all tokens
|
|
290
|
+
full_tokens = [t] + try_block_tokens + catch_tokens
|
|
291
|
+
full_tokens = [tk for tk in full_tokens if not _is_empty_token(tk)]
|
|
292
|
+
|
|
293
|
+
# Create the main try-catch block
|
|
294
|
+
self.blocks[block_id] = {
|
|
295
|
+
'id': block_id,
|
|
296
|
+
'type': 'statement',
|
|
297
|
+
'subtype': 'try_catch_statement',
|
|
298
|
+
'tokens': full_tokens,
|
|
299
|
+
'start_token': t,
|
|
300
|
+
'start_index': start_idx,
|
|
301
|
+
'end_index': final_idx - 1,
|
|
302
|
+
'parent': None
|
|
303
|
+
}
|
|
304
|
+
parent_id = block_id
|
|
305
|
+
block_id += 1
|
|
306
|
+
i = final_idx
|
|
307
|
+
|
|
308
|
+
# Process inner statements of TRY block
|
|
309
|
+
inner = try_block_tokens[1:-1] if try_block_tokens and len(try_block_tokens) >= 2 else []
|
|
310
|
+
inner = [tk for tk in inner if not _is_empty_token(tk)]
|
|
311
|
+
if inner:
|
|
312
|
+
if self._is_map_literal(inner):
|
|
313
|
+
# ... map literal handling ...
|
|
314
|
+
pass
|
|
315
|
+
else:
|
|
316
|
+
stmts = self._split_into_statements(inner)
|
|
317
|
+
for stmt_tokens in stmts:
|
|
318
|
+
self.blocks[block_id] = {
|
|
319
|
+
'id': block_id,
|
|
320
|
+
'type': 'statement',
|
|
321
|
+
'subtype': stmt_tokens[0].type if stmt_tokens else 'unknown',
|
|
322
|
+
'tokens': [tk for tk in stmt_tokens if not _is_empty_token(tk)],
|
|
323
|
+
'start_token': (stmt_tokens[0] if stmt_tokens else try_block_tokens[0]),
|
|
324
|
+
'start_index': start_idx, # Approximate
|
|
325
|
+
'end_index': start_idx, # Approximate
|
|
326
|
+
'parent': parent_id
|
|
327
|
+
}
|
|
328
|
+
block_id += 1
|
|
329
|
+
|
|
330
|
+
# Process inner statements of CATCH block
|
|
331
|
+
if catch_tokens:
|
|
332
|
+
# catch_tokens[0] is CATCH
|
|
333
|
+
# catch_tokens[1] might be (error) or {
|
|
334
|
+
# We need to find the brace block inside catch_tokens
|
|
335
|
+
catch_brace_tokens = []
|
|
336
|
+
for k, ctk in enumerate(catch_tokens):
|
|
337
|
+
if ctk.type == LBRACE:
|
|
338
|
+
catch_brace_tokens = catch_tokens[k:]
|
|
339
|
+
break
|
|
340
|
+
|
|
341
|
+
inner_catch = catch_brace_tokens[1:-1] if catch_brace_tokens and len(catch_brace_tokens) >= 2 else []
|
|
342
|
+
inner_catch = [tk for tk in inner_catch if not _is_empty_token(tk)]
|
|
343
|
+
|
|
344
|
+
if inner_catch:
|
|
345
|
+
stmts = self._split_into_statements(inner_catch)
|
|
346
|
+
for stmt_tokens in stmts:
|
|
347
|
+
self.blocks[block_id] = {
|
|
348
|
+
'id': block_id,
|
|
349
|
+
'type': 'statement',
|
|
350
|
+
'subtype': stmt_tokens[0].type if stmt_tokens else 'unknown',
|
|
351
|
+
'tokens': [tk for tk in stmt_tokens if not _is_empty_token(tk)],
|
|
352
|
+
'start_token': (stmt_tokens[0] if stmt_tokens else catch_tokens[0]),
|
|
353
|
+
'start_index': next_idx, # Approximate
|
|
354
|
+
'end_index': next_idx, # Approximate
|
|
355
|
+
'parent': parent_id
|
|
356
|
+
}
|
|
357
|
+
block_id += 1
|
|
358
|
+
continue
|
|
359
|
+
|
|
360
|
+
# Brace-delimited top-level block
|
|
361
|
+
if t.type == LBRACE:
|
|
362
|
+
block_tokens, next_idx = self._collect_brace_block(tokens, i)
|
|
363
|
+
this_block_id = block_id
|
|
364
|
+
# filter empty tokens before storing
|
|
365
|
+
filtered_block_tokens = [tk for tk in block_tokens if not _is_empty_token(tk)]
|
|
366
|
+
self.blocks[this_block_id] = {
|
|
367
|
+
'id': this_block_id,
|
|
368
|
+
'type': 'block',
|
|
369
|
+
'subtype': 'brace_block',
|
|
370
|
+
'tokens': filtered_block_tokens,
|
|
371
|
+
'start_token': tokens[i],
|
|
372
|
+
'start_index': i,
|
|
373
|
+
'end_index': next_idx - 1,
|
|
374
|
+
'parent': None
|
|
375
|
+
}
|
|
376
|
+
block_id += 1
|
|
377
|
+
|
|
378
|
+
# split inner tokens into child blocks unless it's a map literal
|
|
379
|
+
inner = block_tokens[1:-1] if block_tokens and len(block_tokens) >= 2 else []
|
|
380
|
+
inner = [tk for tk in inner if not _is_empty_token(tk)]
|
|
381
|
+
if inner:
|
|
382
|
+
if self._is_map_literal(inner):
|
|
383
|
+
self.blocks[block_id] = {
|
|
384
|
+
'id': block_id,
|
|
385
|
+
'type': 'map_literal',
|
|
386
|
+
'subtype': 'map_literal',
|
|
387
|
+
'tokens': [tk for tk in block_tokens if not _is_empty_token(tk)], # keep full braces
|
|
388
|
+
'start_token': block_tokens[0],
|
|
389
|
+
'start_index': i,
|
|
390
|
+
'end_index': next_idx - 1,
|
|
391
|
+
'parent': this_block_id
|
|
392
|
+
}
|
|
393
|
+
block_id += 1
|
|
394
|
+
else:
|
|
395
|
+
stmts = self._split_into_statements(inner)
|
|
396
|
+
for stmt_tokens in stmts:
|
|
397
|
+
self.blocks[block_id] = {
|
|
398
|
+
'id': block_id,
|
|
399
|
+
'type': 'statement',
|
|
400
|
+
'subtype': stmt_tokens[0].type if stmt_tokens else 'unknown',
|
|
401
|
+
'tokens': [tk for tk in stmt_tokens if not _is_empty_token(tk)],
|
|
402
|
+
'start_token': (stmt_tokens[0] if stmt_tokens else block_tokens[0]),
|
|
403
|
+
'start_index': i,
|
|
404
|
+
'end_index': i + len(stmt_tokens),
|
|
405
|
+
'parent': this_block_id
|
|
406
|
+
}
|
|
407
|
+
block_id += 1
|
|
408
|
+
|
|
409
|
+
i = next_idx
|
|
410
|
+
continue
|
|
411
|
+
|
|
412
|
+
# Modifier tokens: merge with the following statement
|
|
413
|
+
if t.type in modifier_tokens:
|
|
414
|
+
start_idx = i
|
|
415
|
+
modifier_list = []
|
|
416
|
+
|
|
417
|
+
# Collect consecutive modifiers
|
|
418
|
+
while i < n and tokens[i].type in modifier_tokens:
|
|
419
|
+
modifier_list.append(tokens[i])
|
|
420
|
+
i += 1
|
|
421
|
+
|
|
422
|
+
# Skip EOF/whitespace
|
|
423
|
+
while i < n and tokens[i].type == EOF:
|
|
424
|
+
i += 1
|
|
425
|
+
|
|
426
|
+
# If followed by a statement starter, continue to statement parsing
|
|
427
|
+
# by falling through to the elif below
|
|
428
|
+
if i < n and tokens[i].type in statement_starters:
|
|
429
|
+
# Update t to point to the statement starter
|
|
430
|
+
t = tokens[i]
|
|
431
|
+
# Don't increment i - let the statement parsing handle it
|
|
432
|
+
else:
|
|
433
|
+
# Modifiers without a following statement - this is an async expression!
|
|
434
|
+
# Collect the modifiers AND the following expression into one block
|
|
435
|
+
# Example: "async producer()" should be one block
|
|
436
|
+
|
|
437
|
+
# Start collecting the expression that follows
|
|
438
|
+
j = i
|
|
439
|
+
expr_tokens = modifier_list[:] # Include modifiers in the block
|
|
440
|
+
nesting = 0
|
|
441
|
+
started_expr = False
|
|
442
|
+
|
|
443
|
+
# Collect tokens for the expression
|
|
444
|
+
while j < n:
|
|
445
|
+
tj = tokens[j]
|
|
446
|
+
|
|
447
|
+
# Track nesting
|
|
448
|
+
if tj.type in {LPAREN, LBRACKET, LBRACE}:
|
|
449
|
+
nesting += 1
|
|
450
|
+
started_expr = True
|
|
451
|
+
elif tj.type in {RPAREN, RBRACKET, RBRACE}:
|
|
452
|
+
nesting -= 1
|
|
453
|
+
|
|
454
|
+
expr_tokens.append(tj)
|
|
455
|
+
j += 1
|
|
456
|
+
|
|
457
|
+
# Stop at semicolon when at nesting 0
|
|
458
|
+
if nesting == 0 and tj.type == SEMICOLON:
|
|
459
|
+
break
|
|
460
|
+
|
|
461
|
+
# Stop after completing a simple expression at nesting 0
|
|
462
|
+
# (identifier with optional call, or after closing all parens)
|
|
463
|
+
if started_expr and nesting == 0:
|
|
464
|
+
break
|
|
465
|
+
|
|
466
|
+
# Create block for async expression
|
|
467
|
+
self.blocks[block_id] = {
|
|
468
|
+
'id': block_id,
|
|
469
|
+
'type': 'statement',
|
|
470
|
+
'subtype': modifier_list[0].type, # ASYNC
|
|
471
|
+
'tokens': expr_tokens,
|
|
472
|
+
'start_token': modifier_list[0],
|
|
473
|
+
'start_index': start_idx,
|
|
474
|
+
'end_index': j
|
|
475
|
+
}
|
|
476
|
+
block_id += 1
|
|
477
|
+
i = j
|
|
478
|
+
# Clear modifier_list so it doesn't affect next statement
|
|
479
|
+
del modifier_list
|
|
480
|
+
continue
|
|
481
|
+
|
|
482
|
+
# Statement-like tokens: try to collect tokens up to a statement boundary
|
|
483
|
+
# DUAL-MODE DEBUG: skip if debug( ) which is a function call, not statement
|
|
484
|
+
if t.type in statement_starters and not (t.type == DEBUG and i + 1 < n and tokens[i + 1].type == LPAREN):
|
|
485
|
+
# Check if we just processed modifiers
|
|
486
|
+
if 'modifier_list' in locals() and start_idx < i:
|
|
487
|
+
# Start from modifier position, include modifiers in stmt_tokens
|
|
488
|
+
stmt_start_idx = start_idx
|
|
489
|
+
stmt_tokens = modifier_list + [t]
|
|
490
|
+
j = i + 1
|
|
491
|
+
del modifier_list # Clear for next iteration
|
|
492
|
+
else:
|
|
493
|
+
stmt_start_idx = i
|
|
494
|
+
stmt_tokens = [t] # Start with the statement starter token
|
|
495
|
+
j = i + 1
|
|
496
|
+
nesting = 0 # Track nesting level for (), [], {}
|
|
497
|
+
found_brace_block = False # Did we encounter a { ... } block?
|
|
498
|
+
found_colon_block = False # Did we encounter a : (tolerable syntax)?
|
|
499
|
+
baseline_column = None # Track indentation for colon-based blocks
|
|
500
|
+
in_assignment = (t.type in {LET, CONST}) # Are we in an assignment RHS?
|
|
501
|
+
seen_assign = False # Track if we've seen the main ASSIGN in LET/CONST
|
|
502
|
+
|
|
503
|
+
while j < n:
|
|
504
|
+
tj = tokens[j]
|
|
505
|
+
|
|
506
|
+
# Check if this is a statement terminator at nesting 0 BEFORE updating nesting
|
|
507
|
+
if nesting == 0 and tj.type in stop_types and not found_colon_block:
|
|
508
|
+
break
|
|
509
|
+
|
|
510
|
+
# Track when we see the main ASSIGN in LET/CONST statements
|
|
511
|
+
if in_assignment and tj.type == ASSIGN and nesting == 0:
|
|
512
|
+
seen_assign = True
|
|
513
|
+
|
|
514
|
+
# CRITICAL FIX: Check if next token starts a new statement (assignment or function call)
|
|
515
|
+
# BUT: Don't break if we're in a LET/CONST before the main ASSIGN (type annotation case)
|
|
516
|
+
# ALSO: Don't break if we're in the middle of a property access chain (obj.prop = ...)
|
|
517
|
+
if nesting == 0 and len(stmt_tokens) > 1: # Only check if we've collected some tokens
|
|
518
|
+
# Pattern 1: IDENT followed by ASSIGN is an assignment statement
|
|
519
|
+
# EXCEPT: In LET/CONST before main assign (e.g., "let x : string =" - string is type, not new var)
|
|
520
|
+
# EXCEPT: After DOT (property access within same statement: obj.prop = ...)
|
|
521
|
+
if tj.type == IDENT and j + 1 < n and tokens[j + 1].type == ASSIGN:
|
|
522
|
+
# Check if previous token was DOT (we're in property chain)
|
|
523
|
+
prev_token = stmt_tokens[-1] if stmt_tokens else None
|
|
524
|
+
is_property_access = prev_token and prev_token.type == DOT
|
|
525
|
+
|
|
526
|
+
# Only break if:
|
|
527
|
+
# 1. NOT in property access chain, AND
|
|
528
|
+
# 2. (NOT in LET/CONST, OR we've already seen the main assign)
|
|
529
|
+
if not is_property_access and (not in_assignment or seen_assign):
|
|
530
|
+
break
|
|
531
|
+
|
|
532
|
+
# Pattern 2: IDENT followed by DOT could be start of property assignment (obj.prop = ...)
|
|
533
|
+
# This is a NEW statement if we're in LET/CONST and have seen the main assign
|
|
534
|
+
elif tj.type == IDENT and j + 1 < n and tokens[j + 1].type == DOT:
|
|
535
|
+
# Check if this is on a new line (likely a new statement)
|
|
536
|
+
if stmt_tokens:
|
|
537
|
+
last_line = stmt_tokens[-1].line
|
|
538
|
+
if tj.line > last_line and in_assignment and seen_assign:
|
|
539
|
+
# New line after completed assignment - this is a new statement
|
|
540
|
+
break
|
|
541
|
+
|
|
542
|
+
# Look ahead to see if this becomes a property assignment
|
|
543
|
+
# Pattern: IDENT DOT IDENT ASSIGN
|
|
544
|
+
if j + 3 < n and tokens[j + 2].type == IDENT and tokens[j + 3].type == ASSIGN:
|
|
545
|
+
# This is a property assignment starting!
|
|
546
|
+
# Break if we've already completed the LET/CONST
|
|
547
|
+
if in_assignment and seen_assign:
|
|
548
|
+
break
|
|
549
|
+
# IDENT followed by LPAREN is a function call (already handled below, but listed for clarity)
|
|
550
|
+
|
|
551
|
+
# Detect colon-based block (tolerable syntax for action/function/if/while etc.)
|
|
552
|
+
if tj.type == COLON and nesting == 0 and t.type in {ACTION, FUNCTION, IF, WHILE, FOR}:
|
|
553
|
+
found_colon_block = True
|
|
554
|
+
stmt_tokens.append(tj)
|
|
555
|
+
j += 1
|
|
556
|
+
# Record the baseline column for dedent detection
|
|
557
|
+
# This is the column of the first token AFTER the colon
|
|
558
|
+
if j < n:
|
|
559
|
+
baseline_column = tokens[j].column if hasattr(tokens[j], 'column') else 1
|
|
560
|
+
continue
|
|
561
|
+
|
|
562
|
+
# Track nesting level BEFORE dedent check (so we don't break inside {...} or [...] or (...))
|
|
563
|
+
if tj.type in {LPAREN, LBRACE, LBRACKET}:
|
|
564
|
+
# Only mark as brace block if NOT already in colon block (to distinguish code blocks from data literals)
|
|
565
|
+
if tj.type == LBRACE and not found_colon_block:
|
|
566
|
+
found_brace_block = True
|
|
567
|
+
nesting += 1
|
|
568
|
+
elif tj.type in {RPAREN, RBRACE, RBRACKET}:
|
|
569
|
+
nesting -= 1
|
|
570
|
+
|
|
571
|
+
# If we're in a colon block, collect until dedent
|
|
572
|
+
if found_colon_block and nesting == 0:
|
|
573
|
+
current_column = tj.column if hasattr(tj, 'column') else 1
|
|
574
|
+
# Stop if we hit a dedent (token BEFORE baseline column, indicating unindent)
|
|
575
|
+
# This works because baseline_column is the indented level (e.g., 6)
|
|
576
|
+
# and when we see column 2, that's < 6, so we stop
|
|
577
|
+
#print(f" [DEDENT CHECK] token={tj.type} col={current_column} baseline={baseline_column} nesting={nesting}")
|
|
578
|
+
if current_column < baseline_column and tj.type in statement_starters:
|
|
579
|
+
#print(f" [DEDENT BREAK] Breaking on dedent: {tj.type} at col {current_column}")
|
|
580
|
+
break
|
|
581
|
+
|
|
582
|
+
# Stop at new statement starters only if we're at nesting 0
|
|
583
|
+
# BUT: for LET/CONST, allow function expressions in the RHS
|
|
584
|
+
# ALSO: for WHILE/FOR/IF, don't break until we've found the opening brace (control structures need their conditions)
|
|
585
|
+
if nesting == 0 and tj.type in statement_starters and not found_colon_block:
|
|
586
|
+
# Exception: allow chained method calls
|
|
587
|
+
prev = tokens[j-1] if j > 0 else None
|
|
588
|
+
if not (prev and prev.type == DOT):
|
|
589
|
+
# CRITICAL FIX: For control flow statements (WHILE/FOR/IF), don't break on statement_starters
|
|
590
|
+
# until we've found the opening brace. This allows variable names that happen to be keywords
|
|
591
|
+
# (like "limit") to appear in the condition without being treated as new statements.
|
|
592
|
+
is_control_flow = t.type in {WHILE, FOR, IF}
|
|
593
|
+
if is_control_flow and not found_brace_block:
|
|
594
|
+
# We're still parsing the condition - don't break yet
|
|
595
|
+
pass
|
|
596
|
+
else:
|
|
597
|
+
# For LET/CONST, allow FUNCTION, SANDBOX, SANITIZE as RHS (expressions)
|
|
598
|
+
# Also allow DEBUG when followed by ( for debug(x) function calls in assignments
|
|
599
|
+
# Also allow IF when followed by THEN (if-then-else expression)
|
|
600
|
+
allow_in_assignment = tj.type in {FUNCTION, SANDBOX, SANITIZE}
|
|
601
|
+
allow_debug_call = tj.type == DEBUG and j + 1 < n and tokens[j + 1].type == LPAREN
|
|
602
|
+
allow_if_then_else = False
|
|
603
|
+
if tj.type == IF:
|
|
604
|
+
# Look ahead for THEN to detect if-then-else expression
|
|
605
|
+
for k in range(j + 1, min(j + 20, n)): # Look ahead up to 20 tokens
|
|
606
|
+
if tokens[k].type == THEN:
|
|
607
|
+
allow_if_then_else = True
|
|
608
|
+
break
|
|
609
|
+
elif tokens[k].type in {LBRACE, COLON}:
|
|
610
|
+
# Found statement form indicators
|
|
611
|
+
break
|
|
612
|
+
if not (in_assignment and (allow_in_assignment or allow_debug_call or allow_if_then_else)):
|
|
613
|
+
break
|
|
614
|
+
|
|
615
|
+
# CRITICAL FIX: Also break on modifier tokens at nesting 0 when followed by statement keywords
|
|
616
|
+
# This prevents previous statements from consuming modifiers like "async action foo()"
|
|
617
|
+
# But ALLOWS "async foo()" expressions to stay together
|
|
618
|
+
if nesting == 0 and tj.type in modifier_tokens and not found_colon_block and len(stmt_tokens) > 0:
|
|
619
|
+
# Look ahead to see if modifier is followed by a statement keyword
|
|
620
|
+
next_idx = j + 1
|
|
621
|
+
while next_idx < n and tokens[next_idx].type in modifier_tokens:
|
|
622
|
+
next_idx += 1
|
|
623
|
+
if next_idx < n and tokens[next_idx].type in statement_starters:
|
|
624
|
+
# Modifier followed by statement keyword - break here
|
|
625
|
+
break
|
|
626
|
+
# ALSO break if this is an ASYNC modifier followed by IDENT+LPAREN (async expression)
|
|
627
|
+
# This prevents LET statements from consuming "async func()" on the next line
|
|
628
|
+
if tj.type == ASYNC and next_idx < n and tokens[next_idx].type == IDENT:
|
|
629
|
+
if next_idx + 1 < n and tokens[next_idx + 1].type == LPAREN:
|
|
630
|
+
# This is "async ident(" - an async expression
|
|
631
|
+
break
|
|
632
|
+
# Otherwise, continue collecting (async expression case)
|
|
633
|
+
|
|
634
|
+
# FIX: Also break at expression statements (IDENT followed by LPAREN) when we're at nesting 0
|
|
635
|
+
# and not in an assignment context
|
|
636
|
+
# EXCEPTION: Don't break if we're parsing ACTION/FUNCTION (their names are followed by LPAREN for parameters)
|
|
637
|
+
# ALSO EXCEPTION: Don't break if we're parsing WHILE/FOR/IF and haven't found the brace yet (function calls in conditions)
|
|
638
|
+
is_control_flow = t.type in {WHILE, FOR, IF}
|
|
639
|
+
if nesting == 0 and not in_assignment and not found_colon_block and not found_brace_block and t.type not in {ACTION, FUNCTION} and not (is_control_flow and not found_brace_block):
|
|
640
|
+
if tj.type == IDENT and j + 1 < n and tokens[j + 1].type == LPAREN:
|
|
641
|
+
# This looks like a function call starting a new expression statement
|
|
642
|
+
# Only break if we've already collected some tokens (not the first token)
|
|
643
|
+
if len(stmt_tokens) > 1:
|
|
644
|
+
break
|
|
645
|
+
|
|
646
|
+
# Always collect tokens
|
|
647
|
+
stmt_tokens.append(tj)
|
|
648
|
+
j += 1
|
|
649
|
+
|
|
650
|
+
# MODIFIED: For RETURN, CONTINUE, and PRINT, stop after closing parens at nesting 0
|
|
651
|
+
# PRINT can have multiple comma-separated arguments inside the parens
|
|
652
|
+
if t.type in {RETURN, CONTINUE, PRINT} and nesting == 0 and tj.type == RPAREN:
|
|
653
|
+
break
|
|
654
|
+
|
|
655
|
+
# If we just closed a brace block and are back at nesting 0, stop
|
|
656
|
+
if found_brace_block and nesting == 0:
|
|
657
|
+
# CRITICAL FIX: For IF statements, check if followed by ELSE or ELIF
|
|
658
|
+
if t.type == IF:
|
|
659
|
+
# Look ahead for else/elif
|
|
660
|
+
if j < n and tokens[j].type in {ELSE, ELIF}:
|
|
661
|
+
# Found else/elif - continue collecting
|
|
662
|
+
found_brace_block = False
|
|
663
|
+
continue
|
|
664
|
+
|
|
665
|
+
# REQUIRE tolerance block: the {...} is part of the statement, not separate
|
|
666
|
+
# Don't break yet - the brace block is the tolerance logic
|
|
667
|
+
if t.type == REQUIRE:
|
|
668
|
+
found_brace_block = False
|
|
669
|
+
continue
|
|
670
|
+
|
|
671
|
+
break
|
|
672
|
+
|
|
673
|
+
# Skip any trailing semicolons
|
|
674
|
+
while j < n and tokens[j].type == SEMICOLON:
|
|
675
|
+
j += 1
|
|
676
|
+
|
|
677
|
+
# Create block for the collected statement
|
|
678
|
+
filtered_stmt_tokens = [tk for tk in stmt_tokens if not _is_empty_token(tk)]
|
|
679
|
+
if filtered_stmt_tokens: # Only create block if we have meaningful tokens
|
|
680
|
+
self.blocks[block_id] = {
|
|
681
|
+
'id': block_id,
|
|
682
|
+
'type': 'statement',
|
|
683
|
+
'subtype': t.type,
|
|
684
|
+
'tokens': filtered_stmt_tokens,
|
|
685
|
+
'start_token': tokens[stmt_start_idx],
|
|
686
|
+
'start_index': stmt_start_idx,
|
|
687
|
+
'end_index': j,
|
|
688
|
+
'parent': None
|
|
689
|
+
}
|
|
690
|
+
block_id += 1
|
|
691
|
+
i = j
|
|
692
|
+
continue
|
|
693
|
+
|
|
694
|
+
# Fallback: collect a run of tokens until a clear statement boundary
|
|
695
|
+
# Respect nesting so that constructs inside parentheses/braces aren't split
|
|
696
|
+
# FIX: Handle expression statements (function calls not assigned to variables)
|
|
697
|
+
start_idx = i
|
|
698
|
+
run_tokens = [t]
|
|
699
|
+
j = i + 1
|
|
700
|
+
nesting = 0
|
|
701
|
+
|
|
702
|
+
# Check if this is a simple function call expression statement: ident(...)
|
|
703
|
+
is_function_call_start = (t.type == IDENT and j < n and tokens[j].type == LPAREN)
|
|
704
|
+
|
|
705
|
+
while j < n:
|
|
706
|
+
tj = tokens[j]
|
|
707
|
+
|
|
708
|
+
# Update nesting for parentheses/brackets/braces
|
|
709
|
+
if tj.type in {LPAREN, LBRACE, LBRACKET}:
|
|
710
|
+
nesting += 1
|
|
711
|
+
elif tj.type in {RPAREN, RBRACE, RBRACKET}:
|
|
712
|
+
if nesting > 0:
|
|
713
|
+
nesting -= 1
|
|
714
|
+
|
|
715
|
+
# Only consider these as boundaries when at top-level (nesting == 0)
|
|
716
|
+
if nesting == 0:
|
|
717
|
+
# NEW: Line-based statement boundary detection
|
|
718
|
+
# If we have balanced parens and the next token is on a new line and could start a new statement, create boundary
|
|
719
|
+
last_line = run_tokens[-1].line if run_tokens else 0
|
|
720
|
+
if tj.line > last_line:
|
|
721
|
+
# Check if we have balanced parens in run_tokens (statement is syntactically complete)
|
|
722
|
+
paren_count = sum(1 if tok.type == LPAREN else -1 if tok.type == RPAREN else 0 for tok in run_tokens)
|
|
723
|
+
if paren_count == 0:
|
|
724
|
+
# Check if run_tokens contains an assignment (this is a complete assignment statement)
|
|
725
|
+
has_assign = any(tok.type == ASSIGN for tok in run_tokens)
|
|
726
|
+
if has_assign:
|
|
727
|
+
# Current token is on a new line and could start a new statement
|
|
728
|
+
# Check if it's IDENT (could be method call, function call, or property access)
|
|
729
|
+
if tj.type == IDENT:
|
|
730
|
+
# CRITICAL FIX: Don't break if the previous token was ASSIGN
|
|
731
|
+
# This means the IDENT is the RHS value, not a new statement
|
|
732
|
+
prev_tok = run_tokens[-1] if run_tokens else None
|
|
733
|
+
if prev_tok and prev_tok.type == ASSIGN:
|
|
734
|
+
# This IDENT is the RHS of the assignment, not a new statement
|
|
735
|
+
pass # Don't break, continue collecting
|
|
736
|
+
else:
|
|
737
|
+
# This is likely a new statement on a new line
|
|
738
|
+
# Don't add tj to run_tokens, break here
|
|
739
|
+
break
|
|
740
|
+
|
|
741
|
+
# Check if current token (tj) starts a new statement
|
|
742
|
+
# CRITICAL FIX: IDENT followed by ASSIGN is an assignment statement
|
|
743
|
+
# BUT: Don't treat it as a new statement if the previous token was DOT (property access)
|
|
744
|
+
is_assignment_start = False
|
|
745
|
+
if tj.type == IDENT and j + 1 < n and tokens[j + 1].type == ASSIGN:
|
|
746
|
+
# Check if previous token was DOT (part of property access)
|
|
747
|
+
prev_is_dot = (j > 0 and tokens[j - 1].type == DOT)
|
|
748
|
+
if not prev_is_dot:
|
|
749
|
+
is_assignment_start = True
|
|
750
|
+
# Pattern 2: IDENT followed by DOT could be property assignment (obj.prop = ...)
|
|
751
|
+
elif tj.type == IDENT and j + 1 < n and tokens[j + 1].type == DOT:
|
|
752
|
+
# Look ahead: IDENT DOT IDENT ASSIGN is a property assignment
|
|
753
|
+
if j + 3 < n and tokens[j + 2].type == IDENT and tokens[j + 3].type == ASSIGN:
|
|
754
|
+
is_assignment_start = True
|
|
755
|
+
|
|
756
|
+
is_new_statement = (
|
|
757
|
+
tj.type in stop_types or
|
|
758
|
+
tj.type in statement_starters or
|
|
759
|
+
tj.type == LBRACE or
|
|
760
|
+
tj.type == TRY or
|
|
761
|
+
is_assignment_start
|
|
762
|
+
)
|
|
763
|
+
if is_new_statement and j > start_idx: # Only break if we've collected at least one token
|
|
764
|
+
break
|
|
765
|
+
|
|
766
|
+
# FIX: If this is a function call and nesting just became 0 (closed all parens),
|
|
767
|
+
# check if next token looks like start of new statement
|
|
768
|
+
if is_function_call_start and nesting == 0 and j > start_idx + 1:
|
|
769
|
+
# We've closed the function call parens
|
|
770
|
+
# Check if next token starts a new statement (IDENT followed by LPAREN, or a statement keyword)
|
|
771
|
+
next_idx = j + 1
|
|
772
|
+
# Skip semicolons
|
|
773
|
+
while next_idx < n and tokens[next_idx].type == SEMICOLON:
|
|
774
|
+
next_idx += 1
|
|
775
|
+
if next_idx < n:
|
|
776
|
+
next_tok = tokens[next_idx]
|
|
777
|
+
# If next token is a statement starter OR an identifier followed by (, it's a new statement
|
|
778
|
+
if next_tok.type in statement_starters:
|
|
779
|
+
run_tokens.append(tj)
|
|
780
|
+
j += 1
|
|
781
|
+
break
|
|
782
|
+
elif next_tok.type == IDENT and next_idx + 1 < n and tokens[next_idx + 1].type == LPAREN:
|
|
783
|
+
# Next statement is also a function call
|
|
784
|
+
run_tokens.append(tj)
|
|
785
|
+
j += 1
|
|
786
|
+
break
|
|
787
|
+
|
|
788
|
+
run_tokens.append(tj)
|
|
789
|
+
j += 1
|
|
790
|
+
|
|
791
|
+
# Skip trailing semicolons (they're statement terminators, not part of the statement)
|
|
792
|
+
while j < n and tokens[j].type == SEMICOLON:
|
|
793
|
+
j += 1
|
|
794
|
+
|
|
795
|
+
filtered_run_tokens = [tk for tk in run_tokens if not _is_empty_token(tk)]
|
|
796
|
+
if filtered_run_tokens: # Only create block if we have meaningful tokens
|
|
797
|
+
self.blocks[block_id] = {
|
|
798
|
+
'id': block_id,
|
|
799
|
+
'type': 'statement',
|
|
800
|
+
'subtype': (filtered_run_tokens[0].type if filtered_run_tokens else (run_tokens[0].type if run_tokens else 'token_run')),
|
|
801
|
+
'tokens': filtered_run_tokens,
|
|
802
|
+
'start_token': (filtered_run_tokens[0] if filtered_run_tokens else (run_tokens[0] if run_tokens else t)),
|
|
803
|
+
'start_index': start_idx,
|
|
804
|
+
'end_index': j - 1,
|
|
805
|
+
'parent': None
|
|
806
|
+
}
|
|
807
|
+
block_id += 1
|
|
808
|
+
i = j
|
|
809
|
+
|
|
810
|
+
return self.blocks
|
|
811
|
+
|
|
812
|
+
def _collect_brace_block(self, tokens: List, start_index: int):
|
|
813
|
+
"""Collect tokens comprising a brace-delimited block.
|
|
814
|
+
start_index should point at the token immediately after the 'try' or at a LBRACE.
|
|
815
|
+
Returns (collected_tokens_including_braces, next_index_after_block)
|
|
816
|
+
"""
|
|
817
|
+
n = len(tokens)
|
|
818
|
+
# find the opening brace if start_index points to something else
|
|
819
|
+
i = start_index
|
|
820
|
+
# if the next token is not a LBRACE, try to find it
|
|
821
|
+
if i < n and tokens[i].type != LBRACE:
|
|
822
|
+
# scan forward to first LBRACE or EOF
|
|
823
|
+
while i < n and tokens[i].type != LBRACE and tokens[i].type != EOF:
|
|
824
|
+
i += 1
|
|
825
|
+
if i >= n or tokens[i].type != LBRACE:
|
|
826
|
+
# no brace, return empty block
|
|
827
|
+
return [], start_index
|
|
828
|
+
|
|
829
|
+
# i points to LBRACE
|
|
830
|
+
depth = 0
|
|
831
|
+
collected = []
|
|
832
|
+
while i < n:
|
|
833
|
+
tok = tokens[i]
|
|
834
|
+
collected.append(tok)
|
|
835
|
+
if tok.type == LBRACE:
|
|
836
|
+
depth += 1
|
|
837
|
+
elif tok.type == RBRACE:
|
|
838
|
+
depth -= 1
|
|
839
|
+
if depth == 0:
|
|
840
|
+
return collected, i + 1
|
|
841
|
+
i += 1
|
|
842
|
+
|
|
843
|
+
# Reached EOF without closing brace - return what we have (tolerant)
|
|
844
|
+
return collected, i
|
|
845
|
+
|
|
846
|
+
def _split_into_statements(self, tokens: List):
|
|
847
|
+
"""Split a flat list of tokens into a list of statement token lists using statement boundaries."""
|
|
848
|
+
results = []
|
|
849
|
+
if not tokens:
|
|
850
|
+
return results
|
|
851
|
+
|
|
852
|
+
stop_types = {SEMICOLON, RBRACE}
|
|
853
|
+
# NOTE: SEND and RECEIVE removed - they can be used as function calls in expressions
|
|
854
|
+
statement_starters = {
|
|
855
|
+
LET, CONST, DATA, PRINT, FOR, IF, WHILE, RETURN, CONTINUE, BREAK, THROW, ACTION, FUNCTION, TRY, EXTERNAL,
|
|
856
|
+
SCREEN, EXPORT, USE, DEBUG, ENTITY, CONTRACT, VERIFY, PROTECT, SEAL, AUDIT,
|
|
857
|
+
RESTRICT, SANDBOX, TRAIL, NATIVE, GC, INLINE, BUFFER, SIMD,
|
|
858
|
+
DEFER, PATTERN, ENUM, STREAM, WATCH,
|
|
859
|
+
CAPABILITY, GRANT, REVOKE, VALIDATE, SANITIZE, IMMUTABLE,
|
|
860
|
+
INTERFACE, TYPE_ALIAS, MODULE, PACKAGE, USING,
|
|
861
|
+
CHANNEL, ATOMIC, ASYNC # Added ASYNC to recognize async expressions as statement boundaries
|
|
862
|
+
}
|
|
863
|
+
|
|
864
|
+
cur = []
|
|
865
|
+
i = 0
|
|
866
|
+
n = len(tokens)
|
|
867
|
+
|
|
868
|
+
while i < n:
|
|
869
|
+
t = tokens[i]
|
|
870
|
+
|
|
871
|
+
# Enhanced use statement detection (with braces) in inner blocks
|
|
872
|
+
if t.type == USE:
|
|
873
|
+
if cur: # Finish current statement
|
|
874
|
+
results.append(cur)
|
|
875
|
+
cur = []
|
|
876
|
+
|
|
877
|
+
# Collect the entire use statement
|
|
878
|
+
use_tokens = [t]
|
|
879
|
+
i += 1
|
|
880
|
+
brace_count = 0
|
|
881
|
+
|
|
882
|
+
# FIX: Check for statement starters here too to be safe
|
|
883
|
+
while i < n:
|
|
884
|
+
if brace_count == 0 and tokens[i].type in statement_starters:
|
|
885
|
+
break
|
|
886
|
+
|
|
887
|
+
use_tokens.append(tokens[i])
|
|
888
|
+
if tokens[i].type == LBRACE:
|
|
889
|
+
brace_count += 1
|
|
890
|
+
elif tokens[i].type == RBRACE:
|
|
891
|
+
brace_count -= 1
|
|
892
|
+
if brace_count == 0:
|
|
893
|
+
# Look for 'from' after closing brace
|
|
894
|
+
# FIX: Check FROM token type
|
|
895
|
+
if i + 1 < n and (tokens[i + 1].type == FROM or (tokens[i + 1].type == IDENT and tokens[i + 1].literal == 'from')):
|
|
896
|
+
use_tokens.append(tokens[i + 1])
|
|
897
|
+
i += 1
|
|
898
|
+
if i + 1 < n and tokens[i + 1].type == STRING:
|
|
899
|
+
use_tokens.append(tokens[i + 1])
|
|
900
|
+
i += 1
|
|
901
|
+
break
|
|
902
|
+
elif brace_count == 0 and tokens[i].type in stop_types:
|
|
903
|
+
break
|
|
904
|
+
i += 1
|
|
905
|
+
|
|
906
|
+
results.append(use_tokens)
|
|
907
|
+
i += 1
|
|
908
|
+
continue
|
|
909
|
+
|
|
910
|
+
# Entity/Contract statement detection (generic brace collector)
|
|
911
|
+
if t.type == ENTITY or t.type == CONTRACT:
|
|
912
|
+
if cur:
|
|
913
|
+
results.append(cur)
|
|
914
|
+
cur = []
|
|
915
|
+
|
|
916
|
+
# Collect until closing brace
|
|
917
|
+
entity_tokens = [t]
|
|
918
|
+
i += 1
|
|
919
|
+
brace_count = 0
|
|
920
|
+
|
|
921
|
+
while i < n:
|
|
922
|
+
entity_tokens.append(tokens[i])
|
|
923
|
+
if tokens[i].type == LBRACE:
|
|
924
|
+
brace_count += 1
|
|
925
|
+
elif tokens[i].type == RBRACE:
|
|
926
|
+
brace_count -= 1
|
|
927
|
+
if brace_count == 0:
|
|
928
|
+
break
|
|
929
|
+
i += 1
|
|
930
|
+
|
|
931
|
+
results.append(entity_tokens)
|
|
932
|
+
i += 1
|
|
933
|
+
continue
|
|
934
|
+
|
|
935
|
+
# start of a statement
|
|
936
|
+
if not cur:
|
|
937
|
+
cur.append(t)
|
|
938
|
+
i += 1
|
|
939
|
+
continue
|
|
940
|
+
|
|
941
|
+
# accumulate until boundary
|
|
942
|
+
if t.type in stop_types:
|
|
943
|
+
# end current statement (do not include terminator)
|
|
944
|
+
results.append(cur)
|
|
945
|
+
cur = []
|
|
946
|
+
i += 1
|
|
947
|
+
continue
|
|
948
|
+
|
|
949
|
+
if t.type in statement_starters:
|
|
950
|
+
# boundary: emit current and start new
|
|
951
|
+
results.append(cur)
|
|
952
|
+
cur = [t]
|
|
953
|
+
i += 1
|
|
954
|
+
continue
|
|
955
|
+
|
|
956
|
+
# Assignment RHS vs function-call heuristic:
|
|
957
|
+
# if current token is IDENT followed by LPAREN and the previous token was RPAREN (end of prev call), new statement
|
|
958
|
+
if t.type == IDENT and i + 1 < n and tokens[i + 1].type == LPAREN:
|
|
959
|
+
# New heuristic: if previous token was RPAREN (completing a call), this is likely a new statement
|
|
960
|
+
# BUT: if the token before RPAREN is DOT+IDENT (method call), don't create boundary
|
|
961
|
+
if cur and cur[-1].type == RPAREN:
|
|
962
|
+
# Check if this is a method call continuation (e.g., obj.method1().method2())
|
|
963
|
+
# Look for pattern: ... DOT IDENT LPAREN ... RPAREN <-- we are here
|
|
964
|
+
# Find the LPAREN that matches this RPAREN
|
|
965
|
+
paren_depth = 0
|
|
966
|
+
is_method_chain = False
|
|
967
|
+
for j in range(len(cur) - 1, -1, -1):
|
|
968
|
+
if cur[j].type == RPAREN:
|
|
969
|
+
paren_depth += 1
|
|
970
|
+
elif cur[j].type == LPAREN:
|
|
971
|
+
if paren_depth == 0:
|
|
972
|
+
# This is the matching LPAREN
|
|
973
|
+
# Check if it's preceded by DOT+IDENT (method call)
|
|
974
|
+
if j >= 2 and cur[j-1].type == IDENT and cur[j-2].type == DOT:
|
|
975
|
+
is_method_chain = True
|
|
976
|
+
break
|
|
977
|
+
else:
|
|
978
|
+
paren_depth -= 1
|
|
979
|
+
|
|
980
|
+
if not is_method_chain:
|
|
981
|
+
# Previous call is complete, and next is IDENT+LPAREN, so new statement
|
|
982
|
+
results.append(cur)
|
|
983
|
+
cur = [t]
|
|
984
|
+
i += 1
|
|
985
|
+
continue
|
|
986
|
+
|
|
987
|
+
# NEW: Check for line-based statement boundaries
|
|
988
|
+
# If we have balanced parens and the next token is on a new line and could start a new statement, create boundary
|
|
989
|
+
if cur:
|
|
990
|
+
# Check if parens are balanced
|
|
991
|
+
paren_count = sum(1 if tok.type == LPAREN else -1 if tok.type == RPAREN else 0 for tok in cur)
|
|
992
|
+
if paren_count == 0:
|
|
993
|
+
# Check if there's an ASSIGN in cur (this is a complete assignment statement)
|
|
994
|
+
has_assign = any(tok.type == ASSIGN for tok in cur)
|
|
995
|
+
if has_assign:
|
|
996
|
+
# Check if current token is on a new line
|
|
997
|
+
last_line = cur[-1].line if cur else 0
|
|
998
|
+
if t.line > last_line:
|
|
999
|
+
# Check if current token could start a new statement
|
|
1000
|
+
# IDENT followed by DOT or LPAREN could be a new statement
|
|
1001
|
+
if t.type == IDENT:
|
|
1002
|
+
# This is likely a new statement on a new line
|
|
1003
|
+
results.append(cur)
|
|
1004
|
+
cur = [t]
|
|
1005
|
+
i += 1
|
|
1006
|
+
continue
|
|
1007
|
+
|
|
1008
|
+
cur.append(t)
|
|
1009
|
+
i += 1
|
|
1010
|
+
|
|
1011
|
+
if cur:
|
|
1012
|
+
results.append(cur)
|
|
1013
|
+
return results
|
|
1014
|
+
|
|
1015
|
+
def _is_map_literal(self, inner_tokens: List):
|
|
1016
|
+
"""Detect simple map/object literal pattern: STRING/IDENT followed by COLON somewhere early."""
|
|
1017
|
+
if not inner_tokens:
|
|
1018
|
+
return False
|
|
1019
|
+
# look at the first few tokens: key(:)value pairs
|
|
1020
|
+
for i in range(min(len(inner_tokens)-1, 8)):
|
|
1021
|
+
if inner_tokens[i].type in (STRING, IDENT) and i+1 < len(inner_tokens) and inner_tokens[i+1].type == COLON:
|
|
1022
|
+
return True
|
|
1023
|
+
return False
|
|
1024
|
+
|
|
1025
|
+
def print_structure(self):
|
|
1026
|
+
print("🔎 Structural Analyzer - Blocks:")
|
|
1027
|
+
for bid, info in self.blocks.items():
|
|
1028
|
+
start = info.get('start_index')
|
|
1029
|
+
end = info.get('end_index')
|
|
1030
|
+
ttype = info.get('type')
|
|
1031
|
+
subtype = info.get('subtype')
|
|
1032
|
+
token_literals = [t.literal for t in info.get('tokens', []) if getattr(t, 'literal', None)]
|
|
1033
|
+
print(f" [{bid}] {ttype}/{subtype} @ {start}-{end}: {token_literals}")
|