zexus 1.6.2 → 1.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +165 -5
- package/package.json +1 -1
- package/src/zexus/__init__.py +1 -1
- package/src/zexus/access_control_system/__init__.py +38 -0
- package/src/zexus/access_control_system/access_control.py +237 -0
- package/src/zexus/cli/main.py +1 -1
- package/src/zexus/cli/zpm.py +1 -1
- package/src/zexus/debug_sanitizer.py +250 -0
- package/src/zexus/error_reporter.py +22 -2
- package/src/zexus/evaluator/core.py +7 -2
- package/src/zexus/evaluator/expressions.py +116 -57
- package/src/zexus/evaluator/functions.py +586 -170
- package/src/zexus/evaluator/resource_limiter.py +291 -0
- package/src/zexus/evaluator/statements.py +31 -3
- package/src/zexus/evaluator/utils.py +12 -6
- package/src/zexus/lsp/server.py +1 -1
- package/src/zexus/object.py +21 -2
- package/src/zexus/parser/parser.py +39 -1
- package/src/zexus/parser/strategy_context.py +29 -4
- package/src/zexus/parser/strategy_structural.py +12 -4
- package/src/zexus/persistence.py +105 -6
- package/src/zexus/security_enforcement.py +237 -0
- package/src/zexus/stdlib/fs.py +120 -22
- package/src/zexus/zpm/package_manager.py +1 -1
- package/src/zexus.egg-info/PKG-INFO +499 -13
- package/src/zexus.egg-info/SOURCES.txt +242 -152
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
# src/zexus/evaluator/resource_limiter.py
|
|
2
|
+
"""
|
|
3
|
+
Resource Limiter for Zexus Interpreter
|
|
4
|
+
|
|
5
|
+
Prevents resource exhaustion attacks by enforcing limits on:
|
|
6
|
+
- Loop iterations (prevents infinite loops)
|
|
7
|
+
- Execution time (prevents DoS via slow operations)
|
|
8
|
+
- Call stack depth (prevents stack overflow)
|
|
9
|
+
- Memory usage (prevents memory exhaustion)
|
|
10
|
+
|
|
11
|
+
Security Fix #7: Resource Limits
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import time
|
|
15
|
+
import signal
|
|
16
|
+
import sys
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ResourceError(Exception):
|
|
20
|
+
"""Raised when a resource limit is exceeded"""
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class TimeoutError(ResourceError):
|
|
25
|
+
"""Raised when execution timeout is exceeded"""
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class ResourceLimiter:
|
|
30
|
+
"""
|
|
31
|
+
Enforces resource limits during program execution.
|
|
32
|
+
|
|
33
|
+
Limits:
|
|
34
|
+
- max_iterations: Maximum loop iterations across all loops (default: 1,000,000)
|
|
35
|
+
- timeout_seconds: Maximum execution time (default: 30 seconds)
|
|
36
|
+
- max_call_depth: Maximum call stack depth (default: 1000)
|
|
37
|
+
- max_memory_mb: Maximum memory usage (default: 500 MB, not enforced by default)
|
|
38
|
+
|
|
39
|
+
Usage:
|
|
40
|
+
limiter = ResourceLimiter(max_iterations=100000, timeout_seconds=10)
|
|
41
|
+
limiter.start() # Start timeout timer
|
|
42
|
+
|
|
43
|
+
# In loops:
|
|
44
|
+
limiter.check_iterations()
|
|
45
|
+
|
|
46
|
+
# On function calls:
|
|
47
|
+
limiter.enter_call()
|
|
48
|
+
# ... function body ...
|
|
49
|
+
limiter.exit_call()
|
|
50
|
+
|
|
51
|
+
limiter.stop() # Stop timeout timer
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
# Default limits
|
|
55
|
+
DEFAULT_MAX_ITERATIONS = 1_000_000 # 1 million iterations
|
|
56
|
+
DEFAULT_TIMEOUT_SECONDS = 30 # 30 seconds
|
|
57
|
+
DEFAULT_MAX_CALL_DEPTH = 100 # 100 nested calls (Python interpreter uses many stack frames per Zexus call)
|
|
58
|
+
DEFAULT_MAX_MEMORY_MB = 500 # 500 MB (not enforced by default)
|
|
59
|
+
|
|
60
|
+
def __init__(self,
|
|
61
|
+
max_iterations=None,
|
|
62
|
+
timeout_seconds=None,
|
|
63
|
+
max_call_depth=None,
|
|
64
|
+
max_memory_mb=None,
|
|
65
|
+
enable_timeout=False,
|
|
66
|
+
enable_memory_check=False):
|
|
67
|
+
"""
|
|
68
|
+
Initialize resource limiter.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
max_iterations: Maximum total loop iterations (default: 1,000,000)
|
|
72
|
+
timeout_seconds: Maximum execution time (default: 30)
|
|
73
|
+
max_call_depth: Maximum call stack depth (default: 1000)
|
|
74
|
+
max_memory_mb: Maximum memory usage in MB (default: 500)
|
|
75
|
+
enable_timeout: Enable timeout enforcement (default: False, Linux only)
|
|
76
|
+
enable_memory_check: Enable memory checking (default: False, requires psutil)
|
|
77
|
+
"""
|
|
78
|
+
self.max_iterations = max_iterations or self.DEFAULT_MAX_ITERATIONS
|
|
79
|
+
self.timeout_seconds = timeout_seconds or self.DEFAULT_TIMEOUT_SECONDS
|
|
80
|
+
self.max_call_depth = max_call_depth or self.DEFAULT_MAX_CALL_DEPTH
|
|
81
|
+
self.max_memory_mb = max_memory_mb or self.DEFAULT_MAX_MEMORY_MB
|
|
82
|
+
|
|
83
|
+
# Feature flags
|
|
84
|
+
self.enable_timeout = enable_timeout
|
|
85
|
+
self.enable_memory_check = enable_memory_check
|
|
86
|
+
|
|
87
|
+
# Runtime counters
|
|
88
|
+
self.iteration_count = 0
|
|
89
|
+
self.call_depth = 0
|
|
90
|
+
self.start_time = None
|
|
91
|
+
self.timeout_handler = None
|
|
92
|
+
|
|
93
|
+
# Memory checking (optional, requires psutil)
|
|
94
|
+
self.psutil_available = False
|
|
95
|
+
if enable_memory_check:
|
|
96
|
+
try:
|
|
97
|
+
import psutil
|
|
98
|
+
self.psutil = psutil
|
|
99
|
+
self.psutil_available = True
|
|
100
|
+
self.process = psutil.Process()
|
|
101
|
+
except ImportError:
|
|
102
|
+
print("⚠️ Warning: psutil not available, memory checking disabled")
|
|
103
|
+
self.enable_memory_check = False
|
|
104
|
+
|
|
105
|
+
def start(self):
|
|
106
|
+
"""
|
|
107
|
+
Start resource monitoring (timeout timer, etc.)
|
|
108
|
+
Should be called at the beginning of script execution.
|
|
109
|
+
"""
|
|
110
|
+
self.start_time = time.time()
|
|
111
|
+
self.iteration_count = 0
|
|
112
|
+
self.call_depth = 0
|
|
113
|
+
|
|
114
|
+
# Set timeout handler (Linux/Unix only)
|
|
115
|
+
if self.enable_timeout and hasattr(signal, 'SIGALRM'):
|
|
116
|
+
self._set_timeout_alarm()
|
|
117
|
+
|
|
118
|
+
def stop(self):
|
|
119
|
+
"""
|
|
120
|
+
Stop resource monitoring and cleanup.
|
|
121
|
+
Should be called at the end of script execution.
|
|
122
|
+
"""
|
|
123
|
+
# Cancel timeout alarm
|
|
124
|
+
if self.enable_timeout and hasattr(signal, 'SIGALRM'):
|
|
125
|
+
signal.alarm(0) # Cancel alarm
|
|
126
|
+
|
|
127
|
+
def reset(self):
|
|
128
|
+
"""Reset iteration counter (useful for multiple script executions)"""
|
|
129
|
+
self.iteration_count = 0
|
|
130
|
+
self.call_depth = 0
|
|
131
|
+
self.start_time = None
|
|
132
|
+
|
|
133
|
+
def check_iterations(self):
|
|
134
|
+
"""
|
|
135
|
+
Check if iteration limit has been exceeded.
|
|
136
|
+
Should be called at the beginning of each loop iteration.
|
|
137
|
+
|
|
138
|
+
Raises:
|
|
139
|
+
ResourceError: If iteration limit exceeded
|
|
140
|
+
"""
|
|
141
|
+
self.iteration_count += 1
|
|
142
|
+
|
|
143
|
+
if self.iteration_count > self.max_iterations:
|
|
144
|
+
raise ResourceError(
|
|
145
|
+
f"Iteration limit exceeded: {self.max_iterations:,} iterations\n"
|
|
146
|
+
f"This prevents infinite loops and resource exhaustion.\n\n"
|
|
147
|
+
f"Suggestion: Review your loop conditions or increase the limit with:\n"
|
|
148
|
+
f" zx-run --max-iterations 10000000 script.zx"
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
def check_timeout(self):
|
|
152
|
+
"""
|
|
153
|
+
Check if execution timeout has been exceeded.
|
|
154
|
+
Should be called periodically during long operations.
|
|
155
|
+
|
|
156
|
+
Raises:
|
|
157
|
+
TimeoutError: If timeout exceeded
|
|
158
|
+
"""
|
|
159
|
+
if self.start_time is None:
|
|
160
|
+
return
|
|
161
|
+
|
|
162
|
+
elapsed = time.time() - self.start_time
|
|
163
|
+
if elapsed > self.timeout_seconds:
|
|
164
|
+
raise TimeoutError(
|
|
165
|
+
f"Execution timeout exceeded: {elapsed:.2f}s > {self.timeout_seconds}s\n"
|
|
166
|
+
f"This prevents denial-of-service via slow operations.\n\n"
|
|
167
|
+
f"Suggestion: Optimize your code or increase timeout with:\n"
|
|
168
|
+
f" zx-run --timeout 60 script.zx"
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
def check_memory(self):
|
|
172
|
+
"""
|
|
173
|
+
Check if memory limit has been exceeded.
|
|
174
|
+
Should be called periodically (e.g., every 1000 iterations).
|
|
175
|
+
|
|
176
|
+
Raises:
|
|
177
|
+
ResourceError: If memory limit exceeded
|
|
178
|
+
"""
|
|
179
|
+
if not self.enable_memory_check or not self.psutil_available:
|
|
180
|
+
return
|
|
181
|
+
|
|
182
|
+
try:
|
|
183
|
+
memory_mb = self.process.memory_info().rss / 1024 / 1024
|
|
184
|
+
|
|
185
|
+
if memory_mb > self.max_memory_mb:
|
|
186
|
+
raise ResourceError(
|
|
187
|
+
f"Memory limit exceeded: {memory_mb:.2f}MB > {self.max_memory_mb}MB\n"
|
|
188
|
+
f"This prevents memory exhaustion attacks.\n\n"
|
|
189
|
+
f"Suggestion: Reduce memory usage or increase limit with:\n"
|
|
190
|
+
f" zx-run --max-memory 1000 script.zx"
|
|
191
|
+
)
|
|
192
|
+
except Exception as e:
|
|
193
|
+
# Don't crash on memory check failure
|
|
194
|
+
print(f"⚠️ Warning: Memory check failed: {e}")
|
|
195
|
+
|
|
196
|
+
def enter_call(self, function_name=None):
|
|
197
|
+
"""
|
|
198
|
+
Called when entering a function/action call.
|
|
199
|
+
Tracks call depth to prevent stack overflow.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
function_name: Optional name of function being called
|
|
203
|
+
|
|
204
|
+
Raises:
|
|
205
|
+
ResourceError: If call depth limit exceeded
|
|
206
|
+
"""
|
|
207
|
+
self.call_depth += 1
|
|
208
|
+
|
|
209
|
+
if self.call_depth > self.max_call_depth:
|
|
210
|
+
func_info = f" ({function_name})" if function_name else ""
|
|
211
|
+
raise ResourceError(
|
|
212
|
+
f"Call depth limit exceeded: {self.max_call_depth} nested calls{func_info}\n"
|
|
213
|
+
f"This prevents stack overflow from excessive recursion.\n\n"
|
|
214
|
+
f"Suggestion: Review your recursion or increase limit with:\n"
|
|
215
|
+
f" zx-run --max-call-depth 5000 script.zx"
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
def exit_call(self):
|
|
219
|
+
"""
|
|
220
|
+
Called when exiting a function/action call.
|
|
221
|
+
Decrements call depth counter.
|
|
222
|
+
"""
|
|
223
|
+
if self.call_depth > 0:
|
|
224
|
+
self.call_depth -= 1
|
|
225
|
+
|
|
226
|
+
def get_stats(self):
|
|
227
|
+
"""
|
|
228
|
+
Get current resource usage statistics.
|
|
229
|
+
|
|
230
|
+
Returns:
|
|
231
|
+
dict: Resource usage stats
|
|
232
|
+
"""
|
|
233
|
+
stats = {
|
|
234
|
+
'iterations': self.iteration_count,
|
|
235
|
+
'max_iterations': self.max_iterations,
|
|
236
|
+
'iteration_percent': (self.iteration_count / self.max_iterations) * 100,
|
|
237
|
+
'call_depth': self.call_depth,
|
|
238
|
+
'max_call_depth': self.max_call_depth,
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
if self.start_time:
|
|
242
|
+
elapsed = time.time() - self.start_time
|
|
243
|
+
stats['elapsed_seconds'] = elapsed
|
|
244
|
+
stats['timeout_seconds'] = self.timeout_seconds
|
|
245
|
+
stats['timeout_percent'] = (elapsed / self.timeout_seconds) * 100
|
|
246
|
+
|
|
247
|
+
if self.enable_memory_check and self.psutil_available:
|
|
248
|
+
try:
|
|
249
|
+
memory_mb = self.process.memory_info().rss / 1024 / 1024
|
|
250
|
+
stats['memory_mb'] = memory_mb
|
|
251
|
+
stats['max_memory_mb'] = self.max_memory_mb
|
|
252
|
+
stats['memory_percent'] = (memory_mb / self.max_memory_mb) * 100
|
|
253
|
+
except:
|
|
254
|
+
pass
|
|
255
|
+
|
|
256
|
+
return stats
|
|
257
|
+
|
|
258
|
+
def _set_timeout_alarm(self):
|
|
259
|
+
"""
|
|
260
|
+
Set SIGALRM timeout handler (Linux/Unix only).
|
|
261
|
+
This is automatically called by start() if enable_timeout is True.
|
|
262
|
+
"""
|
|
263
|
+
def timeout_handler(signum, frame):
|
|
264
|
+
raise TimeoutError(
|
|
265
|
+
f"Execution timeout: {self.timeout_seconds}s exceeded\n"
|
|
266
|
+
f"This prevents denial-of-service via slow operations.\n\n"
|
|
267
|
+
f"Suggestion: Optimize your code or increase timeout with:\n"
|
|
268
|
+
f" zx-run --timeout 60 script.zx"
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
self.timeout_handler = timeout_handler
|
|
272
|
+
signal.signal(signal.SIGALRM, timeout_handler)
|
|
273
|
+
signal.alarm(self.timeout_seconds)
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
# Default global limiter (can be overridden)
|
|
277
|
+
_default_limiter = None
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def get_default_limiter():
|
|
281
|
+
"""Get the default global resource limiter"""
|
|
282
|
+
global _default_limiter
|
|
283
|
+
if _default_limiter is None:
|
|
284
|
+
_default_limiter = ResourceLimiter()
|
|
285
|
+
return _default_limiter
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def set_default_limiter(limiter):
|
|
289
|
+
"""Set the default global resource limiter"""
|
|
290
|
+
global _default_limiter
|
|
291
|
+
_default_limiter = limiter
|
|
@@ -877,6 +877,16 @@ class StatementEvaluatorMixin:
|
|
|
877
877
|
def eval_while_statement(self, node, env, stack_trace):
|
|
878
878
|
result = NULL
|
|
879
879
|
while True:
|
|
880
|
+
# Resource limit check (Security Fix #7)
|
|
881
|
+
try:
|
|
882
|
+
self.resource_limiter.check_iterations()
|
|
883
|
+
except Exception as e:
|
|
884
|
+
# Convert ResourceError to EvaluationError
|
|
885
|
+
from ..evaluator.resource_limiter import ResourceError, TimeoutError
|
|
886
|
+
if isinstance(e, (ResourceError, TimeoutError)):
|
|
887
|
+
return EvaluationError(str(e))
|
|
888
|
+
raise # Re-raise if not a resource error
|
|
889
|
+
|
|
880
890
|
cond = self.eval_node(node.condition, env, stack_trace)
|
|
881
891
|
if is_error(cond):
|
|
882
892
|
return cond
|
|
@@ -904,6 +914,16 @@ class StatementEvaluatorMixin:
|
|
|
904
914
|
|
|
905
915
|
result = NULL
|
|
906
916
|
for item in iterable.elements:
|
|
917
|
+
# Resource limit check (Security Fix #7)
|
|
918
|
+
try:
|
|
919
|
+
self.resource_limiter.check_iterations()
|
|
920
|
+
except Exception as e:
|
|
921
|
+
# Convert ResourceError to EvaluationError
|
|
922
|
+
from ..evaluator.resource_limiter import ResourceError, TimeoutError
|
|
923
|
+
if isinstance(e, (ResourceError, TimeoutError)):
|
|
924
|
+
return EvaluationError(str(e))
|
|
925
|
+
raise # Re-raise if not a resource error
|
|
926
|
+
|
|
907
927
|
env.set(node.item.value, item)
|
|
908
928
|
result = self.eval_node(node.body, env, stack_trace)
|
|
909
929
|
if isinstance(result, ReturnValue):
|
|
@@ -3115,12 +3135,14 @@ class StatementEvaluatorMixin:
|
|
|
3115
3135
|
else:
|
|
3116
3136
|
data_str = str(data)
|
|
3117
3137
|
|
|
3118
|
-
# Determine encoding
|
|
3138
|
+
# Determine encoding/context
|
|
3119
3139
|
encoding = Encoding.HTML # Default
|
|
3140
|
+
context_name = "html" # For marking sanitized_for
|
|
3120
3141
|
if node.encoding:
|
|
3121
3142
|
enc_val = self.eval_node(node.encoding, env, stack_trace)
|
|
3122
3143
|
if hasattr(enc_val, 'value'):
|
|
3123
3144
|
enc_name = enc_val.value.upper()
|
|
3145
|
+
context_name = enc_val.value.lower()
|
|
3124
3146
|
try:
|
|
3125
3147
|
encoding = Encoding[enc_name]
|
|
3126
3148
|
except KeyError:
|
|
@@ -3130,10 +3152,16 @@ class StatementEvaluatorMixin:
|
|
|
3130
3152
|
try:
|
|
3131
3153
|
sanitized = Sanitizer.sanitize_string(data_str, encoding)
|
|
3132
3154
|
debug_log("eval_sanitize_statement", f"Sanitized {len(data_str)} chars with {encoding.value}")
|
|
3133
|
-
|
|
3155
|
+
result = String(sanitized)
|
|
3156
|
+
# SECURITY ENFORCEMENT: Mark as sanitized for this context
|
|
3157
|
+
result.mark_sanitized(context_name)
|
|
3158
|
+
return result
|
|
3134
3159
|
except Exception as e:
|
|
3135
3160
|
debug_log("eval_sanitize_statement", f"Sanitization error: {e}")
|
|
3136
|
-
|
|
3161
|
+
# Even on error, mark as sanitized to prevent double-sanitization loops
|
|
3162
|
+
result = String(data_str)
|
|
3163
|
+
result.mark_sanitized(context_name)
|
|
3164
|
+
return result
|
|
3137
3165
|
|
|
3138
3166
|
def eval_inject_statement(self, node, env, stack_trace):
|
|
3139
3167
|
"""Evaluate inject statement - full dependency injection with mode-aware resolution."""
|
|
@@ -89,20 +89,26 @@ def _zexus_to_python(value):
|
|
|
89
89
|
else:
|
|
90
90
|
return str(value)
|
|
91
91
|
|
|
92
|
-
def _python_to_zexus(value):
|
|
93
|
-
"""Convert Python native types to Zexus objects
|
|
92
|
+
def _python_to_zexus(value, mark_untrusted=False):
|
|
93
|
+
"""Convert Python native types to Zexus objects
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
value: Python value to convert
|
|
97
|
+
mark_untrusted: If True, mark strings as untrusted (external data)
|
|
98
|
+
"""
|
|
94
99
|
from ..object import Map, List, String, Integer, Float, Boolean as BooleanObj
|
|
95
100
|
|
|
96
101
|
if isinstance(value, dict):
|
|
97
102
|
pairs = {}
|
|
98
103
|
for k, v in value.items():
|
|
99
|
-
pairs[k] = _python_to_zexus(v)
|
|
104
|
+
pairs[k] = _python_to_zexus(v, mark_untrusted)
|
|
100
105
|
return Map(pairs)
|
|
101
106
|
elif isinstance(value, list):
|
|
102
|
-
zexus_list = List([_python_to_zexus(item) for item in value])
|
|
107
|
+
zexus_list = List([_python_to_zexus(item, mark_untrusted) for item in value])
|
|
103
108
|
return zexus_list
|
|
104
109
|
elif isinstance(value, str):
|
|
105
|
-
|
|
110
|
+
# Mark strings as untrusted if from external source (HTTP, DB, etc.)
|
|
111
|
+
return String(value, is_trusted=not mark_untrusted)
|
|
106
112
|
elif isinstance(value, int):
|
|
107
113
|
return Integer(value)
|
|
108
114
|
elif isinstance(value, float):
|
|
@@ -112,7 +118,7 @@ def _python_to_zexus(value):
|
|
|
112
118
|
elif value is None:
|
|
113
119
|
return NULL
|
|
114
120
|
else:
|
|
115
|
-
return String(str(value))
|
|
121
|
+
return String(str(value), is_trusted=not mark_untrusted)
|
|
116
122
|
|
|
117
123
|
def _to_str(obj):
|
|
118
124
|
"""Helper to convert Zexus object to string"""
|
package/src/zexus/lsp/server.py
CHANGED
|
@@ -87,7 +87,7 @@ if PYGLS_AVAILABLE:
|
|
|
87
87
|
"""Zexus Language Server implementation."""
|
|
88
88
|
|
|
89
89
|
def __init__(self):
|
|
90
|
-
super().__init__('zexus-language-server', 'v1.6.
|
|
90
|
+
super().__init__('zexus-language-server', 'v1.6.3')
|
|
91
91
|
self.completion_provider = CompletionProvider()
|
|
92
92
|
self.symbol_provider = SymbolProvider()
|
|
93
93
|
self.hover_provider = HoverProvider()
|
package/src/zexus/object.py
CHANGED
|
@@ -31,7 +31,12 @@ class Null(Object):
|
|
|
31
31
|
def type(self): return "NULL"
|
|
32
32
|
|
|
33
33
|
class String(Object):
|
|
34
|
-
def __init__(self, value
|
|
34
|
+
def __init__(self, value, sanitized_for=None, is_trusted=False):
|
|
35
|
+
self.value = value
|
|
36
|
+
# Track sanitization status for security enforcement
|
|
37
|
+
self.sanitized_for = sanitized_for # None, 'sql', 'html', 'url', 'shell', etc.
|
|
38
|
+
self.is_trusted = is_trusted # True for literals, False for external input
|
|
39
|
+
|
|
35
40
|
def inspect(self): return self.value
|
|
36
41
|
def type(self): return "STRING"
|
|
37
42
|
def __str__(self): return self.value
|
|
@@ -43,6 +48,19 @@ class String(Object):
|
|
|
43
48
|
def __hash__(self):
|
|
44
49
|
"""Enable String objects to be used as dict keys"""
|
|
45
50
|
return hash(self.value)
|
|
51
|
+
|
|
52
|
+
def mark_sanitized(self, context):
|
|
53
|
+
"""Mark this string as sanitized for a specific context"""
|
|
54
|
+
self.sanitized_for = context
|
|
55
|
+
return self
|
|
56
|
+
|
|
57
|
+
def is_safe_for(self, context):
|
|
58
|
+
"""Check if string is safe to use in given context"""
|
|
59
|
+
# Trusted strings (literals) are always safe
|
|
60
|
+
if self.is_trusted:
|
|
61
|
+
return True
|
|
62
|
+
# Check if sanitized for this specific context
|
|
63
|
+
return self.sanitized_for == context
|
|
46
64
|
|
|
47
65
|
class List(Object):
|
|
48
66
|
def __init__(self, elements): self.elements = elements
|
|
@@ -425,7 +443,8 @@ class File(Object):
|
|
|
425
443
|
if isinstance(path, String):
|
|
426
444
|
path = path.value
|
|
427
445
|
with open(path, 'r', encoding='utf-8') as f:
|
|
428
|
-
return
|
|
446
|
+
# Files are external data sources - return untrusted strings
|
|
447
|
+
return String(f.read(), is_trusted=False)
|
|
429
448
|
except Exception as e:
|
|
430
449
|
return EvaluationError(f"File read error: {str(e)}")
|
|
431
450
|
|
|
@@ -82,6 +82,7 @@ class UltimateParser:
|
|
|
82
82
|
TRY: self.parse_try_catch_statement,
|
|
83
83
|
EXTERNAL: self.parse_external_declaration,
|
|
84
84
|
ASYNC: self.parse_async_expression, # Support async <expression>
|
|
85
|
+
SANITIZE: self.parse_sanitize_expression, # FIX #4: Support sanitize as expression
|
|
85
86
|
}
|
|
86
87
|
self.infix_parse_fns = {
|
|
87
88
|
PLUS: self.parse_infix_expression,
|
|
@@ -511,7 +512,8 @@ class UltimateParser:
|
|
|
511
512
|
elif self.cur_token_is(STATE):
|
|
512
513
|
print(f"[PARSE_STMT] Matched STATE", file=sys.stderr, flush=True)
|
|
513
514
|
node = self.parse_state_statement()
|
|
514
|
-
|
|
515
|
+
elif self.cur_token_is(REQUIRE):
|
|
516
|
+
node = self.parse_require_statement()
|
|
515
517
|
elif self.cur_token_is(REVERT):
|
|
516
518
|
print(f"[PARSE_STMT] Matched REVERT", file=sys.stderr, flush=True)
|
|
517
519
|
node = self.parse_revert_statement()
|
|
@@ -3299,6 +3301,40 @@ class UltimateParser:
|
|
|
3299
3301
|
|
|
3300
3302
|
return ValidateStatement(data=data_expr, schema=schema_expr)
|
|
3301
3303
|
|
|
3304
|
+
def parse_sanitize_expression(self):
|
|
3305
|
+
"""Parse sanitize as expression - can be used in assignments
|
|
3306
|
+
|
|
3307
|
+
Supports both:
|
|
3308
|
+
let safe = sanitize data, "sql"
|
|
3309
|
+
let safe = sanitize data as sql
|
|
3310
|
+
"""
|
|
3311
|
+
token = self.cur_token
|
|
3312
|
+
self.next_token()
|
|
3313
|
+
|
|
3314
|
+
# Parse data expression
|
|
3315
|
+
data_expr = self.parse_expression(LOWEST)
|
|
3316
|
+
if data_expr is None:
|
|
3317
|
+
self.errors.append(f"Line {token.line}:{token.column} - Expected expression to sanitize")
|
|
3318
|
+
return None
|
|
3319
|
+
|
|
3320
|
+
# Expect comma or 'as'
|
|
3321
|
+
encoding = None
|
|
3322
|
+
if self.cur_token_is(COMMA):
|
|
3323
|
+
self.next_token()
|
|
3324
|
+
# Parse encoding as expression (can be string literal or identifier)
|
|
3325
|
+
encoding = self.parse_expression(LOWEST)
|
|
3326
|
+
elif self.cur_token_is(IDENT) and self.cur_token.literal == 'as':
|
|
3327
|
+
self.next_token()
|
|
3328
|
+
if self.cur_token_is(IDENT):
|
|
3329
|
+
# Convert identifier to string literal
|
|
3330
|
+
encoding = StringLiteral(value=self.cur_token.literal)
|
|
3331
|
+
self.next_token()
|
|
3332
|
+
elif self.cur_token_is(STRING):
|
|
3333
|
+
encoding = self.parse_string_literal()
|
|
3334
|
+
|
|
3335
|
+
result = SanitizeStatement(data=data_expr, rules=None, encoding=encoding)
|
|
3336
|
+
return result
|
|
3337
|
+
|
|
3302
3338
|
def parse_sanitize_statement(self):
|
|
3303
3339
|
"""Parse sanitize statement - sanitize data"""
|
|
3304
3340
|
token = self.cur_token
|
|
@@ -3698,6 +3734,7 @@ class UltimateParser:
|
|
|
3698
3734
|
|
|
3699
3735
|
Asserts condition, reverts transaction if false.
|
|
3700
3736
|
"""
|
|
3737
|
+
print(f"[DEBUG PARSER] parse_require_statement called", flush=True)
|
|
3701
3738
|
token = self.cur_token
|
|
3702
3739
|
|
|
3703
3740
|
if not self.expect_peek(LPAREN):
|
|
@@ -3721,6 +3758,7 @@ class UltimateParser:
|
|
|
3721
3758
|
if self.peek_token_is(SEMICOLON):
|
|
3722
3759
|
self.next_token()
|
|
3723
3760
|
|
|
3761
|
+
print(f"[DEBUG PARSER] Creating RequireStatement with condition={condition}, message={message}", flush=True)
|
|
3724
3762
|
return RequireStatement(condition=condition, message=message)
|
|
3725
3763
|
|
|
3726
3764
|
def parse_revert_statement(self):
|
|
@@ -3234,9 +3234,10 @@ class ContextStackParser:
|
|
|
3234
3234
|
# Parse REQUIRE statement: require(condition, message) or require condition { tolerance_block }
|
|
3235
3235
|
j = i + 1
|
|
3236
3236
|
|
|
3237
|
-
# Collect tokens until semicolon OR until after tolerance block closes
|
|
3237
|
+
# Collect tokens until semicolon OR until after tolerance block closes OR after closing paren
|
|
3238
3238
|
require_tokens = [token]
|
|
3239
3239
|
brace_nest = 0
|
|
3240
|
+
paren_nest = 0
|
|
3240
3241
|
while j < len(tokens):
|
|
3241
3242
|
tj = tokens[j]
|
|
3242
3243
|
|
|
@@ -3246,6 +3247,12 @@ class ContextStackParser:
|
|
|
3246
3247
|
elif tj.type == RBRACE:
|
|
3247
3248
|
brace_nest -= 1
|
|
3248
3249
|
|
|
3250
|
+
# Track paren nesting for require(condition, message) form
|
|
3251
|
+
if tj.type == LPAREN:
|
|
3252
|
+
paren_nest += 1
|
|
3253
|
+
elif tj.type == RPAREN:
|
|
3254
|
+
paren_nest -= 1
|
|
3255
|
+
|
|
3249
3256
|
require_tokens.append(tj)
|
|
3250
3257
|
j += 1
|
|
3251
3258
|
|
|
@@ -3253,6 +3260,10 @@ class ContextStackParser:
|
|
|
3253
3260
|
if tj.type == SEMICOLON and brace_nest == 0:
|
|
3254
3261
|
break
|
|
3255
3262
|
|
|
3263
|
+
# Stop after closing paren of require(...) form (when paren_nest returns to 0)
|
|
3264
|
+
if tj.type == RPAREN and paren_nest == 0 and brace_nest == 0:
|
|
3265
|
+
break
|
|
3266
|
+
|
|
3256
3267
|
# Stop after tolerance block closes (if there was one)
|
|
3257
3268
|
if brace_nest == 0 and len(require_tokens) > 1 and require_tokens[-2].type == RBRACE:
|
|
3258
3269
|
break
|
|
@@ -4597,7 +4608,7 @@ class ContextStackParser:
|
|
|
4597
4608
|
|
|
4598
4609
|
Returns a SanitizeStatement which can be evaluated as an expression.
|
|
4599
4610
|
"""
|
|
4600
|
-
print(" 🔧 [Sanitize Expression] Parsing sanitize expression")
|
|
4611
|
+
# print(" 🔧 [Sanitize Expression] Parsing sanitize expression")
|
|
4601
4612
|
if not tokens or tokens[0].type != SANITIZE:
|
|
4602
4613
|
return None
|
|
4603
4614
|
|
|
@@ -6155,8 +6166,22 @@ class ContextStackParser:
|
|
|
6155
6166
|
|
|
6156
6167
|
# Check for parenthesized form: require(condition, message)
|
|
6157
6168
|
if start_idx < len(tokens) and tokens[start_idx].type == LPAREN:
|
|
6158
|
-
#
|
|
6159
|
-
|
|
6169
|
+
# Find matching RPAREN
|
|
6170
|
+
paren_depth = 1
|
|
6171
|
+
end_idx = start_idx + 1
|
|
6172
|
+
while end_idx < len(tokens) and paren_depth > 0:
|
|
6173
|
+
if tokens[end_idx].type == LPAREN:
|
|
6174
|
+
paren_depth += 1
|
|
6175
|
+
elif tokens[end_idx].type == RPAREN:
|
|
6176
|
+
paren_depth -= 1
|
|
6177
|
+
end_idx += 1
|
|
6178
|
+
|
|
6179
|
+
if paren_depth != 0:
|
|
6180
|
+
parser_debug(" ❌ Unmatched parentheses in require")
|
|
6181
|
+
return None
|
|
6182
|
+
|
|
6183
|
+
# Extract tokens between LPAREN and matching RPAREN
|
|
6184
|
+
inner = tokens[start_idx+1:end_idx-1]
|
|
6160
6185
|
|
|
6161
6186
|
# Split by comma to get condition and optional message
|
|
6162
6187
|
args = self._parse_argument_list(inner)
|
|
@@ -609,7 +609,14 @@ class StructuralAnalyzer:
|
|
|
609
609
|
elif tokens[k].type in {LBRACE, COLON}:
|
|
610
610
|
# Found statement form indicators
|
|
611
611
|
break
|
|
612
|
-
|
|
612
|
+
|
|
613
|
+
# FIX #4: After seeing SANITIZE in assignment, also check if previous token was SANITIZE
|
|
614
|
+
# This allows collecting the sanitize expression arguments
|
|
615
|
+
prev_was_sanitize = False
|
|
616
|
+
if j > 0 and tokens[j - 1].type == SANITIZE:
|
|
617
|
+
prev_was_sanitize = True
|
|
618
|
+
|
|
619
|
+
if not (in_assignment and (allow_in_assignment or allow_debug_call or allow_if_then_else or prev_was_sanitize)):
|
|
613
620
|
break
|
|
614
621
|
|
|
615
622
|
# CRITICAL FIX: Also break on modifier tokens at nesting 0 when followed by statement keywords
|
|
@@ -647,9 +654,10 @@ class StructuralAnalyzer:
|
|
|
647
654
|
stmt_tokens.append(tj)
|
|
648
655
|
j += 1
|
|
649
656
|
|
|
650
|
-
# MODIFIED: For RETURN, CONTINUE, and
|
|
651
|
-
#
|
|
652
|
-
|
|
657
|
+
# MODIFIED: For RETURN, CONTINUE, PRINT, and REQUIRE, stop after closing parens at nesting 0
|
|
658
|
+
# These can have multiple comma-separated arguments inside the parens
|
|
659
|
+
# NOTE: 't' is the statement starter token (first token), 'tj' is the just-collected token
|
|
660
|
+
if t.type in {RETURN, CONTINUE, PRINT, REQUIRE} and nesting == 0 and tj.type == RPAREN:
|
|
653
661
|
break
|
|
654
662
|
|
|
655
663
|
# If we just closed a brace block and are back at nesting 0, stop
|