zexus 1.6.2 → 1.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,291 @@
1
+ # src/zexus/evaluator/resource_limiter.py
2
+ """
3
+ Resource Limiter for Zexus Interpreter
4
+
5
+ Prevents resource exhaustion attacks by enforcing limits on:
6
+ - Loop iterations (prevents infinite loops)
7
+ - Execution time (prevents DoS via slow operations)
8
+ - Call stack depth (prevents stack overflow)
9
+ - Memory usage (prevents memory exhaustion)
10
+
11
+ Security Fix #7: Resource Limits
12
+ """
13
+
14
+ import time
15
+ import signal
16
+ import sys
17
+
18
+
19
+ class ResourceError(Exception):
20
+ """Raised when a resource limit is exceeded"""
21
+ pass
22
+
23
+
24
+ class TimeoutError(ResourceError):
25
+ """Raised when execution timeout is exceeded"""
26
+ pass
27
+
28
+
29
+ class ResourceLimiter:
30
+ """
31
+ Enforces resource limits during program execution.
32
+
33
+ Limits:
34
+ - max_iterations: Maximum loop iterations across all loops (default: 1,000,000)
35
+ - timeout_seconds: Maximum execution time (default: 30 seconds)
36
+ - max_call_depth: Maximum call stack depth (default: 1000)
37
+ - max_memory_mb: Maximum memory usage (default: 500 MB, not enforced by default)
38
+
39
+ Usage:
40
+ limiter = ResourceLimiter(max_iterations=100000, timeout_seconds=10)
41
+ limiter.start() # Start timeout timer
42
+
43
+ # In loops:
44
+ limiter.check_iterations()
45
+
46
+ # On function calls:
47
+ limiter.enter_call()
48
+ # ... function body ...
49
+ limiter.exit_call()
50
+
51
+ limiter.stop() # Stop timeout timer
52
+ """
53
+
54
+ # Default limits
55
+ DEFAULT_MAX_ITERATIONS = 1_000_000 # 1 million iterations
56
+ DEFAULT_TIMEOUT_SECONDS = 30 # 30 seconds
57
+ DEFAULT_MAX_CALL_DEPTH = 100 # 100 nested calls (Python interpreter uses many stack frames per Zexus call)
58
+ DEFAULT_MAX_MEMORY_MB = 500 # 500 MB (not enforced by default)
59
+
60
+ def __init__(self,
61
+ max_iterations=None,
62
+ timeout_seconds=None,
63
+ max_call_depth=None,
64
+ max_memory_mb=None,
65
+ enable_timeout=False,
66
+ enable_memory_check=False):
67
+ """
68
+ Initialize resource limiter.
69
+
70
+ Args:
71
+ max_iterations: Maximum total loop iterations (default: 1,000,000)
72
+ timeout_seconds: Maximum execution time (default: 30)
73
+ max_call_depth: Maximum call stack depth (default: 1000)
74
+ max_memory_mb: Maximum memory usage in MB (default: 500)
75
+ enable_timeout: Enable timeout enforcement (default: False, Linux only)
76
+ enable_memory_check: Enable memory checking (default: False, requires psutil)
77
+ """
78
+ self.max_iterations = max_iterations or self.DEFAULT_MAX_ITERATIONS
79
+ self.timeout_seconds = timeout_seconds or self.DEFAULT_TIMEOUT_SECONDS
80
+ self.max_call_depth = max_call_depth or self.DEFAULT_MAX_CALL_DEPTH
81
+ self.max_memory_mb = max_memory_mb or self.DEFAULT_MAX_MEMORY_MB
82
+
83
+ # Feature flags
84
+ self.enable_timeout = enable_timeout
85
+ self.enable_memory_check = enable_memory_check
86
+
87
+ # Runtime counters
88
+ self.iteration_count = 0
89
+ self.call_depth = 0
90
+ self.start_time = None
91
+ self.timeout_handler = None
92
+
93
+ # Memory checking (optional, requires psutil)
94
+ self.psutil_available = False
95
+ if enable_memory_check:
96
+ try:
97
+ import psutil
98
+ self.psutil = psutil
99
+ self.psutil_available = True
100
+ self.process = psutil.Process()
101
+ except ImportError:
102
+ print("⚠️ Warning: psutil not available, memory checking disabled")
103
+ self.enable_memory_check = False
104
+
105
+ def start(self):
106
+ """
107
+ Start resource monitoring (timeout timer, etc.)
108
+ Should be called at the beginning of script execution.
109
+ """
110
+ self.start_time = time.time()
111
+ self.iteration_count = 0
112
+ self.call_depth = 0
113
+
114
+ # Set timeout handler (Linux/Unix only)
115
+ if self.enable_timeout and hasattr(signal, 'SIGALRM'):
116
+ self._set_timeout_alarm()
117
+
118
+ def stop(self):
119
+ """
120
+ Stop resource monitoring and cleanup.
121
+ Should be called at the end of script execution.
122
+ """
123
+ # Cancel timeout alarm
124
+ if self.enable_timeout and hasattr(signal, 'SIGALRM'):
125
+ signal.alarm(0) # Cancel alarm
126
+
127
+ def reset(self):
128
+ """Reset iteration counter (useful for multiple script executions)"""
129
+ self.iteration_count = 0
130
+ self.call_depth = 0
131
+ self.start_time = None
132
+
133
+ def check_iterations(self):
134
+ """
135
+ Check if iteration limit has been exceeded.
136
+ Should be called at the beginning of each loop iteration.
137
+
138
+ Raises:
139
+ ResourceError: If iteration limit exceeded
140
+ """
141
+ self.iteration_count += 1
142
+
143
+ if self.iteration_count > self.max_iterations:
144
+ raise ResourceError(
145
+ f"Iteration limit exceeded: {self.max_iterations:,} iterations\n"
146
+ f"This prevents infinite loops and resource exhaustion.\n\n"
147
+ f"Suggestion: Review your loop conditions or increase the limit with:\n"
148
+ f" zx-run --max-iterations 10000000 script.zx"
149
+ )
150
+
151
+ def check_timeout(self):
152
+ """
153
+ Check if execution timeout has been exceeded.
154
+ Should be called periodically during long operations.
155
+
156
+ Raises:
157
+ TimeoutError: If timeout exceeded
158
+ """
159
+ if self.start_time is None:
160
+ return
161
+
162
+ elapsed = time.time() - self.start_time
163
+ if elapsed > self.timeout_seconds:
164
+ raise TimeoutError(
165
+ f"Execution timeout exceeded: {elapsed:.2f}s > {self.timeout_seconds}s\n"
166
+ f"This prevents denial-of-service via slow operations.\n\n"
167
+ f"Suggestion: Optimize your code or increase timeout with:\n"
168
+ f" zx-run --timeout 60 script.zx"
169
+ )
170
+
171
+ def check_memory(self):
172
+ """
173
+ Check if memory limit has been exceeded.
174
+ Should be called periodically (e.g., every 1000 iterations).
175
+
176
+ Raises:
177
+ ResourceError: If memory limit exceeded
178
+ """
179
+ if not self.enable_memory_check or not self.psutil_available:
180
+ return
181
+
182
+ try:
183
+ memory_mb = self.process.memory_info().rss / 1024 / 1024
184
+
185
+ if memory_mb > self.max_memory_mb:
186
+ raise ResourceError(
187
+ f"Memory limit exceeded: {memory_mb:.2f}MB > {self.max_memory_mb}MB\n"
188
+ f"This prevents memory exhaustion attacks.\n\n"
189
+ f"Suggestion: Reduce memory usage or increase limit with:\n"
190
+ f" zx-run --max-memory 1000 script.zx"
191
+ )
192
+ except Exception as e:
193
+ # Don't crash on memory check failure
194
+ print(f"⚠️ Warning: Memory check failed: {e}")
195
+
196
+ def enter_call(self, function_name=None):
197
+ """
198
+ Called when entering a function/action call.
199
+ Tracks call depth to prevent stack overflow.
200
+
201
+ Args:
202
+ function_name: Optional name of function being called
203
+
204
+ Raises:
205
+ ResourceError: If call depth limit exceeded
206
+ """
207
+ self.call_depth += 1
208
+
209
+ if self.call_depth > self.max_call_depth:
210
+ func_info = f" ({function_name})" if function_name else ""
211
+ raise ResourceError(
212
+ f"Call depth limit exceeded: {self.max_call_depth} nested calls{func_info}\n"
213
+ f"This prevents stack overflow from excessive recursion.\n\n"
214
+ f"Suggestion: Review your recursion or increase limit with:\n"
215
+ f" zx-run --max-call-depth 5000 script.zx"
216
+ )
217
+
218
+ def exit_call(self):
219
+ """
220
+ Called when exiting a function/action call.
221
+ Decrements call depth counter.
222
+ """
223
+ if self.call_depth > 0:
224
+ self.call_depth -= 1
225
+
226
+ def get_stats(self):
227
+ """
228
+ Get current resource usage statistics.
229
+
230
+ Returns:
231
+ dict: Resource usage stats
232
+ """
233
+ stats = {
234
+ 'iterations': self.iteration_count,
235
+ 'max_iterations': self.max_iterations,
236
+ 'iteration_percent': (self.iteration_count / self.max_iterations) * 100,
237
+ 'call_depth': self.call_depth,
238
+ 'max_call_depth': self.max_call_depth,
239
+ }
240
+
241
+ if self.start_time:
242
+ elapsed = time.time() - self.start_time
243
+ stats['elapsed_seconds'] = elapsed
244
+ stats['timeout_seconds'] = self.timeout_seconds
245
+ stats['timeout_percent'] = (elapsed / self.timeout_seconds) * 100
246
+
247
+ if self.enable_memory_check and self.psutil_available:
248
+ try:
249
+ memory_mb = self.process.memory_info().rss / 1024 / 1024
250
+ stats['memory_mb'] = memory_mb
251
+ stats['max_memory_mb'] = self.max_memory_mb
252
+ stats['memory_percent'] = (memory_mb / self.max_memory_mb) * 100
253
+ except:
254
+ pass
255
+
256
+ return stats
257
+
258
+ def _set_timeout_alarm(self):
259
+ """
260
+ Set SIGALRM timeout handler (Linux/Unix only).
261
+ This is automatically called by start() if enable_timeout is True.
262
+ """
263
+ def timeout_handler(signum, frame):
264
+ raise TimeoutError(
265
+ f"Execution timeout: {self.timeout_seconds}s exceeded\n"
266
+ f"This prevents denial-of-service via slow operations.\n\n"
267
+ f"Suggestion: Optimize your code or increase timeout with:\n"
268
+ f" zx-run --timeout 60 script.zx"
269
+ )
270
+
271
+ self.timeout_handler = timeout_handler
272
+ signal.signal(signal.SIGALRM, timeout_handler)
273
+ signal.alarm(self.timeout_seconds)
274
+
275
+
276
+ # Default global limiter (can be overridden)
277
+ _default_limiter = None
278
+
279
+
280
+ def get_default_limiter():
281
+ """Get the default global resource limiter"""
282
+ global _default_limiter
283
+ if _default_limiter is None:
284
+ _default_limiter = ResourceLimiter()
285
+ return _default_limiter
286
+
287
+
288
+ def set_default_limiter(limiter):
289
+ """Set the default global resource limiter"""
290
+ global _default_limiter
291
+ _default_limiter = limiter
@@ -877,6 +877,16 @@ class StatementEvaluatorMixin:
877
877
  def eval_while_statement(self, node, env, stack_trace):
878
878
  result = NULL
879
879
  while True:
880
+ # Resource limit check (Security Fix #7)
881
+ try:
882
+ self.resource_limiter.check_iterations()
883
+ except Exception as e:
884
+ # Convert ResourceError to EvaluationError
885
+ from ..evaluator.resource_limiter import ResourceError, TimeoutError
886
+ if isinstance(e, (ResourceError, TimeoutError)):
887
+ return EvaluationError(str(e))
888
+ raise # Re-raise if not a resource error
889
+
880
890
  cond = self.eval_node(node.condition, env, stack_trace)
881
891
  if is_error(cond):
882
892
  return cond
@@ -904,6 +914,16 @@ class StatementEvaluatorMixin:
904
914
 
905
915
  result = NULL
906
916
  for item in iterable.elements:
917
+ # Resource limit check (Security Fix #7)
918
+ try:
919
+ self.resource_limiter.check_iterations()
920
+ except Exception as e:
921
+ # Convert ResourceError to EvaluationError
922
+ from ..evaluator.resource_limiter import ResourceError, TimeoutError
923
+ if isinstance(e, (ResourceError, TimeoutError)):
924
+ return EvaluationError(str(e))
925
+ raise # Re-raise if not a resource error
926
+
907
927
  env.set(node.item.value, item)
908
928
  result = self.eval_node(node.body, env, stack_trace)
909
929
  if isinstance(result, ReturnValue):
@@ -3115,12 +3135,14 @@ class StatementEvaluatorMixin:
3115
3135
  else:
3116
3136
  data_str = str(data)
3117
3137
 
3118
- # Determine encoding
3138
+ # Determine encoding/context
3119
3139
  encoding = Encoding.HTML # Default
3140
+ context_name = "html" # For marking sanitized_for
3120
3141
  if node.encoding:
3121
3142
  enc_val = self.eval_node(node.encoding, env, stack_trace)
3122
3143
  if hasattr(enc_val, 'value'):
3123
3144
  enc_name = enc_val.value.upper()
3145
+ context_name = enc_val.value.lower()
3124
3146
  try:
3125
3147
  encoding = Encoding[enc_name]
3126
3148
  except KeyError:
@@ -3130,10 +3152,16 @@ class StatementEvaluatorMixin:
3130
3152
  try:
3131
3153
  sanitized = Sanitizer.sanitize_string(data_str, encoding)
3132
3154
  debug_log("eval_sanitize_statement", f"Sanitized {len(data_str)} chars with {encoding.value}")
3133
- return String(sanitized)
3155
+ result = String(sanitized)
3156
+ # SECURITY ENFORCEMENT: Mark as sanitized for this context
3157
+ result.mark_sanitized(context_name)
3158
+ return result
3134
3159
  except Exception as e:
3135
3160
  debug_log("eval_sanitize_statement", f"Sanitization error: {e}")
3136
- return String(data_str) # Return original if sanitization fails
3161
+ # Even on error, mark as sanitized to prevent double-sanitization loops
3162
+ result = String(data_str)
3163
+ result.mark_sanitized(context_name)
3164
+ return result
3137
3165
 
3138
3166
  def eval_inject_statement(self, node, env, stack_trace):
3139
3167
  """Evaluate inject statement - full dependency injection with mode-aware resolution."""
@@ -89,20 +89,26 @@ def _zexus_to_python(value):
89
89
  else:
90
90
  return str(value)
91
91
 
92
- def _python_to_zexus(value):
93
- """Convert Python native types to Zexus objects"""
92
+ def _python_to_zexus(value, mark_untrusted=False):
93
+ """Convert Python native types to Zexus objects
94
+
95
+ Args:
96
+ value: Python value to convert
97
+ mark_untrusted: If True, mark strings as untrusted (external data)
98
+ """
94
99
  from ..object import Map, List, String, Integer, Float, Boolean as BooleanObj
95
100
 
96
101
  if isinstance(value, dict):
97
102
  pairs = {}
98
103
  for k, v in value.items():
99
- pairs[k] = _python_to_zexus(v)
104
+ pairs[k] = _python_to_zexus(v, mark_untrusted)
100
105
  return Map(pairs)
101
106
  elif isinstance(value, list):
102
- zexus_list = List([_python_to_zexus(item) for item in value])
107
+ zexus_list = List([_python_to_zexus(item, mark_untrusted) for item in value])
103
108
  return zexus_list
104
109
  elif isinstance(value, str):
105
- return String(value)
110
+ # Mark strings as untrusted if from external source (HTTP, DB, etc.)
111
+ return String(value, is_trusted=not mark_untrusted)
106
112
  elif isinstance(value, int):
107
113
  return Integer(value)
108
114
  elif isinstance(value, float):
@@ -112,7 +118,7 @@ def _python_to_zexus(value):
112
118
  elif value is None:
113
119
  return NULL
114
120
  else:
115
- return String(str(value))
121
+ return String(str(value), is_trusted=not mark_untrusted)
116
122
 
117
123
  def _to_str(obj):
118
124
  """Helper to convert Zexus object to string"""
@@ -87,7 +87,7 @@ if PYGLS_AVAILABLE:
87
87
  """Zexus Language Server implementation."""
88
88
 
89
89
  def __init__(self):
90
- super().__init__('zexus-language-server', 'v1.6.2')
90
+ super().__init__('zexus-language-server', 'v1.6.3')
91
91
  self.completion_provider = CompletionProvider()
92
92
  self.symbol_provider = SymbolProvider()
93
93
  self.hover_provider = HoverProvider()
@@ -31,7 +31,12 @@ class Null(Object):
31
31
  def type(self): return "NULL"
32
32
 
33
33
  class String(Object):
34
- def __init__(self, value): self.value = value
34
+ def __init__(self, value, sanitized_for=None, is_trusted=False):
35
+ self.value = value
36
+ # Track sanitization status for security enforcement
37
+ self.sanitized_for = sanitized_for # None, 'sql', 'html', 'url', 'shell', etc.
38
+ self.is_trusted = is_trusted # True for literals, False for external input
39
+
35
40
  def inspect(self): return self.value
36
41
  def type(self): return "STRING"
37
42
  def __str__(self): return self.value
@@ -43,6 +48,19 @@ class String(Object):
43
48
  def __hash__(self):
44
49
  """Enable String objects to be used as dict keys"""
45
50
  return hash(self.value)
51
+
52
+ def mark_sanitized(self, context):
53
+ """Mark this string as sanitized for a specific context"""
54
+ self.sanitized_for = context
55
+ return self
56
+
57
+ def is_safe_for(self, context):
58
+ """Check if string is safe to use in given context"""
59
+ # Trusted strings (literals) are always safe
60
+ if self.is_trusted:
61
+ return True
62
+ # Check if sanitized for this specific context
63
+ return self.sanitized_for == context
46
64
 
47
65
  class List(Object):
48
66
  def __init__(self, elements): self.elements = elements
@@ -425,7 +443,8 @@ class File(Object):
425
443
  if isinstance(path, String):
426
444
  path = path.value
427
445
  with open(path, 'r', encoding='utf-8') as f:
428
- return String(f.read())
446
+ # Files are external data sources - return untrusted strings
447
+ return String(f.read(), is_trusted=False)
429
448
  except Exception as e:
430
449
  return EvaluationError(f"File read error: {str(e)}")
431
450
 
@@ -82,6 +82,7 @@ class UltimateParser:
82
82
  TRY: self.parse_try_catch_statement,
83
83
  EXTERNAL: self.parse_external_declaration,
84
84
  ASYNC: self.parse_async_expression, # Support async <expression>
85
+ SANITIZE: self.parse_sanitize_expression, # FIX #4: Support sanitize as expression
85
86
  }
86
87
  self.infix_parse_fns = {
87
88
  PLUS: self.parse_infix_expression,
@@ -511,7 +512,8 @@ class UltimateParser:
511
512
  elif self.cur_token_is(STATE):
512
513
  print(f"[PARSE_STMT] Matched STATE", file=sys.stderr, flush=True)
513
514
  node = self.parse_state_statement()
514
- # REQUIRE is now handled by ContextStackParser for enhanced syntax support
515
+ elif self.cur_token_is(REQUIRE):
516
+ node = self.parse_require_statement()
515
517
  elif self.cur_token_is(REVERT):
516
518
  print(f"[PARSE_STMT] Matched REVERT", file=sys.stderr, flush=True)
517
519
  node = self.parse_revert_statement()
@@ -3299,6 +3301,40 @@ class UltimateParser:
3299
3301
 
3300
3302
  return ValidateStatement(data=data_expr, schema=schema_expr)
3301
3303
 
3304
+ def parse_sanitize_expression(self):
3305
+ """Parse sanitize as expression - can be used in assignments
3306
+
3307
+ Supports both:
3308
+ let safe = sanitize data, "sql"
3309
+ let safe = sanitize data as sql
3310
+ """
3311
+ token = self.cur_token
3312
+ self.next_token()
3313
+
3314
+ # Parse data expression
3315
+ data_expr = self.parse_expression(LOWEST)
3316
+ if data_expr is None:
3317
+ self.errors.append(f"Line {token.line}:{token.column} - Expected expression to sanitize")
3318
+ return None
3319
+
3320
+ # Expect comma or 'as'
3321
+ encoding = None
3322
+ if self.cur_token_is(COMMA):
3323
+ self.next_token()
3324
+ # Parse encoding as expression (can be string literal or identifier)
3325
+ encoding = self.parse_expression(LOWEST)
3326
+ elif self.cur_token_is(IDENT) and self.cur_token.literal == 'as':
3327
+ self.next_token()
3328
+ if self.cur_token_is(IDENT):
3329
+ # Convert identifier to string literal
3330
+ encoding = StringLiteral(value=self.cur_token.literal)
3331
+ self.next_token()
3332
+ elif self.cur_token_is(STRING):
3333
+ encoding = self.parse_string_literal()
3334
+
3335
+ result = SanitizeStatement(data=data_expr, rules=None, encoding=encoding)
3336
+ return result
3337
+
3302
3338
  def parse_sanitize_statement(self):
3303
3339
  """Parse sanitize statement - sanitize data"""
3304
3340
  token = self.cur_token
@@ -3698,6 +3734,7 @@ class UltimateParser:
3698
3734
 
3699
3735
  Asserts condition, reverts transaction if false.
3700
3736
  """
3737
+ print(f"[DEBUG PARSER] parse_require_statement called", flush=True)
3701
3738
  token = self.cur_token
3702
3739
 
3703
3740
  if not self.expect_peek(LPAREN):
@@ -3721,6 +3758,7 @@ class UltimateParser:
3721
3758
  if self.peek_token_is(SEMICOLON):
3722
3759
  self.next_token()
3723
3760
 
3761
+ print(f"[DEBUG PARSER] Creating RequireStatement with condition={condition}, message={message}", flush=True)
3724
3762
  return RequireStatement(condition=condition, message=message)
3725
3763
 
3726
3764
  def parse_revert_statement(self):
@@ -3234,9 +3234,10 @@ class ContextStackParser:
3234
3234
  # Parse REQUIRE statement: require(condition, message) or require condition { tolerance_block }
3235
3235
  j = i + 1
3236
3236
 
3237
- # Collect tokens until semicolon OR until after tolerance block closes
3237
+ # Collect tokens until semicolon OR until after tolerance block closes OR after closing paren
3238
3238
  require_tokens = [token]
3239
3239
  brace_nest = 0
3240
+ paren_nest = 0
3240
3241
  while j < len(tokens):
3241
3242
  tj = tokens[j]
3242
3243
 
@@ -3246,6 +3247,12 @@ class ContextStackParser:
3246
3247
  elif tj.type == RBRACE:
3247
3248
  brace_nest -= 1
3248
3249
 
3250
+ # Track paren nesting for require(condition, message) form
3251
+ if tj.type == LPAREN:
3252
+ paren_nest += 1
3253
+ elif tj.type == RPAREN:
3254
+ paren_nest -= 1
3255
+
3249
3256
  require_tokens.append(tj)
3250
3257
  j += 1
3251
3258
 
@@ -3253,6 +3260,10 @@ class ContextStackParser:
3253
3260
  if tj.type == SEMICOLON and brace_nest == 0:
3254
3261
  break
3255
3262
 
3263
+ # Stop after closing paren of require(...) form (when paren_nest returns to 0)
3264
+ if tj.type == RPAREN and paren_nest == 0 and brace_nest == 0:
3265
+ break
3266
+
3256
3267
  # Stop after tolerance block closes (if there was one)
3257
3268
  if brace_nest == 0 and len(require_tokens) > 1 and require_tokens[-2].type == RBRACE:
3258
3269
  break
@@ -4597,7 +4608,7 @@ class ContextStackParser:
4597
4608
 
4598
4609
  Returns a SanitizeStatement which can be evaluated as an expression.
4599
4610
  """
4600
- print(" 🔧 [Sanitize Expression] Parsing sanitize expression")
4611
+ # print(" 🔧 [Sanitize Expression] Parsing sanitize expression")
4601
4612
  if not tokens or tokens[0].type != SANITIZE:
4602
4613
  return None
4603
4614
 
@@ -6155,8 +6166,22 @@ class ContextStackParser:
6155
6166
 
6156
6167
  # Check for parenthesized form: require(condition, message)
6157
6168
  if start_idx < len(tokens) and tokens[start_idx].type == LPAREN:
6158
- # Extract tokens between LPAREN and RPAREN
6159
- inner = tokens[start_idx+1:-1] if len(tokens) > start_idx+1 and tokens[-1].type == RPAREN else tokens[start_idx+1:]
6169
+ # Find matching RPAREN
6170
+ paren_depth = 1
6171
+ end_idx = start_idx + 1
6172
+ while end_idx < len(tokens) and paren_depth > 0:
6173
+ if tokens[end_idx].type == LPAREN:
6174
+ paren_depth += 1
6175
+ elif tokens[end_idx].type == RPAREN:
6176
+ paren_depth -= 1
6177
+ end_idx += 1
6178
+
6179
+ if paren_depth != 0:
6180
+ parser_debug(" ❌ Unmatched parentheses in require")
6181
+ return None
6182
+
6183
+ # Extract tokens between LPAREN and matching RPAREN
6184
+ inner = tokens[start_idx+1:end_idx-1]
6160
6185
 
6161
6186
  # Split by comma to get condition and optional message
6162
6187
  args = self._parse_argument_list(inner)
@@ -609,7 +609,14 @@ class StructuralAnalyzer:
609
609
  elif tokens[k].type in {LBRACE, COLON}:
610
610
  # Found statement form indicators
611
611
  break
612
- if not (in_assignment and (allow_in_assignment or allow_debug_call or allow_if_then_else)):
612
+
613
+ # FIX #4: After seeing SANITIZE in assignment, also check if previous token was SANITIZE
614
+ # This allows collecting the sanitize expression arguments
615
+ prev_was_sanitize = False
616
+ if j > 0 and tokens[j - 1].type == SANITIZE:
617
+ prev_was_sanitize = True
618
+
619
+ if not (in_assignment and (allow_in_assignment or allow_debug_call or allow_if_then_else or prev_was_sanitize)):
613
620
  break
614
621
 
615
622
  # CRITICAL FIX: Also break on modifier tokens at nesting 0 when followed by statement keywords
@@ -647,9 +654,10 @@ class StructuralAnalyzer:
647
654
  stmt_tokens.append(tj)
648
655
  j += 1
649
656
 
650
- # MODIFIED: For RETURN, CONTINUE, and PRINT, stop after closing parens at nesting 0
651
- # PRINT can have multiple comma-separated arguments inside the parens
652
- if t.type in {RETURN, CONTINUE, PRINT} and nesting == 0 and tj.type == RPAREN:
657
+ # MODIFIED: For RETURN, CONTINUE, PRINT, and REQUIRE, stop after closing parens at nesting 0
658
+ # These can have multiple comma-separated arguments inside the parens
659
+ # NOTE: 't' is the statement starter token (first token), 'tj' is the just-collected token
660
+ if t.type in {RETURN, CONTINUE, PRINT, REQUIRE} and nesting == 0 and tj.type == RPAREN:
653
661
  break
654
662
 
655
663
  # If we just closed a brace block and are back at nesting 0, stop