claude-evolve 1.8.12 → 1.8.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1641,7 +1641,7 @@ echo "[INFO] Starting ideation for generation $CURRENT_GENERATION"
1641
1641
  # Main execution with retry logic and exponential backoff
1642
1642
  retry_count=0
1643
1643
  wait_seconds=300 # Start with 5 minutes
1644
- max_wait_seconds=1800 # Cap at 30 minutes
1644
+ max_wait_seconds=300 # Cap at 5 minutes
1645
1645
 
1646
1646
  while true; do
1647
1647
  if [[ $use_strategies == true ]]; then
@@ -275,29 +275,34 @@ count_pending_candidates() {
275
275
  # Function to get CSV stats
276
276
  get_csv_stats() {
277
277
  local csv_path="${1:-$FULL_CSV_PATH}"
278
-
278
+
279
279
  if [[ ! -f "$csv_path" ]]; then
280
280
  echo "[ERROR] CSV not found at: $csv_path" >&2
281
281
  echo "0 0 0"
282
282
  return
283
283
  fi
284
-
284
+
285
285
  local total_rows complete_count pending_count
286
286
  total_rows=$(wc -l < "$csv_path" | tr -d '[:space:]')
287
287
  complete_count=$(grep ',complete' "$csv_path" 2>/dev/null | wc -l | tr -d '[:space:]')
288
-
288
+
289
289
  # Count pending using UNIFIED CSV logic
290
290
  pending_count=$("$PYTHON_CMD" "$SCRIPT_DIR/../lib/evolution_csv.py" "$csv_path" count)
291
-
291
+
292
292
  echo "$total_rows $complete_count $pending_count"
293
293
  }
294
294
 
295
- echo "[DISPATCHER] Starting unified evolution engine"
296
- echo "[DISPATCHER] Configuration: max_workers=$MAX_WORKERS, timeout=${timeout_seconds:-none}"
295
+ # Function to perform full CSV cleanup (duplicates, stuck statuses, missing files, etc.)
296
+ cleanup_csv_full() {
297
+ if [[ ! -f "$FULL_CSV_PATH" ]]; then
298
+ echo "[DISPATCHER] No CSV file to clean up" >&2
299
+ return 0
300
+ fi
301
+
302
+ echo "[DISPATCHER] Performing full CSV cleanup..." >&2
297
303
 
298
- # Clean up any stuck 'running' statuses and duplicates at startup
299
- if [[ -f "$FULL_CSV_PATH" ]]; then
300
- echo "[DISPATCHER] Checking for duplicate candidates..."
304
+ # Remove duplicate candidates
305
+ echo "[DISPATCHER] Checking for duplicate candidates..." >&2
301
306
  "$PYTHON_CMD" -c "
302
307
  import sys
303
308
  sys.path.insert(0, '$SCRIPT_DIR/..')
@@ -312,53 +317,49 @@ except Exception as e:
312
317
  print(f'[ERROR] Failed to remove duplicates: {e}', file=sys.stderr)
313
318
  " 2>&1 || true
314
319
 
315
- echo "[DISPATCHER] Resetting any stuck 'running' candidates to 'pending'..."
320
+ # Reset stuck 'running' candidates
321
+ echo "[DISPATCHER] Resetting any stuck 'running' candidates to 'pending'..." >&2
316
322
  if "$SCRIPT_DIR/claude-evolve-edit" running pending >/dev/null 2>&1; then
317
- echo "[DISPATCHER] Successfully reset stuck candidates"
323
+ echo "[DISPATCHER] Successfully reset stuck candidates" >&2
318
324
  else
319
- echo "[DISPATCHER] No stuck candidates found or edit command not available"
325
+ echo "[DISPATCHER] No stuck candidates found or edit command not available" >&2
320
326
  fi
321
- fi
322
327
 
323
- # Validate CSV and clean up stuck statuses and duplicates
324
- if [[ -f "$FULL_CSV_PATH" ]]; then
325
- echo "[DISPATCHER] Validating CSV and cleaning up..."
326
-
327
- # First check for and clean up duplicates
328
- echo "[DISPATCHER] Checking for duplicate entries..."
328
+ # Clean up duplicates with the dedicated cleanup script
329
+ echo "[DISPATCHER] Checking for duplicate entries..." >&2
329
330
  duplicate_check_output=$("$PYTHON_CMD" "$SCRIPT_DIR/claude-evolve-cleanup-duplicates" "$FULL_CSV_PATH" 2>&1)
330
331
  if echo "$duplicate_check_output" | grep -q "Found.*duplicate"; then
331
- echo "[DISPATCHER] WARNING: Duplicate entries detected in CSV!"
332
- echo "$duplicate_check_output"
333
- echo "[DISPATCHER] Automatically cleaning up duplicates..."
332
+ echo "[DISPATCHER] WARNING: Duplicate entries detected in CSV!" >&2
333
+ echo "$duplicate_check_output" >&2
334
+ echo "[DISPATCHER] Automatically cleaning up duplicates..." >&2
334
335
  if "$PYTHON_CMD" "$SCRIPT_DIR/claude-evolve-cleanup-duplicates" "$FULL_CSV_PATH" --fix; then
335
- echo "[DISPATCHER] Duplicates cleaned up successfully"
336
+ echo "[DISPATCHER] Duplicates cleaned up successfully" >&2
336
337
  else
337
338
  echo "[ERROR] Failed to clean up duplicates" >&2
338
- exit 1
339
+ return 1
339
340
  fi
340
341
  else
341
- echo "[DISPATCHER] No duplicates found"
342
+ echo "[DISPATCHER] No duplicates found" >&2
342
343
  fi
343
-
344
- # Check for and clean up invalid entries
345
- echo "[DISPATCHER] Checking for invalid entries..."
344
+
345
+ # Clean up invalid entries
346
+ echo "[DISPATCHER] Checking for invalid entries..." >&2
346
347
  invalid_check_output=$("$PYTHON_CMD" "$SCRIPT_DIR/claude-evolve-clean-invalid" "$FULL_CSV_PATH" --dry-run 2>&1)
347
348
  if echo "$invalid_check_output" | grep -q "Found.*invalid"; then
348
- echo "[DISPATCHER] WARNING: Invalid entries detected in CSV!"
349
- echo "$invalid_check_output"
350
- echo "[DISPATCHER] Automatically cleaning up invalid entries..."
349
+ echo "[DISPATCHER] WARNING: Invalid entries detected in CSV!" >&2
350
+ echo "$invalid_check_output" >&2
351
+ echo "[DISPATCHER] Automatically cleaning up invalid entries..." >&2
351
352
  if "$PYTHON_CMD" "$SCRIPT_DIR/claude-evolve-clean-invalid" "$FULL_CSV_PATH"; then
352
- echo "[DISPATCHER] Invalid entries cleaned up successfully"
353
+ echo "[DISPATCHER] Invalid entries cleaned up successfully" >&2
353
354
  else
354
355
  echo "[ERROR] Failed to clean up invalid entries" >&2
355
- exit 1
356
+ return 1
356
357
  fi
357
358
  else
358
- echo "[DISPATCHER] No invalid entries found"
359
+ echo "[DISPATCHER] No invalid entries found" >&2
359
360
  fi
360
-
361
- # Then validate and clean stuck statuses
361
+
362
+ # Clean stuck statuses and missing files
362
363
  if ! "$PYTHON_CMD" -c "
363
364
  import csv
364
365
  import sys
@@ -366,90 +367,88 @@ import os
366
367
  from pathlib import Path
367
368
 
368
369
  csv_file = '$FULL_CSV_PATH'
369
- full_output_dir = '$FULL_OUTPUT_DIR' # Pass FULL_OUTPUT_DIR to Python script
370
- script_dir = '$SCRIPT_DIR' # Pass SCRIPT_DIR for sys.path.append
370
+ full_output_dir = '$FULL_OUTPUT_DIR'
371
+ script_dir = '$SCRIPT_DIR'
371
372
 
372
373
  try:
373
- # Read CSV - let Python's csv module handle all the complexity
374
374
  with open(csv_file, 'r') as f:
375
375
  reader = csv.reader(f)
376
376
  rows = list(reader)
377
-
377
+
378
378
  if not rows:
379
379
  print('[ERROR] CSV is empty')
380
380
  sys.exit(1)
381
-
382
- # Basic sanity checks
383
- header = rows[0]
384
- num_fields = len(header)
385
-
381
+
386
382
  if len(rows) == 1:
387
383
  print('[INFO] CSV has no data rows (only header)')
388
-
384
+
389
385
  changed_count = 0
390
-
386
+
391
387
  # Clean up any stuck 'running' statuses
392
388
  for i in range(1, len(rows)):
393
389
  if len(rows[i]) > 4 and rows[i][4] == 'running':
394
390
  rows[i][4] = ''
395
391
  changed_count += 1
396
392
 
397
- # Reset failed-parent-missing to pending at startup - give them another chance
393
+ # Reset failed-parent-missing to pending - give them another chance
398
394
  for i in range(1, len(rows)):
399
395
  if len(rows[i]) > 4 and rows[i][4] == 'failed-parent-missing':
400
396
  rows[i][4] = 'pending'
401
397
  changed_count += 1
402
-
398
+
403
399
  # Check for missing Python files for completed/failed candidates
404
400
  for i in range(1, len(rows)):
405
401
  if len(rows[i]) > 4:
406
402
  candidate_id = rows[i][0]
407
403
  status = rows[i][4]
408
-
409
- # Only check if status implies a file should exist
404
+
410
405
  if status in ['complete', 'failed', 'failed-ai-retry', 'failed-retry1', 'failed-retry2', 'failed-retry3']:
411
406
  expected_file = Path(full_output_dir) / f'evolution_{candidate_id}.py'
412
407
  if not expected_file.is_file():
413
408
  print(f'[INFO] Detected missing file for {candidate_id} (status: {status}). Resetting to pending.')
414
- rows[i][4] = 'pending' # Reset status to pending
415
- # Clear performance and other fields if desired, for a clean retry
416
- if len(rows[i]) > 3: rows[i][3] = '' # Performance
417
- if len(rows[i]) > 5: rows[i][5] = '' # LLM used for run
409
+ rows[i][4] = 'pending'
410
+ if len(rows[i]) > 3: rows[i][3] = ''
411
+ if len(rows[i]) > 5: rows[i][5] = ''
418
412
  changed_count += 1
419
-
413
+
420
414
  if changed_count > 0:
421
- # Write back
422
415
  with open(csv_file + '.tmp', 'w', newline='') as f:
423
416
  writer = csv.writer(f)
424
417
  writer.writerows(rows)
425
418
  os.rename(csv_file + '.tmp', csv_file)
426
419
  print(f'[INFO] Reset {changed_count} candidates (stuck running or missing files) to pending')
427
-
428
- # Count pending candidates using UNIFIED logic
420
+
429
421
  sys.path.append(script_dir + '/..')
430
422
  from lib.evolution_csv import EvolutionCSV
431
-
423
+
432
424
  with EvolutionCSV(csv_file) as csv_ops:
433
- # Auto-fix any corrupted status fields before counting
434
425
  fixed = csv_ops.cleanup_corrupted_status_fields()
435
426
  if fixed > 0:
436
427
  print(f'[INFO] Auto-fixed {fixed} corrupted status field(s)', file=sys.stderr)
437
428
  pending = csv_ops.count_pending_candidates()
438
429
 
439
430
  print(f'[INFO] CSV loaded: {len(rows)-1} total candidates, {pending} pending')
440
-
431
+
441
432
  except csv.Error as e:
442
433
  print(f'[ERROR] CSV parsing error: {e}')
443
- print('[ERROR] The CSV file appears to be malformed')
444
434
  sys.exit(1)
445
435
  except Exception as e:
446
436
  print(f'[ERROR] Failed to read CSV: {e}')
447
437
  sys.exit(1)
448
438
  " 2>&1; then
449
- echo "[ERROR] CSV validation failed. Please check the error message above."
450
- exit 1
439
+ echo "[ERROR] CSV validation failed during cleanup" >&2
440
+ return 1
451
441
  fi
452
- fi
442
+
443
+ echo "[DISPATCHER] Full CSV cleanup complete" >&2
444
+ return 0
445
+ }
446
+
447
+ echo "[DISPATCHER] Starting unified evolution engine"
448
+ echo "[DISPATCHER] Configuration: max_workers=$MAX_WORKERS, timeout=${timeout_seconds:-none}"
449
+
450
+ # Perform full CSV cleanup at startup
451
+ cleanup_csv_full || exit 1
453
452
 
454
453
  # Automatic cleanup detection - check for unchanged algorithms and warn user
455
454
  echo "[DISPATCHER] Checking for duplicate/unchanged algorithms..."
@@ -585,8 +584,28 @@ while true; do
585
584
 
586
585
  # Check if API limit was reached
587
586
  if [[ "$api_limit_reached" == "true" ]]; then
588
- echo "[DISPATCHER] Stopping evolution run due to API usage limits" >&2
589
- break
587
+ echo "[DISPATCHER] All AI models hit usage limits" >&2
588
+ echo "[DISPATCHER] Waiting 5 minutes before restarting the run process..." >&2
589
+
590
+ # Wait 5 minutes with countdown
591
+ remaining=300
592
+ while [[ $remaining -gt 0 ]]; do
593
+ if [[ $((remaining % 60)) -eq 0 ]]; then
594
+ echo "[DISPATCHER] Restarting in $((remaining / 60)) minutes..." >&2
595
+ fi
596
+ sleep 60
597
+ remaining=$((remaining - 60))
598
+ done
599
+
600
+ echo "[DISPATCHER] Restarting run process to clear stuck states and retry..." >&2
601
+ # Perform full CSV cleanup to reset stuck states, just like at startup
602
+ cleanup_csv_full || {
603
+ echo "[ERROR] CSV cleanup failed after API limit wait" >&2
604
+ # Continue anyway - better to try than to stop completely
605
+ }
606
+ # Clear the flag and continue - this restarts the main loop
607
+ api_limit_reached=false
608
+ continue
590
609
  fi
591
610
 
592
611
  # Periodic cleanup of stuck candidates (every 5 iterations, ~25 seconds)
@@ -104,7 +104,7 @@ call_ai_for_evolution() {
104
104
  local retry_count=0
105
105
  local max_retries=3
106
106
  local wait_seconds=300 # Start with 5 minutes
107
- local max_wait_seconds=1800 # Cap at 30 minutes
107
+ local max_wait_seconds=300 # Cap at 5 minutes
108
108
 
109
109
  while true; do
110
110
  # Capture file state before AI call
@@ -407,19 +407,41 @@ with EvolutionCSV('$FULL_CSV_PATH') as csv:
407
407
  eval_arg=""
408
408
  fi
409
409
  local eval_cmd=("$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$eval_arg")
410
-
410
+
411
411
  # Add memory limiting if configured
412
+ # CRITICAL: Use multiple layers of protection (ulimit + Python wrapper + monitoring)
413
+ local memory_protection=""
412
414
  if [[ -n "$MEMORY_LIMIT_MB" ]] && [[ "$MEMORY_LIMIT_MB" -gt 0 ]]; then
415
+ # Layer 1: ulimit for hard memory limit (kernel-enforced)
416
+ # IMPORTANT: Use -m (RSS) not -v (virtual memory) because:
417
+ # - Neural networks use mmap() which bypasses RLIMIT_AS (-v)
418
+ # - RSS limit is more reliable for actual memory consumption
419
+ # Convert MB to KB for ulimit
420
+ local memory_limit_kb=$((MEMORY_LIMIT_MB * 1024))
421
+
422
+ # Try -m first (RSS limit), fall back to -v if not supported
423
+ if ulimit -m $memory_limit_kb 2>/dev/null; then
424
+ memory_protection="ulimit -m $memory_limit_kb 2>/dev/null; "
425
+ echo "[MEMORY] Layer 1: ulimit -m ${memory_limit_kb}KB (RSS limit - catches neural networks)" >&2
426
+ else
427
+ memory_protection="ulimit -v $memory_limit_kb 2>/dev/null; "
428
+ echo "[MEMORY] Layer 1: ulimit -v ${memory_limit_kb}KB (fallback - may not catch neural networks)" >&2
429
+ fi
430
+
431
+ # Layer 2: Python wrapper with PROCESS TREE monitoring (backup protection)
413
432
  eval_cmd=("$PYTHON_CMD" "$SCRIPT_DIR/../lib/memory_limit_wrapper.py" "$MEMORY_LIMIT_MB" "${eval_cmd[@]}")
433
+
434
+ echo "[MEMORY] Layer 2: Python process tree monitoring (kills entire subprocess tree)" >&2
414
435
  fi
415
-
436
+
416
437
  # Add timeout if configured
417
438
  [[ -n "$timeout_seconds" ]] && eval_cmd=(timeout "$timeout_seconds" "${eval_cmd[@]}")
418
-
419
- # Run evaluation with tee to both display and capture output
439
+
440
+ # Run evaluation with memory protection, tee to both display and capture output
420
441
  # Use stdbuf to disable buffering for real-time output
421
442
  # IMPORTANT: Use PIPESTATUS to get the exit code of the evaluation command, not tee
422
- stdbuf -o0 -e0 "${eval_cmd[@]}" 2>&1 | tee "$eval_output_file" >&2
443
+ # The subshell ensures ulimit is applied before the command runs
444
+ stdbuf -o0 -e0 bash -c "${memory_protection}$(printf '%q ' "${eval_cmd[@]}")" 2>&1 | tee "$eval_output_file" >&2
423
445
  local eval_exit_code=${PIPESTATUS[0]} # Get exit code of first command in pipe
424
446
 
425
447
  if [[ $eval_exit_code -eq 0 ]]; then
@@ -4,6 +4,16 @@ Memory-limited execution wrapper for claude-evolve evaluations.
4
4
 
5
5
  This script runs a command with memory limits to prevent runaway algorithms
6
6
  from consuming all system memory and crashing the machine.
7
+
8
+ CRITICAL: Multi-layer protection approach (both must work together):
9
+ 1. ulimit -m (RSS limit) set by calling shell script - kernel-enforced, catches neural networks
10
+ 2. This Python wrapper monitors ENTIRE PROCESS TREE every 0.1s and kills if limit exceeded
11
+
12
+ AIDEV-NOTE: Previous bugs fixed:
13
+ - ulimit -v (virtual memory) doesn't catch neural networks that use mmap()
14
+ - Was only monitoring direct child, not entire process tree (missed grandchildren)
15
+ - Monitoring interval was 0.5s - too slow for fast memory allocations
16
+ - Resource limit failures were silently ignored instead of failing fast
7
17
  """
8
18
  import sys
9
19
  import os
@@ -11,119 +21,197 @@ import subprocess
11
21
  import signal
12
22
  import time
13
23
  import resource
14
- from typing import Optional
24
+ from typing import Optional, Tuple
15
25
 
16
- def set_memory_limit(limit_mb: int) -> None:
17
- """Set memory limit in MB using resource module."""
26
+ def verify_memory_limit_set(limit_mb: int) -> Tuple[bool, str]:
27
+ """Verify that memory limits are actually enforced."""
28
+ try:
29
+ limit_bytes = limit_mb * 1024 * 1024
30
+
31
+ # Check RLIMIT_AS (virtual memory)
32
+ soft_as, hard_as = resource.getrlimit(resource.RLIMIT_AS)
33
+ if soft_as != resource.RLIM_INFINITY and soft_as <= limit_bytes * 1.1:
34
+ return True, f"RLIMIT_AS set to {soft_as / (1024*1024):.0f}MB"
35
+
36
+ # Check RLIMIT_DATA (data segment)
37
+ try:
38
+ soft_data, hard_data = resource.getrlimit(resource.RLIMIT_DATA)
39
+ if soft_data != resource.RLIM_INFINITY and soft_data <= limit_bytes * 1.1:
40
+ return True, f"RLIMIT_DATA set to {soft_data / (1024*1024):.0f}MB"
41
+ except (OSError, ValueError):
42
+ pass
43
+
44
+ return False, "No hard memory limits detected"
45
+ except Exception as e:
46
+ return False, f"Error checking limits: {e}"
47
+
48
+ def set_memory_limit(limit_mb: int) -> bool:
49
+ """
50
+ Set memory limit in MB using resource module.
51
+ Returns True if successful, False otherwise.
52
+ """
18
53
  try:
19
54
  # Convert MB to bytes
20
55
  limit_bytes = limit_mb * 1024 * 1024
21
-
56
+
22
57
  # Set virtual memory limit (address space)
23
58
  # On macOS this is the most reliable way to limit memory
24
59
  resource.setrlimit(resource.RLIMIT_AS, (limit_bytes, limit_bytes))
25
-
60
+
26
61
  # Also try to set data segment limit if available
27
62
  try:
28
63
  resource.setrlimit(resource.RLIMIT_DATA, (limit_bytes, limit_bytes))
29
64
  except (OSError, ValueError):
30
65
  # Not available on all systems
31
66
  pass
32
-
33
- print(f"[MEMORY] Set memory limit to {limit_mb}MB", file=sys.stderr)
34
-
67
+
68
+ # Verify it was actually set
69
+ is_set, msg = verify_memory_limit_set(limit_mb)
70
+ if is_set:
71
+ print(f"[MEMORY] ✓ Hard limit enforced: {msg}", file=sys.stderr)
72
+ return True
73
+ else:
74
+ print(f"[MEMORY] ✗ Hard limit NOT enforced: {msg}", file=sys.stderr)
75
+ return False
76
+
35
77
  except (OSError, ValueError) as e:
36
- print(f"[MEMORY] Warning: Could not set memory limit: {e}", file=sys.stderr)
78
+ print(f"[MEMORY] Could not set memory limit: {e}", file=sys.stderr)
79
+ return False
80
+
81
+ def get_process_tree_memory_native(pid: int) -> float:
82
+ """Get total memory usage of entire process group using native ps command."""
83
+ try:
84
+ # Get the process group ID
85
+ pgid = os.getpgid(pid)
86
+
87
+ # Get all processes in the process group
88
+ ps_result = subprocess.run(
89
+ ["ps", "-o", "rss=", "-g", str(pgid)],
90
+ capture_output=True,
91
+ text=True,
92
+ timeout=1
93
+ )
94
+
95
+ if ps_result.returncode != 0:
96
+ return 0.0
97
+
98
+ # Sum all RSS values from the process group
99
+ total_rss_kb = 0
100
+ for line in ps_result.stdout.strip().split('\n'):
101
+ line = line.strip()
102
+ if line:
103
+ try:
104
+ total_rss_kb += int(line)
105
+ except ValueError:
106
+ continue
107
+
108
+ # Convert KB to MB
109
+ return total_rss_kb / 1024.0
110
+ except Exception:
111
+ return 0.0
37
112
 
38
113
  def monitor_memory_usage_native(process: subprocess.Popen, limit_mb: int) -> Optional[str]:
39
- """Monitor process memory usage using native tools and kill if it exceeds limits."""
40
- # print(f"[MEMORY] Starting native monitoring for PID {process.pid} with limit {limit_mb}MB", file=sys.stderr)
41
-
114
+ """Monitor ENTIRE PROCESS TREE memory usage using native tools and kill if it exceeds limits."""
115
+ print(f"[MEMORY] Monitoring process tree from root PID {process.pid} (limit: {limit_mb}MB)", file=sys.stderr)
116
+
42
117
  while process.poll() is None:
43
118
  try:
44
- # Use ps command to get memory usage
45
- ps_result = subprocess.run(
46
- ["ps", "-o", "rss=", "-p", str(process.pid)],
47
- capture_output=True,
48
- text=True,
49
- timeout=1
50
- )
51
-
52
- if ps_result.returncode == 0 and ps_result.stdout.strip():
53
- # ps returns RSS in KB, convert to MB
54
- memory_kb = int(ps_result.stdout.strip())
55
- memory_mb = memory_kb / 1024
56
-
57
- # print(f"[MEMORY] PID {process.pid} using {memory_mb:.1f}MB (limit: {limit_mb}MB)", file=sys.stderr)
58
-
59
- if memory_mb > limit_mb:
60
- print(f"[MEMORY] Process exceeded {limit_mb}MB limit (using {memory_mb:.1f}MB), terminating", file=sys.stderr)
61
- # Kill the entire process group - fix race condition
62
- try:
63
- pgid = os.getpgid(process.pid)
64
- os.killpg(pgid, signal.SIGTERM)
65
- except ProcessLookupError:
66
- return f"Memory limit exceeded: {memory_mb:.1f}MB > {limit_mb}MB"
67
-
68
- time.sleep(2) # Give it time to cleanup
69
-
70
- try:
71
- if process.poll() is None:
72
- pgid = os.getpgid(process.pid)
73
- os.killpg(pgid, signal.SIGKILL)
74
- except ProcessLookupError:
75
- pass
119
+ # Get total memory for entire process tree
120
+ memory_mb = get_process_tree_memory_native(process.pid)
121
+
122
+ if memory_mb > limit_mb:
123
+ print(f"[MEMORY] Process tree exceeded {limit_mb}MB limit (using {memory_mb:.1f}MB), terminating entire tree", file=sys.stderr)
124
+ # Kill the entire process group
125
+ try:
126
+ pgid = os.getpgid(process.pid)
127
+ os.killpg(pgid, signal.SIGTERM)
128
+ except ProcessLookupError:
76
129
  return f"Memory limit exceeded: {memory_mb:.1f}MB > {limit_mb}MB"
77
-
78
- time.sleep(0.5) # Check every 500ms
79
-
130
+
131
+ time.sleep(2) # Give it time to cleanup
132
+
133
+ try:
134
+ if process.poll() is None:
135
+ pgid = os.getpgid(process.pid)
136
+ os.killpg(pgid, signal.SIGKILL)
137
+ except ProcessLookupError:
138
+ pass
139
+ return f"Memory limit exceeded: {memory_mb:.1f}MB > {limit_mb}MB"
140
+
141
+ time.sleep(0.1) # Check every 100ms for faster response
142
+
80
143
  except (subprocess.TimeoutExpired, ValueError, ProcessLookupError):
81
144
  # Process might have terminated or ps command failed
82
- time.sleep(0.5)
145
+ time.sleep(0.1)
83
146
  continue
84
-
85
- # print(f"[MEMORY] Monitoring stopped for PID {process.pid}", file=sys.stderr)
147
+
86
148
  return None
87
149
 
150
+ def get_process_tree_memory_psutil(ps_process) -> float:
151
+ """Get total memory usage of entire process tree using psutil."""
152
+ try:
153
+ import psutil
154
+ total_mb = 0.0
155
+
156
+ # Get memory of root process
157
+ try:
158
+ total_mb += ps_process.memory_info().rss / (1024 * 1024)
159
+ except (psutil.NoSuchProcess, psutil.AccessDenied):
160
+ return 0.0
161
+
162
+ # Get memory of all children (recursive)
163
+ try:
164
+ for child in ps_process.children(recursive=True):
165
+ try:
166
+ total_mb += child.memory_info().rss / (1024 * 1024)
167
+ except (psutil.NoSuchProcess, psutil.AccessDenied):
168
+ continue
169
+ except (psutil.NoSuchProcess, psutil.AccessDenied):
170
+ pass
171
+
172
+ return total_mb
173
+ except ImportError:
174
+ return 0.0
175
+
88
176
  def monitor_memory_usage(process: subprocess.Popen, limit_mb: int) -> Optional[str]:
89
- """Monitor process memory usage and kill if it exceeds limits."""
177
+ """Monitor ENTIRE PROCESS TREE memory usage and kill if it exceeds limits."""
90
178
  try:
91
179
  import psutil
92
180
  ps_process = psutil.Process(process.pid)
93
-
181
+ print(f"[MEMORY] Monitoring process tree from root PID {process.pid} (limit: {limit_mb}MB, using psutil)", file=sys.stderr)
182
+
94
183
  while process.poll() is None:
95
184
  try:
96
- # Get memory usage in MB
97
- memory_info = ps_process.memory_info()
98
- memory_mb = memory_info.rss / (1024 * 1024)
99
-
185
+ # Get total memory for entire process tree
186
+ memory_mb = get_process_tree_memory_psutil(ps_process)
187
+
100
188
  if memory_mb > limit_mb:
101
- print(f"[MEMORY] Process exceeded {limit_mb}MB limit (using {memory_mb:.1f}MB), terminating", file=sys.stderr)
102
- # Kill the entire process group - fix race condition
189
+ print(f"[MEMORY] Process tree exceeded {limit_mb}MB limit (using {memory_mb:.1f}MB), terminating entire tree", file=sys.stderr)
190
+ # Kill the entire process group
103
191
  try:
104
192
  pgid = os.getpgid(process.pid)
105
193
  os.killpg(pgid, signal.SIGTERM)
106
194
  except ProcessLookupError:
107
195
  return f"Memory limit exceeded: {memory_mb:.1f}MB > {limit_mb}MB"
108
-
196
+
109
197
  time.sleep(2) # Give it time to cleanup
110
-
198
+
111
199
  try:
112
200
  if process.poll() is None:
113
- pgid = os.getpgid(process.pid)
201
+ pgid = os.getpgid(process.pid)
114
202
  os.killpg(pgid, signal.SIGKILL)
115
203
  except ProcessLookupError:
116
204
  pass
117
205
  return f"Memory limit exceeded: {memory_mb:.1f}MB > {limit_mb}MB"
118
-
119
- time.sleep(0.5) # Check every 500ms
206
+
207
+ time.sleep(0.1) # Check every 100ms for faster response
120
208
  except (psutil.NoSuchProcess, psutil.AccessDenied):
121
209
  # Process already terminated
122
210
  break
123
211
  except ImportError:
124
212
  # psutil not available, use native monitoring
125
213
  return monitor_memory_usage_native(process, limit_mb)
126
-
214
+
127
215
  return None
128
216
 
129
217
  def validate_memory_limit(limit_mb: int) -> bool:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-evolve",
3
- "version": "1.8.12",
3
+ "version": "1.8.14",
4
4
  "bin": {
5
5
  "claude-evolve": "./bin/claude-evolve",
6
6
  "claude-evolve-main": "./bin/claude-evolve-main",