claude-evolve 1.9.7 → 1.9.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/ai_cli.py CHANGED
@@ -10,9 +10,16 @@ import subprocess
10
10
  import sys
11
11
  import tempfile
12
12
  import time
13
+ from datetime import datetime
13
14
  from pathlib import Path
14
15
  from typing import Optional, Tuple, List
15
16
 
17
+
18
+ def _log(msg: str):
19
+ """Log with timestamp. AI CLI uses its own logging to avoid import cycles."""
20
+ ts = datetime.now().strftime("%H:%M:%S")
21
+ print(f"[{ts}] [AI] {msg}", file=sys.stderr, flush=True)
22
+
16
23
  # Path to ai-cli.sh relative to this file
17
24
  SCRIPT_DIR = Path(__file__).parent
18
25
  AI_CLI_PATH = SCRIPT_DIR / "ai-cli.sh"
@@ -305,21 +312,25 @@ def call_ai_with_backoff(
305
312
  shuffled_models = models.copy()
306
313
  random.shuffle(shuffled_models)
307
314
 
308
- print(f"[AI] Round {round_num + 1}/{max_rounds}: trying {len(shuffled_models)} models", file=sys.stderr)
315
+ _log(f"Round {round_num + 1}/{max_rounds}: trying {len(shuffled_models)} models")
309
316
 
310
317
  for model in shuffled_models:
311
318
  try:
319
+ _log(f"Trying {model}...")
312
320
  output, model_name = call_ai_model(prompt, model, working_dir, env_vars)
313
321
  if round_num > 0:
314
- print(f"[AI] Succeeded on round {round_num + 1} with {model}", file=sys.stderr)
322
+ _log(f"Succeeded on round {round_num + 1} with {model}")
323
+ else:
324
+ _log(f"Success with {model}")
315
325
  return output, model_name
316
326
  except AIError as e:
327
+ _log(f"{model} failed: {str(e)[:60]}...")
317
328
  last_errors[model] = str(e)
318
329
  # Continue to next model
319
330
 
320
331
  # All models failed in this round
321
332
  if round_num < max_rounds - 1:
322
- print(f"[AI] All models failed in round {round_num + 1}, waiting {wait_time}s before retry...", file=sys.stderr)
333
+ _log(f"All models failed in round {round_num + 1}, waiting {wait_time}s...")
323
334
  time.sleep(wait_time)
324
335
  # Exponential backoff: 60 -> 120 -> 240 -> 480 (capped at max_wait)
325
336
  wait_time = min(wait_time * 2, max_wait)
@@ -778,16 +778,22 @@ class EvolutionCSV:
778
778
 
779
779
  return f"{gen_prefix}-{max_id + 1:03d}"
780
780
 
781
- def get_next_ids(self, generation: int, count: int) -> List[str]:
781
+ def get_next_ids(self, generation: int, count: int, claimed_ids: Optional[List[str]] = None) -> List[str]:
782
782
  """
783
783
  Get multiple next available IDs for a generation.
784
784
 
785
785
  Args:
786
786
  generation: Generation number
787
787
  count: Number of IDs to generate
788
+ claimed_ids: Optional list of IDs already claimed in this session
789
+ (not yet written to CSV). Prevents duplicate IDs.
788
790
 
789
791
  Returns:
790
792
  List of ID strings
793
+
794
+ AIDEV-NOTE: The claimed_ids parameter is critical for ideation where
795
+ multiple strategies run before writing to CSV. Without it, each strategy
796
+ would get overlapping IDs like gen75-001, gen75-002 for each strategy.
791
797
  """
792
798
  rows = self._read_csv()
793
799
  gen_prefix = f"gen{generation:02d}"
@@ -796,6 +802,7 @@ class EvolutionCSV:
796
802
  has_header = rows and rows[0] and rows[0][0].lower() == 'id'
797
803
  start_idx = 1 if has_header else 0
798
804
 
805
+ # Check CSV for existing IDs
799
806
  for row in rows[start_idx:]:
800
807
  if not self.is_valid_candidate_row(row):
801
808
  continue
@@ -808,6 +815,16 @@ class EvolutionCSV:
808
815
  except (ValueError, IndexError):
809
816
  pass
810
817
 
818
+ # Also check claimed IDs (not yet in CSV)
819
+ if claimed_ids:
820
+ for claimed_id in claimed_ids:
821
+ if claimed_id.startswith(gen_prefix + '-'):
822
+ try:
823
+ id_num = int(claimed_id.split('-')[1])
824
+ max_id = max(max_id, id_num)
825
+ except (ValueError, IndexError):
826
+ pass
827
+
811
828
  return [f"{gen_prefix}-{max_id + 1 + i:03d}" for i in range(count)]
812
829
 
813
830
  def append_candidates(self, candidates: List[Dict[str, str]]) -> int:
@@ -855,6 +872,9 @@ class EvolutionCSV:
855
872
 
856
873
  Returns:
857
874
  Dict with total, pending, complete, failed counts
875
+
876
+ AIDEV-NOTE: Uses is_pending_candidate() for pending count to ensure
877
+ consistency between stats and what workers actually find.
858
878
  """
859
879
  rows = self._read_csv()
860
880
  if not rows:
@@ -870,16 +890,19 @@ class EvolutionCSV:
870
890
  continue
871
891
 
872
892
  stats['total'] += 1
873
- status = row[4].strip().lower() if len(row) > 4 else ''
874
893
 
875
- if status == 'complete':
876
- stats['complete'] += 1
877
- elif status == 'running':
878
- stats['running'] += 1
879
- elif status.startswith('failed'):
880
- stats['failed'] += 1
881
- else:
894
+ # Use is_pending_candidate for consistency with workers
895
+ if self.is_pending_candidate(row):
882
896
  stats['pending'] += 1
897
+ else:
898
+ status = row[4].strip().lower() if len(row) > 4 else ''
899
+ if status == 'complete':
900
+ stats['complete'] += 1
901
+ elif status == 'running':
902
+ stats['running'] += 1
903
+ elif status.startswith('failed'):
904
+ stats['failed'] += 1
905
+ # Anything else that's not pending gets counted as failed/other
883
906
 
884
907
  return stats
885
908
 
@@ -93,20 +93,28 @@ class IdeationStrategy(ABC):
93
93
  pass
94
94
 
95
95
  def generate(self, context: IdeationContext, count: int,
96
- max_rounds: int = 10, initial_wait: int = 60, max_wait: int = 600) -> List[Idea]:
96
+ max_rounds: int = 10, initial_wait: int = 60, max_wait: int = 600,
97
+ claimed_ids: List[str] = None) -> List[Idea]:
97
98
  """Generate ideas using this strategy with round-based retry and backoff.
98
99
 
99
100
  AIDEV-NOTE: Uses call_ai_with_backoff for robust retry handling.
100
101
  Each round tries ALL models. If all fail, waits with exponential backoff.
102
+ claimed_ids tracks IDs already claimed by previous strategies in this run.
103
+ IDs are added to claimed_ids immediately to prevent reuse even on failure.
101
104
  """
102
105
  if count <= 0:
103
106
  return []
107
+ if claimed_ids is None:
108
+ claimed_ids = []
104
109
 
105
- print(f"[IDEATE] Running {self.name} strategy for {count} ideas", file=sys.stderr)
110
+ print(f"[IDEATE] Running {self.name} strategy for {count} ideas", file=sys.stderr, flush=True)
106
111
 
107
- # Get next IDs
108
- ids = self.csv.get_next_ids(context.generation, count)
109
- print(f"[IDEATE] Using IDs: {', '.join(ids)}", file=sys.stderr)
112
+ # Get next IDs, avoiding any already claimed in this ideation run
113
+ ids = self.csv.get_next_ids(context.generation, count, claimed_ids=claimed_ids)
114
+ print(f"[IDEATE] Using IDs: {', '.join(ids)}", file=sys.stderr, flush=True)
115
+
116
+ # Immediately claim these IDs (even if AI fails, don't reuse them)
117
+ claimed_ids.extend(ids)
110
118
 
111
119
  # Create temp CSV with stub rows
112
120
  temp_csv = Path(self.config.evolution_dir) / f"temp-csv-{os.getpid()}.csv"
@@ -398,6 +406,7 @@ class Ideator:
398
406
  print(f"[IDEATE] Top performers: {len(context.top_performers)}", file=sys.stderr)
399
407
 
400
408
  all_ideas: List[Idea] = []
409
+ claimed_ids: List[str] = [] # Track IDs claimed across all strategies
401
410
  strategies_succeeded = 0
402
411
 
403
412
  for strategy, count in self.strategies:
@@ -408,10 +417,12 @@ class Ideator:
408
417
  context, count,
409
418
  max_rounds=self.config.max_rounds,
410
419
  initial_wait=self.config.initial_wait,
411
- max_wait=self.config.max_wait
420
+ max_wait=self.config.max_wait,
421
+ claimed_ids=claimed_ids # Pass already-claimed IDs
412
422
  )
413
423
 
414
424
  if ideas:
425
+ # IDs are already tracked in generate(), just count success
415
426
  strategies_succeeded += 1
416
427
 
417
428
  # Filter for novelty
package/lib/evolve_run.py CHANGED
@@ -26,6 +26,8 @@ SCRIPT_DIR = Path(__file__).parent
26
26
  sys.path.insert(0, str(SCRIPT_DIR.parent))
27
27
 
28
28
  from lib.evolution_csv import EvolutionCSV
29
+ from lib.log import log, log_error, log_warn, set_prefix
30
+ set_prefix("RUN")
29
31
 
30
32
 
31
33
  @dataclass
@@ -56,7 +58,8 @@ class WorkerPool:
56
58
  if len(self.workers) >= self.max_workers:
57
59
  return None
58
60
 
59
- cmd = [sys.executable, str(self.worker_script)]
61
+ # Use -u for unbuffered output so logs stream in real-time
62
+ cmd = [sys.executable, '-u', str(self.worker_script)]
60
63
  if self.config_path:
61
64
  cmd.extend(['--config', self.config_path])
62
65
  if self.timeout:
@@ -67,10 +70,10 @@ class WorkerPool:
67
70
  # This provides real-time visibility into which models are being used
68
71
  proc = subprocess.Popen(cmd)
69
72
  self.workers[proc.pid] = proc
70
- print(f"[RUN] Spawned worker {proc.pid}", file=sys.stderr)
73
+ log(f"Spawned worker {proc.pid}")
71
74
  return proc.pid
72
75
  except Exception as e:
73
- print(f"[RUN] Failed to spawn worker: {e}", file=sys.stderr)
76
+ log_error(f"Failed to spawn worker: {e}")
74
77
  return None
75
78
 
76
79
  def cleanup_finished(self) -> List[int]:
@@ -83,7 +86,7 @@ class WorkerPool:
83
86
  if ret is not None:
84
87
  finished_pids.append(pid)
85
88
  exit_codes.append(ret)
86
- print(f"[RUN] Worker {pid} exited with code {ret}", file=sys.stderr)
89
+ log(f"Worker {pid} exited with code {ret}")
87
90
 
88
91
  for pid in finished_pids:
89
92
  del self.workers[pid]
@@ -95,7 +98,7 @@ class WorkerPool:
95
98
  if not self.workers:
96
99
  return
97
100
 
98
- print(f"[RUN] Shutting down {len(self.workers)} workers...", file=sys.stderr)
101
+ log(f"Shutting down {len(self.workers)} workers...")
99
102
 
100
103
  # Send SIGTERM
101
104
  for pid, proc in self.workers.items():
@@ -115,7 +118,7 @@ class WorkerPool:
115
118
  for pid, proc in list(self.workers.items()):
116
119
  try:
117
120
  proc.kill()
118
- print(f"[RUN] Force killed worker {pid}", file=sys.stderr)
121
+ log(f"Force killed worker {pid}")
119
122
  except Exception:
120
123
  pass
121
124
 
@@ -151,36 +154,36 @@ class EvolutionRunner:
151
154
  def _handle_signal(self, signum, frame):
152
155
  """Handle termination signal."""
153
156
  sig_name = signal.Signals(signum).name
154
- print(f"\n[RUN] Received {sig_name}, shutting down...", file=sys.stderr)
157
+ log(f"Received {sig_name}, shutting down...")
155
158
  self.shutdown_requested = True
156
159
  self.pool.shutdown()
157
160
  sys.exit(128 + signum)
158
161
 
159
162
  def cleanup_csv(self):
160
163
  """Clean up CSV at startup."""
161
- print("[RUN] Cleaning up CSV...", file=sys.stderr)
164
+ log("Cleaning up CSV...")
162
165
  with EvolutionCSV(self.config.csv_path) as csv:
163
166
  # Remove duplicates
164
167
  removed = csv.remove_duplicate_candidates()
165
168
  if removed:
166
- print(f"[RUN] Removed {removed} duplicate candidates", file=sys.stderr)
169
+ log(f"Removed {removed} duplicate candidates")
167
170
 
168
171
  # Reset stuck candidates
169
172
  reset = csv.reset_stuck_candidates()
170
173
  if reset:
171
- print(f"[RUN] Reset {reset} stuck candidates", file=sys.stderr)
174
+ log(f"Reset {reset} stuck candidates")
172
175
 
173
176
  # Clean corrupted status fields
174
177
  fixed = csv.cleanup_corrupted_status_fields()
175
178
  if fixed:
176
- print(f"[RUN] Fixed {fixed} corrupted status fields", file=sys.stderr)
179
+ log(f"Fixed {fixed} corrupted status fields")
177
180
 
178
181
  def ensure_baseline(self):
179
182
  """Ensure baseline entry exists in CSV."""
180
183
  with EvolutionCSV(self.config.csv_path) as csv:
181
184
  info = csv.get_candidate_info('baseline-000')
182
185
  if not info:
183
- print("[RUN] Adding baseline-000 entry", file=sys.stderr)
186
+ log("Adding baseline-000 entry")
184
187
  csv.append_candidates([{
185
188
  'id': 'baseline-000',
186
189
  'basedOnId': '',
@@ -200,14 +203,14 @@ class EvolutionRunner:
200
203
 
201
204
  # Need minimum completed algorithms to learn from
202
205
  if stats['complete'] < self.config.min_completed_for_ideation:
203
- print(f"[RUN] Not enough completed ({stats['complete']} < {self.config.min_completed_for_ideation})", file=sys.stderr)
206
+ log(f"Not enough completed ({stats['complete']} < {self.config.min_completed_for_ideation})")
204
207
  return False
205
208
 
206
209
  return True
207
210
 
208
211
  def run_ideation(self) -> bool:
209
212
  """Run ideation. Returns True on success."""
210
- print("[RUN] Running ideation...", file=sys.stderr)
213
+ log("Running ideation...")
211
214
 
212
215
  cmd = [sys.executable, str(self.ideate_script)]
213
216
  if self.config.config_path:
@@ -221,18 +224,18 @@ class EvolutionRunner:
221
224
  cwd=self.config.evolution_dir
222
225
  )
223
226
 
224
- # Log output
227
+ # Forward ideation output (already has timestamps from ideate module)
225
228
  if result.stdout:
226
229
  for line in result.stdout.strip().split('\n'):
227
- print(f"[IDEATE] {line}", file=sys.stderr)
230
+ print(line, file=sys.stderr, flush=True)
228
231
  if result.stderr:
229
232
  for line in result.stderr.strip().split('\n'):
230
- print(f"[IDEATE] {line}", file=sys.stderr)
233
+ print(line, file=sys.stderr, flush=True)
231
234
 
232
235
  return result.returncode == 0
233
236
 
234
237
  except Exception as e:
235
- print(f"[RUN] Ideation failed: {e}", file=sys.stderr)
238
+ log_error(f"Ideation failed: {e}")
236
239
  return False
237
240
 
238
241
  def run(self) -> int:
@@ -242,9 +245,9 @@ class EvolutionRunner:
242
245
  Returns:
243
246
  Exit code
244
247
  """
245
- print("[RUN] Starting evolution run", file=sys.stderr)
246
- print(f"[RUN] Max workers: {self.config.max_workers}", file=sys.stderr)
247
- print(f"[RUN] Auto ideate: {self.config.auto_ideate}", file=sys.stderr)
248
+ log("Starting evolution run")
249
+ log(f"Max workers: {self.config.max_workers}")
250
+ log(f"Auto ideate: {self.config.auto_ideate}")
248
251
 
249
252
  # Startup cleanup
250
253
  self.cleanup_csv()
@@ -260,7 +263,7 @@ class EvolutionRunner:
260
263
 
261
264
  # Check for API limit
262
265
  if 2 in exit_codes or 3 in exit_codes:
263
- print("[RUN] API limit reached, waiting 5 minutes...", file=sys.stderr)
266
+ log("API limit reached, waiting 5 minutes...")
264
267
  self.api_limit_reached = True
265
268
  time.sleep(300) # 5 minute wait
266
269
  self.api_limit_reached = False
@@ -274,7 +277,7 @@ class EvolutionRunner:
274
277
 
275
278
  # Get stats
276
279
  stats = self.get_stats()
277
- print(f"[RUN] Stats: {stats['pending']} pending, {stats['complete']} complete, {stats['running']} running", file=sys.stderr)
280
+ log(f"Stats: {stats['pending']} pending, {stats['complete']} complete, {stats['running']} running")
278
281
 
279
282
  # Check if we need ideation
280
283
  if stats['pending'] == 0 and self.pool.active_count == 0:
@@ -290,11 +293,11 @@ class EvolutionRunner:
290
293
  if self.run_ideation():
291
294
  continue # Loop back to check for new work
292
295
  else:
293
- print("[RUN] Ideation failed, waiting...", file=sys.stderr)
296
+ log_warn("Ideation failed, waiting...")
294
297
  time.sleep(30)
295
298
  continue
296
299
  else:
297
- print("[RUN] Evolution complete!", file=sys.stderr)
300
+ log("Evolution complete!")
298
301
  break
299
302
 
300
303
  # Spawn workers for pending work
@@ -309,7 +312,7 @@ class EvolutionRunner:
309
312
 
310
313
  # Cleanup
311
314
  self.pool.shutdown()
312
- print("[RUN] Exiting", file=sys.stderr)
315
+ log("Exiting")
313
316
  return 0
314
317
 
315
318
 
@@ -378,10 +381,10 @@ def main():
378
381
  sys.exit(runner.run())
379
382
 
380
383
  except FileNotFoundError as e:
381
- print(f"Error: {e}", file=sys.stderr)
384
+ log_error(f"Config error: {e}")
382
385
  sys.exit(1)
383
386
  except Exception as e:
384
- print(f"Error: {e}", file=sys.stderr)
387
+ log_error(f"Error: {e}")
385
388
  import traceback
386
389
  traceback.print_exc()
387
390
  sys.exit(1)
@@ -30,6 +30,9 @@ from typing import Optional, Tuple, Dict, Any
30
30
  SCRIPT_DIR = Path(__file__).parent
31
31
  sys.path.insert(0, str(SCRIPT_DIR.parent))
32
32
 
33
+ from lib.log import log, log_error, log_warn, log_debug, set_prefix
34
+ set_prefix("WORKER")
35
+
33
36
  from lib.evolution_csv import EvolutionCSV
34
37
  from lib.ai_cli import call_ai_with_backoff, get_git_protection_warning, AIError
35
38
 
@@ -47,6 +50,7 @@ class Config:
47
50
  memory_limit_mb: int = 0
48
51
  timeout_seconds: int = 600
49
52
  max_candidates: int = 5
53
+ max_validation_retries: int = 3 # Max attempts to fix validation errors (if validator.py exists)
50
54
  # Retry configuration with exponential backoff
51
55
  max_rounds: int = 10
52
56
  initial_wait: int = 60
@@ -78,10 +82,10 @@ class Worker:
78
82
  def _handle_signal(self, signum, frame):
79
83
  """Handle termination signal - reset current candidate to pending."""
80
84
  sig_name = signal.Signals(signum).name
81
- print(f"[WORKER-{os.getpid()}] Received {sig_name}", file=sys.stderr)
85
+ log(f"Received {sig_name}")
82
86
 
83
87
  if self.current_candidate_id:
84
- print(f"[WORKER-{os.getpid()}] Resetting {self.current_candidate_id} to pending", file=sys.stderr)
88
+ log(f"Resetting {self.current_candidate_id} to pending")
85
89
  try:
86
90
  with EvolutionCSV(self.config.csv_path) as csv:
87
91
  info = csv.get_candidate_info(self.current_candidate_id)
@@ -90,7 +94,7 @@ class Worker:
90
94
  if status not in ('complete', 'failed', 'failed-ai-retry', 'failed-parent-missing'):
91
95
  csv.update_candidate_status(self.current_candidate_id, 'pending')
92
96
  except Exception as e:
93
- print(f"[WORKER-{os.getpid()}] Error resetting status: {e}", file=sys.stderr)
97
+ log(f"Error resetting status: {e}")
94
98
 
95
99
  sys.exit(128 + signum)
96
100
 
@@ -169,14 +173,14 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
169
173
  hash_after = self._file_hash(target_file) if target_file.exists() else None
170
174
 
171
175
  if hash_before != hash_after and hash_after is not None:
172
- print(f"[WORKER-{os.getpid()}] AI successfully modified file (model: {model})", file=sys.stderr)
176
+ log(f"AI successfully modified file (model: {model})")
173
177
  return True, model
174
178
  else:
175
- print(f"[WORKER-{os.getpid()}] AI completed but did not modify file", file=sys.stderr)
179
+ log(f"AI completed but did not modify file")
176
180
  return False, model
177
181
 
178
182
  except AIError as e:
179
- print(f"[WORKER-{os.getpid()}] All AI retries exhausted: {e}", file=sys.stderr)
183
+ log_error(f"All AI retries exhausted: {e}")
180
184
  return False, ""
181
185
 
182
186
  def _file_hash(self, path: Path) -> Optional[str]:
@@ -199,6 +203,136 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
199
203
  except Exception:
200
204
  return False
201
205
 
206
+ def _find_validator(self) -> Optional[Path]:
207
+ """
208
+ Auto-detect validator.py in the evolution directory.
209
+ No config required - if validator.py exists, we use it.
210
+ """
211
+ validator_path = Path(self.config.evolution_dir) / "validator.py"
212
+ if validator_path.exists():
213
+ return validator_path
214
+ return None
215
+
216
+ def _run_validator(self, candidate_id: str) -> Tuple[bool, Dict[str, Any]]:
217
+ """
218
+ Run the validator (fast smoke test) before full evaluation.
219
+
220
+ AIDEV-NOTE: Auto-detects validator.py in evolution directory.
221
+ Returns exit code 0 on success, non-zero on failure.
222
+ Resilient to any output format - handles JSON, plain text, or nothing.
223
+
224
+ Returns:
225
+ Tuple of (success, error_info_dict)
226
+ - success: True if validation passed
227
+ - error_info: Dict with whatever info we could extract from output
228
+ """
229
+ validator_path = self._find_validator()
230
+ if not validator_path:
231
+ return True, {} # No validator found, skip
232
+
233
+ cmd = [self.config.python_cmd, str(validator_path), candidate_id]
234
+ log(f"Running validator: {' '.join(cmd)}")
235
+
236
+ try:
237
+ result = subprocess.run(
238
+ cmd,
239
+ capture_output=True,
240
+ text=True,
241
+ timeout=30, # Validator should be fast (~3 seconds)
242
+ cwd=self.config.evolution_dir
243
+ )
244
+
245
+ # Combine stdout and stderr for full context
246
+ stdout = result.stdout.strip() if result.stdout else ""
247
+ stderr = result.stderr.strip() if result.stderr else ""
248
+ combined_output = f"{stdout}\n{stderr}".strip()
249
+
250
+ # Try to extract structured info, but be resilient to any format
251
+ error_info = {'raw_output': combined_output}
252
+
253
+ # Try to parse JSON from stdout (validator may output JSON)
254
+ if stdout.startswith('{'):
255
+ try:
256
+ parsed = json.loads(stdout)
257
+ if isinstance(parsed, dict):
258
+ error_info.update(parsed)
259
+ except json.JSONDecodeError:
260
+ pass # Not valid JSON, that's fine
261
+
262
+ # If no structured error, use the raw output
263
+ if 'error' not in error_info and combined_output:
264
+ error_info['error'] = combined_output
265
+
266
+ if result.returncode == 0:
267
+ log("Validation passed")
268
+ return True, error_info
269
+ else:
270
+ error_type = error_info.get('error_type', 'validation_failed')
271
+ log_warn(f"Validation failed: {error_type}")
272
+ return False, error_info
273
+
274
+ except subprocess.TimeoutExpired:
275
+ log_error("Validator timed out")
276
+ return False, {'error': 'Validator timed out after 30 seconds', 'error_type': 'timeout'}
277
+ except Exception as e:
278
+ log_error(f"Validator error: {e}")
279
+ return False, {'error': str(e), 'error_type': 'exception'}
280
+
281
+ def _build_fix_prompt(self, candidate: Candidate, target_basename: str, error_info: Dict[str, Any]) -> str:
282
+ """
283
+ Build AI prompt to fix validation errors.
284
+
285
+ AIDEV-NOTE: Resilient to any error_info structure - uses whatever is available.
286
+ """
287
+ prompt = f"""{get_git_protection_warning()}
288
+
289
+ The code in {target_basename} failed validation. Please fix the errors and try again.
290
+
291
+ ## Validator Output
292
+
293
+ """
294
+ # Include whatever structured fields we have
295
+ if error_info.get('error_type'):
296
+ prompt += f"**Error Type:** {error_info['error_type']}\n\n"
297
+
298
+ if error_info.get('error'):
299
+ prompt += f"**Error:**\n{error_info['error']}\n\n"
300
+
301
+ if error_info.get('suggestion'):
302
+ prompt += f"**Suggested Fix:**\n{error_info['suggestion']}\n\n"
303
+
304
+ if error_info.get('traceback'):
305
+ tb = error_info['traceback']
306
+ # Truncate if too long
307
+ if len(tb) > 1500:
308
+ tb = "..." + tb[-1500:]
309
+ prompt += f"**Traceback:**\n```\n{tb}\n```\n\n"
310
+
311
+ # If we only have raw output (no structured fields), show that
312
+ if not any(error_info.get(k) for k in ('error', 'error_type', 'suggestion', 'traceback')):
313
+ raw = error_info.get('raw_output', 'No output captured')
314
+ # Truncate if needed
315
+ if len(raw) > 2000:
316
+ raw = raw[:2000] + "\n... (truncated)"
317
+ prompt += f"```\n{raw}\n```\n\n"
318
+
319
+ prompt += f"""## Instructions
320
+
321
+ 1. Read the file {target_basename} to understand the current code
322
+ 2. Identify the issue based on the validator output above
323
+ 3. Fix the code to resolve the validation error
324
+ 4. The fix should still implement: {candidate.description}
325
+
326
+ **CRITICAL:** Make sure to actually fix the error. Do not just add comments or make cosmetic changes.
327
+
328
+ To help debug, you can run the validator yourself:
329
+ ```
330
+ python validator.py {target_basename}
331
+ ```
332
+ """
333
+
334
+ return prompt
335
+
202
336
  def _run_evaluator(self, candidate_id: str, is_baseline: bool) -> Tuple[Optional[float], Dict[str, Any]]:
203
337
  """
204
338
  Run the evaluator.
@@ -217,7 +351,7 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
217
351
 
218
352
  cmd.extend([self.config.evaluator_path, eval_arg])
219
353
 
220
- print(f"[WORKER-{os.getpid()}] Running evaluator: {' '.join(cmd)}", file=sys.stderr)
354
+ log(f"Running evaluator: {' '.join(cmd)}")
221
355
 
222
356
  try:
223
357
  result = subprocess.run(
@@ -229,17 +363,17 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
229
363
  )
230
364
 
231
365
  if result.returncode != 0:
232
- print(f"[WORKER-{os.getpid()}] Evaluator failed: {result.stderr}", file=sys.stderr)
366
+ log_error(f"Evaluator failed: {result.stderr}")
233
367
  return None, {}
234
368
 
235
369
  output = result.stdout + result.stderr
236
370
  return self._parse_evaluator_output(output)
237
371
 
238
372
  except subprocess.TimeoutExpired:
239
- print(f"[WORKER-{os.getpid()}] Evaluator timed out", file=sys.stderr)
373
+ log_error("Evaluator timed out")
240
374
  return None, {}
241
375
  except Exception as e:
242
- print(f"[WORKER-{os.getpid()}] Evaluator error: {e}", file=sys.stderr)
376
+ log_error(f"Evaluator error: {e}")
243
377
  return None, {}
244
378
 
245
379
  def _parse_evaluator_output(self, output: str) -> Tuple[Optional[float], Dict[str, Any]]:
@@ -297,9 +431,9 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
297
431
  Exit code (0=success, 77=AI failed, 78=missing parent, etc.)
298
432
  """
299
433
  self.current_candidate_id = candidate.id
300
- print(f"[WORKER-{os.getpid()}] Processing: {candidate.id}", file=sys.stderr)
301
- print(f"[WORKER-{os.getpid()}] Description: {candidate.description}", file=sys.stderr)
302
- print(f"[WORKER-{os.getpid()}] Based on: {candidate.based_on_id or 'baseline'}", file=sys.stderr)
434
+ log(f"Processing: {candidate.id}")
435
+ log(f"Description: {candidate.description[:80]}..." if len(candidate.description) > 80 else f"Description: {candidate.description}")
436
+ log(f"Based on: {candidate.based_on_id or 'baseline'}")
303
437
 
304
438
  is_baseline = self._is_baseline(candidate.id, candidate.based_on_id)
305
439
  target_file = Path(self.config.output_dir) / f"evolution_{candidate.id}.py"
@@ -308,7 +442,7 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
308
442
  resolved_parent, source_file = self._resolve_parent_id(candidate.based_on_id)
309
443
 
310
444
  if source_file is None and not is_baseline:
311
- print(f"[WORKER-{os.getpid()}] ERROR: Parent not found: {candidate.based_on_id}", file=sys.stderr)
445
+ log_error(f"Parent not found: {candidate.based_on_id}")
312
446
  return 78 # Missing parent
313
447
 
314
448
  if source_file is None:
@@ -316,10 +450,10 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
316
450
 
317
451
  # Check if target already exists
318
452
  if target_file.exists():
319
- print(f"[WORKER-{os.getpid()}] File already exists, running evaluation only", file=sys.stderr)
453
+ log("File already exists, running evaluation only")
320
454
  elif not is_baseline:
321
455
  # Copy source to target
322
- print(f"[WORKER-{os.getpid()}] Copying {source_file} to {target_file}", file=sys.stderr)
456
+ log(f"Copying {source_file.name} to {target_file.name}")
323
457
  shutil.copy(source_file, target_file)
324
458
 
325
459
  # Call AI to modify (uses round-based retry with backoff)
@@ -327,7 +461,7 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
327
461
  success, model = self._call_ai_with_backoff(prompt, target_file)
328
462
 
329
463
  if not success:
330
- print(f"[WORKER-{os.getpid()}] AI failed after all retries", file=sys.stderr)
464
+ log_error("AI failed after all retries")
331
465
  target_file.unlink(missing_ok=True)
332
466
  return 77 # AI generation failed
333
467
 
@@ -338,23 +472,69 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
338
472
 
339
473
  # Check syntax
340
474
  if not self._check_syntax(target_file):
341
- print(f"[WORKER-{os.getpid()}] Syntax error in generated file", file=sys.stderr)
475
+ log_error("Syntax error in generated file")
342
476
  target_file.unlink(missing_ok=True)
343
477
  with EvolutionCSV(self.config.csv_path) as csv:
344
478
  csv.update_candidate_status(candidate.id, 'pending')
345
479
  return 0 # Will retry
346
480
 
481
+ # Run validator with retry loop
482
+ # AIDEV-NOTE: Validator catches structural errors before expensive full evaluation.
483
+ # If validation fails, we give the AI feedback and ask it to fix the code.
484
+ validation_passed = False
485
+ for validation_attempt in range(self.config.max_validation_retries + 1):
486
+ valid, error_info = self._run_validator(candidate.id)
487
+
488
+ if valid:
489
+ validation_passed = True
490
+ break
491
+
492
+ if validation_attempt >= self.config.max_validation_retries:
493
+ log_error(f"Validation failed after {self.config.max_validation_retries} fix attempts")
494
+ break
495
+
496
+ # Ask AI to fix the validation error
497
+ log(f"Validation failed (attempt {validation_attempt + 1}), asking AI to fix...")
498
+ fix_prompt = self._build_fix_prompt(candidate, target_file.name, error_info)
499
+ success, fix_model = self._call_ai_with_backoff(fix_prompt, target_file)
500
+
501
+ if not success:
502
+ log_error("AI failed to fix validation error")
503
+ break
504
+
505
+ # Record that we used an additional model call for fixing
506
+ if fix_model:
507
+ with EvolutionCSV(self.config.csv_path) as csv:
508
+ current_llm = csv.get_candidate_info(candidate.id).get('run-LLM', '')
509
+ new_llm = f"{current_llm}+{fix_model}" if current_llm else fix_model
510
+ csv.update_candidate_field(candidate.id, 'run-LLM', new_llm)
511
+
512
+ # Re-check syntax after fix
513
+ if not self._check_syntax(target_file):
514
+ log_error("Fix introduced syntax error")
515
+ # Don't break - try again if we have retries left
516
+
517
+ if not validation_passed:
518
+ # Validation failed after all retries
519
+ with EvolutionCSV(self.config.csv_path) as csv:
520
+ csv.update_candidate_status(candidate.id, 'failed-validation')
521
+ # Store the last error for debugging
522
+ if error_info:
523
+ error_summary = f"{error_info.get('error_type', 'unknown')}: {error_info.get('error', '')[:100]}"
524
+ csv.update_candidate_field(candidate.id, 'validation_error', error_summary)
525
+ return 1
526
+
347
527
  # Run evaluator
348
- print(f"[WORKER-{os.getpid()}] Running evaluator...", file=sys.stderr)
528
+ log("Running evaluator...")
349
529
  score, json_data = self._run_evaluator(candidate.id, is_baseline)
350
530
 
351
531
  if score is None:
352
- print(f"[WORKER-{os.getpid()}] Evaluation failed - no score", file=sys.stderr)
532
+ log_error("Evaluation failed - no score")
353
533
  with EvolutionCSV(self.config.csv_path) as csv:
354
534
  csv.update_candidate_status(candidate.id, 'failed')
355
535
  return 1
356
536
 
357
- print(f"[WORKER-{os.getpid()}] Score: {score}", file=sys.stderr)
537
+ log(f"Score: {score}")
358
538
 
359
539
  # Update CSV
360
540
  with EvolutionCSV(self.config.csv_path) as csv:
@@ -376,7 +556,7 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
376
556
  Returns:
377
557
  Exit code
378
558
  """
379
- print(f"[WORKER-{os.getpid()}] Started (max {self.config.max_candidates} candidates)", file=sys.stderr)
559
+ log(f"Started (max {self.config.max_candidates} candidates)")
380
560
  processed = 0
381
561
 
382
562
  while processed < self.config.max_candidates:
@@ -385,7 +565,7 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
385
565
  result = csv.get_next_pending_candidate()
386
566
 
387
567
  if not result:
388
- print(f"[WORKER-{os.getpid()}] No pending candidates", file=sys.stderr)
568
+ log("No pending candidates")
389
569
  break
390
570
 
391
571
  candidate_id, _ = result
@@ -395,7 +575,7 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
395
575
  info = csv.get_candidate_info(candidate_id)
396
576
 
397
577
  if not info:
398
- print(f"[WORKER-{os.getpid()}] Candidate info not found: {candidate_id}", file=sys.stderr)
578
+ log_warn(f"Candidate info not found: {candidate_id}")
399
579
  continue
400
580
 
401
581
  candidate = Candidate(
@@ -418,9 +598,9 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
418
598
  elif exit_code == 3: # API exhausted
419
599
  return 3
420
600
 
421
- print(f"[WORKER-{os.getpid()}] Processed {processed}/{self.config.max_candidates}", file=sys.stderr)
601
+ log(f"Processed {processed}/{self.config.max_candidates}")
422
602
 
423
- print(f"[WORKER-{os.getpid()}] Exiting", file=sys.stderr)
603
+ log("Exiting")
424
604
  return 0
425
605
 
426
606
 
@@ -467,6 +647,7 @@ def load_config_from_yaml(config_path: Optional[str] = None) -> Config:
467
647
  memory_limit_mb=data.get('memory_limit_mb', 0),
468
648
  timeout_seconds=data.get('timeout_seconds', 600),
469
649
  max_candidates=data.get('worker_max_candidates', 5),
650
+ max_validation_retries=data.get('max_validation_retries', 3),
470
651
  max_rounds=ideation.get('max_rounds', 10),
471
652
  initial_wait=ideation.get('initial_wait', 60),
472
653
  max_wait=ideation.get('max_wait', 600)
@@ -488,10 +669,10 @@ def main():
488
669
  sys.exit(worker.run())
489
670
 
490
671
  except FileNotFoundError as e:
491
- print(f"Error: {e}", file=sys.stderr)
672
+ log_error(f"Config error: {e}")
492
673
  sys.exit(1)
493
674
  except Exception as e:
494
- print(f"Error: {e}", file=sys.stderr)
675
+ log_error(f"Error: {e}")
495
676
  import traceback
496
677
  traceback.print_exc()
497
678
  sys.exit(1)
package/lib/log.py ADDED
@@ -0,0 +1,42 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Simple timestamped logging for claude-evolve.
4
+ Uses stderr with flush=True for real-time output.
5
+ """
6
+
7
+ import os
8
+ import sys
9
+ from datetime import datetime
10
+
11
+ # Default prefix, can be set per-module
12
+ _prefix = "EVOLVE"
13
+
14
+
15
+ def set_prefix(prefix: str):
16
+ """Set the log prefix (e.g., 'WORKER', 'IDEATE', 'RUN')."""
17
+ global _prefix
18
+ _prefix = prefix
19
+
20
+
21
+ def log(msg: str, prefix: str = None):
22
+ """Log with timestamp. Always flushes for real-time output."""
23
+ ts = datetime.now().strftime("%H:%M:%S")
24
+ p = prefix or _prefix
25
+ pid = os.getpid()
26
+ print(f"[{ts}] [{p}-{pid}] {msg}", file=sys.stderr, flush=True)
27
+
28
+
29
+ def log_debug(msg: str, prefix: str = None):
30
+ """Log debug message (only if DEBUG env var set)."""
31
+ if os.environ.get('DEBUG') or os.environ.get('VERBOSE'):
32
+ log(f"[DEBUG] {msg}", prefix)
33
+
34
+
35
+ def log_error(msg: str, prefix: str = None):
36
+ """Log error message."""
37
+ log(f"[ERROR] {msg}", prefix)
38
+
39
+
40
+ def log_warn(msg: str, prefix: str = None):
41
+ """Log warning message."""
42
+ log(f"[WARN] {msg}", prefix)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-evolve",
3
- "version": "1.9.7",
3
+ "version": "1.9.9",
4
4
  "bin": {
5
5
  "claude-evolve": "bin/claude-evolve",
6
6
  "claude-evolve-main": "bin/claude-evolve-main",