claude-evolve 1.9.7 → 1.9.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/__pycache__/ai_cli.cpython-314.pyc +0 -0
- package/lib/__pycache__/evolution_csv.cpython-314.pyc +0 -0
- package/lib/__pycache__/evolve_ideate.cpython-314.pyc +0 -0
- package/lib/__pycache__/evolve_run.cpython-314.pyc +0 -0
- package/lib/__pycache__/evolve_worker.cpython-314.pyc +0 -0
- package/lib/ai_cli.py +14 -3
- package/lib/evolution_csv.py +32 -9
- package/lib/evolve_ideate.py +17 -6
- package/lib/evolve_run.py +31 -28
- package/lib/evolve_worker.py +209 -28
- package/lib/log.py +42 -0
- package/package.json +1 -1
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/lib/ai_cli.py
CHANGED
|
@@ -10,9 +10,16 @@ import subprocess
|
|
|
10
10
|
import sys
|
|
11
11
|
import tempfile
|
|
12
12
|
import time
|
|
13
|
+
from datetime import datetime
|
|
13
14
|
from pathlib import Path
|
|
14
15
|
from typing import Optional, Tuple, List
|
|
15
16
|
|
|
17
|
+
|
|
18
|
+
def _log(msg: str):
|
|
19
|
+
"""Log with timestamp. AI CLI uses its own logging to avoid import cycles."""
|
|
20
|
+
ts = datetime.now().strftime("%H:%M:%S")
|
|
21
|
+
print(f"[{ts}] [AI] {msg}", file=sys.stderr, flush=True)
|
|
22
|
+
|
|
16
23
|
# Path to ai-cli.sh relative to this file
|
|
17
24
|
SCRIPT_DIR = Path(__file__).parent
|
|
18
25
|
AI_CLI_PATH = SCRIPT_DIR / "ai-cli.sh"
|
|
@@ -305,21 +312,25 @@ def call_ai_with_backoff(
|
|
|
305
312
|
shuffled_models = models.copy()
|
|
306
313
|
random.shuffle(shuffled_models)
|
|
307
314
|
|
|
308
|
-
|
|
315
|
+
_log(f"Round {round_num + 1}/{max_rounds}: trying {len(shuffled_models)} models")
|
|
309
316
|
|
|
310
317
|
for model in shuffled_models:
|
|
311
318
|
try:
|
|
319
|
+
_log(f"Trying {model}...")
|
|
312
320
|
output, model_name = call_ai_model(prompt, model, working_dir, env_vars)
|
|
313
321
|
if round_num > 0:
|
|
314
|
-
|
|
322
|
+
_log(f"Succeeded on round {round_num + 1} with {model}")
|
|
323
|
+
else:
|
|
324
|
+
_log(f"Success with {model}")
|
|
315
325
|
return output, model_name
|
|
316
326
|
except AIError as e:
|
|
327
|
+
_log(f"{model} failed: {str(e)[:60]}...")
|
|
317
328
|
last_errors[model] = str(e)
|
|
318
329
|
# Continue to next model
|
|
319
330
|
|
|
320
331
|
# All models failed in this round
|
|
321
332
|
if round_num < max_rounds - 1:
|
|
322
|
-
|
|
333
|
+
_log(f"All models failed in round {round_num + 1}, waiting {wait_time}s...")
|
|
323
334
|
time.sleep(wait_time)
|
|
324
335
|
# Exponential backoff: 60 -> 120 -> 240 -> 480 (capped at max_wait)
|
|
325
336
|
wait_time = min(wait_time * 2, max_wait)
|
package/lib/evolution_csv.py
CHANGED
|
@@ -778,16 +778,22 @@ class EvolutionCSV:
|
|
|
778
778
|
|
|
779
779
|
return f"{gen_prefix}-{max_id + 1:03d}"
|
|
780
780
|
|
|
781
|
-
def get_next_ids(self, generation: int, count: int) -> List[str]:
|
|
781
|
+
def get_next_ids(self, generation: int, count: int, claimed_ids: Optional[List[str]] = None) -> List[str]:
|
|
782
782
|
"""
|
|
783
783
|
Get multiple next available IDs for a generation.
|
|
784
784
|
|
|
785
785
|
Args:
|
|
786
786
|
generation: Generation number
|
|
787
787
|
count: Number of IDs to generate
|
|
788
|
+
claimed_ids: Optional list of IDs already claimed in this session
|
|
789
|
+
(not yet written to CSV). Prevents duplicate IDs.
|
|
788
790
|
|
|
789
791
|
Returns:
|
|
790
792
|
List of ID strings
|
|
793
|
+
|
|
794
|
+
AIDEV-NOTE: The claimed_ids parameter is critical for ideation where
|
|
795
|
+
multiple strategies run before writing to CSV. Without it, each strategy
|
|
796
|
+
would get overlapping IDs like gen75-001, gen75-002 for each strategy.
|
|
791
797
|
"""
|
|
792
798
|
rows = self._read_csv()
|
|
793
799
|
gen_prefix = f"gen{generation:02d}"
|
|
@@ -796,6 +802,7 @@ class EvolutionCSV:
|
|
|
796
802
|
has_header = rows and rows[0] and rows[0][0].lower() == 'id'
|
|
797
803
|
start_idx = 1 if has_header else 0
|
|
798
804
|
|
|
805
|
+
# Check CSV for existing IDs
|
|
799
806
|
for row in rows[start_idx:]:
|
|
800
807
|
if not self.is_valid_candidate_row(row):
|
|
801
808
|
continue
|
|
@@ -808,6 +815,16 @@ class EvolutionCSV:
|
|
|
808
815
|
except (ValueError, IndexError):
|
|
809
816
|
pass
|
|
810
817
|
|
|
818
|
+
# Also check claimed IDs (not yet in CSV)
|
|
819
|
+
if claimed_ids:
|
|
820
|
+
for claimed_id in claimed_ids:
|
|
821
|
+
if claimed_id.startswith(gen_prefix + '-'):
|
|
822
|
+
try:
|
|
823
|
+
id_num = int(claimed_id.split('-')[1])
|
|
824
|
+
max_id = max(max_id, id_num)
|
|
825
|
+
except (ValueError, IndexError):
|
|
826
|
+
pass
|
|
827
|
+
|
|
811
828
|
return [f"{gen_prefix}-{max_id + 1 + i:03d}" for i in range(count)]
|
|
812
829
|
|
|
813
830
|
def append_candidates(self, candidates: List[Dict[str, str]]) -> int:
|
|
@@ -855,6 +872,9 @@ class EvolutionCSV:
|
|
|
855
872
|
|
|
856
873
|
Returns:
|
|
857
874
|
Dict with total, pending, complete, failed counts
|
|
875
|
+
|
|
876
|
+
AIDEV-NOTE: Uses is_pending_candidate() for pending count to ensure
|
|
877
|
+
consistency between stats and what workers actually find.
|
|
858
878
|
"""
|
|
859
879
|
rows = self._read_csv()
|
|
860
880
|
if not rows:
|
|
@@ -870,16 +890,19 @@ class EvolutionCSV:
|
|
|
870
890
|
continue
|
|
871
891
|
|
|
872
892
|
stats['total'] += 1
|
|
873
|
-
status = row[4].strip().lower() if len(row) > 4 else ''
|
|
874
893
|
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
elif status == 'running':
|
|
878
|
-
stats['running'] += 1
|
|
879
|
-
elif status.startswith('failed'):
|
|
880
|
-
stats['failed'] += 1
|
|
881
|
-
else:
|
|
894
|
+
# Use is_pending_candidate for consistency with workers
|
|
895
|
+
if self.is_pending_candidate(row):
|
|
882
896
|
stats['pending'] += 1
|
|
897
|
+
else:
|
|
898
|
+
status = row[4].strip().lower() if len(row) > 4 else ''
|
|
899
|
+
if status == 'complete':
|
|
900
|
+
stats['complete'] += 1
|
|
901
|
+
elif status == 'running':
|
|
902
|
+
stats['running'] += 1
|
|
903
|
+
elif status.startswith('failed'):
|
|
904
|
+
stats['failed'] += 1
|
|
905
|
+
# Anything else that's not pending gets counted as failed/other
|
|
883
906
|
|
|
884
907
|
return stats
|
|
885
908
|
|
package/lib/evolve_ideate.py
CHANGED
|
@@ -93,20 +93,28 @@ class IdeationStrategy(ABC):
|
|
|
93
93
|
pass
|
|
94
94
|
|
|
95
95
|
def generate(self, context: IdeationContext, count: int,
|
|
96
|
-
max_rounds: int = 10, initial_wait: int = 60, max_wait: int = 600
|
|
96
|
+
max_rounds: int = 10, initial_wait: int = 60, max_wait: int = 600,
|
|
97
|
+
claimed_ids: List[str] = None) -> List[Idea]:
|
|
97
98
|
"""Generate ideas using this strategy with round-based retry and backoff.
|
|
98
99
|
|
|
99
100
|
AIDEV-NOTE: Uses call_ai_with_backoff for robust retry handling.
|
|
100
101
|
Each round tries ALL models. If all fail, waits with exponential backoff.
|
|
102
|
+
claimed_ids tracks IDs already claimed by previous strategies in this run.
|
|
103
|
+
IDs are added to claimed_ids immediately to prevent reuse even on failure.
|
|
101
104
|
"""
|
|
102
105
|
if count <= 0:
|
|
103
106
|
return []
|
|
107
|
+
if claimed_ids is None:
|
|
108
|
+
claimed_ids = []
|
|
104
109
|
|
|
105
|
-
print(f"[IDEATE] Running {self.name} strategy for {count} ideas", file=sys.stderr)
|
|
110
|
+
print(f"[IDEATE] Running {self.name} strategy for {count} ideas", file=sys.stderr, flush=True)
|
|
106
111
|
|
|
107
|
-
# Get next IDs
|
|
108
|
-
ids = self.csv.get_next_ids(context.generation, count)
|
|
109
|
-
print(f"[IDEATE] Using IDs: {', '.join(ids)}", file=sys.stderr)
|
|
112
|
+
# Get next IDs, avoiding any already claimed in this ideation run
|
|
113
|
+
ids = self.csv.get_next_ids(context.generation, count, claimed_ids=claimed_ids)
|
|
114
|
+
print(f"[IDEATE] Using IDs: {', '.join(ids)}", file=sys.stderr, flush=True)
|
|
115
|
+
|
|
116
|
+
# Immediately claim these IDs (even if AI fails, don't reuse them)
|
|
117
|
+
claimed_ids.extend(ids)
|
|
110
118
|
|
|
111
119
|
# Create temp CSV with stub rows
|
|
112
120
|
temp_csv = Path(self.config.evolution_dir) / f"temp-csv-{os.getpid()}.csv"
|
|
@@ -398,6 +406,7 @@ class Ideator:
|
|
|
398
406
|
print(f"[IDEATE] Top performers: {len(context.top_performers)}", file=sys.stderr)
|
|
399
407
|
|
|
400
408
|
all_ideas: List[Idea] = []
|
|
409
|
+
claimed_ids: List[str] = [] # Track IDs claimed across all strategies
|
|
401
410
|
strategies_succeeded = 0
|
|
402
411
|
|
|
403
412
|
for strategy, count in self.strategies:
|
|
@@ -408,10 +417,12 @@ class Ideator:
|
|
|
408
417
|
context, count,
|
|
409
418
|
max_rounds=self.config.max_rounds,
|
|
410
419
|
initial_wait=self.config.initial_wait,
|
|
411
|
-
max_wait=self.config.max_wait
|
|
420
|
+
max_wait=self.config.max_wait,
|
|
421
|
+
claimed_ids=claimed_ids # Pass already-claimed IDs
|
|
412
422
|
)
|
|
413
423
|
|
|
414
424
|
if ideas:
|
|
425
|
+
# IDs are already tracked in generate(), just count success
|
|
415
426
|
strategies_succeeded += 1
|
|
416
427
|
|
|
417
428
|
# Filter for novelty
|
package/lib/evolve_run.py
CHANGED
|
@@ -26,6 +26,8 @@ SCRIPT_DIR = Path(__file__).parent
|
|
|
26
26
|
sys.path.insert(0, str(SCRIPT_DIR.parent))
|
|
27
27
|
|
|
28
28
|
from lib.evolution_csv import EvolutionCSV
|
|
29
|
+
from lib.log import log, log_error, log_warn, set_prefix
|
|
30
|
+
set_prefix("RUN")
|
|
29
31
|
|
|
30
32
|
|
|
31
33
|
@dataclass
|
|
@@ -56,7 +58,8 @@ class WorkerPool:
|
|
|
56
58
|
if len(self.workers) >= self.max_workers:
|
|
57
59
|
return None
|
|
58
60
|
|
|
59
|
-
|
|
61
|
+
# Use -u for unbuffered output so logs stream in real-time
|
|
62
|
+
cmd = [sys.executable, '-u', str(self.worker_script)]
|
|
60
63
|
if self.config_path:
|
|
61
64
|
cmd.extend(['--config', self.config_path])
|
|
62
65
|
if self.timeout:
|
|
@@ -67,10 +70,10 @@ class WorkerPool:
|
|
|
67
70
|
# This provides real-time visibility into which models are being used
|
|
68
71
|
proc = subprocess.Popen(cmd)
|
|
69
72
|
self.workers[proc.pid] = proc
|
|
70
|
-
|
|
73
|
+
log(f"Spawned worker {proc.pid}")
|
|
71
74
|
return proc.pid
|
|
72
75
|
except Exception as e:
|
|
73
|
-
|
|
76
|
+
log_error(f"Failed to spawn worker: {e}")
|
|
74
77
|
return None
|
|
75
78
|
|
|
76
79
|
def cleanup_finished(self) -> List[int]:
|
|
@@ -83,7 +86,7 @@ class WorkerPool:
|
|
|
83
86
|
if ret is not None:
|
|
84
87
|
finished_pids.append(pid)
|
|
85
88
|
exit_codes.append(ret)
|
|
86
|
-
|
|
89
|
+
log(f"Worker {pid} exited with code {ret}")
|
|
87
90
|
|
|
88
91
|
for pid in finished_pids:
|
|
89
92
|
del self.workers[pid]
|
|
@@ -95,7 +98,7 @@ class WorkerPool:
|
|
|
95
98
|
if not self.workers:
|
|
96
99
|
return
|
|
97
100
|
|
|
98
|
-
|
|
101
|
+
log(f"Shutting down {len(self.workers)} workers...")
|
|
99
102
|
|
|
100
103
|
# Send SIGTERM
|
|
101
104
|
for pid, proc in self.workers.items():
|
|
@@ -115,7 +118,7 @@ class WorkerPool:
|
|
|
115
118
|
for pid, proc in list(self.workers.items()):
|
|
116
119
|
try:
|
|
117
120
|
proc.kill()
|
|
118
|
-
|
|
121
|
+
log(f"Force killed worker {pid}")
|
|
119
122
|
except Exception:
|
|
120
123
|
pass
|
|
121
124
|
|
|
@@ -151,36 +154,36 @@ class EvolutionRunner:
|
|
|
151
154
|
def _handle_signal(self, signum, frame):
|
|
152
155
|
"""Handle termination signal."""
|
|
153
156
|
sig_name = signal.Signals(signum).name
|
|
154
|
-
|
|
157
|
+
log(f"Received {sig_name}, shutting down...")
|
|
155
158
|
self.shutdown_requested = True
|
|
156
159
|
self.pool.shutdown()
|
|
157
160
|
sys.exit(128 + signum)
|
|
158
161
|
|
|
159
162
|
def cleanup_csv(self):
|
|
160
163
|
"""Clean up CSV at startup."""
|
|
161
|
-
|
|
164
|
+
log("Cleaning up CSV...")
|
|
162
165
|
with EvolutionCSV(self.config.csv_path) as csv:
|
|
163
166
|
# Remove duplicates
|
|
164
167
|
removed = csv.remove_duplicate_candidates()
|
|
165
168
|
if removed:
|
|
166
|
-
|
|
169
|
+
log(f"Removed {removed} duplicate candidates")
|
|
167
170
|
|
|
168
171
|
# Reset stuck candidates
|
|
169
172
|
reset = csv.reset_stuck_candidates()
|
|
170
173
|
if reset:
|
|
171
|
-
|
|
174
|
+
log(f"Reset {reset} stuck candidates")
|
|
172
175
|
|
|
173
176
|
# Clean corrupted status fields
|
|
174
177
|
fixed = csv.cleanup_corrupted_status_fields()
|
|
175
178
|
if fixed:
|
|
176
|
-
|
|
179
|
+
log(f"Fixed {fixed} corrupted status fields")
|
|
177
180
|
|
|
178
181
|
def ensure_baseline(self):
|
|
179
182
|
"""Ensure baseline entry exists in CSV."""
|
|
180
183
|
with EvolutionCSV(self.config.csv_path) as csv:
|
|
181
184
|
info = csv.get_candidate_info('baseline-000')
|
|
182
185
|
if not info:
|
|
183
|
-
|
|
186
|
+
log("Adding baseline-000 entry")
|
|
184
187
|
csv.append_candidates([{
|
|
185
188
|
'id': 'baseline-000',
|
|
186
189
|
'basedOnId': '',
|
|
@@ -200,14 +203,14 @@ class EvolutionRunner:
|
|
|
200
203
|
|
|
201
204
|
# Need minimum completed algorithms to learn from
|
|
202
205
|
if stats['complete'] < self.config.min_completed_for_ideation:
|
|
203
|
-
|
|
206
|
+
log(f"Not enough completed ({stats['complete']} < {self.config.min_completed_for_ideation})")
|
|
204
207
|
return False
|
|
205
208
|
|
|
206
209
|
return True
|
|
207
210
|
|
|
208
211
|
def run_ideation(self) -> bool:
|
|
209
212
|
"""Run ideation. Returns True on success."""
|
|
210
|
-
|
|
213
|
+
log("Running ideation...")
|
|
211
214
|
|
|
212
215
|
cmd = [sys.executable, str(self.ideate_script)]
|
|
213
216
|
if self.config.config_path:
|
|
@@ -221,18 +224,18 @@ class EvolutionRunner:
|
|
|
221
224
|
cwd=self.config.evolution_dir
|
|
222
225
|
)
|
|
223
226
|
|
|
224
|
-
#
|
|
227
|
+
# Forward ideation output (already has timestamps from ideate module)
|
|
225
228
|
if result.stdout:
|
|
226
229
|
for line in result.stdout.strip().split('\n'):
|
|
227
|
-
print(
|
|
230
|
+
print(line, file=sys.stderr, flush=True)
|
|
228
231
|
if result.stderr:
|
|
229
232
|
for line in result.stderr.strip().split('\n'):
|
|
230
|
-
print(
|
|
233
|
+
print(line, file=sys.stderr, flush=True)
|
|
231
234
|
|
|
232
235
|
return result.returncode == 0
|
|
233
236
|
|
|
234
237
|
except Exception as e:
|
|
235
|
-
|
|
238
|
+
log_error(f"Ideation failed: {e}")
|
|
236
239
|
return False
|
|
237
240
|
|
|
238
241
|
def run(self) -> int:
|
|
@@ -242,9 +245,9 @@ class EvolutionRunner:
|
|
|
242
245
|
Returns:
|
|
243
246
|
Exit code
|
|
244
247
|
"""
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
+
log("Starting evolution run")
|
|
249
|
+
log(f"Max workers: {self.config.max_workers}")
|
|
250
|
+
log(f"Auto ideate: {self.config.auto_ideate}")
|
|
248
251
|
|
|
249
252
|
# Startup cleanup
|
|
250
253
|
self.cleanup_csv()
|
|
@@ -260,7 +263,7 @@ class EvolutionRunner:
|
|
|
260
263
|
|
|
261
264
|
# Check for API limit
|
|
262
265
|
if 2 in exit_codes or 3 in exit_codes:
|
|
263
|
-
|
|
266
|
+
log("API limit reached, waiting 5 minutes...")
|
|
264
267
|
self.api_limit_reached = True
|
|
265
268
|
time.sleep(300) # 5 minute wait
|
|
266
269
|
self.api_limit_reached = False
|
|
@@ -274,7 +277,7 @@ class EvolutionRunner:
|
|
|
274
277
|
|
|
275
278
|
# Get stats
|
|
276
279
|
stats = self.get_stats()
|
|
277
|
-
|
|
280
|
+
log(f"Stats: {stats['pending']} pending, {stats['complete']} complete, {stats['running']} running")
|
|
278
281
|
|
|
279
282
|
# Check if we need ideation
|
|
280
283
|
if stats['pending'] == 0 and self.pool.active_count == 0:
|
|
@@ -290,11 +293,11 @@ class EvolutionRunner:
|
|
|
290
293
|
if self.run_ideation():
|
|
291
294
|
continue # Loop back to check for new work
|
|
292
295
|
else:
|
|
293
|
-
|
|
296
|
+
log_warn("Ideation failed, waiting...")
|
|
294
297
|
time.sleep(30)
|
|
295
298
|
continue
|
|
296
299
|
else:
|
|
297
|
-
|
|
300
|
+
log("Evolution complete!")
|
|
298
301
|
break
|
|
299
302
|
|
|
300
303
|
# Spawn workers for pending work
|
|
@@ -309,7 +312,7 @@ class EvolutionRunner:
|
|
|
309
312
|
|
|
310
313
|
# Cleanup
|
|
311
314
|
self.pool.shutdown()
|
|
312
|
-
|
|
315
|
+
log("Exiting")
|
|
313
316
|
return 0
|
|
314
317
|
|
|
315
318
|
|
|
@@ -378,10 +381,10 @@ def main():
|
|
|
378
381
|
sys.exit(runner.run())
|
|
379
382
|
|
|
380
383
|
except FileNotFoundError as e:
|
|
381
|
-
|
|
384
|
+
log_error(f"Config error: {e}")
|
|
382
385
|
sys.exit(1)
|
|
383
386
|
except Exception as e:
|
|
384
|
-
|
|
387
|
+
log_error(f"Error: {e}")
|
|
385
388
|
import traceback
|
|
386
389
|
traceback.print_exc()
|
|
387
390
|
sys.exit(1)
|
package/lib/evolve_worker.py
CHANGED
|
@@ -30,6 +30,9 @@ from typing import Optional, Tuple, Dict, Any
|
|
|
30
30
|
SCRIPT_DIR = Path(__file__).parent
|
|
31
31
|
sys.path.insert(0, str(SCRIPT_DIR.parent))
|
|
32
32
|
|
|
33
|
+
from lib.log import log, log_error, log_warn, log_debug, set_prefix
|
|
34
|
+
set_prefix("WORKER")
|
|
35
|
+
|
|
33
36
|
from lib.evolution_csv import EvolutionCSV
|
|
34
37
|
from lib.ai_cli import call_ai_with_backoff, get_git_protection_warning, AIError
|
|
35
38
|
|
|
@@ -47,6 +50,7 @@ class Config:
|
|
|
47
50
|
memory_limit_mb: int = 0
|
|
48
51
|
timeout_seconds: int = 600
|
|
49
52
|
max_candidates: int = 5
|
|
53
|
+
max_validation_retries: int = 3 # Max attempts to fix validation errors (if validator.py exists)
|
|
50
54
|
# Retry configuration with exponential backoff
|
|
51
55
|
max_rounds: int = 10
|
|
52
56
|
initial_wait: int = 60
|
|
@@ -78,10 +82,10 @@ class Worker:
|
|
|
78
82
|
def _handle_signal(self, signum, frame):
|
|
79
83
|
"""Handle termination signal - reset current candidate to pending."""
|
|
80
84
|
sig_name = signal.Signals(signum).name
|
|
81
|
-
|
|
85
|
+
log(f"Received {sig_name}")
|
|
82
86
|
|
|
83
87
|
if self.current_candidate_id:
|
|
84
|
-
|
|
88
|
+
log(f"Resetting {self.current_candidate_id} to pending")
|
|
85
89
|
try:
|
|
86
90
|
with EvolutionCSV(self.config.csv_path) as csv:
|
|
87
91
|
info = csv.get_candidate_info(self.current_candidate_id)
|
|
@@ -90,7 +94,7 @@ class Worker:
|
|
|
90
94
|
if status not in ('complete', 'failed', 'failed-ai-retry', 'failed-parent-missing'):
|
|
91
95
|
csv.update_candidate_status(self.current_candidate_id, 'pending')
|
|
92
96
|
except Exception as e:
|
|
93
|
-
|
|
97
|
+
log(f"Error resetting status: {e}")
|
|
94
98
|
|
|
95
99
|
sys.exit(128 + signum)
|
|
96
100
|
|
|
@@ -169,14 +173,14 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
|
|
|
169
173
|
hash_after = self._file_hash(target_file) if target_file.exists() else None
|
|
170
174
|
|
|
171
175
|
if hash_before != hash_after and hash_after is not None:
|
|
172
|
-
|
|
176
|
+
log(f"AI successfully modified file (model: {model})")
|
|
173
177
|
return True, model
|
|
174
178
|
else:
|
|
175
|
-
|
|
179
|
+
log(f"AI completed but did not modify file")
|
|
176
180
|
return False, model
|
|
177
181
|
|
|
178
182
|
except AIError as e:
|
|
179
|
-
|
|
183
|
+
log_error(f"All AI retries exhausted: {e}")
|
|
180
184
|
return False, ""
|
|
181
185
|
|
|
182
186
|
def _file_hash(self, path: Path) -> Optional[str]:
|
|
@@ -199,6 +203,136 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
|
|
|
199
203
|
except Exception:
|
|
200
204
|
return False
|
|
201
205
|
|
|
206
|
+
def _find_validator(self) -> Optional[Path]:
|
|
207
|
+
"""
|
|
208
|
+
Auto-detect validator.py in the evolution directory.
|
|
209
|
+
No config required - if validator.py exists, we use it.
|
|
210
|
+
"""
|
|
211
|
+
validator_path = Path(self.config.evolution_dir) / "validator.py"
|
|
212
|
+
if validator_path.exists():
|
|
213
|
+
return validator_path
|
|
214
|
+
return None
|
|
215
|
+
|
|
216
|
+
def _run_validator(self, candidate_id: str) -> Tuple[bool, Dict[str, Any]]:
|
|
217
|
+
"""
|
|
218
|
+
Run the validator (fast smoke test) before full evaluation.
|
|
219
|
+
|
|
220
|
+
AIDEV-NOTE: Auto-detects validator.py in evolution directory.
|
|
221
|
+
Returns exit code 0 on success, non-zero on failure.
|
|
222
|
+
Resilient to any output format - handles JSON, plain text, or nothing.
|
|
223
|
+
|
|
224
|
+
Returns:
|
|
225
|
+
Tuple of (success, error_info_dict)
|
|
226
|
+
- success: True if validation passed
|
|
227
|
+
- error_info: Dict with whatever info we could extract from output
|
|
228
|
+
"""
|
|
229
|
+
validator_path = self._find_validator()
|
|
230
|
+
if not validator_path:
|
|
231
|
+
return True, {} # No validator found, skip
|
|
232
|
+
|
|
233
|
+
cmd = [self.config.python_cmd, str(validator_path), candidate_id]
|
|
234
|
+
log(f"Running validator: {' '.join(cmd)}")
|
|
235
|
+
|
|
236
|
+
try:
|
|
237
|
+
result = subprocess.run(
|
|
238
|
+
cmd,
|
|
239
|
+
capture_output=True,
|
|
240
|
+
text=True,
|
|
241
|
+
timeout=30, # Validator should be fast (~3 seconds)
|
|
242
|
+
cwd=self.config.evolution_dir
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
# Combine stdout and stderr for full context
|
|
246
|
+
stdout = result.stdout.strip() if result.stdout else ""
|
|
247
|
+
stderr = result.stderr.strip() if result.stderr else ""
|
|
248
|
+
combined_output = f"{stdout}\n{stderr}".strip()
|
|
249
|
+
|
|
250
|
+
# Try to extract structured info, but be resilient to any format
|
|
251
|
+
error_info = {'raw_output': combined_output}
|
|
252
|
+
|
|
253
|
+
# Try to parse JSON from stdout (validator may output JSON)
|
|
254
|
+
if stdout.startswith('{'):
|
|
255
|
+
try:
|
|
256
|
+
parsed = json.loads(stdout)
|
|
257
|
+
if isinstance(parsed, dict):
|
|
258
|
+
error_info.update(parsed)
|
|
259
|
+
except json.JSONDecodeError:
|
|
260
|
+
pass # Not valid JSON, that's fine
|
|
261
|
+
|
|
262
|
+
# If no structured error, use the raw output
|
|
263
|
+
if 'error' not in error_info and combined_output:
|
|
264
|
+
error_info['error'] = combined_output
|
|
265
|
+
|
|
266
|
+
if result.returncode == 0:
|
|
267
|
+
log("Validation passed")
|
|
268
|
+
return True, error_info
|
|
269
|
+
else:
|
|
270
|
+
error_type = error_info.get('error_type', 'validation_failed')
|
|
271
|
+
log_warn(f"Validation failed: {error_type}")
|
|
272
|
+
return False, error_info
|
|
273
|
+
|
|
274
|
+
except subprocess.TimeoutExpired:
|
|
275
|
+
log_error("Validator timed out")
|
|
276
|
+
return False, {'error': 'Validator timed out after 30 seconds', 'error_type': 'timeout'}
|
|
277
|
+
except Exception as e:
|
|
278
|
+
log_error(f"Validator error: {e}")
|
|
279
|
+
return False, {'error': str(e), 'error_type': 'exception'}
|
|
280
|
+
|
|
281
|
+
def _build_fix_prompt(self, candidate: Candidate, target_basename: str, error_info: Dict[str, Any]) -> str:
|
|
282
|
+
"""
|
|
283
|
+
Build AI prompt to fix validation errors.
|
|
284
|
+
|
|
285
|
+
AIDEV-NOTE: Resilient to any error_info structure - uses whatever is available.
|
|
286
|
+
"""
|
|
287
|
+
prompt = f"""{get_git_protection_warning()}
|
|
288
|
+
|
|
289
|
+
The code in {target_basename} failed validation. Please fix the errors and try again.
|
|
290
|
+
|
|
291
|
+
## Validator Output
|
|
292
|
+
|
|
293
|
+
"""
|
|
294
|
+
# Include whatever structured fields we have
|
|
295
|
+
if error_info.get('error_type'):
|
|
296
|
+
prompt += f"**Error Type:** {error_info['error_type']}\n\n"
|
|
297
|
+
|
|
298
|
+
if error_info.get('error'):
|
|
299
|
+
prompt += f"**Error:**\n{error_info['error']}\n\n"
|
|
300
|
+
|
|
301
|
+
if error_info.get('suggestion'):
|
|
302
|
+
prompt += f"**Suggested Fix:**\n{error_info['suggestion']}\n\n"
|
|
303
|
+
|
|
304
|
+
if error_info.get('traceback'):
|
|
305
|
+
tb = error_info['traceback']
|
|
306
|
+
# Truncate if too long
|
|
307
|
+
if len(tb) > 1500:
|
|
308
|
+
tb = "..." + tb[-1500:]
|
|
309
|
+
prompt += f"**Traceback:**\n```\n{tb}\n```\n\n"
|
|
310
|
+
|
|
311
|
+
# If we only have raw output (no structured fields), show that
|
|
312
|
+
if not any(error_info.get(k) for k in ('error', 'error_type', 'suggestion', 'traceback')):
|
|
313
|
+
raw = error_info.get('raw_output', 'No output captured')
|
|
314
|
+
# Truncate if needed
|
|
315
|
+
if len(raw) > 2000:
|
|
316
|
+
raw = raw[:2000] + "\n... (truncated)"
|
|
317
|
+
prompt += f"```\n{raw}\n```\n\n"
|
|
318
|
+
|
|
319
|
+
prompt += f"""## Instructions
|
|
320
|
+
|
|
321
|
+
1. Read the file {target_basename} to understand the current code
|
|
322
|
+
2. Identify the issue based on the validator output above
|
|
323
|
+
3. Fix the code to resolve the validation error
|
|
324
|
+
4. The fix should still implement: {candidate.description}
|
|
325
|
+
|
|
326
|
+
**CRITICAL:** Make sure to actually fix the error. Do not just add comments or make cosmetic changes.
|
|
327
|
+
|
|
328
|
+
To help debug, you can run the validator yourself:
|
|
329
|
+
```
|
|
330
|
+
python validator.py {target_basename}
|
|
331
|
+
```
|
|
332
|
+
"""
|
|
333
|
+
|
|
334
|
+
return prompt
|
|
335
|
+
|
|
202
336
|
def _run_evaluator(self, candidate_id: str, is_baseline: bool) -> Tuple[Optional[float], Dict[str, Any]]:
|
|
203
337
|
"""
|
|
204
338
|
Run the evaluator.
|
|
@@ -217,7 +351,7 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
|
|
|
217
351
|
|
|
218
352
|
cmd.extend([self.config.evaluator_path, eval_arg])
|
|
219
353
|
|
|
220
|
-
|
|
354
|
+
log(f"Running evaluator: {' '.join(cmd)}")
|
|
221
355
|
|
|
222
356
|
try:
|
|
223
357
|
result = subprocess.run(
|
|
@@ -229,17 +363,17 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
|
|
|
229
363
|
)
|
|
230
364
|
|
|
231
365
|
if result.returncode != 0:
|
|
232
|
-
|
|
366
|
+
log_error(f"Evaluator failed: {result.stderr}")
|
|
233
367
|
return None, {}
|
|
234
368
|
|
|
235
369
|
output = result.stdout + result.stderr
|
|
236
370
|
return self._parse_evaluator_output(output)
|
|
237
371
|
|
|
238
372
|
except subprocess.TimeoutExpired:
|
|
239
|
-
|
|
373
|
+
log_error("Evaluator timed out")
|
|
240
374
|
return None, {}
|
|
241
375
|
except Exception as e:
|
|
242
|
-
|
|
376
|
+
log_error(f"Evaluator error: {e}")
|
|
243
377
|
return None, {}
|
|
244
378
|
|
|
245
379
|
def _parse_evaluator_output(self, output: str) -> Tuple[Optional[float], Dict[str, Any]]:
|
|
@@ -297,9 +431,9 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
|
|
|
297
431
|
Exit code (0=success, 77=AI failed, 78=missing parent, etc.)
|
|
298
432
|
"""
|
|
299
433
|
self.current_candidate_id = candidate.id
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
434
|
+
log(f"Processing: {candidate.id}")
|
|
435
|
+
log(f"Description: {candidate.description[:80]}..." if len(candidate.description) > 80 else f"Description: {candidate.description}")
|
|
436
|
+
log(f"Based on: {candidate.based_on_id or 'baseline'}")
|
|
303
437
|
|
|
304
438
|
is_baseline = self._is_baseline(candidate.id, candidate.based_on_id)
|
|
305
439
|
target_file = Path(self.config.output_dir) / f"evolution_{candidate.id}.py"
|
|
@@ -308,7 +442,7 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
|
|
|
308
442
|
resolved_parent, source_file = self._resolve_parent_id(candidate.based_on_id)
|
|
309
443
|
|
|
310
444
|
if source_file is None and not is_baseline:
|
|
311
|
-
|
|
445
|
+
log_error(f"Parent not found: {candidate.based_on_id}")
|
|
312
446
|
return 78 # Missing parent
|
|
313
447
|
|
|
314
448
|
if source_file is None:
|
|
@@ -316,10 +450,10 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
|
|
|
316
450
|
|
|
317
451
|
# Check if target already exists
|
|
318
452
|
if target_file.exists():
|
|
319
|
-
|
|
453
|
+
log("File already exists, running evaluation only")
|
|
320
454
|
elif not is_baseline:
|
|
321
455
|
# Copy source to target
|
|
322
|
-
|
|
456
|
+
log(f"Copying {source_file.name} to {target_file.name}")
|
|
323
457
|
shutil.copy(source_file, target_file)
|
|
324
458
|
|
|
325
459
|
# Call AI to modify (uses round-based retry with backoff)
|
|
@@ -327,7 +461,7 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
|
|
|
327
461
|
success, model = self._call_ai_with_backoff(prompt, target_file)
|
|
328
462
|
|
|
329
463
|
if not success:
|
|
330
|
-
|
|
464
|
+
log_error("AI failed after all retries")
|
|
331
465
|
target_file.unlink(missing_ok=True)
|
|
332
466
|
return 77 # AI generation failed
|
|
333
467
|
|
|
@@ -338,23 +472,69 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
|
|
|
338
472
|
|
|
339
473
|
# Check syntax
|
|
340
474
|
if not self._check_syntax(target_file):
|
|
341
|
-
|
|
475
|
+
log_error("Syntax error in generated file")
|
|
342
476
|
target_file.unlink(missing_ok=True)
|
|
343
477
|
with EvolutionCSV(self.config.csv_path) as csv:
|
|
344
478
|
csv.update_candidate_status(candidate.id, 'pending')
|
|
345
479
|
return 0 # Will retry
|
|
346
480
|
|
|
481
|
+
# Run validator with retry loop
|
|
482
|
+
# AIDEV-NOTE: Validator catches structural errors before expensive full evaluation.
|
|
483
|
+
# If validation fails, we give the AI feedback and ask it to fix the code.
|
|
484
|
+
validation_passed = False
|
|
485
|
+
for validation_attempt in range(self.config.max_validation_retries + 1):
|
|
486
|
+
valid, error_info = self._run_validator(candidate.id)
|
|
487
|
+
|
|
488
|
+
if valid:
|
|
489
|
+
validation_passed = True
|
|
490
|
+
break
|
|
491
|
+
|
|
492
|
+
if validation_attempt >= self.config.max_validation_retries:
|
|
493
|
+
log_error(f"Validation failed after {self.config.max_validation_retries} fix attempts")
|
|
494
|
+
break
|
|
495
|
+
|
|
496
|
+
# Ask AI to fix the validation error
|
|
497
|
+
log(f"Validation failed (attempt {validation_attempt + 1}), asking AI to fix...")
|
|
498
|
+
fix_prompt = self._build_fix_prompt(candidate, target_file.name, error_info)
|
|
499
|
+
success, fix_model = self._call_ai_with_backoff(fix_prompt, target_file)
|
|
500
|
+
|
|
501
|
+
if not success:
|
|
502
|
+
log_error("AI failed to fix validation error")
|
|
503
|
+
break
|
|
504
|
+
|
|
505
|
+
# Record that we used an additional model call for fixing
|
|
506
|
+
if fix_model:
|
|
507
|
+
with EvolutionCSV(self.config.csv_path) as csv:
|
|
508
|
+
current_llm = csv.get_candidate_info(candidate.id).get('run-LLM', '')
|
|
509
|
+
new_llm = f"{current_llm}+{fix_model}" if current_llm else fix_model
|
|
510
|
+
csv.update_candidate_field(candidate.id, 'run-LLM', new_llm)
|
|
511
|
+
|
|
512
|
+
# Re-check syntax after fix
|
|
513
|
+
if not self._check_syntax(target_file):
|
|
514
|
+
log_error("Fix introduced syntax error")
|
|
515
|
+
# Don't break - try again if we have retries left
|
|
516
|
+
|
|
517
|
+
if not validation_passed:
|
|
518
|
+
# Validation failed after all retries
|
|
519
|
+
with EvolutionCSV(self.config.csv_path) as csv:
|
|
520
|
+
csv.update_candidate_status(candidate.id, 'failed-validation')
|
|
521
|
+
# Store the last error for debugging
|
|
522
|
+
if error_info:
|
|
523
|
+
error_summary = f"{error_info.get('error_type', 'unknown')}: {error_info.get('error', '')[:100]}"
|
|
524
|
+
csv.update_candidate_field(candidate.id, 'validation_error', error_summary)
|
|
525
|
+
return 1
|
|
526
|
+
|
|
347
527
|
# Run evaluator
|
|
348
|
-
|
|
528
|
+
log("Running evaluator...")
|
|
349
529
|
score, json_data = self._run_evaluator(candidate.id, is_baseline)
|
|
350
530
|
|
|
351
531
|
if score is None:
|
|
352
|
-
|
|
532
|
+
log_error("Evaluation failed - no score")
|
|
353
533
|
with EvolutionCSV(self.config.csv_path) as csv:
|
|
354
534
|
csv.update_candidate_status(candidate.id, 'failed')
|
|
355
535
|
return 1
|
|
356
536
|
|
|
357
|
-
|
|
537
|
+
log(f"Score: {score}")
|
|
358
538
|
|
|
359
539
|
# Update CSV
|
|
360
540
|
with EvolutionCSV(self.config.csv_path) as csv:
|
|
@@ -376,7 +556,7 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
|
|
|
376
556
|
Returns:
|
|
377
557
|
Exit code
|
|
378
558
|
"""
|
|
379
|
-
|
|
559
|
+
log(f"Started (max {self.config.max_candidates} candidates)")
|
|
380
560
|
processed = 0
|
|
381
561
|
|
|
382
562
|
while processed < self.config.max_candidates:
|
|
@@ -385,7 +565,7 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
|
|
|
385
565
|
result = csv.get_next_pending_candidate()
|
|
386
566
|
|
|
387
567
|
if not result:
|
|
388
|
-
|
|
568
|
+
log("No pending candidates")
|
|
389
569
|
break
|
|
390
570
|
|
|
391
571
|
candidate_id, _ = result
|
|
@@ -395,7 +575,7 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
|
|
|
395
575
|
info = csv.get_candidate_info(candidate_id)
|
|
396
576
|
|
|
397
577
|
if not info:
|
|
398
|
-
|
|
578
|
+
log_warn(f"Candidate info not found: {candidate_id}")
|
|
399
579
|
continue
|
|
400
580
|
|
|
401
581
|
candidate = Candidate(
|
|
@@ -418,9 +598,9 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
|
|
|
418
598
|
elif exit_code == 3: # API exhausted
|
|
419
599
|
return 3
|
|
420
600
|
|
|
421
|
-
|
|
601
|
+
log(f"Processed {processed}/{self.config.max_candidates}")
|
|
422
602
|
|
|
423
|
-
|
|
603
|
+
log("Exiting")
|
|
424
604
|
return 0
|
|
425
605
|
|
|
426
606
|
|
|
@@ -467,6 +647,7 @@ def load_config_from_yaml(config_path: Optional[str] = None) -> Config:
|
|
|
467
647
|
memory_limit_mb=data.get('memory_limit_mb', 0),
|
|
468
648
|
timeout_seconds=data.get('timeout_seconds', 600),
|
|
469
649
|
max_candidates=data.get('worker_max_candidates', 5),
|
|
650
|
+
max_validation_retries=data.get('max_validation_retries', 3),
|
|
470
651
|
max_rounds=ideation.get('max_rounds', 10),
|
|
471
652
|
initial_wait=ideation.get('initial_wait', 60),
|
|
472
653
|
max_wait=ideation.get('max_wait', 600)
|
|
@@ -488,10 +669,10 @@ def main():
|
|
|
488
669
|
sys.exit(worker.run())
|
|
489
670
|
|
|
490
671
|
except FileNotFoundError as e:
|
|
491
|
-
|
|
672
|
+
log_error(f"Config error: {e}")
|
|
492
673
|
sys.exit(1)
|
|
493
674
|
except Exception as e:
|
|
494
|
-
|
|
675
|
+
log_error(f"Error: {e}")
|
|
495
676
|
import traceback
|
|
496
677
|
traceback.print_exc()
|
|
497
678
|
sys.exit(1)
|
package/lib/log.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Simple timestamped logging for claude-evolve.
|
|
4
|
+
Uses stderr with flush=True for real-time output.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
import sys
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
|
|
11
|
+
# Default prefix, can be set per-module
|
|
12
|
+
_prefix = "EVOLVE"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def set_prefix(prefix: str):
|
|
16
|
+
"""Set the log prefix (e.g., 'WORKER', 'IDEATE', 'RUN')."""
|
|
17
|
+
global _prefix
|
|
18
|
+
_prefix = prefix
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def log(msg: str, prefix: str = None):
|
|
22
|
+
"""Log with timestamp. Always flushes for real-time output."""
|
|
23
|
+
ts = datetime.now().strftime("%H:%M:%S")
|
|
24
|
+
p = prefix or _prefix
|
|
25
|
+
pid = os.getpid()
|
|
26
|
+
print(f"[{ts}] [{p}-{pid}] {msg}", file=sys.stderr, flush=True)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def log_debug(msg: str, prefix: str = None):
|
|
30
|
+
"""Log debug message (only if DEBUG env var set)."""
|
|
31
|
+
if os.environ.get('DEBUG') or os.environ.get('VERBOSE'):
|
|
32
|
+
log(f"[DEBUG] {msg}", prefix)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def log_error(msg: str, prefix: str = None):
|
|
36
|
+
"""Log error message."""
|
|
37
|
+
log(f"[ERROR] {msg}", prefix)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def log_warn(msg: str, prefix: str = None):
|
|
41
|
+
"""Log warning message."""
|
|
42
|
+
log(f"[WARN] {msg}", prefix)
|