claude-evolve 1.9.6 → 1.9.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/__pycache__/ai_cli.cpython-314.pyc +0 -0
- package/lib/__pycache__/evolution_csv.cpython-314.pyc +0 -0
- package/lib/__pycache__/evolve_ideate.cpython-314.pyc +0 -0
- package/lib/__pycache__/evolve_run.cpython-314.pyc +0 -0
- package/lib/__pycache__/evolve_worker.cpython-314.pyc +0 -0
- package/lib/ai_cli.py +14 -3
- package/lib/evolution_csv.py +32 -9
- package/lib/evolve_ideate.py +17 -6
- package/lib/evolve_run.py +34 -42
- package/lib/evolve_worker.py +74 -88
- package/lib/log.py +42 -0
- package/package.json +1 -1
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/lib/ai_cli.py
CHANGED
|
@@ -10,9 +10,16 @@ import subprocess
|
|
|
10
10
|
import sys
|
|
11
11
|
import tempfile
|
|
12
12
|
import time
|
|
13
|
+
from datetime import datetime
|
|
13
14
|
from pathlib import Path
|
|
14
15
|
from typing import Optional, Tuple, List
|
|
15
16
|
|
|
17
|
+
|
|
18
|
+
def _log(msg: str):
|
|
19
|
+
"""Log with timestamp. AI CLI uses its own logging to avoid import cycles."""
|
|
20
|
+
ts = datetime.now().strftime("%H:%M:%S")
|
|
21
|
+
print(f"[{ts}] [AI] {msg}", file=sys.stderr, flush=True)
|
|
22
|
+
|
|
16
23
|
# Path to ai-cli.sh relative to this file
|
|
17
24
|
SCRIPT_DIR = Path(__file__).parent
|
|
18
25
|
AI_CLI_PATH = SCRIPT_DIR / "ai-cli.sh"
|
|
@@ -305,21 +312,25 @@ def call_ai_with_backoff(
|
|
|
305
312
|
shuffled_models = models.copy()
|
|
306
313
|
random.shuffle(shuffled_models)
|
|
307
314
|
|
|
308
|
-
|
|
315
|
+
_log(f"Round {round_num + 1}/{max_rounds}: trying {len(shuffled_models)} models")
|
|
309
316
|
|
|
310
317
|
for model in shuffled_models:
|
|
311
318
|
try:
|
|
319
|
+
_log(f"Trying {model}...")
|
|
312
320
|
output, model_name = call_ai_model(prompt, model, working_dir, env_vars)
|
|
313
321
|
if round_num > 0:
|
|
314
|
-
|
|
322
|
+
_log(f"Succeeded on round {round_num + 1} with {model}")
|
|
323
|
+
else:
|
|
324
|
+
_log(f"Success with {model}")
|
|
315
325
|
return output, model_name
|
|
316
326
|
except AIError as e:
|
|
327
|
+
_log(f"{model} failed: {str(e)[:60]}...")
|
|
317
328
|
last_errors[model] = str(e)
|
|
318
329
|
# Continue to next model
|
|
319
330
|
|
|
320
331
|
# All models failed in this round
|
|
321
332
|
if round_num < max_rounds - 1:
|
|
322
|
-
|
|
333
|
+
_log(f"All models failed in round {round_num + 1}, waiting {wait_time}s...")
|
|
323
334
|
time.sleep(wait_time)
|
|
324
335
|
# Exponential backoff: 60 -> 120 -> 240 -> 480 (capped at max_wait)
|
|
325
336
|
wait_time = min(wait_time * 2, max_wait)
|
package/lib/evolution_csv.py
CHANGED
|
@@ -778,16 +778,22 @@ class EvolutionCSV:
|
|
|
778
778
|
|
|
779
779
|
return f"{gen_prefix}-{max_id + 1:03d}"
|
|
780
780
|
|
|
781
|
-
def get_next_ids(self, generation: int, count: int) -> List[str]:
|
|
781
|
+
def get_next_ids(self, generation: int, count: int, claimed_ids: Optional[List[str]] = None) -> List[str]:
|
|
782
782
|
"""
|
|
783
783
|
Get multiple next available IDs for a generation.
|
|
784
784
|
|
|
785
785
|
Args:
|
|
786
786
|
generation: Generation number
|
|
787
787
|
count: Number of IDs to generate
|
|
788
|
+
claimed_ids: Optional list of IDs already claimed in this session
|
|
789
|
+
(not yet written to CSV). Prevents duplicate IDs.
|
|
788
790
|
|
|
789
791
|
Returns:
|
|
790
792
|
List of ID strings
|
|
793
|
+
|
|
794
|
+
AIDEV-NOTE: The claimed_ids parameter is critical for ideation where
|
|
795
|
+
multiple strategies run before writing to CSV. Without it, each strategy
|
|
796
|
+
would get overlapping IDs like gen75-001, gen75-002 for each strategy.
|
|
791
797
|
"""
|
|
792
798
|
rows = self._read_csv()
|
|
793
799
|
gen_prefix = f"gen{generation:02d}"
|
|
@@ -796,6 +802,7 @@ class EvolutionCSV:
|
|
|
796
802
|
has_header = rows and rows[0] and rows[0][0].lower() == 'id'
|
|
797
803
|
start_idx = 1 if has_header else 0
|
|
798
804
|
|
|
805
|
+
# Check CSV for existing IDs
|
|
799
806
|
for row in rows[start_idx:]:
|
|
800
807
|
if not self.is_valid_candidate_row(row):
|
|
801
808
|
continue
|
|
@@ -808,6 +815,16 @@ class EvolutionCSV:
|
|
|
808
815
|
except (ValueError, IndexError):
|
|
809
816
|
pass
|
|
810
817
|
|
|
818
|
+
# Also check claimed IDs (not yet in CSV)
|
|
819
|
+
if claimed_ids:
|
|
820
|
+
for claimed_id in claimed_ids:
|
|
821
|
+
if claimed_id.startswith(gen_prefix + '-'):
|
|
822
|
+
try:
|
|
823
|
+
id_num = int(claimed_id.split('-')[1])
|
|
824
|
+
max_id = max(max_id, id_num)
|
|
825
|
+
except (ValueError, IndexError):
|
|
826
|
+
pass
|
|
827
|
+
|
|
811
828
|
return [f"{gen_prefix}-{max_id + 1 + i:03d}" for i in range(count)]
|
|
812
829
|
|
|
813
830
|
def append_candidates(self, candidates: List[Dict[str, str]]) -> int:
|
|
@@ -855,6 +872,9 @@ class EvolutionCSV:
|
|
|
855
872
|
|
|
856
873
|
Returns:
|
|
857
874
|
Dict with total, pending, complete, failed counts
|
|
875
|
+
|
|
876
|
+
AIDEV-NOTE: Uses is_pending_candidate() for pending count to ensure
|
|
877
|
+
consistency between stats and what workers actually find.
|
|
858
878
|
"""
|
|
859
879
|
rows = self._read_csv()
|
|
860
880
|
if not rows:
|
|
@@ -870,16 +890,19 @@ class EvolutionCSV:
|
|
|
870
890
|
continue
|
|
871
891
|
|
|
872
892
|
stats['total'] += 1
|
|
873
|
-
status = row[4].strip().lower() if len(row) > 4 else ''
|
|
874
893
|
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
elif status == 'running':
|
|
878
|
-
stats['running'] += 1
|
|
879
|
-
elif status.startswith('failed'):
|
|
880
|
-
stats['failed'] += 1
|
|
881
|
-
else:
|
|
894
|
+
# Use is_pending_candidate for consistency with workers
|
|
895
|
+
if self.is_pending_candidate(row):
|
|
882
896
|
stats['pending'] += 1
|
|
897
|
+
else:
|
|
898
|
+
status = row[4].strip().lower() if len(row) > 4 else ''
|
|
899
|
+
if status == 'complete':
|
|
900
|
+
stats['complete'] += 1
|
|
901
|
+
elif status == 'running':
|
|
902
|
+
stats['running'] += 1
|
|
903
|
+
elif status.startswith('failed'):
|
|
904
|
+
stats['failed'] += 1
|
|
905
|
+
# Anything else that's not pending gets counted as failed/other
|
|
883
906
|
|
|
884
907
|
return stats
|
|
885
908
|
|
package/lib/evolve_ideate.py
CHANGED
|
@@ -93,20 +93,28 @@ class IdeationStrategy(ABC):
|
|
|
93
93
|
pass
|
|
94
94
|
|
|
95
95
|
def generate(self, context: IdeationContext, count: int,
|
|
96
|
-
max_rounds: int = 10, initial_wait: int = 60, max_wait: int = 600
|
|
96
|
+
max_rounds: int = 10, initial_wait: int = 60, max_wait: int = 600,
|
|
97
|
+
claimed_ids: List[str] = None) -> List[Idea]:
|
|
97
98
|
"""Generate ideas using this strategy with round-based retry and backoff.
|
|
98
99
|
|
|
99
100
|
AIDEV-NOTE: Uses call_ai_with_backoff for robust retry handling.
|
|
100
101
|
Each round tries ALL models. If all fail, waits with exponential backoff.
|
|
102
|
+
claimed_ids tracks IDs already claimed by previous strategies in this run.
|
|
103
|
+
IDs are added to claimed_ids immediately to prevent reuse even on failure.
|
|
101
104
|
"""
|
|
102
105
|
if count <= 0:
|
|
103
106
|
return []
|
|
107
|
+
if claimed_ids is None:
|
|
108
|
+
claimed_ids = []
|
|
104
109
|
|
|
105
|
-
print(f"[IDEATE] Running {self.name} strategy for {count} ideas", file=sys.stderr)
|
|
110
|
+
print(f"[IDEATE] Running {self.name} strategy for {count} ideas", file=sys.stderr, flush=True)
|
|
106
111
|
|
|
107
|
-
# Get next IDs
|
|
108
|
-
ids = self.csv.get_next_ids(context.generation, count)
|
|
109
|
-
print(f"[IDEATE] Using IDs: {', '.join(ids)}", file=sys.stderr)
|
|
112
|
+
# Get next IDs, avoiding any already claimed in this ideation run
|
|
113
|
+
ids = self.csv.get_next_ids(context.generation, count, claimed_ids=claimed_ids)
|
|
114
|
+
print(f"[IDEATE] Using IDs: {', '.join(ids)}", file=sys.stderr, flush=True)
|
|
115
|
+
|
|
116
|
+
# Immediately claim these IDs (even if AI fails, don't reuse them)
|
|
117
|
+
claimed_ids.extend(ids)
|
|
110
118
|
|
|
111
119
|
# Create temp CSV with stub rows
|
|
112
120
|
temp_csv = Path(self.config.evolution_dir) / f"temp-csv-{os.getpid()}.csv"
|
|
@@ -398,6 +406,7 @@ class Ideator:
|
|
|
398
406
|
print(f"[IDEATE] Top performers: {len(context.top_performers)}", file=sys.stderr)
|
|
399
407
|
|
|
400
408
|
all_ideas: List[Idea] = []
|
|
409
|
+
claimed_ids: List[str] = [] # Track IDs claimed across all strategies
|
|
401
410
|
strategies_succeeded = 0
|
|
402
411
|
|
|
403
412
|
for strategy, count in self.strategies:
|
|
@@ -408,10 +417,12 @@ class Ideator:
|
|
|
408
417
|
context, count,
|
|
409
418
|
max_rounds=self.config.max_rounds,
|
|
410
419
|
initial_wait=self.config.initial_wait,
|
|
411
|
-
max_wait=self.config.max_wait
|
|
420
|
+
max_wait=self.config.max_wait,
|
|
421
|
+
claimed_ids=claimed_ids # Pass already-claimed IDs
|
|
412
422
|
)
|
|
413
423
|
|
|
414
424
|
if ideas:
|
|
425
|
+
# IDs are already tracked in generate(), just count success
|
|
415
426
|
strategies_succeeded += 1
|
|
416
427
|
|
|
417
428
|
# Filter for novelty
|
package/lib/evolve_run.py
CHANGED
|
@@ -26,6 +26,8 @@ SCRIPT_DIR = Path(__file__).parent
|
|
|
26
26
|
sys.path.insert(0, str(SCRIPT_DIR.parent))
|
|
27
27
|
|
|
28
28
|
from lib.evolution_csv import EvolutionCSV
|
|
29
|
+
from lib.log import log, log_error, log_warn, set_prefix
|
|
30
|
+
set_prefix("RUN")
|
|
29
31
|
|
|
30
32
|
|
|
31
33
|
@dataclass
|
|
@@ -56,24 +58,22 @@ class WorkerPool:
|
|
|
56
58
|
if len(self.workers) >= self.max_workers:
|
|
57
59
|
return None
|
|
58
60
|
|
|
59
|
-
|
|
61
|
+
# Use -u for unbuffered output so logs stream in real-time
|
|
62
|
+
cmd = [sys.executable, '-u', str(self.worker_script)]
|
|
60
63
|
if self.config_path:
|
|
61
64
|
cmd.extend(['--config', self.config_path])
|
|
62
65
|
if self.timeout:
|
|
63
66
|
cmd.extend(['--timeout', str(self.timeout)])
|
|
64
67
|
|
|
65
68
|
try:
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
stderr=subprocess.STDOUT,
|
|
70
|
-
text=True
|
|
71
|
-
)
|
|
69
|
+
# Don't capture output - let it stream directly to terminal
|
|
70
|
+
# This provides real-time visibility into which models are being used
|
|
71
|
+
proc = subprocess.Popen(cmd)
|
|
72
72
|
self.workers[proc.pid] = proc
|
|
73
|
-
|
|
73
|
+
log(f"Spawned worker {proc.pid}")
|
|
74
74
|
return proc.pid
|
|
75
75
|
except Exception as e:
|
|
76
|
-
|
|
76
|
+
log_error(f"Failed to spawn worker: {e}")
|
|
77
77
|
return None
|
|
78
78
|
|
|
79
79
|
def cleanup_finished(self) -> List[int]:
|
|
@@ -86,15 +86,7 @@ class WorkerPool:
|
|
|
86
86
|
if ret is not None:
|
|
87
87
|
finished_pids.append(pid)
|
|
88
88
|
exit_codes.append(ret)
|
|
89
|
-
|
|
90
|
-
# Log output
|
|
91
|
-
if proc.stdout:
|
|
92
|
-
output = proc.stdout.read()
|
|
93
|
-
if output:
|
|
94
|
-
for line in output.strip().split('\n'):
|
|
95
|
-
print(f"[WORKER-{pid}] {line}", file=sys.stderr)
|
|
96
|
-
|
|
97
|
-
print(f"[RUN] Worker {pid} exited with code {ret}", file=sys.stderr)
|
|
89
|
+
log(f"Worker {pid} exited with code {ret}")
|
|
98
90
|
|
|
99
91
|
for pid in finished_pids:
|
|
100
92
|
del self.workers[pid]
|
|
@@ -106,7 +98,7 @@ class WorkerPool:
|
|
|
106
98
|
if not self.workers:
|
|
107
99
|
return
|
|
108
100
|
|
|
109
|
-
|
|
101
|
+
log(f"Shutting down {len(self.workers)} workers...")
|
|
110
102
|
|
|
111
103
|
# Send SIGTERM
|
|
112
104
|
for pid, proc in self.workers.items():
|
|
@@ -126,7 +118,7 @@ class WorkerPool:
|
|
|
126
118
|
for pid, proc in list(self.workers.items()):
|
|
127
119
|
try:
|
|
128
120
|
proc.kill()
|
|
129
|
-
|
|
121
|
+
log(f"Force killed worker {pid}")
|
|
130
122
|
except Exception:
|
|
131
123
|
pass
|
|
132
124
|
|
|
@@ -162,36 +154,36 @@ class EvolutionRunner:
|
|
|
162
154
|
def _handle_signal(self, signum, frame):
|
|
163
155
|
"""Handle termination signal."""
|
|
164
156
|
sig_name = signal.Signals(signum).name
|
|
165
|
-
|
|
157
|
+
log(f"Received {sig_name}, shutting down...")
|
|
166
158
|
self.shutdown_requested = True
|
|
167
159
|
self.pool.shutdown()
|
|
168
160
|
sys.exit(128 + signum)
|
|
169
161
|
|
|
170
162
|
def cleanup_csv(self):
|
|
171
163
|
"""Clean up CSV at startup."""
|
|
172
|
-
|
|
164
|
+
log("Cleaning up CSV...")
|
|
173
165
|
with EvolutionCSV(self.config.csv_path) as csv:
|
|
174
166
|
# Remove duplicates
|
|
175
167
|
removed = csv.remove_duplicate_candidates()
|
|
176
168
|
if removed:
|
|
177
|
-
|
|
169
|
+
log(f"Removed {removed} duplicate candidates")
|
|
178
170
|
|
|
179
171
|
# Reset stuck candidates
|
|
180
172
|
reset = csv.reset_stuck_candidates()
|
|
181
173
|
if reset:
|
|
182
|
-
|
|
174
|
+
log(f"Reset {reset} stuck candidates")
|
|
183
175
|
|
|
184
176
|
# Clean corrupted status fields
|
|
185
177
|
fixed = csv.cleanup_corrupted_status_fields()
|
|
186
178
|
if fixed:
|
|
187
|
-
|
|
179
|
+
log(f"Fixed {fixed} corrupted status fields")
|
|
188
180
|
|
|
189
181
|
def ensure_baseline(self):
|
|
190
182
|
"""Ensure baseline entry exists in CSV."""
|
|
191
183
|
with EvolutionCSV(self.config.csv_path) as csv:
|
|
192
184
|
info = csv.get_candidate_info('baseline-000')
|
|
193
185
|
if not info:
|
|
194
|
-
|
|
186
|
+
log("Adding baseline-000 entry")
|
|
195
187
|
csv.append_candidates([{
|
|
196
188
|
'id': 'baseline-000',
|
|
197
189
|
'basedOnId': '',
|
|
@@ -211,14 +203,14 @@ class EvolutionRunner:
|
|
|
211
203
|
|
|
212
204
|
# Need minimum completed algorithms to learn from
|
|
213
205
|
if stats['complete'] < self.config.min_completed_for_ideation:
|
|
214
|
-
|
|
206
|
+
log(f"Not enough completed ({stats['complete']} < {self.config.min_completed_for_ideation})")
|
|
215
207
|
return False
|
|
216
208
|
|
|
217
209
|
return True
|
|
218
210
|
|
|
219
211
|
def run_ideation(self) -> bool:
|
|
220
212
|
"""Run ideation. Returns True on success."""
|
|
221
|
-
|
|
213
|
+
log("Running ideation...")
|
|
222
214
|
|
|
223
215
|
cmd = [sys.executable, str(self.ideate_script)]
|
|
224
216
|
if self.config.config_path:
|
|
@@ -232,18 +224,18 @@ class EvolutionRunner:
|
|
|
232
224
|
cwd=self.config.evolution_dir
|
|
233
225
|
)
|
|
234
226
|
|
|
235
|
-
#
|
|
227
|
+
# Forward ideation output (already has timestamps from ideate module)
|
|
236
228
|
if result.stdout:
|
|
237
229
|
for line in result.stdout.strip().split('\n'):
|
|
238
|
-
print(
|
|
230
|
+
print(line, file=sys.stderr, flush=True)
|
|
239
231
|
if result.stderr:
|
|
240
232
|
for line in result.stderr.strip().split('\n'):
|
|
241
|
-
print(
|
|
233
|
+
print(line, file=sys.stderr, flush=True)
|
|
242
234
|
|
|
243
235
|
return result.returncode == 0
|
|
244
236
|
|
|
245
237
|
except Exception as e:
|
|
246
|
-
|
|
238
|
+
log_error(f"Ideation failed: {e}")
|
|
247
239
|
return False
|
|
248
240
|
|
|
249
241
|
def run(self) -> int:
|
|
@@ -253,9 +245,9 @@ class EvolutionRunner:
|
|
|
253
245
|
Returns:
|
|
254
246
|
Exit code
|
|
255
247
|
"""
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
248
|
+
log("Starting evolution run")
|
|
249
|
+
log(f"Max workers: {self.config.max_workers}")
|
|
250
|
+
log(f"Auto ideate: {self.config.auto_ideate}")
|
|
259
251
|
|
|
260
252
|
# Startup cleanup
|
|
261
253
|
self.cleanup_csv()
|
|
@@ -271,7 +263,7 @@ class EvolutionRunner:
|
|
|
271
263
|
|
|
272
264
|
# Check for API limit
|
|
273
265
|
if 2 in exit_codes or 3 in exit_codes:
|
|
274
|
-
|
|
266
|
+
log("API limit reached, waiting 5 minutes...")
|
|
275
267
|
self.api_limit_reached = True
|
|
276
268
|
time.sleep(300) # 5 minute wait
|
|
277
269
|
self.api_limit_reached = False
|
|
@@ -285,7 +277,7 @@ class EvolutionRunner:
|
|
|
285
277
|
|
|
286
278
|
# Get stats
|
|
287
279
|
stats = self.get_stats()
|
|
288
|
-
|
|
280
|
+
log(f"Stats: {stats['pending']} pending, {stats['complete']} complete, {stats['running']} running")
|
|
289
281
|
|
|
290
282
|
# Check if we need ideation
|
|
291
283
|
if stats['pending'] == 0 and self.pool.active_count == 0:
|
|
@@ -301,11 +293,11 @@ class EvolutionRunner:
|
|
|
301
293
|
if self.run_ideation():
|
|
302
294
|
continue # Loop back to check for new work
|
|
303
295
|
else:
|
|
304
|
-
|
|
296
|
+
log_warn("Ideation failed, waiting...")
|
|
305
297
|
time.sleep(30)
|
|
306
298
|
continue
|
|
307
299
|
else:
|
|
308
|
-
|
|
300
|
+
log("Evolution complete!")
|
|
309
301
|
break
|
|
310
302
|
|
|
311
303
|
# Spawn workers for pending work
|
|
@@ -320,7 +312,7 @@ class EvolutionRunner:
|
|
|
320
312
|
|
|
321
313
|
# Cleanup
|
|
322
314
|
self.pool.shutdown()
|
|
323
|
-
|
|
315
|
+
log("Exiting")
|
|
324
316
|
return 0
|
|
325
317
|
|
|
326
318
|
|
|
@@ -389,10 +381,10 @@ def main():
|
|
|
389
381
|
sys.exit(runner.run())
|
|
390
382
|
|
|
391
383
|
except FileNotFoundError as e:
|
|
392
|
-
|
|
384
|
+
log_error(f"Config error: {e}")
|
|
393
385
|
sys.exit(1)
|
|
394
386
|
except Exception as e:
|
|
395
|
-
|
|
387
|
+
log_error(f"Error: {e}")
|
|
396
388
|
import traceback
|
|
397
389
|
traceback.print_exc()
|
|
398
390
|
sys.exit(1)
|
package/lib/evolve_worker.py
CHANGED
|
@@ -30,8 +30,11 @@ from typing import Optional, Tuple, Dict, Any
|
|
|
30
30
|
SCRIPT_DIR = Path(__file__).parent
|
|
31
31
|
sys.path.insert(0, str(SCRIPT_DIR.parent))
|
|
32
32
|
|
|
33
|
+
from lib.log import log, log_error, log_warn, log_debug, set_prefix
|
|
34
|
+
set_prefix("WORKER")
|
|
35
|
+
|
|
33
36
|
from lib.evolution_csv import EvolutionCSV
|
|
34
|
-
from lib.ai_cli import
|
|
37
|
+
from lib.ai_cli import call_ai_with_backoff, get_git_protection_warning, AIError
|
|
35
38
|
|
|
36
39
|
|
|
37
40
|
@dataclass
|
|
@@ -46,8 +49,11 @@ class Config:
|
|
|
46
49
|
python_cmd: str = "python3"
|
|
47
50
|
memory_limit_mb: int = 0
|
|
48
51
|
timeout_seconds: int = 600
|
|
49
|
-
max_ai_retries: int = 3
|
|
50
52
|
max_candidates: int = 5
|
|
53
|
+
# Retry configuration with exponential backoff
|
|
54
|
+
max_rounds: int = 10
|
|
55
|
+
initial_wait: int = 60
|
|
56
|
+
max_wait: int = 600
|
|
51
57
|
|
|
52
58
|
|
|
53
59
|
@dataclass
|
|
@@ -75,10 +81,10 @@ class Worker:
|
|
|
75
81
|
def _handle_signal(self, signum, frame):
|
|
76
82
|
"""Handle termination signal - reset current candidate to pending."""
|
|
77
83
|
sig_name = signal.Signals(signum).name
|
|
78
|
-
|
|
84
|
+
log(f"Received {sig_name}")
|
|
79
85
|
|
|
80
86
|
if self.current_candidate_id:
|
|
81
|
-
|
|
87
|
+
log(f"Resetting {self.current_candidate_id} to pending")
|
|
82
88
|
try:
|
|
83
89
|
with EvolutionCSV(self.config.csv_path) as csv:
|
|
84
90
|
info = csv.get_candidate_info(self.current_candidate_id)
|
|
@@ -87,7 +93,7 @@ class Worker:
|
|
|
87
93
|
if status not in ('complete', 'failed', 'failed-ai-retry', 'failed-parent-missing'):
|
|
88
94
|
csv.update_candidate_status(self.current_candidate_id, 'pending')
|
|
89
95
|
except Exception as e:
|
|
90
|
-
|
|
96
|
+
log(f"Error resetting status: {e}")
|
|
91
97
|
|
|
92
98
|
sys.exit(128 + signum)
|
|
93
99
|
|
|
@@ -139,52 +145,42 @@ This is especially important for models with smaller context windows (like GLM).
|
|
|
139
145
|
|
|
140
146
|
CRITICAL: If you do not know how to implement what was asked for, or if the requested change is unclear or not feasible, you MUST refuse to make any changes. DO NOT modify the code if you are uncertain about the implementation. Simply respond that you cannot implement the requested change and explain why. It is better to refuse than to make incorrect or random changes."""
|
|
141
147
|
|
|
142
|
-
def
|
|
148
|
+
def _call_ai_with_backoff(self, prompt: str, target_file: Path) -> Tuple[bool, str]:
|
|
143
149
|
"""
|
|
144
|
-
Call AI with
|
|
150
|
+
Call AI with round-based retry and exponential backoff.
|
|
151
|
+
|
|
152
|
+
AIDEV-NOTE: Uses call_ai_with_backoff which tries all models in the pool,
|
|
153
|
+
then waits with exponential backoff if all fail, and repeats.
|
|
145
154
|
|
|
146
155
|
Returns:
|
|
147
156
|
Tuple of (success, model_name)
|
|
148
157
|
"""
|
|
149
|
-
|
|
150
|
-
|
|
158
|
+
# Get file hash before AI call
|
|
159
|
+
hash_before = self._file_hash(target_file) if target_file.exists() else None
|
|
160
|
+
|
|
161
|
+
try:
|
|
162
|
+
output, model = call_ai_with_backoff(
|
|
163
|
+
prompt,
|
|
164
|
+
command="run",
|
|
165
|
+
working_dir=self.config.evolution_dir,
|
|
166
|
+
max_rounds=self.config.max_rounds,
|
|
167
|
+
initial_wait=self.config.initial_wait,
|
|
168
|
+
max_wait=self.config.max_wait
|
|
169
|
+
)
|
|
151
170
|
|
|
152
|
-
#
|
|
153
|
-
if
|
|
154
|
-
print(f"[WORKER-{os.getpid()}] Re-copying source file for retry", file=sys.stderr)
|
|
155
|
-
shutil.copy(source_file, target_file)
|
|
171
|
+
# Check if file was modified
|
|
172
|
+
hash_after = self._file_hash(target_file) if target_file.exists() else None
|
|
156
173
|
|
|
157
|
-
|
|
158
|
-
|
|
174
|
+
if hash_before != hash_after and hash_after is not None:
|
|
175
|
+
log(f"AI successfully modified file (model: {model})")
|
|
176
|
+
return True, model
|
|
177
|
+
else:
|
|
178
|
+
log(f"AI completed but did not modify file")
|
|
179
|
+
return False, model
|
|
159
180
|
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
# Check if file was modified
|
|
164
|
-
hash_after = self._file_hash(target_file) if target_file.exists() else None
|
|
165
|
-
|
|
166
|
-
if hash_before != hash_after and hash_after is not None:
|
|
167
|
-
print(f"[WORKER-{os.getpid()}] AI successfully modified file (model: {model})", file=sys.stderr)
|
|
168
|
-
return True, model
|
|
169
|
-
else:
|
|
170
|
-
print(f"[WORKER-{os.getpid()}] AI did not modify file", file=sys.stderr)
|
|
171
|
-
|
|
172
|
-
except RateLimitError as e:
|
|
173
|
-
print(f"[WORKER-{os.getpid()}] Rate limit: {e}", file=sys.stderr)
|
|
174
|
-
raise # Propagate to caller
|
|
175
|
-
except APIExhaustedError as e:
|
|
176
|
-
print(f"[WORKER-{os.getpid()}] API exhausted: {e}", file=sys.stderr)
|
|
177
|
-
raise # Propagate to caller
|
|
178
|
-
except TimeoutError as e:
|
|
179
|
-
print(f"[WORKER-{os.getpid()}] Timeout: {e}", file=sys.stderr)
|
|
180
|
-
except AIError as e:
|
|
181
|
-
print(f"[WORKER-{os.getpid()}] AI error: {e}", file=sys.stderr)
|
|
182
|
-
|
|
183
|
-
if attempt < self.config.max_ai_retries:
|
|
184
|
-
print(f"[WORKER-{os.getpid()}] Will retry with different model...", file=sys.stderr)
|
|
185
|
-
time.sleep(2)
|
|
186
|
-
|
|
187
|
-
return False, ""
|
|
181
|
+
except AIError as e:
|
|
182
|
+
log_error(f"All AI retries exhausted: {e}")
|
|
183
|
+
return False, ""
|
|
188
184
|
|
|
189
185
|
def _file_hash(self, path: Path) -> Optional[str]:
|
|
190
186
|
"""Get file hash."""
|
|
@@ -224,7 +220,7 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
|
|
|
224
220
|
|
|
225
221
|
cmd.extend([self.config.evaluator_path, eval_arg])
|
|
226
222
|
|
|
227
|
-
|
|
223
|
+
log(f"Running evaluator: {' '.join(cmd)}")
|
|
228
224
|
|
|
229
225
|
try:
|
|
230
226
|
result = subprocess.run(
|
|
@@ -236,17 +232,17 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
|
|
|
236
232
|
)
|
|
237
233
|
|
|
238
234
|
if result.returncode != 0:
|
|
239
|
-
|
|
235
|
+
log_error(f"Evaluator failed: {result.stderr}")
|
|
240
236
|
return None, {}
|
|
241
237
|
|
|
242
238
|
output = result.stdout + result.stderr
|
|
243
239
|
return self._parse_evaluator_output(output)
|
|
244
240
|
|
|
245
241
|
except subprocess.TimeoutExpired:
|
|
246
|
-
|
|
242
|
+
log_error("Evaluator timed out")
|
|
247
243
|
return None, {}
|
|
248
244
|
except Exception as e:
|
|
249
|
-
|
|
245
|
+
log_error(f"Evaluator error: {e}")
|
|
250
246
|
return None, {}
|
|
251
247
|
|
|
252
248
|
def _parse_evaluator_output(self, output: str) -> Tuple[Optional[float], Dict[str, Any]]:
|
|
@@ -304,9 +300,9 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
|
|
|
304
300
|
Exit code (0=success, 77=AI failed, 78=missing parent, etc.)
|
|
305
301
|
"""
|
|
306
302
|
self.current_candidate_id = candidate.id
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
303
|
+
log(f"Processing: {candidate.id}")
|
|
304
|
+
log(f"Description: {candidate.description[:80]}..." if len(candidate.description) > 80 else f"Description: {candidate.description}")
|
|
305
|
+
log(f"Based on: {candidate.based_on_id or 'baseline'}")
|
|
310
306
|
|
|
311
307
|
is_baseline = self._is_baseline(candidate.id, candidate.based_on_id)
|
|
312
308
|
target_file = Path(self.config.output_dir) / f"evolution_{candidate.id}.py"
|
|
@@ -315,7 +311,7 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
|
|
|
315
311
|
resolved_parent, source_file = self._resolve_parent_id(candidate.based_on_id)
|
|
316
312
|
|
|
317
313
|
if source_file is None and not is_baseline:
|
|
318
|
-
|
|
314
|
+
log_error(f"Parent not found: {candidate.based_on_id}")
|
|
319
315
|
return 78 # Missing parent
|
|
320
316
|
|
|
321
317
|
if source_file is None:
|
|
@@ -323,59 +319,45 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
|
|
|
323
319
|
|
|
324
320
|
# Check if target already exists
|
|
325
321
|
if target_file.exists():
|
|
326
|
-
|
|
322
|
+
log("File already exists, running evaluation only")
|
|
327
323
|
elif not is_baseline:
|
|
328
324
|
# Copy source to target
|
|
329
|
-
|
|
325
|
+
log(f"Copying {source_file.name} to {target_file.name}")
|
|
330
326
|
shutil.copy(source_file, target_file)
|
|
331
327
|
|
|
332
|
-
# Call AI to modify
|
|
328
|
+
# Call AI to modify (uses round-based retry with backoff)
|
|
333
329
|
prompt = self._build_prompt(candidate, target_file.name)
|
|
330
|
+
success, model = self._call_ai_with_backoff(prompt, target_file)
|
|
334
331
|
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
if not success:
|
|
339
|
-
print(f"[WORKER-{os.getpid()}] AI failed after all retries", file=sys.stderr)
|
|
340
|
-
target_file.unlink(missing_ok=True)
|
|
341
|
-
return 77 # AI generation failed
|
|
342
|
-
|
|
343
|
-
# Record model used
|
|
344
|
-
if model:
|
|
345
|
-
with EvolutionCSV(self.config.csv_path) as csv:
|
|
346
|
-
csv.update_candidate_field(candidate.id, 'run-LLM', model)
|
|
347
|
-
|
|
348
|
-
except RateLimitError:
|
|
332
|
+
if not success:
|
|
333
|
+
log_error("AI failed after all retries")
|
|
349
334
|
target_file.unlink(missing_ok=True)
|
|
350
|
-
|
|
351
|
-
csv.update_candidate_status(candidate.id, 'pending')
|
|
352
|
-
return 2 # Rate limit
|
|
335
|
+
return 77 # AI generation failed
|
|
353
336
|
|
|
354
|
-
|
|
355
|
-
|
|
337
|
+
# Record model used
|
|
338
|
+
if model:
|
|
356
339
|
with EvolutionCSV(self.config.csv_path) as csv:
|
|
357
|
-
csv.
|
|
358
|
-
return 3 # API exhausted
|
|
340
|
+
csv.update_candidate_field(candidate.id, 'run-LLM', model)
|
|
359
341
|
|
|
360
342
|
# Check syntax
|
|
361
343
|
if not self._check_syntax(target_file):
|
|
362
|
-
|
|
344
|
+
log_error("Syntax error in generated file")
|
|
363
345
|
target_file.unlink(missing_ok=True)
|
|
364
346
|
with EvolutionCSV(self.config.csv_path) as csv:
|
|
365
347
|
csv.update_candidate_status(candidate.id, 'pending')
|
|
366
348
|
return 0 # Will retry
|
|
367
349
|
|
|
368
350
|
# Run evaluator
|
|
369
|
-
|
|
351
|
+
log("Running evaluator...")
|
|
370
352
|
score, json_data = self._run_evaluator(candidate.id, is_baseline)
|
|
371
353
|
|
|
372
354
|
if score is None:
|
|
373
|
-
|
|
355
|
+
log_error("Evaluation failed - no score")
|
|
374
356
|
with EvolutionCSV(self.config.csv_path) as csv:
|
|
375
357
|
csv.update_candidate_status(candidate.id, 'failed')
|
|
376
358
|
return 1
|
|
377
359
|
|
|
378
|
-
|
|
360
|
+
log(f"Score: {score}")
|
|
379
361
|
|
|
380
362
|
# Update CSV
|
|
381
363
|
with EvolutionCSV(self.config.csv_path) as csv:
|
|
@@ -397,7 +379,7 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
|
|
|
397
379
|
Returns:
|
|
398
380
|
Exit code
|
|
399
381
|
"""
|
|
400
|
-
|
|
382
|
+
log(f"Started (max {self.config.max_candidates} candidates)")
|
|
401
383
|
processed = 0
|
|
402
384
|
|
|
403
385
|
while processed < self.config.max_candidates:
|
|
@@ -406,7 +388,7 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
|
|
|
406
388
|
result = csv.get_next_pending_candidate()
|
|
407
389
|
|
|
408
390
|
if not result:
|
|
409
|
-
|
|
391
|
+
log("No pending candidates")
|
|
410
392
|
break
|
|
411
393
|
|
|
412
394
|
candidate_id, _ = result
|
|
@@ -416,7 +398,7 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
|
|
|
416
398
|
info = csv.get_candidate_info(candidate_id)
|
|
417
399
|
|
|
418
400
|
if not info:
|
|
419
|
-
|
|
401
|
+
log_warn(f"Candidate info not found: {candidate_id}")
|
|
420
402
|
continue
|
|
421
403
|
|
|
422
404
|
candidate = Candidate(
|
|
@@ -439,9 +421,9 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
|
|
|
439
421
|
elif exit_code == 3: # API exhausted
|
|
440
422
|
return 3
|
|
441
423
|
|
|
442
|
-
|
|
424
|
+
log(f"Processed {processed}/{self.config.max_candidates}")
|
|
443
425
|
|
|
444
|
-
|
|
426
|
+
log("Exiting")
|
|
445
427
|
return 0
|
|
446
428
|
|
|
447
429
|
|
|
@@ -475,6 +457,8 @@ def load_config_from_yaml(config_path: Optional[str] = None) -> Config:
|
|
|
475
457
|
p = base_dir / p
|
|
476
458
|
return str(p.resolve())
|
|
477
459
|
|
|
460
|
+
ideation = data.get('ideation', {})
|
|
461
|
+
|
|
478
462
|
return Config(
|
|
479
463
|
csv_path=resolve(data.get('csv_file', 'evolution.csv')),
|
|
480
464
|
evolution_dir=str(base_dir.resolve()),
|
|
@@ -485,8 +469,10 @@ def load_config_from_yaml(config_path: Optional[str] = None) -> Config:
|
|
|
485
469
|
python_cmd=data.get('python_cmd', 'python3'),
|
|
486
470
|
memory_limit_mb=data.get('memory_limit_mb', 0),
|
|
487
471
|
timeout_seconds=data.get('timeout_seconds', 600),
|
|
488
|
-
|
|
489
|
-
|
|
472
|
+
max_candidates=data.get('worker_max_candidates', 5),
|
|
473
|
+
max_rounds=ideation.get('max_rounds', 10),
|
|
474
|
+
initial_wait=ideation.get('initial_wait', 60),
|
|
475
|
+
max_wait=ideation.get('max_wait', 600)
|
|
490
476
|
)
|
|
491
477
|
|
|
492
478
|
|
|
@@ -505,10 +491,10 @@ def main():
|
|
|
505
491
|
sys.exit(worker.run())
|
|
506
492
|
|
|
507
493
|
except FileNotFoundError as e:
|
|
508
|
-
|
|
494
|
+
log_error(f"Config error: {e}")
|
|
509
495
|
sys.exit(1)
|
|
510
496
|
except Exception as e:
|
|
511
|
-
|
|
497
|
+
log_error(f"Error: {e}")
|
|
512
498
|
import traceback
|
|
513
499
|
traceback.print_exc()
|
|
514
500
|
sys.exit(1)
|
package/lib/log.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Simple timestamped logging for claude-evolve.
|
|
4
|
+
Uses stderr with flush=True for real-time output.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
import sys
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
|
|
11
|
+
# Default prefix, can be set per-module
|
|
12
|
+
_prefix = "EVOLVE"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def set_prefix(prefix: str):
|
|
16
|
+
"""Set the log prefix (e.g., 'WORKER', 'IDEATE', 'RUN')."""
|
|
17
|
+
global _prefix
|
|
18
|
+
_prefix = prefix
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def log(msg: str, prefix: str = None):
|
|
22
|
+
"""Log with timestamp. Always flushes for real-time output."""
|
|
23
|
+
ts = datetime.now().strftime("%H:%M:%S")
|
|
24
|
+
p = prefix or _prefix
|
|
25
|
+
pid = os.getpid()
|
|
26
|
+
print(f"[{ts}] [{p}-{pid}] {msg}", file=sys.stderr, flush=True)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def log_debug(msg: str, prefix: str = None):
|
|
30
|
+
"""Log debug message (only if DEBUG env var set)."""
|
|
31
|
+
if os.environ.get('DEBUG') or os.environ.get('VERBOSE'):
|
|
32
|
+
log(f"[DEBUG] {msg}", prefix)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def log_error(msg: str, prefix: str = None):
|
|
36
|
+
"""Log error message."""
|
|
37
|
+
log(f"[ERROR] {msg}", prefix)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def log_warn(msg: str, prefix: str = None):
|
|
41
|
+
"""Log warning message."""
|
|
42
|
+
log(f"[WARN] {msg}", prefix)
|