claude-evolve 1.4.11 → 1.4.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/claude-evolve-autostatus +192 -177
- package/bin/claude-evolve-edit +82 -6
- package/bin/claude-evolve-ideate +535 -218
- package/bin/claude-evolve-ideate.debug +907 -0
- package/bin/claude-evolve-run +49 -7
- package/bin/claude-evolve-worker +121 -21
- package/lib/__pycache__/evolution_csv.cpython-311.pyc +0 -0
- package/lib/__pycache__/evolution_csv.cpython-313.pyc +0 -0
- package/lib/evolution_csv.py +36 -2
- package/lib/validate_parent_ids.py +232 -0
- package/package.json +1 -1
package/bin/claude-evolve-run
CHANGED
|
@@ -13,16 +13,23 @@ if [[ -n ${CLAUDE_EVOLVE_CONFIG:-} ]]; then
|
|
|
13
13
|
else
|
|
14
14
|
# Check if config.yaml exists in current directory
|
|
15
15
|
if [[ -f "config.yaml" ]]; then
|
|
16
|
-
export
|
|
17
|
-
|
|
16
|
+
# Don't export to avoid collision with parallel runs
|
|
17
|
+
CONFIG_FILE="$(pwd)/config.yaml"
|
|
18
|
+
load_config "$CONFIG_FILE"
|
|
18
19
|
else
|
|
19
20
|
load_config
|
|
20
21
|
fi
|
|
21
22
|
fi
|
|
22
23
|
|
|
23
|
-
#
|
|
24
|
-
if [[ -
|
|
25
|
-
|
|
24
|
+
# Store the config path for workers (don't export to avoid collision)
|
|
25
|
+
if [[ -n ${CLAUDE_EVOLVE_CONFIG:-} ]]; then
|
|
26
|
+
WORKER_CONFIG_PATH="$CLAUDE_EVOLVE_CONFIG"
|
|
27
|
+
elif [[ -n ${CONFIG_FILE:-} ]]; then
|
|
28
|
+
WORKER_CONFIG_PATH="$CONFIG_FILE"
|
|
29
|
+
elif [[ -f "config.yaml" ]]; then
|
|
30
|
+
WORKER_CONFIG_PATH="$(pwd)/config.yaml"
|
|
31
|
+
else
|
|
32
|
+
WORKER_CONFIG_PATH=""
|
|
26
33
|
fi
|
|
27
34
|
|
|
28
35
|
# Validate configuration
|
|
@@ -229,6 +236,7 @@ start_worker() {
|
|
|
229
236
|
|
|
230
237
|
local worker_args=()
|
|
231
238
|
[[ -n $timeout_seconds ]] && worker_args+=(--timeout "$timeout_seconds")
|
|
239
|
+
[[ -n $WORKER_CONFIG_PATH ]] && worker_args+=(--config "$WORKER_CONFIG_PATH")
|
|
232
240
|
|
|
233
241
|
echo "[DISPATCHER] Starting worker..."
|
|
234
242
|
"$worker_script" "${worker_args[@]}" &
|
|
@@ -252,6 +260,12 @@ cleanup_workers() {
|
|
|
252
260
|
if [[ $exit_code -eq 2 ]]; then
|
|
253
261
|
echo "[DISPATCHER] Worker $pid hit rate limit, will retry later"
|
|
254
262
|
# Rate limits don't count as consecutive failures
|
|
263
|
+
elif [[ $exit_code -eq 3 ]]; then
|
|
264
|
+
echo "[DISPATCHER] Worker $pid hit API usage limit - stopping all processing" >&2
|
|
265
|
+
echo "[DISPATCHER] Cannot continue evolution run due to API limits" >&2
|
|
266
|
+
echo "[DISPATCHER] Please wait for limits to reset before restarting" >&2
|
|
267
|
+
# Set a flag to stop the main loop
|
|
268
|
+
api_limit_reached=true
|
|
255
269
|
else
|
|
256
270
|
echo "[DISPATCHER] Worker $pid failed with exit code $exit_code"
|
|
257
271
|
# With retry mechanism, failures are normal - just keep processing
|
|
@@ -290,6 +304,16 @@ get_csv_stats() {
|
|
|
290
304
|
echo "[DISPATCHER] Starting unified evolution engine"
|
|
291
305
|
echo "[DISPATCHER] Configuration: max_workers=$MAX_WORKERS, timeout=${timeout_seconds:-none}"
|
|
292
306
|
|
|
307
|
+
# Clean up any stuck 'running' statuses at startup
|
|
308
|
+
if [[ -f "$FULL_CSV_PATH" ]]; then
|
|
309
|
+
echo "[DISPATCHER] Resetting any stuck 'running' candidates to 'pending'..."
|
|
310
|
+
if "$SCRIPT_DIR/claude-evolve-edit" running pending >/dev/null 2>&1; then
|
|
311
|
+
echo "[DISPATCHER] Successfully reset stuck candidates"
|
|
312
|
+
else
|
|
313
|
+
echo "[DISPATCHER] No stuck candidates found or edit command not available"
|
|
314
|
+
fi
|
|
315
|
+
fi
|
|
316
|
+
|
|
293
317
|
# Validate CSV and clean up stuck statuses and duplicates
|
|
294
318
|
if [[ -f "$FULL_CSV_PATH" ]]; then
|
|
295
319
|
echo "[DISPATCHER] Validating CSV and cleaning up..."
|
|
@@ -451,11 +475,20 @@ ensure_baseline_entry
|
|
|
451
475
|
# With retry mechanism, we don't need consecutive failure tracking
|
|
452
476
|
# Failures are handled gracefully through the retry system
|
|
453
477
|
|
|
478
|
+
# Flag to track API limit status
|
|
479
|
+
api_limit_reached=false
|
|
480
|
+
|
|
454
481
|
# Main dispatch loop
|
|
455
482
|
while true; do
|
|
456
483
|
# Clean up finished workers
|
|
457
484
|
cleanup_workers
|
|
458
485
|
|
|
486
|
+
# Check if API limit was reached
|
|
487
|
+
if [[ "$api_limit_reached" == "true" ]]; then
|
|
488
|
+
echo "[DISPATCHER] Stopping evolution run due to API usage limits" >&2
|
|
489
|
+
break
|
|
490
|
+
fi
|
|
491
|
+
|
|
459
492
|
# Get current status
|
|
460
493
|
csv_stats=$(get_csv_stats "$FULL_CSV_PATH")
|
|
461
494
|
read -r total_rows complete_count pending_count <<< "$csv_stats"
|
|
@@ -514,5 +547,14 @@ done
|
|
|
514
547
|
|
|
515
548
|
# Clean shutdown
|
|
516
549
|
shutdown_workers
|
|
517
|
-
|
|
518
|
-
|
|
550
|
+
|
|
551
|
+
# Final status message
|
|
552
|
+
if [[ "$api_limit_reached" == "true" ]]; then
|
|
553
|
+
echo "[DISPATCHER] Evolution run stopped due to API usage limits"
|
|
554
|
+
echo "[DISPATCHER] Wait for limits to reset, then run 'claude-evolve run' again"
|
|
555
|
+
echo "[DISPATCHER] Exiting with code 1 (API limits reached)"
|
|
556
|
+
exit 1
|
|
557
|
+
else
|
|
558
|
+
echo "[DISPATCHER] Evolution run complete"
|
|
559
|
+
echo "[DISPATCHER] Exiting with code 0"
|
|
560
|
+
fi
|
package/bin/claude-evolve-worker
CHANGED
|
@@ -6,21 +6,63 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &> /dev/null && pwd)"
|
|
|
6
6
|
source "$SCRIPT_DIR/../lib/config.sh"
|
|
7
7
|
source "$SCRIPT_DIR/../lib/csv-lock.sh"
|
|
8
8
|
|
|
9
|
-
#
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
else
|
|
13
|
-
load_config
|
|
14
|
-
fi
|
|
9
|
+
# Track current candidate for cleanup
|
|
10
|
+
CURRENT_CANDIDATE_ID=""
|
|
11
|
+
TERMINATION_SIGNAL=""
|
|
15
12
|
|
|
16
|
-
#
|
|
13
|
+
# Cleanup function to handle termination
|
|
14
|
+
cleanup_on_exit() {
|
|
15
|
+
if [[ -n "$CURRENT_CANDIDATE_ID" ]]; then
|
|
16
|
+
# Only mark as failed if it was a timeout (SIGTERM from timeout command)
|
|
17
|
+
# For user interruption (Ctrl-C) or kill, leave it for retry
|
|
18
|
+
if [[ "$TERMINATION_SIGNAL" == "TERM" ]]; then
|
|
19
|
+
echo "[WORKER-$$] Timeout detected, marking $CURRENT_CANDIDATE_ID as failed" >&2
|
|
20
|
+
"$PYTHON_CMD" -c "
|
|
21
|
+
import sys
|
|
22
|
+
sys.path.insert(0, '$SCRIPT_DIR/..')
|
|
23
|
+
from lib.evolution_csv import EvolutionCSV
|
|
24
|
+
try:
|
|
25
|
+
with EvolutionCSV('$FULL_CSV_PATH') as csv:
|
|
26
|
+
csv.update_candidate_status('$CURRENT_CANDIDATE_ID', 'failed')
|
|
27
|
+
except:
|
|
28
|
+
pass # Best effort cleanup
|
|
29
|
+
" 2>/dev/null || true
|
|
30
|
+
else
|
|
31
|
+
echo "[WORKER-$$] Interrupted, leaving $CURRENT_CANDIDATE_ID for retry" >&2
|
|
32
|
+
# Optionally reset to pending instead of leaving as running
|
|
33
|
+
"$PYTHON_CMD" -c "
|
|
34
|
+
import sys
|
|
35
|
+
sys.path.insert(0, '$SCRIPT_DIR/..')
|
|
36
|
+
from lib.evolution_csv import EvolutionCSV
|
|
37
|
+
try:
|
|
38
|
+
with EvolutionCSV('$FULL_CSV_PATH') as csv:
|
|
39
|
+
csv.update_candidate_status('$CURRENT_CANDIDATE_ID', 'pending')
|
|
40
|
+
except:
|
|
41
|
+
pass # Best effort cleanup
|
|
42
|
+
" 2>/dev/null || true
|
|
43
|
+
fi
|
|
44
|
+
fi
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
# Set up signal handlers
|
|
48
|
+
trap 'TERMINATION_SIGNAL="TERM"; cleanup_on_exit' TERM
|
|
49
|
+
trap 'TERMINATION_SIGNAL="INT"; cleanup_on_exit' INT
|
|
50
|
+
trap 'TERMINATION_SIGNAL="HUP"; cleanup_on_exit' HUP
|
|
51
|
+
trap 'cleanup_on_exit' EXIT
|
|
52
|
+
|
|
53
|
+
# Parse arguments first to get config path
|
|
17
54
|
timeout_seconds=""
|
|
55
|
+
config_path=""
|
|
18
56
|
while [[ $# -gt 0 ]]; do
|
|
19
57
|
case "$1" in
|
|
20
58
|
--timeout)
|
|
21
59
|
timeout_seconds="$2"
|
|
22
60
|
shift 2
|
|
23
61
|
;;
|
|
62
|
+
--config)
|
|
63
|
+
config_path="$2"
|
|
64
|
+
shift 2
|
|
65
|
+
;;
|
|
24
66
|
*)
|
|
25
67
|
echo "[ERROR] Unknown argument: $1" >&2
|
|
26
68
|
exit 1
|
|
@@ -28,6 +70,15 @@ while [[ $# -gt 0 ]]; do
|
|
|
28
70
|
esac
|
|
29
71
|
done
|
|
30
72
|
|
|
73
|
+
# Load config using the provided path, environment variable, or default
|
|
74
|
+
if [[ -n $config_path ]]; then
|
|
75
|
+
load_config "$config_path"
|
|
76
|
+
elif [[ -n ${CLAUDE_EVOLVE_CONFIG:-} ]]; then
|
|
77
|
+
load_config "$CLAUDE_EVOLVE_CONFIG"
|
|
78
|
+
else
|
|
79
|
+
load_config
|
|
80
|
+
fi
|
|
81
|
+
|
|
31
82
|
# Validate paths
|
|
32
83
|
if [[ ! -f "$FULL_CSV_PATH" ]]; then
|
|
33
84
|
echo "[WORKER-$$] CSV file not found: $FULL_CSV_PATH" >&2
|
|
@@ -58,11 +109,21 @@ process_candidate() {
|
|
|
58
109
|
fi
|
|
59
110
|
fi
|
|
60
111
|
|
|
61
|
-
#
|
|
112
|
+
# Check if this is a baseline candidate (no parent and specific ID pattern)
|
|
113
|
+
local is_baseline=false
|
|
114
|
+
if [[ -z "$parent_id" ]] && [[ "$candidate_id" =~ ^(baseline|baseline-000|000|0|gen00-000)$ ]]; then
|
|
115
|
+
is_baseline=true
|
|
116
|
+
echo "[WORKER-$$] Detected baseline candidate - will run algorithm.py directly"
|
|
117
|
+
fi
|
|
118
|
+
|
|
119
|
+
# Target file for evolution (not used for baseline)
|
|
62
120
|
local target_file="$FULL_OUTPUT_DIR/evolution_${candidate_id}.py"
|
|
63
121
|
|
|
64
122
|
# Check if processing should be skipped
|
|
65
|
-
if [[
|
|
123
|
+
if [[ "$is_baseline" == "true" ]]; then
|
|
124
|
+
# For baseline, skip all file operations
|
|
125
|
+
echo "[WORKER-$$] Baseline candidate - skipping file operations"
|
|
126
|
+
elif [[ -f "$target_file" ]]; then
|
|
66
127
|
echo "[WORKER-$$] � Skipping copy - File already exists - skipping all processing"
|
|
67
128
|
echo "[WORKER-$$] � Skipping Claude processing - File already exists - skipping all processing"
|
|
68
129
|
|
|
@@ -100,13 +161,29 @@ The modification should be substantial and follow the description exactly. Make
|
|
|
100
161
|
|
|
101
162
|
Important: Make meaningful changes that match the description. Don't just add comments or make trivial adjustments."
|
|
102
163
|
|
|
103
|
-
if
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
164
|
+
if [[ "$is_baseline" != "true" ]]; then
|
|
165
|
+
local claude_output
|
|
166
|
+
claude_output=$(echo "$evolution_prompt" | claude --dangerously-skip-permissions -p 2>&1)
|
|
167
|
+
local claude_exit_code=$?
|
|
168
|
+
|
|
169
|
+
# Check for usage limits
|
|
170
|
+
if echo "$claude_output" | grep -q "Claude AI usage limit reached"; then
|
|
171
|
+
echo "[WORKER-$$] ERROR: Claude AI usage limit reached!" >&2
|
|
172
|
+
echo "[WORKER-$$] ERROR: Cannot continue processing - API limits exceeded" >&2
|
|
173
|
+
rm -f "$target_file" # Clean up on failure
|
|
174
|
+
# Exit with special code 3 to indicate API limit
|
|
175
|
+
exit 3
|
|
176
|
+
fi
|
|
177
|
+
|
|
178
|
+
if [[ $claude_exit_code -ne 0 ]]; then
|
|
179
|
+
echo "[WORKER-$$] ERROR: Claude evolution failed with exit code $claude_exit_code" >&2
|
|
180
|
+
echo "[WORKER-$$] ERROR: Claude output: $claude_output" >&2
|
|
181
|
+
rm -f "$target_file" # Clean up on failure
|
|
182
|
+
return 1
|
|
183
|
+
fi
|
|
184
|
+
|
|
185
|
+
echo "[WORKER-$$] Evolution applied successfully"
|
|
107
186
|
fi
|
|
108
|
-
|
|
109
|
-
echo "[WORKER-$$] Evolution applied successfully"
|
|
110
187
|
fi
|
|
111
188
|
|
|
112
189
|
# Run evaluation
|
|
@@ -115,7 +192,13 @@ Important: Make meaningful changes that match the description. Don't just add co
|
|
|
115
192
|
local eval_start=$(date +%s)
|
|
116
193
|
|
|
117
194
|
# Prepare evaluation command
|
|
118
|
-
|
|
195
|
+
# For baseline, pass "baseline" or empty string to evaluator to use algorithm.py
|
|
196
|
+
local eval_arg="$candidate_id"
|
|
197
|
+
if [[ "$is_baseline" == "true" ]]; then
|
|
198
|
+
# Evaluator should interpret this as "use algorithm.py directly"
|
|
199
|
+
eval_arg=""
|
|
200
|
+
fi
|
|
201
|
+
local eval_cmd=("$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$eval_arg")
|
|
119
202
|
[[ -n "$timeout_seconds" ]] && eval_cmd=(timeout "$timeout_seconds" "${eval_cmd[@]}")
|
|
120
203
|
|
|
121
204
|
# Run evaluation with tee to both display and capture output
|
|
@@ -228,17 +311,20 @@ with EvolutionCSV('$FULL_CSV_PATH') as csv:
|
|
|
228
311
|
else
|
|
229
312
|
echo "[WORKER-$$] ERROR: No score found in evaluation output" >&2
|
|
230
313
|
echo "[WORKER-$$] Output: $eval_output" >&2
|
|
231
|
-
rm -f "$eval_output_file"
|
|
314
|
+
# rm -f "$eval_output_file" # Keep for debugging
|
|
315
|
+
echo "[WORKER-$$] Evaluation output saved to: $eval_output_file" >&2
|
|
232
316
|
return 1
|
|
233
317
|
fi
|
|
234
318
|
|
|
235
|
-
# Clean up temp file
|
|
236
|
-
rm -f "$eval_output_file"
|
|
319
|
+
# Clean up temp file (comment out to keep for debugging)
|
|
320
|
+
# rm -f "$eval_output_file"
|
|
321
|
+
echo "[WORKER-$$] Evaluation output saved to: $eval_output_file" >&2
|
|
237
322
|
else
|
|
238
323
|
local exit_code=$?
|
|
239
324
|
# Read any output that was captured before failure
|
|
240
325
|
eval_output=$(<"$eval_output_file")
|
|
241
|
-
rm -f "$eval_output_file"
|
|
326
|
+
# rm -f "$eval_output_file" # Keep for debugging
|
|
327
|
+
echo "[WORKER-$$] Evaluation output saved to: $eval_output_file" >&2
|
|
242
328
|
|
|
243
329
|
echo "[WORKER-$$] ERROR: Evaluation failed with exit code $exit_code" >&2
|
|
244
330
|
echo "[WORKER-$$] Output: $eval_output" >&2
|
|
@@ -272,7 +358,7 @@ with EvolutionCSV('$FULL_CSV_PATH') as csv:
|
|
|
272
358
|
# Get full candidate info
|
|
273
359
|
candidate = csv.get_candidate_info(candidate_id)
|
|
274
360
|
if candidate:
|
|
275
|
-
print(f'{candidate[\"id\"]}|{candidate.get(\"
|
|
361
|
+
print(f'{candidate[\"id\"]}|{candidate.get(\"basedOnId\", \"\")}|{candidate[\"description\"]}')
|
|
276
362
|
")
|
|
277
363
|
|
|
278
364
|
if [[ -z "$candidate_info" ]]; then
|
|
@@ -283,12 +369,26 @@ with EvolutionCSV('$FULL_CSV_PATH') as csv:
|
|
|
283
369
|
# Parse candidate info
|
|
284
370
|
IFS='|' read -r candidate_id parent_id description <<< "$candidate_info"
|
|
285
371
|
|
|
372
|
+
# Set current candidate for cleanup
|
|
373
|
+
CURRENT_CANDIDATE_ID="$candidate_id"
|
|
374
|
+
|
|
286
375
|
# Process the candidate
|
|
287
376
|
if process_candidate "$candidate_id" "$parent_id" "$description"; then
|
|
288
377
|
echo "[WORKER-$$] Successfully processed $candidate_id"
|
|
289
378
|
else
|
|
290
379
|
echo "[WORKER-$$] Failed to process $candidate_id"
|
|
380
|
+
# Ensure status is set to failed (might already be done in process_candidate)
|
|
381
|
+
"$PYTHON_CMD" -c "
|
|
382
|
+
import sys
|
|
383
|
+
sys.path.insert(0, '$SCRIPT_DIR/..')
|
|
384
|
+
from lib.evolution_csv import EvolutionCSV
|
|
385
|
+
with EvolutionCSV('$FULL_CSV_PATH') as csv:
|
|
386
|
+
csv.update_candidate_status('$candidate_id', 'failed')
|
|
387
|
+
" 2>/dev/null || true
|
|
291
388
|
fi
|
|
389
|
+
|
|
390
|
+
# Clear current candidate
|
|
391
|
+
CURRENT_CANDIDATE_ID=""
|
|
292
392
|
done
|
|
293
393
|
|
|
294
394
|
echo "[WORKER-$$] No more pending candidates, worker exiting"
|
|
Binary file
|
|
Binary file
|
package/lib/evolution_csv.py
CHANGED
|
@@ -121,8 +121,9 @@ class EvolutionCSV:
|
|
|
121
121
|
# Check status field (5th column, index 4)
|
|
122
122
|
status = row[4].strip().lower() if row[4] else ''
|
|
123
123
|
|
|
124
|
-
#
|
|
125
|
-
|
|
124
|
+
# Only blank, missing, or "pending" mean pending
|
|
125
|
+
# "running" should NOT be considered pending to avoid duplicate processing
|
|
126
|
+
if not status or status == 'pending':
|
|
126
127
|
return True
|
|
127
128
|
|
|
128
129
|
# Check for retry statuses
|
|
@@ -321,6 +322,39 @@ class EvolutionCSV:
|
|
|
321
322
|
|
|
322
323
|
return None
|
|
323
324
|
|
|
325
|
+
def delete_candidate(self, candidate_id: str) -> bool:
|
|
326
|
+
"""Delete a candidate from the CSV file."""
|
|
327
|
+
rows = self._read_csv()
|
|
328
|
+
if not rows:
|
|
329
|
+
return False
|
|
330
|
+
|
|
331
|
+
# Check if we have a header row
|
|
332
|
+
has_header = rows and rows[0] and rows[0][0].lower() == 'id'
|
|
333
|
+
|
|
334
|
+
# Find and remove the candidate
|
|
335
|
+
deleted = False
|
|
336
|
+
new_rows = []
|
|
337
|
+
|
|
338
|
+
# Keep header if it exists
|
|
339
|
+
if has_header:
|
|
340
|
+
new_rows.append(rows[0])
|
|
341
|
+
start_idx = 1
|
|
342
|
+
else:
|
|
343
|
+
start_idx = 0
|
|
344
|
+
|
|
345
|
+
for i in range(start_idx, len(rows)):
|
|
346
|
+
row = rows[i]
|
|
347
|
+
if self.is_valid_candidate_row(row) and row[0].strip() == candidate_id:
|
|
348
|
+
deleted = True
|
|
349
|
+
# Skip this row (delete it)
|
|
350
|
+
continue
|
|
351
|
+
new_rows.append(row)
|
|
352
|
+
|
|
353
|
+
if deleted:
|
|
354
|
+
self._write_csv(new_rows)
|
|
355
|
+
|
|
356
|
+
return deleted
|
|
357
|
+
|
|
324
358
|
def has_pending_work(self) -> bool:
|
|
325
359
|
"""Check if there are any pending candidates. Used by dispatcher."""
|
|
326
360
|
return self.count_pending_candidates() > 0
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Validate parent IDs in AI-generated ideas for claude-evolve ideation.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import csv
|
|
7
|
+
import json
|
|
8
|
+
import sys
|
|
9
|
+
import re
|
|
10
|
+
from typing import List, Set, Dict, Tuple, Optional
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def get_valid_parent_ids(csv_path: str) -> Set[str]:
|
|
14
|
+
"""Extract all valid candidate IDs from the CSV that can be used as parents."""
|
|
15
|
+
valid_ids = set()
|
|
16
|
+
valid_ids.add("") # Empty string is valid for novel ideas
|
|
17
|
+
valid_ids.add("000") # Special ID for baseline algorithm
|
|
18
|
+
valid_ids.add("0") # Alternative baseline ID
|
|
19
|
+
valid_ids.add("gen00-000") # Another baseline format
|
|
20
|
+
|
|
21
|
+
try:
|
|
22
|
+
with open(csv_path, 'r') as f:
|
|
23
|
+
reader = csv.reader(f)
|
|
24
|
+
next(reader, None) # Skip header
|
|
25
|
+
for row in reader:
|
|
26
|
+
if row and len(row) > 0:
|
|
27
|
+
candidate_id = row[0].strip()
|
|
28
|
+
if candidate_id:
|
|
29
|
+
valid_ids.add(candidate_id)
|
|
30
|
+
except Exception as e:
|
|
31
|
+
print(f"[ERROR] Failed to read CSV: {e}", file=sys.stderr)
|
|
32
|
+
|
|
33
|
+
return valid_ids
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def validate_and_fix_parent_id(parent_id: str, valid_ids: Set[str], idea_type: str,
|
|
37
|
+
top_performers: Optional[List[Tuple[str, str, float]]] = None) -> str:
|
|
38
|
+
"""
|
|
39
|
+
Validate a parent ID and fix it if invalid.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
parent_id: The parent ID to validate
|
|
43
|
+
valid_ids: Set of valid parent IDs
|
|
44
|
+
idea_type: Type of idea (novel, hill-climbing, structural, crossover)
|
|
45
|
+
top_performers: List of (id, description, score) tuples for non-novel ideas
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
A valid parent ID (may be fixed)
|
|
49
|
+
"""
|
|
50
|
+
# Novel ideas should have empty parent
|
|
51
|
+
if idea_type == "novel":
|
|
52
|
+
return ""
|
|
53
|
+
|
|
54
|
+
# Check if parent ID is valid
|
|
55
|
+
if parent_id in valid_ids:
|
|
56
|
+
return parent_id
|
|
57
|
+
|
|
58
|
+
# For non-novel ideas, we need a valid parent
|
|
59
|
+
if top_performers and len(top_performers) > 0:
|
|
60
|
+
# Return the first top performer's ID
|
|
61
|
+
return top_performers[0][0]
|
|
62
|
+
|
|
63
|
+
# If no top performers, return empty (will be caught as error later)
|
|
64
|
+
return ""
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def parse_ai_line(line: str, idea_type: str) -> Tuple[str, str]:
|
|
68
|
+
"""
|
|
69
|
+
Parse a line from AI output to extract parent ID and description.
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
Tuple of (parent_id, description)
|
|
73
|
+
"""
|
|
74
|
+
line = line.strip()
|
|
75
|
+
parent_id = ""
|
|
76
|
+
description = line
|
|
77
|
+
|
|
78
|
+
if idea_type != "novel":
|
|
79
|
+
# Look for "From X:" pattern
|
|
80
|
+
match = re.match(r'^From\s+([^:]+):\s*(.+)$', line, re.IGNORECASE)
|
|
81
|
+
if match:
|
|
82
|
+
parent_id = match.group(1).strip()
|
|
83
|
+
description = match.group(2).strip()
|
|
84
|
+
|
|
85
|
+
return parent_id, description
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def validate_ai_output(ai_output: str, count: int, idea_type: str, csv_path: str,
|
|
89
|
+
top_performers_str: str = "") -> List[Dict[str, str]]:
|
|
90
|
+
"""
|
|
91
|
+
Validate AI output and return validated ideas.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
ai_output: Raw AI output
|
|
95
|
+
count: Expected number of ideas
|
|
96
|
+
idea_type: Type of idea (novel, hill-climbing, structural, crossover)
|
|
97
|
+
csv_path: Path to CSV file
|
|
98
|
+
top_performers_str: String containing top performers (format: "id,description,score\n...")
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
List of validated ideas with 'parent_id' and 'description' keys
|
|
102
|
+
"""
|
|
103
|
+
# Get valid parent IDs
|
|
104
|
+
valid_ids = get_valid_parent_ids(csv_path)
|
|
105
|
+
|
|
106
|
+
# Parse top performers
|
|
107
|
+
top_performers = []
|
|
108
|
+
if top_performers_str:
|
|
109
|
+
for line in top_performers_str.strip().split('\n'):
|
|
110
|
+
if line:
|
|
111
|
+
parts = line.split(',', 2)
|
|
112
|
+
if len(parts) >= 3:
|
|
113
|
+
try:
|
|
114
|
+
top_performers.append((parts[0], parts[1], float(parts[2])))
|
|
115
|
+
except ValueError:
|
|
116
|
+
pass
|
|
117
|
+
|
|
118
|
+
# Process AI output
|
|
119
|
+
validated_ideas = []
|
|
120
|
+
lines = ai_output.strip().split('\n')
|
|
121
|
+
|
|
122
|
+
print(f"[DEBUG] Processing {len(lines)} lines from AI output for {idea_type} ideas", file=sys.stderr)
|
|
123
|
+
|
|
124
|
+
for line in lines:
|
|
125
|
+
# Skip empty lines and metadata
|
|
126
|
+
if not line or line.strip() == '' or line.startswith('#') or line.startswith('[') or line.startswith('=='):
|
|
127
|
+
continue
|
|
128
|
+
|
|
129
|
+
# Skip debug/info messages from AI tools
|
|
130
|
+
if line.strip().startswith('[INFO]') or line.strip().startswith('[WARN]') or line.strip().startswith('[ERROR]') or line.strip().startswith('[DEBUG]'):
|
|
131
|
+
continue
|
|
132
|
+
|
|
133
|
+
# Clean the line
|
|
134
|
+
line = line.strip()
|
|
135
|
+
line = re.sub(r'^[0-9]+\.?\s*', '', line) # Remove numbering
|
|
136
|
+
line = re.sub(r'^-\s*', '', line) # Remove bullet points
|
|
137
|
+
|
|
138
|
+
# Parse parent ID and description
|
|
139
|
+
parent_id, description = parse_ai_line(line, idea_type)
|
|
140
|
+
|
|
141
|
+
# Validate parent ID
|
|
142
|
+
if parent_id and parent_id not in valid_ids:
|
|
143
|
+
print(f"[WARN] Invalid parent ID '{parent_id}' for {idea_type} idea - fixing...", file=sys.stderr)
|
|
144
|
+
print(f"[INFO] Valid parent IDs are: {', '.join(sorted([id for id in valid_ids if id]))[:200]}...", file=sys.stderr)
|
|
145
|
+
parent_id = validate_and_fix_parent_id(parent_id, valid_ids, idea_type, top_performers)
|
|
146
|
+
print(f"[INFO] Fixed parent ID to: '{parent_id}'", file=sys.stderr)
|
|
147
|
+
|
|
148
|
+
# For non-novel ideas, ensure we have a parent
|
|
149
|
+
if idea_type != "novel" and not parent_id:
|
|
150
|
+
if top_performers:
|
|
151
|
+
parent_id = top_performers[0][0]
|
|
152
|
+
print(f"[INFO] Assigned parent ID '{parent_id}' to idea without parent", file=sys.stderr)
|
|
153
|
+
else:
|
|
154
|
+
print(f"[ERROR] Non-novel idea without parent and no top performers available", file=sys.stderr)
|
|
155
|
+
continue
|
|
156
|
+
|
|
157
|
+
# Skip if description is too short or contains shell artifacts
|
|
158
|
+
if len(description) < 20:
|
|
159
|
+
continue
|
|
160
|
+
|
|
161
|
+
if any(word in description for word in ['EOF', '/dev/null', '<<<', '>>>', '#!/bin/bash']):
|
|
162
|
+
print(f"[WARN] Skipping description with shell artifacts: {description[:50]}...", file=sys.stderr)
|
|
163
|
+
continue
|
|
164
|
+
|
|
165
|
+
validated_ideas.append({
|
|
166
|
+
'parent_id': parent_id,
|
|
167
|
+
'description': description
|
|
168
|
+
})
|
|
169
|
+
|
|
170
|
+
if len(validated_ideas) >= count:
|
|
171
|
+
break
|
|
172
|
+
|
|
173
|
+
return validated_ideas
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def main():
|
|
177
|
+
"""Main entry point for validation script."""
|
|
178
|
+
if len(sys.argv) < 5:
|
|
179
|
+
print("Usage: validate_parent_ids.py <ai_output_file> <count> <idea_type> <csv_path> [top_performers_file]", file=sys.stderr)
|
|
180
|
+
sys.exit(1)
|
|
181
|
+
|
|
182
|
+
ai_output_file = sys.argv[1]
|
|
183
|
+
count = int(sys.argv[2])
|
|
184
|
+
idea_type = sys.argv[3]
|
|
185
|
+
csv_path = sys.argv[4]
|
|
186
|
+
top_performers_file = sys.argv[5] if len(sys.argv) > 5 else None
|
|
187
|
+
|
|
188
|
+
try:
|
|
189
|
+
# Read AI output
|
|
190
|
+
with open(ai_output_file, 'r') as f:
|
|
191
|
+
ai_output = f.read()
|
|
192
|
+
except Exception as e:
|
|
193
|
+
print(f"[ERROR] Failed to read AI output file {ai_output_file}: {e}", file=sys.stderr)
|
|
194
|
+
sys.exit(1)
|
|
195
|
+
|
|
196
|
+
# Read top performers if provided
|
|
197
|
+
top_performers_str = ""
|
|
198
|
+
if top_performers_file and top_performers_file != "none":
|
|
199
|
+
try:
|
|
200
|
+
with open(top_performers_file, 'r') as f:
|
|
201
|
+
top_performers_str = f.read()
|
|
202
|
+
except Exception as e:
|
|
203
|
+
print(f"[WARN] Failed to read top performers file {top_performers_file}: {e}", file=sys.stderr)
|
|
204
|
+
|
|
205
|
+
# Check if AI output is empty or looks like an error
|
|
206
|
+
if not ai_output.strip():
|
|
207
|
+
print(f"[ERROR] AI output is empty", file=sys.stderr)
|
|
208
|
+
sys.exit(1)
|
|
209
|
+
|
|
210
|
+
if len(ai_output) < 50:
|
|
211
|
+
print(f"[WARN] AI output is suspiciously short: {ai_output}", file=sys.stderr)
|
|
212
|
+
|
|
213
|
+
# Validate
|
|
214
|
+
validated_ideas = validate_ai_output(ai_output, count, idea_type, csv_path, top_performers_str)
|
|
215
|
+
|
|
216
|
+
# Output validated ideas as JSON
|
|
217
|
+
print(json.dumps(validated_ideas))
|
|
218
|
+
|
|
219
|
+
# Return error ONLY if no valid ideas at all
|
|
220
|
+
if len(validated_ideas) == 0:
|
|
221
|
+
print(f"[ERROR] No valid ideas found in AI output. First 500 chars:", file=sys.stderr)
|
|
222
|
+
print(ai_output[:500], file=sys.stderr)
|
|
223
|
+
sys.exit(1)
|
|
224
|
+
elif len(validated_ideas) < count:
|
|
225
|
+
print(f"[WARN] Only validated {len(validated_ideas)} out of {count} requested {idea_type} ideas", file=sys.stderr)
|
|
226
|
+
print(f"[INFO] AI appears to have generated fewer ideas than requested.", file=sys.stderr)
|
|
227
|
+
print(f"[INFO] Proceeding with {len(validated_ideas)} valid ideas.", file=sys.stderr)
|
|
228
|
+
# Don't exit with error - we have some valid ideas!
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
if __name__ == "__main__":
|
|
232
|
+
main()
|