claude-evolve 1.4.7 → 1.4.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/claude-evolve-autostatus +318 -0
- package/bin/claude-evolve-worker +15 -3
- package/package.json +1 -1
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Auto-updating status display for claude-evolve that fits to terminal size.
|
|
4
|
+
Updates in real-time without flicker using ANSI escape sequences.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
import sys
|
|
9
|
+
import time
|
|
10
|
+
import termios
|
|
11
|
+
import tty
|
|
12
|
+
import select
|
|
13
|
+
import signal
|
|
14
|
+
import argparse
|
|
15
|
+
import subprocess
|
|
16
|
+
from datetime import datetime
|
|
17
|
+
|
|
18
|
+
# Add parent directory to path for imports
|
|
19
|
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
20
|
+
sys.path.insert(0, os.path.join(script_dir, '..'))
|
|
21
|
+
|
|
22
|
+
from lib.config import Config
|
|
23
|
+
from lib.evolution_csv import EvolutionCSV
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class TerminalDisplay:
|
|
27
|
+
"""Handles terminal display with ANSI escape sequences for flicker-free updates."""
|
|
28
|
+
|
|
29
|
+
def __init__(self):
|
|
30
|
+
self.rows, self.cols = self.get_terminal_size()
|
|
31
|
+
signal.signal(signal.SIGWINCH, self.handle_resize)
|
|
32
|
+
|
|
33
|
+
def get_terminal_size(self):
|
|
34
|
+
"""Get current terminal size."""
|
|
35
|
+
try:
|
|
36
|
+
rows, cols = os.popen('stty size', 'r').read().split()
|
|
37
|
+
return int(rows), int(cols)
|
|
38
|
+
except:
|
|
39
|
+
return 24, 80 # Default fallback
|
|
40
|
+
|
|
41
|
+
def handle_resize(self, signum, frame):
|
|
42
|
+
"""Handle terminal resize signal."""
|
|
43
|
+
self.rows, self.cols = self.get_terminal_size()
|
|
44
|
+
|
|
45
|
+
def clear_screen(self):
|
|
46
|
+
"""Clear the entire screen."""
|
|
47
|
+
print('\033[2J\033[H', end='')
|
|
48
|
+
|
|
49
|
+
def move_cursor(self, row, col):
|
|
50
|
+
"""Move cursor to specific position."""
|
|
51
|
+
print(f'\033[{row};{col}H', end='')
|
|
52
|
+
|
|
53
|
+
def clear_line(self):
|
|
54
|
+
"""Clear current line."""
|
|
55
|
+
print('\033[2K', end='')
|
|
56
|
+
|
|
57
|
+
def hide_cursor(self):
|
|
58
|
+
"""Hide the cursor."""
|
|
59
|
+
print('\033[?25l', end='')
|
|
60
|
+
|
|
61
|
+
def show_cursor(self):
|
|
62
|
+
"""Show the cursor."""
|
|
63
|
+
print('\033[?25h', end='')
|
|
64
|
+
|
|
65
|
+
def reset(self):
|
|
66
|
+
"""Reset terminal to normal state."""
|
|
67
|
+
self.show_cursor()
|
|
68
|
+
print('\033[0m', end='') # Reset colors
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class AutoStatus:
|
|
72
|
+
"""Auto-updating status display."""
|
|
73
|
+
|
|
74
|
+
def __init__(self, working_dir=None):
|
|
75
|
+
self.config = Config()
|
|
76
|
+
|
|
77
|
+
# Load config using same mechanism as other commands
|
|
78
|
+
# First check CLAUDE_EVOLVE_CONFIG env var
|
|
79
|
+
config_env = os.environ.get('CLAUDE_EVOLVE_CONFIG')
|
|
80
|
+
if config_env:
|
|
81
|
+
self.config.load(config_env)
|
|
82
|
+
else:
|
|
83
|
+
# Load from working directory or current directory
|
|
84
|
+
self.config.load(working_dir=working_dir)
|
|
85
|
+
|
|
86
|
+
self.display = TerminalDisplay()
|
|
87
|
+
self.running = True
|
|
88
|
+
|
|
89
|
+
def get_status_data(self):
|
|
90
|
+
"""Get current status data from CSV."""
|
|
91
|
+
csv_path = self.config.resolve_path(self.config.data['csv_file'])
|
|
92
|
+
|
|
93
|
+
with EvolutionCSV(csv_path) as csv:
|
|
94
|
+
df = csv.df
|
|
95
|
+
|
|
96
|
+
# Count by status
|
|
97
|
+
status_counts = {
|
|
98
|
+
'pending': len(df[df['status'] == 'pending']),
|
|
99
|
+
'running': len(df[df['status'] == 'running']),
|
|
100
|
+
'complete': len(df[df['status'] == 'complete']),
|
|
101
|
+
'failed': len(df[df['status'] == 'failed']),
|
|
102
|
+
'total': len(df)
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
# Get performance stats for completed
|
|
106
|
+
completed_df = df[df['status'] == 'complete']
|
|
107
|
+
if not completed_df.empty and 'performance' in completed_df.columns:
|
|
108
|
+
perf_values = completed_df['performance'].dropna()
|
|
109
|
+
if not perf_values.empty:
|
|
110
|
+
perf_stats = {
|
|
111
|
+
'min': perf_values.min(),
|
|
112
|
+
'max': perf_values.max(),
|
|
113
|
+
'mean': perf_values.mean(),
|
|
114
|
+
'count': len(perf_values)
|
|
115
|
+
}
|
|
116
|
+
else:
|
|
117
|
+
perf_stats = None
|
|
118
|
+
else:
|
|
119
|
+
perf_stats = None
|
|
120
|
+
|
|
121
|
+
# Get recent candidates (last N that fit on screen)
|
|
122
|
+
max_candidates = max(1, self.display.rows - 15) # Reserve space for header/stats
|
|
123
|
+
recent = df.tail(max_candidates)
|
|
124
|
+
|
|
125
|
+
return {
|
|
126
|
+
'counts': status_counts,
|
|
127
|
+
'performance': perf_stats,
|
|
128
|
+
'recent': recent,
|
|
129
|
+
'csv_path': csv_path
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
def format_duration(self, seconds):
|
|
133
|
+
"""Format duration in human-readable form."""
|
|
134
|
+
if seconds < 60:
|
|
135
|
+
return f"{seconds}s"
|
|
136
|
+
elif seconds < 3600:
|
|
137
|
+
return f"{seconds//60}m {seconds%60}s"
|
|
138
|
+
else:
|
|
139
|
+
hours = seconds // 3600
|
|
140
|
+
mins = (seconds % 3600) // 60
|
|
141
|
+
return f"{hours}h {mins}m"
|
|
142
|
+
|
|
143
|
+
def render(self):
|
|
144
|
+
"""Render the current status to the terminal."""
|
|
145
|
+
try:
|
|
146
|
+
data = self.get_status_data()
|
|
147
|
+
except Exception as e:
|
|
148
|
+
self.display.clear_screen()
|
|
149
|
+
self.display.move_cursor(1, 1)
|
|
150
|
+
print(f"Error reading status: {e}")
|
|
151
|
+
return
|
|
152
|
+
|
|
153
|
+
# Clear screen and start rendering
|
|
154
|
+
self.display.clear_screen()
|
|
155
|
+
row = 1
|
|
156
|
+
|
|
157
|
+
# Header
|
|
158
|
+
self.display.move_cursor(row, 1)
|
|
159
|
+
header = "Claude Evolution Auto-Status"
|
|
160
|
+
print(f"\033[1;36m{header.center(self.display.cols)}\033[0m")
|
|
161
|
+
row += 1
|
|
162
|
+
|
|
163
|
+
# Timestamp
|
|
164
|
+
self.display.move_cursor(row, 1)
|
|
165
|
+
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
166
|
+
print(f"Updated: {timestamp} | Press 'q' to quit")
|
|
167
|
+
row += 2
|
|
168
|
+
|
|
169
|
+
# File info
|
|
170
|
+
self.display.move_cursor(row, 1)
|
|
171
|
+
print(f"CSV: {data['csv_path']}")
|
|
172
|
+
row += 2
|
|
173
|
+
|
|
174
|
+
# Status summary
|
|
175
|
+
self.display.move_cursor(row, 1)
|
|
176
|
+
print("\033[1mStatus Summary:\033[0m")
|
|
177
|
+
row += 1
|
|
178
|
+
|
|
179
|
+
counts = data['counts']
|
|
180
|
+
status_line = (f" Total: {counts['total']} | "
|
|
181
|
+
f"\033[33mPending: {counts['pending']}\033[0m | "
|
|
182
|
+
f"\033[36mRunning: {counts['running']}\033[0m | "
|
|
183
|
+
f"\033[32mComplete: {counts['complete']}\033[0m | "
|
|
184
|
+
f"\033[31mFailed: {counts['failed']}\033[0m")
|
|
185
|
+
|
|
186
|
+
self.display.move_cursor(row, 1)
|
|
187
|
+
print(status_line)
|
|
188
|
+
row += 2
|
|
189
|
+
|
|
190
|
+
# Performance stats
|
|
191
|
+
if data['performance']:
|
|
192
|
+
self.display.move_cursor(row, 1)
|
|
193
|
+
print("\033[1mPerformance Stats:\033[0m")
|
|
194
|
+
row += 1
|
|
195
|
+
|
|
196
|
+
perf = data['performance']
|
|
197
|
+
self.display.move_cursor(row, 1)
|
|
198
|
+
print(f" Min: {perf['min']:.4f} | Max: {perf['max']:.4f} | "
|
|
199
|
+
f"Mean: {perf['mean']:.4f} | Count: {perf['count']}")
|
|
200
|
+
row += 2
|
|
201
|
+
|
|
202
|
+
# Recent candidates
|
|
203
|
+
if not data['recent'].empty:
|
|
204
|
+
self.display.move_cursor(row, 1)
|
|
205
|
+
print("\033[1mRecent Candidates:\033[0m")
|
|
206
|
+
row += 1
|
|
207
|
+
|
|
208
|
+
# Table header
|
|
209
|
+
self.display.move_cursor(row, 1)
|
|
210
|
+
header_fmt = f"{'ID':>8} | {'Status':^10} | {'Performance':>11} | {'Description'}"
|
|
211
|
+
print(header_fmt[:self.display.cols])
|
|
212
|
+
row += 1
|
|
213
|
+
|
|
214
|
+
self.display.move_cursor(row, 1)
|
|
215
|
+
print("-" * min(self.display.cols, len(header_fmt)))
|
|
216
|
+
row += 1
|
|
217
|
+
|
|
218
|
+
# Table rows
|
|
219
|
+
for _, candidate in data['recent'].iterrows():
|
|
220
|
+
if row >= self.display.rows - 1: # Leave room for bottom
|
|
221
|
+
break
|
|
222
|
+
|
|
223
|
+
self.display.move_cursor(row, 1)
|
|
224
|
+
|
|
225
|
+
# Color based on status
|
|
226
|
+
status = candidate.get('status', 'unknown')
|
|
227
|
+
if status == 'complete':
|
|
228
|
+
color = '\033[32m' # Green
|
|
229
|
+
elif status == 'running':
|
|
230
|
+
color = '\033[36m' # Cyan
|
|
231
|
+
elif status == 'failed':
|
|
232
|
+
color = '\033[31m' # Red
|
|
233
|
+
elif status == 'pending':
|
|
234
|
+
color = '\033[33m' # Yellow
|
|
235
|
+
else:
|
|
236
|
+
color = '\033[0m' # Default
|
|
237
|
+
|
|
238
|
+
# Format performance
|
|
239
|
+
if status == 'complete' and 'performance' in candidate:
|
|
240
|
+
perf = f"{candidate['performance']:.4f}"
|
|
241
|
+
else:
|
|
242
|
+
perf = "-"
|
|
243
|
+
|
|
244
|
+
# Truncate description to fit
|
|
245
|
+
desc = candidate.get('description', '')
|
|
246
|
+
max_desc_len = self.display.cols - 35 # Account for other columns
|
|
247
|
+
if len(desc) > max_desc_len:
|
|
248
|
+
desc = desc[:max_desc_len-3] + "..."
|
|
249
|
+
|
|
250
|
+
line = f"{candidate['id']:>8} | {color}{status:^10}\033[0m | {perf:>11} | {desc}"
|
|
251
|
+
print(line[:self.display.cols])
|
|
252
|
+
row += 1
|
|
253
|
+
|
|
254
|
+
# Ensure cursor is at bottom
|
|
255
|
+
self.display.move_cursor(self.display.rows, 1)
|
|
256
|
+
sys.stdout.flush()
|
|
257
|
+
|
|
258
|
+
def check_input(self):
|
|
259
|
+
"""Check for keyboard input without blocking."""
|
|
260
|
+
if select.select([sys.stdin], [], [], 0)[0]:
|
|
261
|
+
char = sys.stdin.read(1)
|
|
262
|
+
if char.lower() == 'q':
|
|
263
|
+
self.running = False
|
|
264
|
+
return True
|
|
265
|
+
return False
|
|
266
|
+
|
|
267
|
+
def run(self):
|
|
268
|
+
"""Main loop for auto-updating display."""
|
|
269
|
+
# Save terminal settings
|
|
270
|
+
old_settings = termios.tcgetattr(sys.stdin)
|
|
271
|
+
|
|
272
|
+
try:
|
|
273
|
+
# Set terminal to raw mode for immediate input
|
|
274
|
+
tty.setraw(sys.stdin.fileno())
|
|
275
|
+
|
|
276
|
+
self.display.hide_cursor()
|
|
277
|
+
|
|
278
|
+
while self.running:
|
|
279
|
+
self.render()
|
|
280
|
+
|
|
281
|
+
# Check for input and wait
|
|
282
|
+
for _ in range(10): # Check 10 times per second
|
|
283
|
+
if self.check_input():
|
|
284
|
+
break
|
|
285
|
+
time.sleep(0.1)
|
|
286
|
+
|
|
287
|
+
except KeyboardInterrupt:
|
|
288
|
+
pass
|
|
289
|
+
|
|
290
|
+
finally:
|
|
291
|
+
# Restore terminal settings
|
|
292
|
+
termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_settings)
|
|
293
|
+
self.display.reset()
|
|
294
|
+
self.display.clear_screen()
|
|
295
|
+
self.display.move_cursor(1, 1)
|
|
296
|
+
print("Exiting auto-status...")
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def main():
|
|
300
|
+
"""Main entry point."""
|
|
301
|
+
parser = argparse.ArgumentParser(
|
|
302
|
+
description="Auto-updating status display for claude-evolve that fits to terminal size.",
|
|
303
|
+
epilog="Press 'q' to quit while running."
|
|
304
|
+
)
|
|
305
|
+
parser.add_argument(
|
|
306
|
+
'--working-dir',
|
|
307
|
+
help='Working directory containing claude-evolve.yaml config file'
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
args = parser.parse_args()
|
|
311
|
+
|
|
312
|
+
# Run auto-status
|
|
313
|
+
auto_status = AutoStatus(working_dir=args.working_dir)
|
|
314
|
+
auto_status.run()
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
if __name__ == '__main__':
|
|
318
|
+
main()
|
package/bin/claude-evolve-worker
CHANGED
|
@@ -111,18 +111,22 @@ Important: Make meaningful changes that match the description. Don't just add co
|
|
|
111
111
|
|
|
112
112
|
# Run evaluation
|
|
113
113
|
echo "[WORKER-$$] Evaluating algorithm..."
|
|
114
|
-
local
|
|
114
|
+
local eval_output_file="/tmp/claude-evolve-eval-$$-$candidate_id.out"
|
|
115
115
|
local eval_start=$(date +%s)
|
|
116
116
|
|
|
117
117
|
# Prepare evaluation command
|
|
118
118
|
local eval_cmd=("$PYTHON_CMD" "$FULL_EVALUATOR_PATH" "$candidate_id")
|
|
119
119
|
[[ -n "$timeout_seconds" ]] && eval_cmd=(timeout "$timeout_seconds" "${eval_cmd[@]}")
|
|
120
120
|
|
|
121
|
-
# Run evaluation and capture output
|
|
122
|
-
|
|
121
|
+
# Run evaluation with tee to both display and capture output
|
|
122
|
+
# Use stdbuf to disable buffering for real-time output
|
|
123
|
+
if stdbuf -o0 -e0 "${eval_cmd[@]}" 2>&1 | tee "$eval_output_file" >&2; then
|
|
123
124
|
local eval_end=$(date +%s)
|
|
124
125
|
local eval_duration=$((eval_end - eval_start))
|
|
125
126
|
|
|
127
|
+
# Read captured output for parsing
|
|
128
|
+
eval_output=$(<"$eval_output_file")
|
|
129
|
+
|
|
126
130
|
# Extract performance score - support multiple formats
|
|
127
131
|
# Try to parse the output and extract score
|
|
128
132
|
local score_and_json=$("$PYTHON_CMD" -c "
|
|
@@ -224,10 +228,18 @@ with EvolutionCSV('$FULL_CSV_PATH') as csv:
|
|
|
224
228
|
else
|
|
225
229
|
echo "[WORKER-$$] ERROR: No score found in evaluation output" >&2
|
|
226
230
|
echo "[WORKER-$$] Output: $eval_output" >&2
|
|
231
|
+
rm -f "$eval_output_file"
|
|
227
232
|
return 1
|
|
228
233
|
fi
|
|
234
|
+
|
|
235
|
+
# Clean up temp file
|
|
236
|
+
rm -f "$eval_output_file"
|
|
229
237
|
else
|
|
230
238
|
local exit_code=$?
|
|
239
|
+
# Read any output that was captured before failure
|
|
240
|
+
eval_output=$(<"$eval_output_file")
|
|
241
|
+
rm -f "$eval_output_file"
|
|
242
|
+
|
|
231
243
|
echo "[WORKER-$$] ERROR: Evaluation failed with exit code $exit_code" >&2
|
|
232
244
|
echo "[WORKER-$$] Output: $eval_output" >&2
|
|
233
245
|
|