claude-evolve 1.4.4 → 1.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -63,17 +63,35 @@ your-project/
63
63
 
64
64
  ## Evaluator Requirements
65
65
 
66
- Your `evaluator.py` must output a performance score to stdout:
66
+ Your `evaluator.py` must output a performance score to stdout. The system supports multiple output formats:
67
67
 
68
+ ### Format 1: Simple numeric value
68
69
  ```python
69
- # Simple: just print a number
70
+ # Just print a single number
70
71
  print(1.234)
72
+ ```
73
+
74
+ ### Format 2: JSON with performance/score field
75
+ ```python
76
+ # JSON with 'performance' field (recommended)
77
+ print('{"performance": 1.234, "accuracy": 0.95, "latency": 45.2}')
71
78
 
72
- # Advanced: JSON with metrics
73
- print('{"performance": 1.234, "accuracy": 0.95}')
79
+ # OR JSON with 'score' field
80
+ print('{"score": 1.234, "precision": 0.88, "recall": 0.92}')
81
+ ```
82
+
83
+ ### Format 3: SCORE: prefix (legacy)
84
+ ```python
85
+ # For backward compatibility
86
+ print("SCORE: 1.234")
74
87
  ```
75
88
 
76
- Higher scores = better performance. Score of 0 = failure.
89
+ **Important notes:**
90
+ - Higher scores = better performance
91
+ - When using JSON, all fields are saved to the CSV for analysis
92
+ - The `performance` or `score` field is required for evolution decisions
93
+ - Return value of 0 doesn't mean failure - it's just a low score
94
+ - Actual failures should exit with non-zero status code
77
95
 
78
96
  ## Configuration
79
97
 
@@ -123,14 +123,96 @@ Important: Make meaningful changes that match the description. Don't just add co
123
123
  local eval_end=$(date +%s)
124
124
  local eval_duration=$((eval_end - eval_start))
125
125
 
126
- # Extract performance score
127
- local score=$(echo "$eval_output" | grep -E "^SCORE:" | cut -d: -f2 | tr -d ' ')
126
+ # Extract performance score - support multiple formats
127
+ # Try to parse the output and extract score
128
+ local score_and_json=$("$PYTHON_CMD" -c "
129
+ import sys
130
+ import json
131
+ import re
132
+
133
+ output = '''$eval_output'''
134
+
135
+ # Try different formats
136
+ score = None
137
+ json_data = None
138
+
139
+ # Format 1: Simple numeric value (just a number on a line)
140
+ for line in output.strip().split('\n'):
141
+ line = line.strip()
142
+ if line and not line.startswith('{'):
143
+ try:
144
+ score = float(line)
145
+ break
146
+ except ValueError:
147
+ pass
148
+
149
+ # Format 2: JSON with 'performance' or 'score' field
150
+ if score is None:
151
+ for line in output.strip().split('\n'):
152
+ line = line.strip()
153
+ if line.startswith('{'):
154
+ try:
155
+ data = json.loads(line)
156
+ json_data = data
157
+ if 'performance' in data:
158
+ score = float(data['performance'])
159
+ elif 'score' in data:
160
+ score = float(data['score'])
161
+ break
162
+ except (json.JSONDecodeError, ValueError, KeyError):
163
+ pass
164
+
165
+ # Format 3: SCORE: prefix (backward compatibility)
166
+ if score is None:
167
+ match = re.search(r'^SCORE:\s*([+-]?\d*\.?\d+)', output, re.MULTILINE)
168
+ if match:
169
+ try:
170
+ score = float(match.group(1))
171
+ except ValueError:
172
+ pass
173
+
174
+ # Output results
175
+ if score is not None:
176
+ print(f'SCORE={score}')
177
+ if json_data:
178
+ print('JSON_DATA=' + json.dumps(json_data))
179
+ else:
180
+ print('SCORE=NONE')
181
+ ")
128
182
 
129
- if [[ -n "$score" ]]; then
183
+ # Parse the Python output
184
+ local score=""
185
+ local json_data=""
186
+ while IFS= read -r line; do
187
+ if [[ "$line" =~ ^SCORE=(.*)$ ]]; then
188
+ score="${BASH_REMATCH[1]}"
189
+ elif [[ "$line" =~ ^JSON_DATA=(.*)$ ]]; then
190
+ json_data="${BASH_REMATCH[1]}"
191
+ fi
192
+ done <<< "$score_and_json"
193
+
194
+ if [[ "$score" != "NONE" ]] && [[ -n "$score" ]]; then
130
195
  echo "[WORKER-$$] Evaluation complete: score=$score (${eval_duration}s)"
131
196
 
132
197
  # Update CSV with result
133
- "$PYTHON_CMD" -c "
198
+ if [[ -n "$json_data" ]]; then
199
+ # If we have JSON data, update all fields
200
+ "$PYTHON_CMD" -c "
201
+ import sys
202
+ import json
203
+ sys.path.insert(0, '$SCRIPT_DIR/..')
204
+ from lib.evolution_csv import EvolutionCSV
205
+
206
+ json_data = json.loads('$json_data')
207
+ with EvolutionCSV('$FULL_CSV_PATH') as csv:
208
+ csv.update_candidate_status('$candidate_id', 'complete')
209
+ # Update all fields from JSON
210
+ for key, value in json_data.items():
211
+ csv.update_candidate_field('$candidate_id', key, str(value))
212
+ "
213
+ else
214
+ # Simple score only
215
+ "$PYTHON_CMD" -c "
134
216
  import sys
135
217
  sys.path.insert(0, '$SCRIPT_DIR/..')
136
218
  from lib.evolution_csv import EvolutionCSV
@@ -138,6 +220,7 @@ with EvolutionCSV('$FULL_CSV_PATH') as csv:
138
220
  csv.update_candidate_status('$candidate_id', 'complete')
139
221
  csv.update_candidate_performance('$candidate_id', '$score')
140
222
  "
223
+ fi
141
224
  else
142
225
  echo "[WORKER-$$] ERROR: No score found in evaluation output" >&2
143
226
  echo "[WORKER-$$] Output: $eval_output" >&2
@@ -240,6 +240,66 @@ class EvolutionCSV:
240
240
 
241
241
  return updated
242
242
 
243
+ def update_candidate_field(self, candidate_id: str, field_name: str, value: str) -> bool:
244
+ """Update a specific field for a candidate by adding it as a new column if needed."""
245
+ rows = self._read_csv()
246
+ if not rows:
247
+ return False
248
+
249
+ # Check if we have a header row
250
+ has_header = rows and rows[0] and rows[0][0].lower() == 'id'
251
+ header_row = rows[0] if has_header else None
252
+
253
+ # Find or add the field to header
254
+ if has_header:
255
+ # Normalize field names - lowercase for comparison
256
+ field_lower = field_name.lower()
257
+ field_index = None
258
+
259
+ # Try to find existing column
260
+ for i, col in enumerate(header_row):
261
+ if col.lower() == field_lower:
262
+ field_index = i
263
+ break
264
+
265
+ # If field doesn't exist, add it to header
266
+ if field_index is None:
267
+ field_index = len(header_row)
268
+ header_row.append(field_name)
269
+ else:
270
+ # No header - we'll use predefined positions for known fields
271
+ field_map = {
272
+ 'id': 0,
273
+ 'basedonid': 1,
274
+ 'description': 2,
275
+ 'performance': 3,
276
+ 'status': 4
277
+ }
278
+ field_index = field_map.get(field_name.lower())
279
+ if field_index is None:
280
+ # Unknown field without header - can't update
281
+ return False
282
+
283
+ # Update the candidate's field
284
+ updated = False
285
+ start_idx = 1 if has_header else 0
286
+
287
+ for i in range(start_idx, len(rows)):
288
+ row = rows[i]
289
+ if self.is_valid_candidate_row(row) and row[0].strip() == candidate_id:
290
+ # Ensure row has enough columns
291
+ while len(row) <= field_index:
292
+ row.append('')
293
+
294
+ row[field_index] = value
295
+ updated = True
296
+ break
297
+
298
+ if updated:
299
+ self._write_csv(rows)
300
+
301
+ return updated
302
+
243
303
  def get_candidate_info(self, candidate_id: str) -> Optional[Dict[str, str]]:
244
304
  """Get information about a specific candidate."""
245
305
  rows = self._read_csv()
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-evolve",
3
- "version": "1.4.4",
3
+ "version": "1.4.6",
4
4
  "bin": {
5
5
  "claude-evolve": "./bin/claude-evolve",
6
6
  "claude-evolve-main": "./bin/claude-evolve-main",