claude-evolve 1.9.8 → 1.9.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/ai-cli.sh CHANGED
@@ -145,13 +145,13 @@ $prompt"
145
145
  ;;
146
146
  glm-openrouter)
147
147
  local ai_output
148
- ai_output=$(timeout -k 30 600 opencode -m openrouter/z-ai/glm-4.6 run "$prompt" 2>&1)
148
+ ai_output=$(timeout -k 30 600 opencode -m openrouter/z-ai/glm-4.7 run "$prompt" 2>&1)
149
149
  local ai_exit_code=$?
150
150
  ;;
151
151
  glm-zai)
152
152
  # GLM -- can be slow sometimes
153
153
  local ai_output
154
- ai_output=$(timeout -k 30 1800 opencode -m zai-coding-plan/glm-4.6 run "$prompt" 2>&1)
154
+ ai_output=$(timeout -k 30 1800 opencode -m zai-coding-plan/glm-4.7 run "$prompt" 2>&1)
155
155
  local ai_exit_code=$?
156
156
  ;;
157
157
  deepseek-openrouter)
@@ -50,6 +50,7 @@ class Config:
50
50
  memory_limit_mb: int = 0
51
51
  timeout_seconds: int = 600
52
52
  max_candidates: int = 5
53
+ max_validation_retries: int = 3 # Max attempts to fix validation errors (if validator.py exists)
53
54
  # Retry configuration with exponential backoff
54
55
  max_rounds: int = 10
55
56
  initial_wait: int = 60
@@ -202,6 +203,136 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
202
203
  except Exception:
203
204
  return False
204
205
 
206
+ def _find_validator(self) -> Optional[Path]:
207
+ """
208
+ Auto-detect validator.py in the evolution directory.
209
+ No config required - if validator.py exists, we use it.
210
+ """
211
+ validator_path = Path(self.config.evolution_dir) / "validator.py"
212
+ if validator_path.exists():
213
+ return validator_path
214
+ return None
215
+
216
+ def _run_validator(self, candidate_id: str) -> Tuple[bool, Dict[str, Any]]:
217
+ """
218
+ Run the validator (fast smoke test) before full evaluation.
219
+
220
+ AIDEV-NOTE: Auto-detects validator.py in evolution directory.
221
+ Returns exit code 0 on success, non-zero on failure.
222
+ Resilient to any output format - handles JSON, plain text, or nothing.
223
+
224
+ Returns:
225
+ Tuple of (success, error_info_dict)
226
+ - success: True if validation passed
227
+ - error_info: Dict with whatever info we could extract from output
228
+ """
229
+ validator_path = self._find_validator()
230
+ if not validator_path:
231
+ return True, {} # No validator found, skip
232
+
233
+ cmd = [self.config.python_cmd, str(validator_path), candidate_id]
234
+ log(f"Running validator: {' '.join(cmd)}")
235
+
236
+ try:
237
+ result = subprocess.run(
238
+ cmd,
239
+ capture_output=True,
240
+ text=True,
241
+ timeout=30, # Validator should be fast (~3 seconds)
242
+ cwd=self.config.evolution_dir
243
+ )
244
+
245
+ # Combine stdout and stderr for full context
246
+ stdout = result.stdout.strip() if result.stdout else ""
247
+ stderr = result.stderr.strip() if result.stderr else ""
248
+ combined_output = f"{stdout}\n{stderr}".strip()
249
+
250
+ # Try to extract structured info, but be resilient to any format
251
+ error_info = {'raw_output': combined_output}
252
+
253
+ # Try to parse JSON from stdout (validator may output JSON)
254
+ if stdout.startswith('{'):
255
+ try:
256
+ parsed = json.loads(stdout)
257
+ if isinstance(parsed, dict):
258
+ error_info.update(parsed)
259
+ except json.JSONDecodeError:
260
+ pass # Not valid JSON, that's fine
261
+
262
+ # If no structured error, use the raw output
263
+ if 'error' not in error_info and combined_output:
264
+ error_info['error'] = combined_output
265
+
266
+ if result.returncode == 0:
267
+ log("Validation passed")
268
+ return True, error_info
269
+ else:
270
+ error_type = error_info.get('error_type', 'validation_failed')
271
+ log_warn(f"Validation failed: {error_type}")
272
+ return False, error_info
273
+
274
+ except subprocess.TimeoutExpired:
275
+ log_error("Validator timed out")
276
+ return False, {'error': 'Validator timed out after 30 seconds', 'error_type': 'timeout'}
277
+ except Exception as e:
278
+ log_error(f"Validator error: {e}")
279
+ return False, {'error': str(e), 'error_type': 'exception'}
280
+
281
+ def _build_fix_prompt(self, candidate: Candidate, target_basename: str, error_info: Dict[str, Any]) -> str:
282
+ """
283
+ Build AI prompt to fix validation errors.
284
+
285
+ AIDEV-NOTE: Resilient to any error_info structure - uses whatever is available.
286
+ """
287
+ prompt = f"""{get_git_protection_warning()}
288
+
289
+ The code in {target_basename} failed validation. Please fix the errors and try again.
290
+
291
+ ## Validator Output
292
+
293
+ """
294
+ # Include whatever structured fields we have
295
+ if error_info.get('error_type'):
296
+ prompt += f"**Error Type:** {error_info['error_type']}\n\n"
297
+
298
+ if error_info.get('error'):
299
+ prompt += f"**Error:**\n{error_info['error']}\n\n"
300
+
301
+ if error_info.get('suggestion'):
302
+ prompt += f"**Suggested Fix:**\n{error_info['suggestion']}\n\n"
303
+
304
+ if error_info.get('traceback'):
305
+ tb = error_info['traceback']
306
+ # Truncate if too long
307
+ if len(tb) > 1500:
308
+ tb = "..." + tb[-1500:]
309
+ prompt += f"**Traceback:**\n```\n{tb}\n```\n\n"
310
+
311
+ # If we only have raw output (no structured fields), show that
312
+ if not any(error_info.get(k) for k in ('error', 'error_type', 'suggestion', 'traceback')):
313
+ raw = error_info.get('raw_output', 'No output captured')
314
+ # Truncate if needed
315
+ if len(raw) > 2000:
316
+ raw = raw[:2000] + "\n... (truncated)"
317
+ prompt += f"```\n{raw}\n```\n\n"
318
+
319
+ prompt += f"""## Instructions
320
+
321
+ 1. Read the file {target_basename} to understand the current code
322
+ 2. Identify the issue based on the validator output above
323
+ 3. Fix the code to resolve the validation error
324
+ 4. The fix should still implement: {candidate.description}
325
+
326
+ **CRITICAL:** Make sure to actually fix the error. Do not just add comments or make cosmetic changes.
327
+
328
+ To help debug, you can run the validator yourself:
329
+ ```
330
+ python validator.py {target_basename}
331
+ ```
332
+ """
333
+
334
+ return prompt
335
+
205
336
  def _run_evaluator(self, candidate_id: str, is_baseline: bool) -> Tuple[Optional[float], Dict[str, Any]]:
206
337
  """
207
338
  Run the evaluator.
@@ -347,6 +478,52 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
347
478
  csv.update_candidate_status(candidate.id, 'pending')
348
479
  return 0 # Will retry
349
480
 
481
+ # Run validator with retry loop
482
+ # AIDEV-NOTE: Validator catches structural errors before expensive full evaluation.
483
+ # If validation fails, we give the AI feedback and ask it to fix the code.
484
+ validation_passed = False
485
+ for validation_attempt in range(self.config.max_validation_retries + 1):
486
+ valid, error_info = self._run_validator(candidate.id)
487
+
488
+ if valid:
489
+ validation_passed = True
490
+ break
491
+
492
+ if validation_attempt >= self.config.max_validation_retries:
493
+ log_error(f"Validation failed after {self.config.max_validation_retries} fix attempts")
494
+ break
495
+
496
+ # Ask AI to fix the validation error
497
+ log(f"Validation failed (attempt {validation_attempt + 1}), asking AI to fix...")
498
+ fix_prompt = self._build_fix_prompt(candidate, target_file.name, error_info)
499
+ success, fix_model = self._call_ai_with_backoff(fix_prompt, target_file)
500
+
501
+ if not success:
502
+ log_error("AI failed to fix validation error")
503
+ break
504
+
505
+ # Record that we used an additional model call for fixing
506
+ if fix_model:
507
+ with EvolutionCSV(self.config.csv_path) as csv:
508
+ current_llm = csv.get_candidate_info(candidate.id).get('run-LLM', '')
509
+ new_llm = f"{current_llm}+{fix_model}" if current_llm else fix_model
510
+ csv.update_candidate_field(candidate.id, 'run-LLM', new_llm)
511
+
512
+ # Re-check syntax after fix
513
+ if not self._check_syntax(target_file):
514
+ log_error("Fix introduced syntax error")
515
+ # Don't break - try again if we have retries left
516
+
517
+ if not validation_passed:
518
+ # Validation failed after all retries
519
+ with EvolutionCSV(self.config.csv_path) as csv:
520
+ csv.update_candidate_status(candidate.id, 'failed-validation')
521
+ # Store the last error for debugging
522
+ if error_info:
523
+ error_summary = f"{error_info.get('error_type', 'unknown')}: {error_info.get('error', '')[:100]}"
524
+ csv.update_candidate_field(candidate.id, 'validation_error', error_summary)
525
+ return 1
526
+
350
527
  # Run evaluator
351
528
  log("Running evaluator...")
352
529
  score, json_data = self._run_evaluator(candidate.id, is_baseline)
@@ -470,6 +647,7 @@ def load_config_from_yaml(config_path: Optional[str] = None) -> Config:
470
647
  memory_limit_mb=data.get('memory_limit_mb', 0),
471
648
  timeout_seconds=data.get('timeout_seconds', 600),
472
649
  max_candidates=data.get('worker_max_candidates', 5),
650
+ max_validation_retries=data.get('max_validation_retries', 3),
473
651
  max_rounds=ideation.get('max_rounds', 10),
474
652
  initial_wait=ideation.get('initial_wait', 60),
475
653
  max_wait=ideation.get('max_wait', 600)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-evolve",
3
- "version": "1.9.8",
3
+ "version": "1.9.10",
4
4
  "bin": {
5
5
  "claude-evolve": "bin/claude-evolve",
6
6
  "claude-evolve-main": "bin/claude-evolve-main",