npm - claude-evolve - Versions diffs - 1.9.8 → 1.9.9 - Mend

claude-evolve 1.9.8 → 1.9.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/lib/__pycache__/evolve_worker.cpython-314.pyc +0 -0
package/lib/evolve_worker.py +178 -0
package/package.json +1 -1

package/lib/__pycache__/evolve_worker.cpython-314.pyc CHANGED Viewed

Binary file

package/lib/evolve_worker.py CHANGED Viewed

@@ -50,6 +50,7 @@ class Config:
     memory_limit_mb: int = 0
     timeout_seconds: int = 600
     max_candidates: int = 5
+    max_validation_retries: int = 3  # Max attempts to fix validation errors (if validator.py exists)
     # Retry configuration with exponential backoff
     max_rounds: int = 10
     initial_wait: int = 60
@@ -202,6 +203,136 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
         except Exception:
             return False
+    def _find_validator(self) -> Optional[Path]:
+        """
+        Auto-detect validator.py in the evolution directory.
+        No config required - if validator.py exists, we use it.
+        """
+        validator_path = Path(self.config.evolution_dir) / "validator.py"
+        if validator_path.exists():
+            return validator_path
+        return None
+    def _run_validator(self, candidate_id: str) -> Tuple[bool, Dict[str, Any]]:
+        """
+        Run the validator (fast smoke test) before full evaluation.
+        AIDEV-NOTE: Auto-detects validator.py in evolution directory.
+        Returns exit code 0 on success, non-zero on failure.
+        Resilient to any output format - handles JSON, plain text, or nothing.
+        Returns:
+            Tuple of (success, error_info_dict)
+            - success: True if validation passed
+            - error_info: Dict with whatever info we could extract from output
+        """
+        validator_path = self._find_validator()
+        if not validator_path:
+            return True, {}  # No validator found, skip
+        cmd = [self.config.python_cmd, str(validator_path), candidate_id]
+        log(f"Running validator: {' '.join(cmd)}")
+        try:
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                timeout=30,  # Validator should be fast (~3 seconds)
+                cwd=self.config.evolution_dir
+            )
+            # Combine stdout and stderr for full context
+            stdout = result.stdout.strip() if result.stdout else ""
+            stderr = result.stderr.strip() if result.stderr else ""
+            combined_output = f"{stdout}\n{stderr}".strip()
+            # Try to extract structured info, but be resilient to any format
+            error_info = {'raw_output': combined_output}
+            # Try to parse JSON from stdout (validator may output JSON)
+            if stdout.startswith('{'):
+                try:
+                    parsed = json.loads(stdout)
+                    if isinstance(parsed, dict):
+                        error_info.update(parsed)
+                except json.JSONDecodeError:
+                    pass  # Not valid JSON, that's fine
+            # If no structured error, use the raw output
+            if 'error' not in error_info and combined_output:
+                error_info['error'] = combined_output
+            if result.returncode == 0:
+                log("Validation passed")
+                return True, error_info
+            else:
+                error_type = error_info.get('error_type', 'validation_failed')
+                log_warn(f"Validation failed: {error_type}")
+                return False, error_info
+        except subprocess.TimeoutExpired:
+            log_error("Validator timed out")
+            return False, {'error': 'Validator timed out after 30 seconds', 'error_type': 'timeout'}
+        except Exception as e:
+            log_error(f"Validator error: {e}")
+            return False, {'error': str(e), 'error_type': 'exception'}
+    def _build_fix_prompt(self, candidate: Candidate, target_basename: str, error_info: Dict[str, Any]) -> str:
+        """
+        Build AI prompt to fix validation errors.
+        AIDEV-NOTE: Resilient to any error_info structure - uses whatever is available.
+        """
+        prompt = f"""{get_git_protection_warning()}
+The code in {target_basename} failed validation. Please fix the errors and try again.
+## Validator Output
+"""
+        # Include whatever structured fields we have
+        if error_info.get('error_type'):
+            prompt += f"**Error Type:** {error_info['error_type']}\n\n"
+        if error_info.get('error'):
+            prompt += f"**Error:**\n{error_info['error']}\n\n"
+        if error_info.get('suggestion'):
+            prompt += f"**Suggested Fix:**\n{error_info['suggestion']}\n\n"
+        if error_info.get('traceback'):
+            tb = error_info['traceback']
+            # Truncate if too long
+            if len(tb) > 1500:
+                tb = "..." + tb[-1500:]
+            prompt += f"**Traceback:**\n```\n{tb}\n```\n\n"
+        # If we only have raw output (no structured fields), show that
+        if not any(error_info.get(k) for k in ('error', 'error_type', 'suggestion', 'traceback')):
+            raw = error_info.get('raw_output', 'No output captured')
+            # Truncate if needed
+            if len(raw) > 2000:
+                raw = raw[:2000] + "\n... (truncated)"
+            prompt += f"```\n{raw}\n```\n\n"
+        prompt += f"""## Instructions
+1. Read the file {target_basename} to understand the current code
+2. Identify the issue based on the validator output above
+3. Fix the code to resolve the validation error
+4. The fix should still implement: {candidate.description}
+**CRITICAL:** Make sure to actually fix the error. Do not just add comments or make cosmetic changes.
+To help debug, you can run the validator yourself:
+```
+python validator.py {target_basename}
+```
+"""
+        return prompt
     def _run_evaluator(self, candidate_id: str, is_baseline: bool) -> Tuple[Optional[float], Dict[str, Any]]:
         """
         Run the evaluator.
@@ -347,6 +478,52 @@ CRITICAL: If you do not know how to implement what was asked for, or if the requ
                     csv.update_candidate_status(candidate.id, 'pending')
                 return 0  # Will retry
+            # Run validator with retry loop
+            # AIDEV-NOTE: Validator catches structural errors before expensive full evaluation.
+            # If validation fails, we give the AI feedback and ask it to fix the code.
+            validation_passed = False
+            for validation_attempt in range(self.config.max_validation_retries + 1):
+                valid, error_info = self._run_validator(candidate.id)
+                if valid:
+                    validation_passed = True
+                    break
+                if validation_attempt >= self.config.max_validation_retries:
+                    log_error(f"Validation failed after {self.config.max_validation_retries} fix attempts")
+                    break
+                # Ask AI to fix the validation error
+                log(f"Validation failed (attempt {validation_attempt + 1}), asking AI to fix...")
+                fix_prompt = self._build_fix_prompt(candidate, target_file.name, error_info)
+                success, fix_model = self._call_ai_with_backoff(fix_prompt, target_file)
+                if not success:
+                    log_error("AI failed to fix validation error")
+                    break
+                # Record that we used an additional model call for fixing
+                if fix_model:
+                    with EvolutionCSV(self.config.csv_path) as csv:
+                        current_llm = csv.get_candidate_info(candidate.id).get('run-LLM', '')
+                        new_llm = f"{current_llm}+{fix_model}" if current_llm else fix_model
+                        csv.update_candidate_field(candidate.id, 'run-LLM', new_llm)
+                # Re-check syntax after fix
+                if not self._check_syntax(target_file):
+                    log_error("Fix introduced syntax error")
+                    # Don't break - try again if we have retries left
+            if not validation_passed:
+                # Validation failed after all retries
+                with EvolutionCSV(self.config.csv_path) as csv:
+                    csv.update_candidate_status(candidate.id, 'failed-validation')
+                    # Store the last error for debugging
+                    if error_info:
+                        error_summary = f"{error_info.get('error_type', 'unknown')}: {error_info.get('error', '')[:100]}"
+                        csv.update_candidate_field(candidate.id, 'validation_error', error_summary)
+                return 1
         # Run evaluator
         log("Running evaluator...")
         score, json_data = self._run_evaluator(candidate.id, is_baseline)
@@ -470,6 +647,7 @@ def load_config_from_yaml(config_path: Optional[str] = None) -> Config:
         memory_limit_mb=data.get('memory_limit_mb', 0),
         timeout_seconds=data.get('timeout_seconds', 600),
         max_candidates=data.get('worker_max_candidates', 5),
+        max_validation_retries=data.get('max_validation_retries', 3),
         max_rounds=ideation.get('max_rounds', 10),
         initial_wait=ideation.get('initial_wait', 60),
         max_wait=ideation.get('max_wait', 600)

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-evolve",
-  "version": "1.9.8",
+  "version": "1.9.9",
   "bin": {
     "claude-evolve": "bin/claude-evolve",
     "claude-evolve-main": "bin/claude-evolve-main",