PyPI - pdd-cli - Versions diffs - 0.0.42__py3-none-any.whl → 0.0.90__py3-none-any.whl - Mend

pdd-cli 0.0.42py3-none-any.whl → 0.0.90py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (119) hide show

pdd/__init__.py +4 -4
pdd/agentic_common.py +863 -0
pdd/agentic_crash.py +534 -0
pdd/agentic_fix.py +1179 -0
pdd/agentic_langtest.py +162 -0
pdd/agentic_update.py +370 -0
pdd/agentic_verify.py +183 -0
pdd/auto_deps_main.py +15 -5
pdd/auto_include.py +63 -5
pdd/bug_main.py +3 -2
pdd/bug_to_unit_test.py +2 -0
pdd/change_main.py +11 -4
pdd/cli.py +22 -1181
pdd/cmd_test_main.py +80 -19
pdd/code_generator.py +58 -18
pdd/code_generator_main.py +672 -25
pdd/commands/__init__.py +42 -0
pdd/commands/analysis.py +248 -0
pdd/commands/fix.py +140 -0
pdd/commands/generate.py +257 -0
pdd/commands/maintenance.py +174 -0
pdd/commands/misc.py +79 -0
pdd/commands/modify.py +230 -0
pdd/commands/report.py +144 -0
pdd/commands/templates.py +215 -0
pdd/commands/utility.py +110 -0
pdd/config_resolution.py +58 -0
pdd/conflicts_main.py +8 -3
pdd/construct_paths.py +281 -81
pdd/context_generator.py +10 -2
pdd/context_generator_main.py +113 -11
pdd/continue_generation.py +47 -7
pdd/core/__init__.py +0 -0
pdd/core/cli.py +503 -0
pdd/core/dump.py +554 -0
pdd/core/errors.py +63 -0
pdd/core/utils.py +90 -0
pdd/crash_main.py +44 -11
pdd/data/language_format.csv +71 -62
pdd/data/llm_model.csv +20 -18
pdd/detect_change_main.py +5 -4
pdd/fix_code_loop.py +331 -77
pdd/fix_error_loop.py +209 -60
pdd/fix_errors_from_unit_tests.py +4 -3
pdd/fix_main.py +75 -18
pdd/fix_verification_errors.py +12 -100
pdd/fix_verification_errors_loop.py +319 -272
pdd/fix_verification_main.py +57 -17
pdd/generate_output_paths.py +93 -10
pdd/generate_test.py +16 -5
pdd/get_jwt_token.py +48 -9
pdd/get_run_command.py +73 -0
pdd/get_test_command.py +68 -0
pdd/git_update.py +70 -19
pdd/increase_tests.py +7 -0
pdd/incremental_code_generator.py +2 -2
pdd/insert_includes.py +11 -3
pdd/llm_invoke.py +1278 -110
pdd/load_prompt_template.py +36 -10
pdd/pdd_completion.fish +25 -2
pdd/pdd_completion.sh +30 -4
pdd/pdd_completion.zsh +79 -4
pdd/postprocess.py +10 -3
pdd/preprocess.py +228 -15
pdd/preprocess_main.py +8 -5
pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
pdd/prompts/agentic_update_LLM.prompt +1071 -0
pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
pdd/prompts/auto_include_LLM.prompt +98 -101
pdd/prompts/change_LLM.prompt +1 -3
pdd/prompts/detect_change_LLM.prompt +562 -3
pdd/prompts/example_generator_LLM.prompt +22 -1
pdd/prompts/extract_code_LLM.prompt +5 -1
pdd/prompts/extract_program_code_fix_LLM.prompt +14 -2
pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
pdd/prompts/extract_promptline_LLM.prompt +17 -11
pdd/prompts/find_verification_errors_LLM.prompt +6 -0
pdd/prompts/fix_code_module_errors_LLM.prompt +16 -4
pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +6 -41
pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
pdd/prompts/generate_test_LLM.prompt +21 -6
pdd/prompts/increase_tests_LLM.prompt +1 -2
pdd/prompts/insert_includes_LLM.prompt +1181 -6
pdd/prompts/split_LLM.prompt +1 -62
pdd/prompts/trace_LLM.prompt +25 -22
pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
pdd/prompts/update_prompt_LLM.prompt +22 -1
pdd/prompts/xml_convertor_LLM.prompt +3246 -7
pdd/pytest_output.py +188 -21
pdd/python_env_detector.py +151 -0
pdd/render_mermaid.py +236 -0
pdd/setup_tool.py +648 -0
pdd/simple_math.py +2 -0
pdd/split_main.py +3 -2
pdd/summarize_directory.py +56 -7
pdd/sync_determine_operation.py +918 -186
pdd/sync_main.py +82 -32
pdd/sync_orchestration.py +1456 -453
pdd/sync_tui.py +848 -0
pdd/template_registry.py +264 -0
pdd/templates/architecture/architecture_json.prompt +242 -0
pdd/templates/generic/generate_prompt.prompt +174 -0
pdd/trace.py +168 -12
pdd/trace_main.py +4 -3
pdd/track_cost.py +151 -61
pdd/unfinished_prompt.py +49 -3
pdd/update_main.py +549 -67
pdd/update_model_costs.py +2 -2
pdd/update_prompt.py +19 -4
{pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/METADATA +20 -7
pdd_cli-0.0.90.dist-info/RECORD +153 -0
{pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/licenses/LICENSE +1 -1
pdd_cli-0.0.42.dist-info/RECORD +0 -115
{pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/WHEEL +0 -0
{pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/entry_points.txt +0 -0
{pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/top_level.txt +0 -0

pdd/fix_error_loop.py CHANGED Viewed

@@ -5,13 +5,19 @@ import subprocess
 import shutil
 import json
 from datetime import datetime
+from pathlib import Path
 from rich import print as rprint
 from rich.console import Console
 # Relative import from an internal module.
+from .get_language import get_language
 from .fix_errors_from_unit_tests import fix_errors_from_unit_tests
-from . import DEFAULT_TIME # Import DEFAULT_TIME
+from . import DEFAULT_TIME  # Import DEFAULT_TIME
+from .python_env_detector import detect_host_python_executable
+from .agentic_fix import run_agentic_fix
+from .agentic_langtest import default_verify_cmd_for
 console = Console()
@@ -19,42 +25,63 @@ def escape_brackets(text: str) -> str:
     """Escape square brackets so Rich doesn't misinterpret them."""
     return text.replace("[", "\\[").replace("]", "\\]")
+# ---------- Normalize any agentic return shape to a 4-tuple ----------
+def _normalize_agentic_result(result):
+    """
+    Normalize run_agentic_fix result into: (success: bool, msg: str, cost: float, model: str, changed_files: List[str])
+    Handles older 2/3/4-tuple shapes used by tests/monkeypatches.
+    """
+    if isinstance(result, tuple):
+        if len(result) == 5:
+            ok, msg, cost, model, changed_files = result
+            return bool(ok), str(msg), float(cost), str(model or "agentic-cli"), list(changed_files or [])
+        if len(result) == 4:
+            ok, msg, cost, model = result
+            return bool(ok), str(msg), float(cost), str(model or "agentic-cli"), []
+        if len(result) == 3:
+            ok, msg, cost = result
+            return bool(ok), str(msg), float(cost), "agentic-cli", []
+        if len(result) == 2:
+            ok, msg = result
+            return bool(ok), str(msg), 0.0, "agentic-cli", []
+    # Fallback (shouldn't happen)
+    return False, "Invalid agentic result shape", 0.0, "agentic-cli", []
+def _safe_run_agentic_fix(*, prompt_file, code_file, unit_test_file, error_log_file, cwd=None):
+    """
+    Call (possibly monkeypatched) run_agentic_fix and normalize its return.
+    """
+    res = run_agentic_fix(
+        prompt_file=prompt_file,
+        code_file=code_file,
+        unit_test_file=unit_test_file,
+        error_log_file=error_log_file,
+        cwd=cwd,
+    )
+    return _normalize_agentic_result(res)
+# ---------------------------------------------------------------------
 def run_pytest_on_file(test_file: str) -> tuple[int, int, int, str]:
     """
-    Run pytest on the specified test file using subprocess.
+    Run pytest on the specified test file using the subprocess-based runner.
     Returns a tuple: (failures, errors, warnings, logs)
     """
-    try:
-        # Include "--json-only" to ensure only valid JSON is printed.
-        cmd = [sys.executable, "-m", "pdd.pytest_output", "--json-only", test_file]
-        result = subprocess.run(cmd, capture_output=True, text=True)
-        # Parse the JSON output from stdout
-        try:
-            output = json.loads(result.stdout)
-            test_results = output.get('test_results', [{}])[0]
-            # Check pytest's return code first
-            return_code = test_results.get('return_code', 1)
-            failures = test_results.get('failures', 0)
-            errors = test_results.get('errors', 0)
-            warnings = test_results.get('warnings', 0)
-            if return_code == 2:
-                errors += 1
-            # Combine stdout and stderr from the test results
-            logs = test_results.get('standard_output', '') + '\n' + test_results.get('standard_error', '')
-            return failures, errors, warnings, logs
-        except json.JSONDecodeError:
-            # If JSON parsing fails, return the raw output
-            return 1, 1, 0, f"Failed to parse pytest output:\n{result.stdout}\n{result.stderr}"
-    except Exception as e:
-        return 1, 1, 0, f"Error running pytest: {str(e)}"
+    from .pytest_output import run_pytest_and_capture_output
+    # Use the subprocess-based runner to avoid module caching issues
+    output_data = run_pytest_and_capture_output(test_file)
+    # Extract results
+    results = output_data.get("test_results", [{}])[0]
+    failures = results.get("failures", 0)
+    errors = results.get("errors", 0)
+    warnings = results.get("warnings", 0)
+    # Combine stdout/stderr for the log
+    logs = (results.get("standard_output", "") or "") + "\n" + (results.get("standard_error", "") or "")
+    return failures, errors, warnings, logs
 def format_log_for_output(log_structure):
     """
@@ -74,6 +101,8 @@ def format_log_for_output(log_structure):
         # Fix attempt with XML tags
         if iteration.get("fix_attempt"):
             formatted_text += f"<fix_attempt iteration={iteration['number']}>\n"
+            if iteration.get("model_name"):
+                formatted_text += f"Model: {iteration['model_name']}\n"
             formatted_text += f"{iteration['fix_attempt']}\n"
             formatted_text += "</fix_attempt>\n\n"
@@ -98,6 +127,7 @@ def format_log_for_output(log_structure):
 def fix_error_loop(unit_test_file: str,
                    code_file: str,
+                   prompt_file: str,
                    prompt: str,
                    verification_program: str,
                    strength: float,
@@ -106,7 +136,8 @@ def fix_error_loop(unit_test_file: str,
                    budget: float,
                    error_log_file: str = "error_log.txt",
                    verbose: bool = False,
-                   time: float = DEFAULT_TIME):
+                   time: float = DEFAULT_TIME,
+                   agentic_fallback: bool = True):
     """
     Attempt to fix errors in a unit test and corresponding code using repeated iterations,
     counting only the number of times we actually call the LLM fix function.
@@ -127,7 +158,7 @@ def fix_error_loop(unit_test_file: str,
         error_log_file: Path to file to log errors (default: "error_log.txt").
         verbose: Enable verbose logging (default: False).
         time: Time parameter for the fix_errors_from_unit_tests call.
+        agentic_fallback: Whether to trigger cli agentic fallback when fix fails.
     Outputs:
         success: Boolean indicating if the overall process succeeded.
         final_unit_test: String contents of the final unit test file.
@@ -184,7 +215,24 @@ def fix_error_loop(unit_test_file: str,
     iteration = 0
     # Run an initial test to determine starting state
     try:
-        initial_fails, initial_errors, initial_warnings, pytest_output = run_pytest_on_file(unit_test_file)
+        is_python = str(code_file).lower().endswith(".py")
+        if is_python:
+            initial_fails, initial_errors, initial_warnings, pytest_output = run_pytest_on_file(unit_test_file)
+        else:
+            # For non-Python files, run the verification program to get an initial error state
+            rprint(f"[cyan]Non-Python target detected. Running verification program to get initial state...[/cyan]")
+            lang = get_language(os.path.splitext(code_file)[1])
+            verify_cmd = default_verify_cmd_for(lang, unit_test_file)
+            if not verify_cmd:
+                raise ValueError(f"No default verification command for language: {lang}")
+            verify_result = subprocess.run(verify_cmd, capture_output=True, text=True, shell=True, stdin=subprocess.DEVNULL)
+            pytest_output = (verify_result.stdout or "") + "\n" + (verify_result.stderr or "")
+            if verify_result.returncode == 0:
+                initial_fails, initial_errors, initial_warnings = 0, 0, 0
+            else:
+                initial_fails, initial_errors, initial_warnings = 1, 0, 0 # Treat any failure as one "fail"
         # Store initial state for statistics
         stats = {
             "initial_fails": initial_fails,
@@ -197,14 +245,62 @@ def fix_error_loop(unit_test_file: str,
             "iterations_info": []
         }
     except Exception as e:
-        rprint(f"[red]Error running initial pytest:[/red] {e}")
+        rprint(f"[red]Error running initial test/verification:[/red] {e}")
         return False, "", "", fix_attempts, total_cost, model_name
+    # If target is not a Python file, trigger agentic fallback if tests fail
+    if not is_python:
+        if initial_fails > 0 or initial_errors > 0:
+            rprint("[cyan]Non-Python target failed initial verification. Triggering agentic fallback...[/cyan]")
+            error_log_path = Path(error_log_file)
+            error_log_path.parent.mkdir(parents=True, exist_ok=True)
+            with open(error_log_path, "w") as f:
+                f.write(pytest_output)
+            rprint(f"[cyan]Attempting agentic fix fallback (prompt_file={prompt_file!r})...[/cyan]")
+            success, agent_msg, agent_cost, agent_model, agent_changed_files = _safe_run_agentic_fix(
+                prompt_file=prompt_file,
+                code_file=code_file,
+                unit_test_file=unit_test_file,
+                error_log_file=error_log_file,
+                cwd=Path(prompt_file).parent if prompt_file else None,
+            )
+            if not success:
+                rprint(f"[bold red]Agentic fix fallback failed: {agent_msg}[/bold red]")
+            if agent_changed_files:
+                rprint(f"[cyan]Agent modified {len(agent_changed_files)} file(s):[/cyan]")
+                for f in agent_changed_files:
+                    rprint(f"  • {f}")
+            final_unit_test = ""
+            final_code = ""
+            try:
+                with open(unit_test_file, "r") as f:
+                    final_unit_test = f.read()
+            except Exception:
+                pass
+            try:
+                with open(code_file, "r") as f:
+                    final_code = f.read()
+            except Exception:
+                pass
+            return success, final_unit_test, final_code, 1, agent_cost, agent_model
+        else:
+            # Non-python tests passed, so we are successful.
+            rprint("[green]Non-Python tests passed. No fix needed.[/green]")
+            try:
+                with open(unit_test_file, "r") as f:
+                    final_unit_test = f.read()
+                with open(code_file, "r") as f:
+                    final_code = f.read()
+            except Exception as e:
+                rprint(f"[yellow]Warning: Could not read final files: {e}[/yellow]")
+            return True, final_unit_test, final_code, 0, 0.0, "N/A"
     fails, errors, warnings = initial_fails, initial_errors, initial_warnings
     # Determine success state immediately
     success = (fails == 0 and errors == 0 and warnings == 0)
     # Track if tests were initially passing
     initially_passing = success
@@ -241,13 +337,23 @@ def fix_error_loop(unit_test_file: str,
             # Update structured log
             log_structure["iterations"][-1]["post_test_output"] = pytest_output
             # Write formatted log to file
-            with open(error_log_file, "w") as elog:
+            error_log_path = Path(error_log_file)
+            error_log_path.parent.mkdir(parents=True, exist_ok=True)
+            with open(error_log_path, "w") as elog:
                 elog.write(format_log_for_output(log_structure))
             # Set success to True (already determined)
-            # No need to read the files - keep empty strings for passing cases
+            # Read the actual fixed files to return the successful state
+            try:
+                with open(unit_test_file, "r") as f:
+                    final_unit_test = f.read()
+                with open(code_file, "r") as f:
+                    final_code = f.read()
+            except Exception as e:
+                rprint(f"[yellow]Warning: Could not read fixed files: {e}[/yellow]")
+                # Keep empty strings as fallback
             break
         iteration_header = f"=== Attempt iteration {iteration} ==="
@@ -325,7 +431,7 @@ def fix_error_loop(unit_test_file: str,
         try:
             # Format the log for the LLM
             formatted_log = format_log_for_output(log_structure)
             updated_unit_test, updated_code, fixed_unit_test, fixed_code, analysis, cost, model_name = fix_errors_from_unit_tests(
                 unit_test_contents,
                 code_contents,
@@ -335,11 +441,12 @@ def fix_error_loop(unit_test_file: str,
                 strength,
                 temperature,
                 verbose=verbose,
-                time=time # Pass time parameter
+                time=time  # Pass time parameter
             )
             # Update the fix attempt in the structured log
             log_structure["iterations"][-1]["fix_attempt"] = analysis
+            log_structure["iterations"][-1]["model_name"] = model_name
         except Exception as e:
             rprint(f"[red]Error during fix_errors_from_unit_tests call:[/red] {e}")
             break
@@ -380,8 +487,8 @@ def fix_error_loop(unit_test_file: str,
             # Run the verification:
             try:
-                verify_cmd = [sys.executable, verification_program]
-                verify_result = subprocess.run(verify_cmd, capture_output=True, text=True)
+                verify_cmd = [detect_host_python_executable(), verification_program]
+                verify_result = subprocess.run(verify_cmd, capture_output=True, text=True, stdin=subprocess.DEVNULL)
                 # Safely handle None for stdout or stderr:
                 verify_stdout = verify_result.stdout or ""
                 verify_stderr = verify_result.stderr or ""
@@ -411,9 +518,11 @@ def fix_error_loop(unit_test_file: str,
             # Update post-test output in structured log
             log_structure["iterations"][-1]["post_test_output"] = pytest_output
             # Write updated structured log to file after each iteration
-            with open(error_log_file, "w") as elog:
+            error_log_path = Path(error_log_file)
+            error_log_path.parent.mkdir(parents=True, exist_ok=True)
+            with open(error_log_path, "w") as elog:
                 elog.write(format_log_for_output(log_structure))
             # Update iteration stats with post-fix results
@@ -477,8 +586,8 @@ def fix_error_loop(unit_test_file: str,
     else:
         stats["best_iteration"] = "final"
-    # Read final file contents, but only if tests weren't initially passing
-    # For initially passing tests, keep empty strings as required by the test
+    # Read final file contents for non-initially-passing tests
+    # (Initially passing tests have files read at lines 344-348)
     try:
         if not initially_passing:
             with open(unit_test_file, "r") as f:
@@ -489,11 +598,6 @@ def fix_error_loop(unit_test_file: str,
         rprint(f"[red]Error reading final files:[/red] {e}")
         final_unit_test, final_code = "", ""
-    # Check if we broke out early because tests already passed
-    if stats["best_iteration"] == 0 and fix_attempts == 0:
-        # Still return at least 1 attempt to acknowledge the work done
-        fix_attempts = 1
     # Print summary statistics
     rprint("\n[bold cyan]Summary Statistics:[/bold cyan]")
     rprint(f"Initial state: {initial_fails} fails, {initial_errors} errors, {initial_warnings} warnings")
@@ -503,17 +607,62 @@ def fix_error_loop(unit_test_file: str,
     # Calculate improvements
     stats["improvement"] = {
-        "fails_reduced": initial_fails - stats["final_fails"],
-        "errors_reduced": initial_errors - stats["final_errors"],
-        "warnings_reduced": initial_warnings - stats["final_warnings"],
-        "percent_improvement": 100 if initial_fails + initial_errors + initial_warnings == 0 else
-                              (1 - (stats["final_fails"] + stats["final_errors"] + stats["final_warnings"]) /
+        "fails_reduced": initial_fails - stats['final_fails'],
+        "errors_reduced": initial_errors - stats['final_errors'],
+        "warnings_reduced": initial_warnings - stats['final_warnings'],
+        "percent_improvement": 100 if (initial_fails + initial_errors + initial_warnings) == 0 else
+                              (1 - (stats['final_fails'] + stats['final_errors'] + stats['final_warnings']) /
                                    (initial_fails + initial_errors + initial_warnings)) * 100
     }
     rprint(f"Improvement: {stats['improvement']['fails_reduced']} fails, {stats['improvement']['errors_reduced']} errors, {stats['improvement']['warnings_reduced']} warnings")
     rprint(f"Overall improvement: {stats['improvement']['percent_improvement']:.2f}%")
+    # Agentic fallback at end adds cost & model (normalized)
+    if not success and agentic_fallback and total_cost < budget:
+        # Ensure error_log_file exists before calling agentic fix
+        # Write the current log structure if it hasn't been written yet
+        try:
+            if not os.path.exists(error_log_file) or os.path.getsize(error_log_file) == 0:
+                error_log_path = Path(error_log_file)
+                error_log_path.parent.mkdir(parents=True, exist_ok=True)
+                with open(error_log_path, "w") as elog:
+                    if log_structure["iterations"]:
+                        elog.write(format_log_for_output(log_structure))
+                    else:
+                        # No iterations ran, write initial state info
+                        elog.write(f"Initial state: {initial_fails} fails, {initial_errors} errors, {initial_warnings} warnings\n")
+                        if 'pytest_output' in locals():
+                            elog.write(f"\n<pytest_output>\n{pytest_output}\n</pytest_output>\n")
+        except Exception as e:
+            rprint(f"[yellow]Warning: Could not write error log before agentic fallback: {e}[/yellow]")
+        rprint(f"[cyan]Attempting agentic fix fallback (prompt_file={prompt_file!r})...[/cyan]")
+        agent_success, agent_msg, agent_cost, agent_model, agent_changed_files = _safe_run_agentic_fix(
+            prompt_file=prompt_file,
+            code_file=code_file,
+            unit_test_file=unit_test_file,
+            error_log_file=error_log_file,
+            cwd=Path(prompt_file).parent if prompt_file else None,
+        )
+        total_cost += agent_cost
+        if not agent_success:
+            rprint(f"[bold red]Agentic fix fallback failed: {agent_msg}[/bold red]")
+        if agent_changed_files:
+            rprint(f"[cyan]Agent modified {len(agent_changed_files)} file(s):[/cyan]")
+            for f in agent_changed_files:
+                rprint(f"  • {f}")
+        if agent_success:
+            model_name = agent_model or model_name
+            try:
+                with open(unit_test_file, "r") as f:
+                    final_unit_test = f.read()
+                with open(code_file, "r") as f:
+                    final_code = f.read()
+            except Exception as e:
+                rprint(f"[yellow]Warning: Could not read files after successful agentic fix: {e}[/yellow]")
+            success = True
     return success, final_unit_test, final_code, fix_attempts, total_cost, model_name
 # If this module is run directly for testing purposes:
@@ -548,4 +697,4 @@ if __name__ == "__main__":
     rprint(f"Attempts: {attempts}")
     rprint(f"Total cost: ${total_cost:.6f}")
     rprint(f"Model used: {model_name}")
-    rprint(f"Final unit test contents:\n{final_unit_test}")
+    rprint(f"Final unit test contents:\n{final_unit_test}")

pdd/fix_errors_from_unit_tests.py CHANGED Viewed

@@ -114,7 +114,8 @@ def fix_errors_from_unit_tests(
     Fix errors in unit tests using LLM models and log the process.
     Args:
-        unit_test (str): The unit test code
+        unit_test (str): The unit test code, potentially multiple files concatenated
+                         with <file name="filename.py">...</file> tags.
         code (str): The code under test
         prompt (str): The prompt that generated the code
         error (str): The error message
@@ -244,10 +245,10 @@ def fix_errors_from_unit_tests(
         if verbose:
             console.print(f"[bold red]{error_msg}[/bold red]")
         write_to_error_file(error_file, error_msg)
-        return False, False, "", "", "", 0.0, ""
+        return False, False, "", "", "", 0.0, f"Error: ValidationError - {str(e)[:100]}"
     except Exception as e:
         error_msg = f"Error in fix_errors_from_unit_tests: {str(e)}"
         if verbose:
             console.print(f"[bold red]{error_msg}[/bold red]")
         write_to_error_file(error_file, error_msg)
-        return False, False, "", "", "", 0.0, ""
+        return False, False, "", "", "", 0.0, f"Error: {type(e).__name__}"

pdd/fix_main.py CHANGED Viewed

@@ -13,7 +13,7 @@ from .preprocess import preprocess
 from .construct_paths import construct_paths
 from .fix_errors_from_unit_tests import fix_errors_from_unit_tests
-from .fix_error_loop import fix_error_loop
+from .fix_error_loop import fix_error_loop, run_pytest_on_file
 from .get_jwt_token import get_jwt_token
 from .get_language import get_language
@@ -33,7 +33,10 @@ def fix_main(
     verification_program: Optional[str],
     max_attempts: int,
     budget: float,
-    auto_submit: bool
+    auto_submit: bool,
+    agentic_fallback: bool = True,
+    strength: Optional[float] = None,
+    temperature: Optional[float] = None,
 ) -> Tuple[bool, str, str, int, float, str]:
     """
     Main function to fix errors in code and unit tests.
@@ -52,7 +55,7 @@ def fix_main(
         max_attempts: Maximum number of fix attempts
         budget: Maximum cost allowed for fixing
         auto_submit: Whether to auto-submit example if tests pass
+        agentic_fallback: Whether the cli agent fallback is triggered
     Returns:
         Tuple containing:
         - Success status (bool)
@@ -69,13 +72,13 @@ def fix_main(
     # Initialize analysis_results to None to prevent reference errors
     analysis_results = None
+    # Input validation - let these propagate to caller for proper exit code
+    if not loop:
+        error_path = Path(error_file)
+        if not error_path.exists():
+            raise FileNotFoundError(f"Error file '{error_file}' does not exist.")
     try:
-        # Verify error file exists if not in loop mode
-        if not loop:
-            error_path = Path(error_file)
-            if not error_path.exists():
-                raise FileNotFoundError(f"Error file '{error_file}' does not exist.")
         # Construct file paths
         input_file_paths = {
             "prompt_file": prompt_file,
@@ -97,12 +100,14 @@ def fix_main(
             quiet=ctx.obj.get('quiet', False),
             command="fix",
             command_options=command_options,
-            create_error_file=loop  # Only create error file if in loop mode
+            create_error_file=loop,  # Only create error file if in loop mode
+            context_override=ctx.obj.get('context'),
+            confirm_callback=ctx.obj.get('confirm_callback')
         )
-        # Get parameters from context
-        strength = ctx.obj.get('strength', DEFAULT_STRENGTH)
-        temperature = ctx.obj.get('temperature', 0)
+        # Get parameters from context (prefer passed parameters over ctx.obj)
+        strength = strength if strength is not None else ctx.obj.get('strength', DEFAULT_STRENGTH)
+        temperature = temperature if temperature is not None else ctx.obj.get('temperature', 0)
         verbose = ctx.obj.get('verbose', False)
         time = ctx.obj.get('time') # Get time from context
@@ -111,6 +116,7 @@ def fix_main(
             success, fixed_unit_test, fixed_code, attempts, total_cost, model_name = fix_error_loop(
                 unit_test_file=unit_test_file,
                 code_file=code_file,
+                prompt_file=prompt_file,
                 prompt=input_strings["prompt_file"],
                 verification_program=verification_program,
                 strength=strength,
@@ -119,7 +125,8 @@ def fix_main(
                 max_attempts=max_attempts,
                 budget=budget,
                 error_log_file=output_file_paths.get("output_results"),
-                verbose=verbose
+                verbose=verbose,
+                agentic_fallback=agentic_fallback
             )
         else:
             # Use fix_errors_from_unit_tests for single-pass fixing
@@ -134,16 +141,62 @@ def fix_main(
                 time=time, # Pass time to fix_errors_from_unit_tests
                 verbose=verbose
             )
-            success = update_unit_test or update_code
             attempts = 1
+            # Issue #158 fix: Validate the fix by running tests instead of
+            # trusting the LLM's suggestion flags (update_unit_test/update_code)
+            if update_unit_test or update_code:
+                # Write fixed files to temp location first, then run tests
+                import tempfile
+                import os as os_module
+                # Create temp files for testing
+                test_dir = tempfile.mkdtemp(prefix="pdd_fix_validate_")
+                temp_test_file = os_module.path.join(test_dir, "test_temp.py")
+                temp_code_file = os_module.path.join(test_dir, "code_temp.py")
+                try:
+                    # Write the fixed content (or original if not changed)
+                    test_content = fixed_unit_test if fixed_unit_test else input_strings["unit_test_file"]
+                    code_content = fixed_code if fixed_code else input_strings["code_file"]
+                    with open(temp_test_file, 'w') as f:
+                        f.write(test_content)
+                    with open(temp_code_file, 'w') as f:
+                        f.write(code_content)
+                    # Run pytest on the fixed test file to validate
+                    fails, errors, warnings, test_output = run_pytest_on_file(temp_test_file)
+                    # Success only if tests pass (no failures or errors)
+                    success = (fails == 0 and errors == 0)
+                    if verbose:
+                        rprint(f"[cyan]Fix validation: {fails} failures, {errors} errors, {warnings} warnings[/cyan]")
+                        if not success:
+                            rprint("[yellow]Fix suggested by LLM did not pass tests[/yellow]")
+                finally:
+                    # Cleanup temp files
+                    import shutil
+                    try:
+                        shutil.rmtree(test_dir)
+                    except Exception:
+                        pass
+            else:
+                # No changes suggested by LLM
+                success = False
         # Save fixed files
         if fixed_unit_test:
-            with open(output_file_paths["output_test"], 'w') as f:
+            output_test_path = Path(output_file_paths["output_test"])
+            output_test_path.parent.mkdir(parents=True, exist_ok=True)
+            with open(output_test_path, 'w') as f:
                 f.write(fixed_unit_test)
         if fixed_code:
-            with open(output_file_paths["output_code"], 'w') as f:
+            output_code_path = Path(output_file_paths["output_code"])
+            output_code_path.parent.mkdir(parents=True, exist_ok=True)
+            with open(output_code_path, 'w') as f:
                 f.write(fixed_code)
         # Provide user feedback
@@ -286,6 +339,9 @@ def fix_main(
         return success, fixed_unit_test, fixed_code, attempts, total_cost, model_name
+    except click.Abort:
+        # User cancelled - re-raise to stop the sync loop
+        raise
     except Exception as e:
         if not ctx.obj.get('quiet', False):
             # Safely handle and print MarkupError
@@ -296,4 +352,5 @@ def fix_main(
                  # Print other errors normally, escaping the error string
                  from rich.markup import escape # Ensure escape is imported
                  rprint(f"[bold red]Error:[/bold red] {escape(str(e))}")
-        sys.exit(1)
+        # Return error result instead of sys.exit(1) to allow orchestrator to handle gracefully
+        return False, "", "", 0, 0.0, f"Error: {e}"

pdd-cli 0.0.42__py3-none-any.whl → 0.0.90__py3-none-any.whl

pdd-cli 0.0.42py3-none-any.whl → 0.0.90py3-none-any.whl