PyPI - pdd-cli - Versions diffs - 0.0.45__py3-none-any.whl → 0.0.118__py3-none-any.whl - Mend

pdd-cli 0.0.45py3-none-any.whl → 0.0.118py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (195) hide show

pdd/__init__.py +40 -8
pdd/agentic_bug.py +323 -0
pdd/agentic_bug_orchestrator.py +497 -0
pdd/agentic_change.py +231 -0
pdd/agentic_change_orchestrator.py +526 -0
pdd/agentic_common.py +598 -0
pdd/agentic_crash.py +534 -0
pdd/agentic_e2e_fix.py +319 -0
pdd/agentic_e2e_fix_orchestrator.py +426 -0
pdd/agentic_fix.py +1294 -0
pdd/agentic_langtest.py +162 -0
pdd/agentic_update.py +387 -0
pdd/agentic_verify.py +183 -0
pdd/architecture_sync.py +565 -0
pdd/auth_service.py +210 -0
pdd/auto_deps_main.py +71 -51
pdd/auto_include.py +245 -5
pdd/auto_update.py +125 -47
pdd/bug_main.py +196 -23
pdd/bug_to_unit_test.py +2 -0
pdd/change_main.py +11 -4
pdd/cli.py +22 -1181
pdd/cmd_test_main.py +350 -150
pdd/code_generator.py +60 -18
pdd/code_generator_main.py +790 -57
pdd/commands/__init__.py +48 -0
pdd/commands/analysis.py +306 -0
pdd/commands/auth.py +309 -0
pdd/commands/connect.py +290 -0
pdd/commands/fix.py +163 -0
pdd/commands/generate.py +257 -0
pdd/commands/maintenance.py +175 -0
pdd/commands/misc.py +87 -0
pdd/commands/modify.py +256 -0
pdd/commands/report.py +144 -0
pdd/commands/sessions.py +284 -0
pdd/commands/templates.py +215 -0
pdd/commands/utility.py +110 -0
pdd/config_resolution.py +58 -0
pdd/conflicts_main.py +8 -3
pdd/construct_paths.py +589 -111
pdd/context_generator.py +10 -2
pdd/context_generator_main.py +175 -76
pdd/continue_generation.py +53 -10
pdd/core/__init__.py +33 -0
pdd/core/cli.py +527 -0
pdd/core/cloud.py +237 -0
pdd/core/dump.py +554 -0
pdd/core/errors.py +67 -0
pdd/core/remote_session.py +61 -0
pdd/core/utils.py +90 -0
pdd/crash_main.py +262 -33
pdd/data/language_format.csv +71 -63
pdd/data/llm_model.csv +20 -18
pdd/detect_change_main.py +5 -4
pdd/docs/prompting_guide.md +864 -0
pdd/docs/whitepaper_with_benchmarks/data_and_functions/benchmark_analysis.py +495 -0
pdd/docs/whitepaper_with_benchmarks/data_and_functions/creation_compare.py +528 -0
pdd/fix_code_loop.py +523 -95
pdd/fix_code_module_errors.py +6 -2
pdd/fix_error_loop.py +491 -92
pdd/fix_errors_from_unit_tests.py +4 -3
pdd/fix_main.py +278 -21
pdd/fix_verification_errors.py +12 -100
pdd/fix_verification_errors_loop.py +529 -286
pdd/fix_verification_main.py +294 -89
pdd/frontend/dist/assets/index-B5DZHykP.css +1 -0
pdd/frontend/dist/assets/index-DQ3wkeQ2.js +449 -0
pdd/frontend/dist/index.html +376 -0
pdd/frontend/dist/logo.svg +33 -0
pdd/generate_output_paths.py +139 -15
pdd/generate_test.py +218 -146
pdd/get_comment.py +19 -44
pdd/get_extension.py +8 -9
pdd/get_jwt_token.py +318 -22
pdd/get_language.py +8 -7
pdd/get_run_command.py +75 -0
pdd/get_test_command.py +68 -0
pdd/git_update.py +70 -19
pdd/incremental_code_generator.py +2 -2
pdd/insert_includes.py +13 -4
pdd/llm_invoke.py +1711 -181
pdd/load_prompt_template.py +19 -12
pdd/path_resolution.py +140 -0
pdd/pdd_completion.fish +25 -2
pdd/pdd_completion.sh +30 -4
pdd/pdd_completion.zsh +79 -4
pdd/postprocess.py +14 -4
pdd/preprocess.py +293 -24
pdd/preprocess_main.py +41 -6
pdd/prompts/agentic_bug_step10_pr_LLM.prompt +182 -0
pdd/prompts/agentic_bug_step1_duplicate_LLM.prompt +73 -0
pdd/prompts/agentic_bug_step2_docs_LLM.prompt +129 -0
pdd/prompts/agentic_bug_step3_triage_LLM.prompt +95 -0
pdd/prompts/agentic_bug_step4_reproduce_LLM.prompt +97 -0
pdd/prompts/agentic_bug_step5_root_cause_LLM.prompt +123 -0
pdd/prompts/agentic_bug_step6_test_plan_LLM.prompt +107 -0
pdd/prompts/agentic_bug_step7_generate_LLM.prompt +172 -0
pdd/prompts/agentic_bug_step8_verify_LLM.prompt +119 -0
pdd/prompts/agentic_bug_step9_e2e_test_LLM.prompt +289 -0
pdd/prompts/agentic_change_step10_identify_issues_LLM.prompt +1006 -0
pdd/prompts/agentic_change_step11_fix_issues_LLM.prompt +984 -0
pdd/prompts/agentic_change_step12_create_pr_LLM.prompt +131 -0
pdd/prompts/agentic_change_step1_duplicate_LLM.prompt +73 -0
pdd/prompts/agentic_change_step2_docs_LLM.prompt +101 -0
pdd/prompts/agentic_change_step3_research_LLM.prompt +126 -0
pdd/prompts/agentic_change_step4_clarify_LLM.prompt +164 -0
pdd/prompts/agentic_change_step5_docs_change_LLM.prompt +981 -0
pdd/prompts/agentic_change_step6_devunits_LLM.prompt +1005 -0
pdd/prompts/agentic_change_step7_architecture_LLM.prompt +1044 -0
pdd/prompts/agentic_change_step8_analyze_LLM.prompt +1027 -0
pdd/prompts/agentic_change_step9_implement_LLM.prompt +1077 -0
pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
pdd/prompts/agentic_e2e_fix_step1_unit_tests_LLM.prompt +90 -0
pdd/prompts/agentic_e2e_fix_step2_e2e_tests_LLM.prompt +91 -0
pdd/prompts/agentic_e2e_fix_step3_root_cause_LLM.prompt +89 -0
pdd/prompts/agentic_e2e_fix_step4_fix_e2e_tests_LLM.prompt +96 -0
pdd/prompts/agentic_e2e_fix_step5_identify_devunits_LLM.prompt +91 -0
pdd/prompts/agentic_e2e_fix_step6_create_unit_tests_LLM.prompt +106 -0
pdd/prompts/agentic_e2e_fix_step7_verify_tests_LLM.prompt +116 -0
pdd/prompts/agentic_e2e_fix_step8_run_pdd_fix_LLM.prompt +120 -0
pdd/prompts/agentic_e2e_fix_step9_verify_all_LLM.prompt +146 -0
pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
pdd/prompts/agentic_update_LLM.prompt +925 -0
pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
pdd/prompts/auto_include_LLM.prompt +122 -905
pdd/prompts/change_LLM.prompt +3093 -1
pdd/prompts/detect_change_LLM.prompt +686 -27
pdd/prompts/example_generator_LLM.prompt +22 -1
pdd/prompts/extract_code_LLM.prompt +5 -1
pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
pdd/prompts/extract_promptline_LLM.prompt +17 -11
pdd/prompts/find_verification_errors_LLM.prompt +6 -0
pdd/prompts/fix_code_module_errors_LLM.prompt +12 -2
pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +9 -0
pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
pdd/prompts/generate_test_LLM.prompt +41 -7
pdd/prompts/generate_test_from_example_LLM.prompt +115 -0
pdd/prompts/increase_tests_LLM.prompt +1 -5
pdd/prompts/insert_includes_LLM.prompt +316 -186
pdd/prompts/prompt_code_diff_LLM.prompt +119 -0
pdd/prompts/prompt_diff_LLM.prompt +82 -0
pdd/prompts/trace_LLM.prompt +25 -22
pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
pdd/prompts/update_prompt_LLM.prompt +22 -1
pdd/pytest_output.py +127 -12
pdd/remote_session.py +876 -0
pdd/render_mermaid.py +236 -0
pdd/server/__init__.py +52 -0
pdd/server/app.py +335 -0
pdd/server/click_executor.py +587 -0
pdd/server/executor.py +338 -0
pdd/server/jobs.py +661 -0
pdd/server/models.py +241 -0
pdd/server/routes/__init__.py +31 -0
pdd/server/routes/architecture.py +451 -0
pdd/server/routes/auth.py +364 -0
pdd/server/routes/commands.py +929 -0
pdd/server/routes/config.py +42 -0
pdd/server/routes/files.py +603 -0
pdd/server/routes/prompts.py +1322 -0
pdd/server/routes/websocket.py +473 -0
pdd/server/security.py +243 -0
pdd/server/terminal_spawner.py +209 -0
pdd/server/token_counter.py +222 -0
pdd/setup_tool.py +648 -0
pdd/simple_math.py +2 -0
pdd/split_main.py +3 -2
pdd/summarize_directory.py +237 -195
pdd/sync_animation.py +8 -4
pdd/sync_determine_operation.py +839 -112
pdd/sync_main.py +351 -57
pdd/sync_orchestration.py +1400 -756
pdd/sync_tui.py +848 -0
pdd/template_expander.py +161 -0
pdd/template_registry.py +264 -0
pdd/templates/architecture/architecture_json.prompt +237 -0
pdd/templates/generic/generate_prompt.prompt +174 -0
pdd/trace.py +168 -12
pdd/trace_main.py +4 -3
pdd/track_cost.py +140 -63
pdd/unfinished_prompt.py +51 -4
pdd/update_main.py +567 -67
pdd/update_model_costs.py +2 -2
pdd/update_prompt.py +19 -4
{pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/METADATA +29 -11
pdd_cli-0.0.118.dist-info/RECORD +227 -0
{pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/licenses/LICENSE +1 -1
pdd_cli-0.0.45.dist-info/RECORD +0 -116
{pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/WHEEL +0 -0
{pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/entry_points.txt +0 -0
{pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/top_level.txt +0 -0

pdd/fix_errors_from_unit_tests.py CHANGED Viewed

@@ -114,7 +114,8 @@ def fix_errors_from_unit_tests(
     Fix errors in unit tests using LLM models and log the process.
     Args:
-        unit_test (str): The unit test code
+        unit_test (str): The unit test code, potentially multiple files concatenated
+                         with <file name="filename.py">...</file> tags.
         code (str): The code under test
         prompt (str): The prompt that generated the code
         error (str): The error message
@@ -244,10 +245,10 @@ def fix_errors_from_unit_tests(
         if verbose:
             console.print(f"[bold red]{error_msg}[/bold red]")
         write_to_error_file(error_file, error_msg)
-        return False, False, "", "", "", 0.0, ""
+        return False, False, "", "", "", 0.0, f"Error: ValidationError - {str(e)[:100]}"
     except Exception as e:
         error_msg = f"Error in fix_errors_from_unit_tests: {str(e)}"
         if verbose:
             console.print(f"[bold red]{error_msg}[/bold red]")
         write_to_error_file(error_file, error_msg)
-        return False, False, "", "", "", 0.0, ""
+        return False, False, "", "", "", 0.0, f"Error: {type(e).__name__}"

pdd/fix_main.py CHANGED Viewed

@@ -1,8 +1,11 @@
 import sys
 from typing import Tuple, Optional
+import json
 import click
 from rich import print as rprint
 from rich.markup import MarkupError, escape
+from rich.console import Console
+from rich.panel import Panel
 import requests
 import asyncio
@@ -13,13 +16,27 @@ from .preprocess import preprocess
 from .construct_paths import construct_paths
 from .fix_errors_from_unit_tests import fix_errors_from_unit_tests
-from .fix_error_loop import fix_error_loop
+from .fix_error_loop import fix_error_loop, run_pytest_on_file
 from .get_jwt_token import get_jwt_token
 from .get_language import get_language
+from .core.cloud import CloudConfig
 # Import DEFAULT_STRENGTH from the package
 from . import DEFAULT_STRENGTH
+# Cloud request timeout
+CLOUD_REQUEST_TIMEOUT = 400  # seconds
+console = Console()
+def _env_flag_enabled(name: str) -> bool:
+    """Return True when an env var is set to a truthy value."""
+    value = os.environ.get(name)
+    if value is None:
+        return False
+    return str(value).strip().lower() in {"1", "true", "yes", "on"}
 def fix_main(
     ctx: click.Context,
     prompt_file: str,
@@ -33,7 +50,10 @@ def fix_main(
     verification_program: Optional[str],
     max_attempts: int,
     budget: float,
-    auto_submit: bool
+    auto_submit: bool,
+    agentic_fallback: bool = True,
+    strength: Optional[float] = None,
+    temperature: Optional[float] = None,
 ) -> Tuple[bool, str, str, int, float, str]:
     """
     Main function to fix errors in code and unit tests.
@@ -52,7 +72,7 @@ def fix_main(
         max_attempts: Maximum number of fix attempts
         budget: Maximum cost allowed for fixing
         auto_submit: Whether to auto-submit example if tests pass
+        agentic_fallback: Whether the cli agent fallback is triggered
     Returns:
         Tuple containing:
         - Success status (bool)
@@ -69,13 +89,13 @@ def fix_main(
     # Initialize analysis_results to None to prevent reference errors
     analysis_results = None
+    # Input validation - let these propagate to caller for proper exit code
+    if not loop:
+        error_path = Path(error_file)
+        if not error_path.exists():
+            raise FileNotFoundError(f"Error file '{error_file}' does not exist.")
     try:
-        # Verify error file exists if not in loop mode
-        if not loop:
-            error_path = Path(error_file)
-            if not error_path.exists():
-                raise FileNotFoundError(f"Error file '{error_file}' does not exist.")
         # Construct file paths
         input_file_paths = {
             "prompt_file": prompt_file,
@@ -97,20 +117,200 @@ def fix_main(
             quiet=ctx.obj.get('quiet', False),
             command="fix",
             command_options=command_options,
-            create_error_file=loop  # Only create error file if in loop mode
+            create_error_file=loop,  # Only create error file if in loop mode
+            context_override=ctx.obj.get('context'),
+            confirm_callback=ctx.obj.get('confirm_callback')
         )
-        # Get parameters from context
-        strength = ctx.obj.get('strength', DEFAULT_STRENGTH)
-        temperature = ctx.obj.get('temperature', 0)
+        # Get parameters from context (prefer passed parameters over ctx.obj)
+        strength = strength if strength is not None else ctx.obj.get('strength', DEFAULT_STRENGTH)
+        temperature = temperature if temperature is not None else ctx.obj.get('temperature', 0)
         verbose = ctx.obj.get('verbose', False)
         time = ctx.obj.get('time') # Get time from context
+        # Determine cloud vs local execution preference
+        is_local_execution_preferred = ctx.obj.get('local', False)
+        cloud_only = _env_flag_enabled("PDD_CLOUD_ONLY") or _env_flag_enabled("PDD_NO_LOCAL_FALLBACK")
+        current_execution_is_local = is_local_execution_preferred and not cloud_only
+        # Cloud execution is only supported for single-pass mode (not loop mode)
+        # because loop mode requires running tests and verification programs locally
+        cloud_execution_attempted = False
+        cloud_execution_succeeded = False
+        if not loop and not current_execution_is_local:
+            if verbose:
+                console.print(Panel("Attempting cloud fix execution...", title="[blue]Mode[/blue]", expand=False))
+            jwt_token = CloudConfig.get_jwt_token(verbose=verbose)
+            if not jwt_token:
+                if cloud_only:
+                    console.print("[red]Cloud authentication failed.[/red]")
+                    raise click.UsageError("Cloud authentication failed")
+                console.print("[yellow]Cloud authentication failed. Falling back to local execution.[/yellow]")
+                current_execution_is_local = True
+            if jwt_token and not current_execution_is_local:
+                cloud_execution_attempted = True
+                # Build cloud payload
+                payload = {
+                    "unitTest": input_strings["unit_test_file"],
+                    "code": input_strings["code_file"],
+                    "prompt": input_strings["prompt_file"],
+                    "errors": input_strings.get("error_file", ""),
+                    "language": get_language(os.path.splitext(code_file)[1]),
+                    "strength": strength,
+                    "temperature": temperature,
+                    "time": time if time is not None else 0.25,
+                    "verbose": verbose,
+                }
+                headers = {
+                    "Authorization": f"Bearer {jwt_token}",
+                    "Content-Type": "application/json"
+                }
+                cloud_url = CloudConfig.get_endpoint_url("fixCode")
+                try:
+                    response = requests.post(
+                        cloud_url,
+                        json=payload,
+                        headers=headers,
+                        timeout=CLOUD_REQUEST_TIMEOUT
+                    )
+                    response.raise_for_status()
+                    response_data = response.json()
+                    fixed_unit_test = response_data.get("fixedUnitTest", "")
+                    fixed_code = response_data.get("fixedCode", "")
+                    analysis_results = response_data.get("analysis", "")
+                    total_cost = float(response_data.get("totalCost", 0.0))
+                    model_name = response_data.get("modelName", "cloud_model")
+                    success = response_data.get("success", False)
+                    update_unit_test = response_data.get("updateUnitTest", False)
+                    update_code = response_data.get("updateCode", False)
+                    if not (fixed_unit_test or fixed_code):
+                        if cloud_only:
+                            console.print("[red]Cloud execution returned no fixed code.[/red]")
+                            raise click.UsageError("Cloud execution returned no fixed code")
+                        console.print("[yellow]Cloud execution returned no fixed code. Falling back to local.[/yellow]")
+                        current_execution_is_local = True
+                    else:
+                        cloud_execution_succeeded = True
+                        attempts = 1
+                        # Validate the fix by running tests (same as local)
+                        if update_unit_test or update_code:
+                            import tempfile
+                            import shutil as shutil_module
+                            test_dir = tempfile.mkdtemp(prefix="pdd_fix_validate_")
+                            temp_test_file = os.path.join(test_dir, "test_temp.py")
+                            temp_code_file = os.path.join(test_dir, "code_temp.py")
+                            try:
+                                test_content = fixed_unit_test if fixed_unit_test else input_strings["unit_test_file"]
+                                code_content = fixed_code if fixed_code else input_strings["code_file"]
+                                with open(temp_test_file, 'w') as f:
+                                    f.write(test_content)
+                                with open(temp_code_file, 'w') as f:
+                                    f.write(code_content)
+                                fails, errors_count, warnings, test_output = run_pytest_on_file(temp_test_file)
+                                success = (fails == 0 and errors_count == 0)
+                                if verbose:
+                                    rprint(f"[cyan]Fix validation: {fails} failures, {errors_count} errors, {warnings} warnings[/cyan]")
+                                    if not success:
+                                        rprint("[yellow]Fix suggested by cloud did not pass tests[/yellow]")
+                            finally:
+                                try:
+                                    shutil_module.rmtree(test_dir)
+                                except Exception:
+                                    pass
+                        else:
+                            success = False
+                        if verbose:
+                            console.print(Panel(
+                                f"Cloud fix completed. Model: {model_name}, Cost: ${total_cost:.6f}",
+                                title="[green]Cloud Success[/green]",
+                                expand=False
+                            ))
+                except requests.exceptions.Timeout:
+                    if cloud_only:
+                        console.print(f"[red]Cloud execution timed out ({CLOUD_REQUEST_TIMEOUT}s).[/red]")
+                        raise click.UsageError("Cloud execution timed out")
+                    console.print(f"[yellow]Cloud execution timed out ({CLOUD_REQUEST_TIMEOUT}s). Falling back to local.[/yellow]")
+                    current_execution_is_local = True
+                except requests.exceptions.HTTPError as e:
+                    status_code = e.response.status_code if e.response else 0
+                    err_content = e.response.text[:200] if e.response else "No response content"
+                    # Non-recoverable errors: do NOT fall back to local
+                    if status_code == 402:  # Insufficient credits
+                        try:
+                            error_data = e.response.json()
+                            current_balance = error_data.get("currentBalance", "unknown")
+                            estimated_cost = error_data.get("estimatedCost", "unknown")
+                            console.print(f"[red]Insufficient credits. Current balance: {current_balance}, estimated cost: {estimated_cost}[/red]")
+                        except Exception:
+                            console.print(f"[red]Insufficient credits: {err_content}[/red]")
+                        raise click.UsageError("Insufficient credits for cloud fix")
+                    elif status_code == 401:  # Authentication error
+                        console.print(f"[red]Authentication failed: {err_content}[/red]")
+                        raise click.UsageError("Cloud authentication failed")
+                    elif status_code == 403:  # Authorization error (not approved)
+                        console.print(f"[red]Access denied: {err_content}[/red]")
+                        raise click.UsageError("Access denied - user not approved")
+                    elif status_code == 400:  # Validation error
+                        console.print(f"[red]Invalid request: {err_content}[/red]")
+                        raise click.UsageError(f"Invalid request: {err_content}")
+                    else:
+                        # Recoverable errors (5xx, unexpected errors): fall back to local
+                        if cloud_only:
+                            console.print(f"[red]Cloud HTTP error ({status_code}): {err_content}[/red]")
+                            raise click.UsageError(f"Cloud HTTP error ({status_code}): {err_content}")
+                        console.print(f"[yellow]Cloud HTTP error ({status_code}): {err_content}. Falling back to local.[/yellow]")
+                        current_execution_is_local = True
+                except requests.exceptions.RequestException as e:
+                    if cloud_only:
+                        console.print(f"[red]Cloud network error: {e}[/red]")
+                        raise click.UsageError(f"Cloud network error: {e}")
+                    console.print(f"[yellow]Cloud network error: {e}. Falling back to local.[/yellow]")
+                    current_execution_is_local = True
+                except json.JSONDecodeError:
+                    if cloud_only:
+                        console.print("[red]Cloud returned invalid JSON.[/red]")
+                        raise click.UsageError("Cloud returned invalid JSON")
+                    console.print("[yellow]Cloud returned invalid JSON. Falling back to local.[/yellow]")
+                    current_execution_is_local = True
+        # Local execution path (for loop mode or when cloud failed/skipped)
         if loop:
-            # Use fix_error_loop for iterative fixing
+            # Determine if loop should use cloud for LLM calls (hybrid mode)
+            # Local test execution stays local, but LLM fix calls can go to cloud
+            use_cloud_for_loop = not is_local_execution_preferred and not cloud_only
+            # If cloud_only is set but we're in loop mode, we still use hybrid approach
+            if cloud_only and not is_local_execution_preferred:
+                use_cloud_for_loop = True
+            if verbose:
+                mode_desc = "hybrid (local tests + cloud LLM)" if use_cloud_for_loop else "local"
+                console.print(Panel(f"Performing {mode_desc} fix loop...", title="[blue]Mode[/blue]", expand=False))
             success, fixed_unit_test, fixed_code, attempts, total_cost, model_name = fix_error_loop(
                 unit_test_file=unit_test_file,
                 code_file=code_file,
+                prompt_file=prompt_file,
                 prompt=input_strings["prompt_file"],
                 verification_program=verification_program,
                 strength=strength,
@@ -119,10 +319,14 @@ def fix_main(
                 max_attempts=max_attempts,
                 budget=budget,
                 error_log_file=output_file_paths.get("output_results"),
-                verbose=verbose
+                verbose=verbose,
+                agentic_fallback=agentic_fallback,
+                use_cloud=use_cloud_for_loop
             )
-        else:
-            # Use fix_errors_from_unit_tests for single-pass fixing
+        elif not cloud_execution_succeeded:
+            # Use fix_errors_from_unit_tests for single-pass fixing (local fallback)
+            if verbose:
+                console.print(Panel("Performing local fix...", title="[blue]Mode[/blue]", expand=False))
             update_unit_test, update_code, fixed_unit_test, fixed_code, analysis_results, total_cost, model_name = fix_errors_from_unit_tests(
                 unit_test=input_strings["unit_test_file"],
                 code=input_strings["code_file"],
@@ -134,16 +338,62 @@ def fix_main(
                 time=time, # Pass time to fix_errors_from_unit_tests
                 verbose=verbose
             )
-            success = update_unit_test or update_code
             attempts = 1
+            # Issue #158 fix: Validate the fix by running tests instead of
+            # trusting the LLM's suggestion flags (update_unit_test/update_code)
+            if update_unit_test or update_code:
+                # Write fixed files to temp location first, then run tests
+                import tempfile
+                import os as os_module
+                # Create temp files for testing
+                test_dir = tempfile.mkdtemp(prefix="pdd_fix_validate_")
+                temp_test_file = os_module.path.join(test_dir, "test_temp.py")
+                temp_code_file = os_module.path.join(test_dir, "code_temp.py")
+                try:
+                    # Write the fixed content (or original if not changed)
+                    test_content = fixed_unit_test if fixed_unit_test else input_strings["unit_test_file"]
+                    code_content = fixed_code if fixed_code else input_strings["code_file"]
+                    with open(temp_test_file, 'w') as f:
+                        f.write(test_content)
+                    with open(temp_code_file, 'w') as f:
+                        f.write(code_content)
+                    # Run pytest on the fixed test file to validate
+                    fails, errors, warnings, test_output = run_pytest_on_file(temp_test_file)
+                    # Success only if tests pass (no failures or errors)
+                    success = (fails == 0 and errors == 0)
+                    if verbose:
+                        rprint(f"[cyan]Fix validation: {fails} failures, {errors} errors, {warnings} warnings[/cyan]")
+                        if not success:
+                            rprint("[yellow]Fix suggested by LLM did not pass tests[/yellow]")
+                finally:
+                    # Cleanup temp files
+                    import shutil
+                    try:
+                        shutil.rmtree(test_dir)
+                    except Exception:
+                        pass
+            else:
+                # No changes suggested by LLM
+                success = False
         # Save fixed files
         if fixed_unit_test:
-            with open(output_file_paths["output_test"], 'w') as f:
+            output_test_path = Path(output_file_paths["output_test"])
+            output_test_path.parent.mkdir(parents=True, exist_ok=True)
+            with open(output_test_path, 'w') as f:
                 f.write(fixed_unit_test)
         if fixed_code:
-            with open(output_file_paths["output_code"], 'w') as f:
+            output_code_path = Path(output_file_paths["output_code"])
+            output_code_path.parent.mkdir(parents=True, exist_ok=True)
+            with open(output_code_path, 'w') as f:
                 f.write(fixed_code)
         # Provide user feedback
@@ -286,6 +536,12 @@ def fix_main(
         return success, fixed_unit_test, fixed_code, attempts, total_cost, model_name
+    except click.Abort:
+        # User cancelled - re-raise to stop the sync loop
+        raise
+    except click.UsageError:
+        # Re-raise UsageError for proper CLI handling (e.g., cloud auth failures, insufficient credits)
+        raise
     except Exception as e:
         if not ctx.obj.get('quiet', False):
             # Safely handle and print MarkupError
@@ -296,4 +552,5 @@ def fix_main(
                  # Print other errors normally, escaping the error string
                  from rich.markup import escape # Ensure escape is imported
                  rprint(f"[bold red]Error:[/bold red] {escape(str(e))}")
-        sys.exit(1)
+        # Return error result instead of sys.exit(1) to allow orchestrator to handle gracefully
+        return False, "", "", 0, 0.0, f"Error: {e}"

pdd/fix_verification_errors.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import re
 from typing import Dict, Any, Optional
 from rich import print as rprint
 from rich.markdown import Markdown
@@ -145,6 +144,7 @@ def fix_verification_errors(
     verification_result_obj = verification_response.get('result')
     if isinstance(verification_result_obj, VerificationOutput):
+        # llm_invoke handles all parsing when output_pydantic is specified
         verification_issues_count = verification_result_obj.issues_count
         verification_details = verification_result_obj.details
         if verbose:
@@ -162,66 +162,18 @@ def fix_verification_errors(
                 if verbose:
                     rprint(f"\n[yellow]Found {verification_issues_count} potential issues. Proceeding to fix step.[/yellow]")
             else:
-                rprint(f"[yellow]Warning:[/yellow] <issues_count> is {verification_issues_count}, but <details> field is empty or missing. Treating as no actionable issues found.")
+                rprint(f"[yellow]Warning:[/yellow] issues_count is {verification_issues_count}, but details field is empty or missing. Treating as no actionable issues found.")
                 verification_issues_count = 0
         else:
             if verbose:
                 rprint("\n[green]No issues found during verification based on structured output.[/green]")
-    elif isinstance(verification_result_obj, str):
-        try:
-            issues_match = re.search(r'<issues_count>(\d+)</issues_count>', verification_result_obj)
-            if issues_match:
-                parsed_issues_count = int(issues_match.group(1))
-                details_match = re.search(r'<details>(.*?)</details>', verification_result_obj, re.DOTALL)
-                parsed_verification_details = details_match.group(1).strip() if (details_match and details_match.group(1)) else None
-                if parsed_issues_count > 0:
-                    if parsed_verification_details: # Check if details exist and are not empty
-                        issues_found = True
-                        verification_issues_count = parsed_issues_count
-                        verification_details = parsed_verification_details
-                        if verbose:
-                            rprint(f"\n[yellow]Found {verification_issues_count} potential issues in string response. Proceeding to fix step.[/yellow]")
-                    else:
-                        rprint(f"[yellow]Warning:[/yellow] <issues_count> is {parsed_issues_count} in string response, but <details> field is empty or missing. Treating as no actionable issues found.")
-                        verification_issues_count = 0
-                        issues_found = False
-                else: # parsed_issues_count == 0
-                    verification_issues_count = 0
-                    issues_found = False
-                    if verbose:
-                         rprint("\n[green]No issues found in string verification based on <issues_count> being 0.[/green]")
-            else: # issues_match is None (tag not found or content not digits)
-                rprint("[bold red]Error:[/bold red] Could not find or parse integer value from <issues_count> tag in string response.")
-                return {
-                    "explanation": None,
-                    "fixed_program": program,
-                    "fixed_code": code,
-                    "total_cost": total_cost,
-                    "model_name": model_name,
-                    "verification_issues_count": 0,
-                }
-        except ValueError: # Should not be hit if regex is \d+, but as a safeguard
-            rprint("[bold red]Error:[/bold red] Invalid non-integer value in <issues_count> tag in string response.")
-            return {
-                "explanation": None,
-                "fixed_program": program,
-                "fixed_code": code,
-                "total_cost": total_cost,
-                "model_name": model_name,
-                "verification_issues_count": 0,
-            }
-    else: # Not VerificationOutput and not a successfully parsed string
-        rprint("[bold red]Error:[/bold red] Verification LLM call did not return the expected structured output (e.g., parsing failed).")
-        rprint(f"  [dim]Expected type:[/dim] {VerificationOutput} or str")
+    else:
+        # llm_invoke should always return VerificationOutput when output_pydantic is specified
+        rprint("[bold red]Error:[/bold red] Verification LLM call did not return the expected structured output.")
+        rprint(f"  [dim]Expected type:[/dim] {VerificationOutput}")
         rprint(f"  [dim]Received type:[/dim] {type(verification_result_obj)}")
         content_str = str(verification_result_obj)
         rprint(f"  [dim]Received content:[/dim] {content_str[:500]}{'...' if len(content_str) > 500 else ''}")
-        raw_text = verification_response.get('result_text')
-        if raw_text:
-            raw_text_str = str(raw_text)
-            rprint(f"  [dim]Raw LLM text (if available from llm_invoke):[/dim] {raw_text_str[:500]}{'...' if len(raw_text_str) > 500 else ''}")
         return {
             "explanation": None,
             "fixed_program": program,
@@ -262,63 +214,23 @@ def fix_verification_errors(
                 rprint(f"  [dim]Cost:[/dim] ${fix_response.get('cost', 0.0):.6f}")
             fix_result_obj = fix_response.get('result')
-            parsed_fix_successfully = False
             if isinstance(fix_result_obj, FixerOutput):
+                # llm_invoke handles all parsing and unescaping via _unescape_code_newlines
                 fixed_program = fix_result_obj.fixed_program
                 fixed_code = fix_result_obj.fixed_code
                 fix_explanation = fix_result_obj.explanation
-                # Unescape literal \n strings to actual newlines
-                if fixed_program:
-                    fixed_program = fixed_program.replace('\\n', '\n')
-                if fixed_code:
-                    fixed_code = fixed_code.replace('\\n', '\n')
-                parsed_fix_successfully = True
                 if verbose:
                     rprint("[green]Successfully parsed structured output for fix.[/green]")
                     rprint(Markdown(f"**Explanation from LLM:**\n{fix_explanation}"))
-            elif isinstance(fix_result_obj, str):
-                program_match = re.search(r'<fixed_program>(.*?)</fixed_program>', fix_result_obj, re.DOTALL)
-                code_match = re.search(r'<fixed_code>(.*?)</fixed_code>', fix_result_obj, re.DOTALL)
-                explanation_match = re.search(r'<explanation>(.*?)</explanation>', fix_result_obj, re.DOTALL)
-                if program_match or code_match or explanation_match: # If any tag is found, attempt to parse
-                    fixed_program_candidate = program_match.group(1).strip() if (program_match and program_match.group(1)) else None
-                    fixed_code_candidate = code_match.group(1).strip() if (code_match and code_match.group(1)) else None
-                    fix_explanation_candidate = explanation_match.group(1).strip() if (explanation_match and explanation_match.group(1)) else None
-                    # Unescape literal \n strings to actual newlines
-                    if fixed_program_candidate:
-                        fixed_program_candidate = fixed_program_candidate.replace('\\n', '\n')
-                    if fixed_code_candidate:
-                        fixed_code_candidate = fixed_code_candidate.replace('\\n', '\n')
-                    fixed_program = fixed_program_candidate if fixed_program_candidate else program
-                    fixed_code = fixed_code_candidate if fixed_code_candidate else code
-                    fix_explanation = fix_explanation_candidate if fix_explanation_candidate else "[Fix explanation not provided by LLM]"
-                    parsed_fix_successfully = True
-                    if verbose:
-                        if not program_match or not fixed_program_candidate:
-                            rprint("[yellow]Warning:[/yellow] Could not find or parse <fixed_program> tag in fix result string. Using original program.")
-                        if not code_match or not fixed_code_candidate:
-                            rprint("[yellow]Warning:[/yellow] Could not find or parse <fixed_code> tag in fix result string. Using original code module.")
-                        if not explanation_match or not fix_explanation_candidate:
-                            rprint("[yellow]Warning:[/yellow] Could not find or parse <explanation> tag in fix result string. Using default explanation.")
-                # else: string, but no relevant tags. Will fall to parsed_fix_successfully = False below
-            if not parsed_fix_successfully:
-                rprint(f"[bold red]Error:[/bold red] Fix generation LLM call did not return the expected structured output (e.g., parsing failed).")
-                rprint(f"  [dim]Expected type:[/dim] {FixerOutput} or str (with XML tags)")
+            else:
+                # llm_invoke should always return FixerOutput when output_pydantic is specified
+                rprint(f"[bold red]Error:[/bold red] Fix generation LLM call did not return the expected structured output.")
+                rprint(f"  [dim]Expected type:[/dim] {FixerOutput}")
                 rprint(f"  [dim]Received type:[/dim] {type(fix_result_obj)}")
                 content_str = str(fix_result_obj)
                 rprint(f"  [dim]Received content:[/dim] {content_str[:500]}{'...' if len(content_str) > 500 else ''}")
-                raw_text = fix_response.get('result_text')
-                if raw_text:
-                    raw_text_str = str(raw_text)
-                    rprint(f"  [dim]Raw LLM text (if available from llm_invoke):[/dim] {raw_text_str[:500]}{'...' if len(raw_text_str) > 500 else ''}")
                 fix_explanation = "[Error: Failed to parse structured output from LLM for fix explanation]"
                 # fixed_program and fixed_code remain original (already initialized)

pdd-cli 0.0.45__py3-none-any.whl → 0.0.118__py3-none-any.whl

pdd-cli 0.0.45py3-none-any.whl → 0.0.118py3-none-any.whl