pdd-cli 0.0.45__py3-none-any.whl → 0.0.118__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdd/__init__.py +40 -8
- pdd/agentic_bug.py +323 -0
- pdd/agentic_bug_orchestrator.py +497 -0
- pdd/agentic_change.py +231 -0
- pdd/agentic_change_orchestrator.py +526 -0
- pdd/agentic_common.py +598 -0
- pdd/agentic_crash.py +534 -0
- pdd/agentic_e2e_fix.py +319 -0
- pdd/agentic_e2e_fix_orchestrator.py +426 -0
- pdd/agentic_fix.py +1294 -0
- pdd/agentic_langtest.py +162 -0
- pdd/agentic_update.py +387 -0
- pdd/agentic_verify.py +183 -0
- pdd/architecture_sync.py +565 -0
- pdd/auth_service.py +210 -0
- pdd/auto_deps_main.py +71 -51
- pdd/auto_include.py +245 -5
- pdd/auto_update.py +125 -47
- pdd/bug_main.py +196 -23
- pdd/bug_to_unit_test.py +2 -0
- pdd/change_main.py +11 -4
- pdd/cli.py +22 -1181
- pdd/cmd_test_main.py +350 -150
- pdd/code_generator.py +60 -18
- pdd/code_generator_main.py +790 -57
- pdd/commands/__init__.py +48 -0
- pdd/commands/analysis.py +306 -0
- pdd/commands/auth.py +309 -0
- pdd/commands/connect.py +290 -0
- pdd/commands/fix.py +163 -0
- pdd/commands/generate.py +257 -0
- pdd/commands/maintenance.py +175 -0
- pdd/commands/misc.py +87 -0
- pdd/commands/modify.py +256 -0
- pdd/commands/report.py +144 -0
- pdd/commands/sessions.py +284 -0
- pdd/commands/templates.py +215 -0
- pdd/commands/utility.py +110 -0
- pdd/config_resolution.py +58 -0
- pdd/conflicts_main.py +8 -3
- pdd/construct_paths.py +589 -111
- pdd/context_generator.py +10 -2
- pdd/context_generator_main.py +175 -76
- pdd/continue_generation.py +53 -10
- pdd/core/__init__.py +33 -0
- pdd/core/cli.py +527 -0
- pdd/core/cloud.py +237 -0
- pdd/core/dump.py +554 -0
- pdd/core/errors.py +67 -0
- pdd/core/remote_session.py +61 -0
- pdd/core/utils.py +90 -0
- pdd/crash_main.py +262 -33
- pdd/data/language_format.csv +71 -63
- pdd/data/llm_model.csv +20 -18
- pdd/detect_change_main.py +5 -4
- pdd/docs/prompting_guide.md +864 -0
- pdd/docs/whitepaper_with_benchmarks/data_and_functions/benchmark_analysis.py +495 -0
- pdd/docs/whitepaper_with_benchmarks/data_and_functions/creation_compare.py +528 -0
- pdd/fix_code_loop.py +523 -95
- pdd/fix_code_module_errors.py +6 -2
- pdd/fix_error_loop.py +491 -92
- pdd/fix_errors_from_unit_tests.py +4 -3
- pdd/fix_main.py +278 -21
- pdd/fix_verification_errors.py +12 -100
- pdd/fix_verification_errors_loop.py +529 -286
- pdd/fix_verification_main.py +294 -89
- pdd/frontend/dist/assets/index-B5DZHykP.css +1 -0
- pdd/frontend/dist/assets/index-DQ3wkeQ2.js +449 -0
- pdd/frontend/dist/index.html +376 -0
- pdd/frontend/dist/logo.svg +33 -0
- pdd/generate_output_paths.py +139 -15
- pdd/generate_test.py +218 -146
- pdd/get_comment.py +19 -44
- pdd/get_extension.py +8 -9
- pdd/get_jwt_token.py +318 -22
- pdd/get_language.py +8 -7
- pdd/get_run_command.py +75 -0
- pdd/get_test_command.py +68 -0
- pdd/git_update.py +70 -19
- pdd/incremental_code_generator.py +2 -2
- pdd/insert_includes.py +13 -4
- pdd/llm_invoke.py +1711 -181
- pdd/load_prompt_template.py +19 -12
- pdd/path_resolution.py +140 -0
- pdd/pdd_completion.fish +25 -2
- pdd/pdd_completion.sh +30 -4
- pdd/pdd_completion.zsh +79 -4
- pdd/postprocess.py +14 -4
- pdd/preprocess.py +293 -24
- pdd/preprocess_main.py +41 -6
- pdd/prompts/agentic_bug_step10_pr_LLM.prompt +182 -0
- pdd/prompts/agentic_bug_step1_duplicate_LLM.prompt +73 -0
- pdd/prompts/agentic_bug_step2_docs_LLM.prompt +129 -0
- pdd/prompts/agentic_bug_step3_triage_LLM.prompt +95 -0
- pdd/prompts/agentic_bug_step4_reproduce_LLM.prompt +97 -0
- pdd/prompts/agentic_bug_step5_root_cause_LLM.prompt +123 -0
- pdd/prompts/agentic_bug_step6_test_plan_LLM.prompt +107 -0
- pdd/prompts/agentic_bug_step7_generate_LLM.prompt +172 -0
- pdd/prompts/agentic_bug_step8_verify_LLM.prompt +119 -0
- pdd/prompts/agentic_bug_step9_e2e_test_LLM.prompt +289 -0
- pdd/prompts/agentic_change_step10_identify_issues_LLM.prompt +1006 -0
- pdd/prompts/agentic_change_step11_fix_issues_LLM.prompt +984 -0
- pdd/prompts/agentic_change_step12_create_pr_LLM.prompt +131 -0
- pdd/prompts/agentic_change_step1_duplicate_LLM.prompt +73 -0
- pdd/prompts/agentic_change_step2_docs_LLM.prompt +101 -0
- pdd/prompts/agentic_change_step3_research_LLM.prompt +126 -0
- pdd/prompts/agentic_change_step4_clarify_LLM.prompt +164 -0
- pdd/prompts/agentic_change_step5_docs_change_LLM.prompt +981 -0
- pdd/prompts/agentic_change_step6_devunits_LLM.prompt +1005 -0
- pdd/prompts/agentic_change_step7_architecture_LLM.prompt +1044 -0
- pdd/prompts/agentic_change_step8_analyze_LLM.prompt +1027 -0
- pdd/prompts/agentic_change_step9_implement_LLM.prompt +1077 -0
- pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
- pdd/prompts/agentic_e2e_fix_step1_unit_tests_LLM.prompt +90 -0
- pdd/prompts/agentic_e2e_fix_step2_e2e_tests_LLM.prompt +91 -0
- pdd/prompts/agentic_e2e_fix_step3_root_cause_LLM.prompt +89 -0
- pdd/prompts/agentic_e2e_fix_step4_fix_e2e_tests_LLM.prompt +96 -0
- pdd/prompts/agentic_e2e_fix_step5_identify_devunits_LLM.prompt +91 -0
- pdd/prompts/agentic_e2e_fix_step6_create_unit_tests_LLM.prompt +106 -0
- pdd/prompts/agentic_e2e_fix_step7_verify_tests_LLM.prompt +116 -0
- pdd/prompts/agentic_e2e_fix_step8_run_pdd_fix_LLM.prompt +120 -0
- pdd/prompts/agentic_e2e_fix_step9_verify_all_LLM.prompt +146 -0
- pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
- pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
- pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
- pdd/prompts/agentic_update_LLM.prompt +925 -0
- pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
- pdd/prompts/auto_include_LLM.prompt +122 -905
- pdd/prompts/change_LLM.prompt +3093 -1
- pdd/prompts/detect_change_LLM.prompt +686 -27
- pdd/prompts/example_generator_LLM.prompt +22 -1
- pdd/prompts/extract_code_LLM.prompt +5 -1
- pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
- pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
- pdd/prompts/extract_promptline_LLM.prompt +17 -11
- pdd/prompts/find_verification_errors_LLM.prompt +6 -0
- pdd/prompts/fix_code_module_errors_LLM.prompt +12 -2
- pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +9 -0
- pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
- pdd/prompts/generate_test_LLM.prompt +41 -7
- pdd/prompts/generate_test_from_example_LLM.prompt +115 -0
- pdd/prompts/increase_tests_LLM.prompt +1 -5
- pdd/prompts/insert_includes_LLM.prompt +316 -186
- pdd/prompts/prompt_code_diff_LLM.prompt +119 -0
- pdd/prompts/prompt_diff_LLM.prompt +82 -0
- pdd/prompts/trace_LLM.prompt +25 -22
- pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
- pdd/prompts/update_prompt_LLM.prompt +22 -1
- pdd/pytest_output.py +127 -12
- pdd/remote_session.py +876 -0
- pdd/render_mermaid.py +236 -0
- pdd/server/__init__.py +52 -0
- pdd/server/app.py +335 -0
- pdd/server/click_executor.py +587 -0
- pdd/server/executor.py +338 -0
- pdd/server/jobs.py +661 -0
- pdd/server/models.py +241 -0
- pdd/server/routes/__init__.py +31 -0
- pdd/server/routes/architecture.py +451 -0
- pdd/server/routes/auth.py +364 -0
- pdd/server/routes/commands.py +929 -0
- pdd/server/routes/config.py +42 -0
- pdd/server/routes/files.py +603 -0
- pdd/server/routes/prompts.py +1322 -0
- pdd/server/routes/websocket.py +473 -0
- pdd/server/security.py +243 -0
- pdd/server/terminal_spawner.py +209 -0
- pdd/server/token_counter.py +222 -0
- pdd/setup_tool.py +648 -0
- pdd/simple_math.py +2 -0
- pdd/split_main.py +3 -2
- pdd/summarize_directory.py +237 -195
- pdd/sync_animation.py +8 -4
- pdd/sync_determine_operation.py +839 -112
- pdd/sync_main.py +351 -57
- pdd/sync_orchestration.py +1400 -756
- pdd/sync_tui.py +848 -0
- pdd/template_expander.py +161 -0
- pdd/template_registry.py +264 -0
- pdd/templates/architecture/architecture_json.prompt +237 -0
- pdd/templates/generic/generate_prompt.prompt +174 -0
- pdd/trace.py +168 -12
- pdd/trace_main.py +4 -3
- pdd/track_cost.py +140 -63
- pdd/unfinished_prompt.py +51 -4
- pdd/update_main.py +567 -67
- pdd/update_model_costs.py +2 -2
- pdd/update_prompt.py +19 -4
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/METADATA +29 -11
- pdd_cli-0.0.118.dist-info/RECORD +227 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/licenses/LICENSE +1 -1
- pdd_cli-0.0.45.dist-info/RECORD +0 -116
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/WHEEL +0 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/top_level.txt +0 -0
|
@@ -114,7 +114,8 @@ def fix_errors_from_unit_tests(
|
|
|
114
114
|
Fix errors in unit tests using LLM models and log the process.
|
|
115
115
|
|
|
116
116
|
Args:
|
|
117
|
-
unit_test (str): The unit test code
|
|
117
|
+
unit_test (str): The unit test code, potentially multiple files concatenated
|
|
118
|
+
with <file name="filename.py">...</file> tags.
|
|
118
119
|
code (str): The code under test
|
|
119
120
|
prompt (str): The prompt that generated the code
|
|
120
121
|
error (str): The error message
|
|
@@ -244,10 +245,10 @@ def fix_errors_from_unit_tests(
|
|
|
244
245
|
if verbose:
|
|
245
246
|
console.print(f"[bold red]{error_msg}[/bold red]")
|
|
246
247
|
write_to_error_file(error_file, error_msg)
|
|
247
|
-
return False, False, "", "", "", 0.0, ""
|
|
248
|
+
return False, False, "", "", "", 0.0, f"Error: ValidationError - {str(e)[:100]}"
|
|
248
249
|
except Exception as e:
|
|
249
250
|
error_msg = f"Error in fix_errors_from_unit_tests: {str(e)}"
|
|
250
251
|
if verbose:
|
|
251
252
|
console.print(f"[bold red]{error_msg}[/bold red]")
|
|
252
253
|
write_to_error_file(error_file, error_msg)
|
|
253
|
-
return False, False, "", "", "", 0.0, ""
|
|
254
|
+
return False, False, "", "", "", 0.0, f"Error: {type(e).__name__}"
|
pdd/fix_main.py
CHANGED
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
import sys
|
|
2
2
|
from typing import Tuple, Optional
|
|
3
|
+
import json
|
|
3
4
|
import click
|
|
4
5
|
from rich import print as rprint
|
|
5
6
|
from rich.markup import MarkupError, escape
|
|
7
|
+
from rich.console import Console
|
|
8
|
+
from rich.panel import Panel
|
|
6
9
|
|
|
7
10
|
import requests
|
|
8
11
|
import asyncio
|
|
@@ -13,13 +16,27 @@ from .preprocess import preprocess
|
|
|
13
16
|
|
|
14
17
|
from .construct_paths import construct_paths
|
|
15
18
|
from .fix_errors_from_unit_tests import fix_errors_from_unit_tests
|
|
16
|
-
from .fix_error_loop import fix_error_loop
|
|
19
|
+
from .fix_error_loop import fix_error_loop, run_pytest_on_file
|
|
17
20
|
from .get_jwt_token import get_jwt_token
|
|
18
21
|
from .get_language import get_language
|
|
22
|
+
from .core.cloud import CloudConfig
|
|
19
23
|
|
|
20
24
|
# Import DEFAULT_STRENGTH from the package
|
|
21
25
|
from . import DEFAULT_STRENGTH
|
|
22
26
|
|
|
27
|
+
# Cloud request timeout
|
|
28
|
+
CLOUD_REQUEST_TIMEOUT = 400 # seconds
|
|
29
|
+
|
|
30
|
+
console = Console()
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _env_flag_enabled(name: str) -> bool:
|
|
34
|
+
"""Return True when an env var is set to a truthy value."""
|
|
35
|
+
value = os.environ.get(name)
|
|
36
|
+
if value is None:
|
|
37
|
+
return False
|
|
38
|
+
return str(value).strip().lower() in {"1", "true", "yes", "on"}
|
|
39
|
+
|
|
23
40
|
def fix_main(
|
|
24
41
|
ctx: click.Context,
|
|
25
42
|
prompt_file: str,
|
|
@@ -33,7 +50,10 @@ def fix_main(
|
|
|
33
50
|
verification_program: Optional[str],
|
|
34
51
|
max_attempts: int,
|
|
35
52
|
budget: float,
|
|
36
|
-
auto_submit: bool
|
|
53
|
+
auto_submit: bool,
|
|
54
|
+
agentic_fallback: bool = True,
|
|
55
|
+
strength: Optional[float] = None,
|
|
56
|
+
temperature: Optional[float] = None,
|
|
37
57
|
) -> Tuple[bool, str, str, int, float, str]:
|
|
38
58
|
"""
|
|
39
59
|
Main function to fix errors in code and unit tests.
|
|
@@ -52,7 +72,7 @@ def fix_main(
|
|
|
52
72
|
max_attempts: Maximum number of fix attempts
|
|
53
73
|
budget: Maximum cost allowed for fixing
|
|
54
74
|
auto_submit: Whether to auto-submit example if tests pass
|
|
55
|
-
|
|
75
|
+
agentic_fallback: Whether the cli agent fallback is triggered
|
|
56
76
|
Returns:
|
|
57
77
|
Tuple containing:
|
|
58
78
|
- Success status (bool)
|
|
@@ -69,13 +89,13 @@ def fix_main(
|
|
|
69
89
|
# Initialize analysis_results to None to prevent reference errors
|
|
70
90
|
analysis_results = None
|
|
71
91
|
|
|
92
|
+
# Input validation - let these propagate to caller for proper exit code
|
|
93
|
+
if not loop:
|
|
94
|
+
error_path = Path(error_file)
|
|
95
|
+
if not error_path.exists():
|
|
96
|
+
raise FileNotFoundError(f"Error file '{error_file}' does not exist.")
|
|
97
|
+
|
|
72
98
|
try:
|
|
73
|
-
# Verify error file exists if not in loop mode
|
|
74
|
-
if not loop:
|
|
75
|
-
error_path = Path(error_file)
|
|
76
|
-
if not error_path.exists():
|
|
77
|
-
raise FileNotFoundError(f"Error file '{error_file}' does not exist.")
|
|
78
|
-
|
|
79
99
|
# Construct file paths
|
|
80
100
|
input_file_paths = {
|
|
81
101
|
"prompt_file": prompt_file,
|
|
@@ -97,20 +117,200 @@ def fix_main(
|
|
|
97
117
|
quiet=ctx.obj.get('quiet', False),
|
|
98
118
|
command="fix",
|
|
99
119
|
command_options=command_options,
|
|
100
|
-
create_error_file=loop # Only create error file if in loop mode
|
|
120
|
+
create_error_file=loop, # Only create error file if in loop mode
|
|
121
|
+
context_override=ctx.obj.get('context'),
|
|
122
|
+
confirm_callback=ctx.obj.get('confirm_callback')
|
|
101
123
|
)
|
|
102
124
|
|
|
103
|
-
# Get parameters from context
|
|
104
|
-
strength = ctx.obj.get('strength', DEFAULT_STRENGTH)
|
|
105
|
-
temperature = ctx.obj.get('temperature', 0)
|
|
125
|
+
# Get parameters from context (prefer passed parameters over ctx.obj)
|
|
126
|
+
strength = strength if strength is not None else ctx.obj.get('strength', DEFAULT_STRENGTH)
|
|
127
|
+
temperature = temperature if temperature is not None else ctx.obj.get('temperature', 0)
|
|
106
128
|
verbose = ctx.obj.get('verbose', False)
|
|
107
129
|
time = ctx.obj.get('time') # Get time from context
|
|
108
130
|
|
|
131
|
+
# Determine cloud vs local execution preference
|
|
132
|
+
is_local_execution_preferred = ctx.obj.get('local', False)
|
|
133
|
+
cloud_only = _env_flag_enabled("PDD_CLOUD_ONLY") or _env_flag_enabled("PDD_NO_LOCAL_FALLBACK")
|
|
134
|
+
current_execution_is_local = is_local_execution_preferred and not cloud_only
|
|
135
|
+
|
|
136
|
+
# Cloud execution is only supported for single-pass mode (not loop mode)
|
|
137
|
+
# because loop mode requires running tests and verification programs locally
|
|
138
|
+
cloud_execution_attempted = False
|
|
139
|
+
cloud_execution_succeeded = False
|
|
140
|
+
|
|
141
|
+
if not loop and not current_execution_is_local:
|
|
142
|
+
if verbose:
|
|
143
|
+
console.print(Panel("Attempting cloud fix execution...", title="[blue]Mode[/blue]", expand=False))
|
|
144
|
+
|
|
145
|
+
jwt_token = CloudConfig.get_jwt_token(verbose=verbose)
|
|
146
|
+
|
|
147
|
+
if not jwt_token:
|
|
148
|
+
if cloud_only:
|
|
149
|
+
console.print("[red]Cloud authentication failed.[/red]")
|
|
150
|
+
raise click.UsageError("Cloud authentication failed")
|
|
151
|
+
console.print("[yellow]Cloud authentication failed. Falling back to local execution.[/yellow]")
|
|
152
|
+
current_execution_is_local = True
|
|
153
|
+
|
|
154
|
+
if jwt_token and not current_execution_is_local:
|
|
155
|
+
cloud_execution_attempted = True
|
|
156
|
+
# Build cloud payload
|
|
157
|
+
payload = {
|
|
158
|
+
"unitTest": input_strings["unit_test_file"],
|
|
159
|
+
"code": input_strings["code_file"],
|
|
160
|
+
"prompt": input_strings["prompt_file"],
|
|
161
|
+
"errors": input_strings.get("error_file", ""),
|
|
162
|
+
"language": get_language(os.path.splitext(code_file)[1]),
|
|
163
|
+
"strength": strength,
|
|
164
|
+
"temperature": temperature,
|
|
165
|
+
"time": time if time is not None else 0.25,
|
|
166
|
+
"verbose": verbose,
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
headers = {
|
|
170
|
+
"Authorization": f"Bearer {jwt_token}",
|
|
171
|
+
"Content-Type": "application/json"
|
|
172
|
+
}
|
|
173
|
+
cloud_url = CloudConfig.get_endpoint_url("fixCode")
|
|
174
|
+
|
|
175
|
+
try:
|
|
176
|
+
response = requests.post(
|
|
177
|
+
cloud_url,
|
|
178
|
+
json=payload,
|
|
179
|
+
headers=headers,
|
|
180
|
+
timeout=CLOUD_REQUEST_TIMEOUT
|
|
181
|
+
)
|
|
182
|
+
response.raise_for_status()
|
|
183
|
+
|
|
184
|
+
response_data = response.json()
|
|
185
|
+
fixed_unit_test = response_data.get("fixedUnitTest", "")
|
|
186
|
+
fixed_code = response_data.get("fixedCode", "")
|
|
187
|
+
analysis_results = response_data.get("analysis", "")
|
|
188
|
+
total_cost = float(response_data.get("totalCost", 0.0))
|
|
189
|
+
model_name = response_data.get("modelName", "cloud_model")
|
|
190
|
+
success = response_data.get("success", False)
|
|
191
|
+
update_unit_test = response_data.get("updateUnitTest", False)
|
|
192
|
+
update_code = response_data.get("updateCode", False)
|
|
193
|
+
|
|
194
|
+
if not (fixed_unit_test or fixed_code):
|
|
195
|
+
if cloud_only:
|
|
196
|
+
console.print("[red]Cloud execution returned no fixed code.[/red]")
|
|
197
|
+
raise click.UsageError("Cloud execution returned no fixed code")
|
|
198
|
+
console.print("[yellow]Cloud execution returned no fixed code. Falling back to local.[/yellow]")
|
|
199
|
+
current_execution_is_local = True
|
|
200
|
+
else:
|
|
201
|
+
cloud_execution_succeeded = True
|
|
202
|
+
attempts = 1
|
|
203
|
+
|
|
204
|
+
# Validate the fix by running tests (same as local)
|
|
205
|
+
if update_unit_test or update_code:
|
|
206
|
+
import tempfile
|
|
207
|
+
import shutil as shutil_module
|
|
208
|
+
|
|
209
|
+
test_dir = tempfile.mkdtemp(prefix="pdd_fix_validate_")
|
|
210
|
+
temp_test_file = os.path.join(test_dir, "test_temp.py")
|
|
211
|
+
temp_code_file = os.path.join(test_dir, "code_temp.py")
|
|
212
|
+
|
|
213
|
+
try:
|
|
214
|
+
test_content = fixed_unit_test if fixed_unit_test else input_strings["unit_test_file"]
|
|
215
|
+
code_content = fixed_code if fixed_code else input_strings["code_file"]
|
|
216
|
+
|
|
217
|
+
with open(temp_test_file, 'w') as f:
|
|
218
|
+
f.write(test_content)
|
|
219
|
+
with open(temp_code_file, 'w') as f:
|
|
220
|
+
f.write(code_content)
|
|
221
|
+
|
|
222
|
+
fails, errors_count, warnings, test_output = run_pytest_on_file(temp_test_file)
|
|
223
|
+
success = (fails == 0 and errors_count == 0)
|
|
224
|
+
|
|
225
|
+
if verbose:
|
|
226
|
+
rprint(f"[cyan]Fix validation: {fails} failures, {errors_count} errors, {warnings} warnings[/cyan]")
|
|
227
|
+
if not success:
|
|
228
|
+
rprint("[yellow]Fix suggested by cloud did not pass tests[/yellow]")
|
|
229
|
+
finally:
|
|
230
|
+
try:
|
|
231
|
+
shutil_module.rmtree(test_dir)
|
|
232
|
+
except Exception:
|
|
233
|
+
pass
|
|
234
|
+
else:
|
|
235
|
+
success = False
|
|
236
|
+
|
|
237
|
+
if verbose:
|
|
238
|
+
console.print(Panel(
|
|
239
|
+
f"Cloud fix completed. Model: {model_name}, Cost: ${total_cost:.6f}",
|
|
240
|
+
title="[green]Cloud Success[/green]",
|
|
241
|
+
expand=False
|
|
242
|
+
))
|
|
243
|
+
|
|
244
|
+
except requests.exceptions.Timeout:
|
|
245
|
+
if cloud_only:
|
|
246
|
+
console.print(f"[red]Cloud execution timed out ({CLOUD_REQUEST_TIMEOUT}s).[/red]")
|
|
247
|
+
raise click.UsageError("Cloud execution timed out")
|
|
248
|
+
console.print(f"[yellow]Cloud execution timed out ({CLOUD_REQUEST_TIMEOUT}s). Falling back to local.[/yellow]")
|
|
249
|
+
current_execution_is_local = True
|
|
250
|
+
|
|
251
|
+
except requests.exceptions.HTTPError as e:
|
|
252
|
+
status_code = e.response.status_code if e.response else 0
|
|
253
|
+
err_content = e.response.text[:200] if e.response else "No response content"
|
|
254
|
+
|
|
255
|
+
# Non-recoverable errors: do NOT fall back to local
|
|
256
|
+
if status_code == 402: # Insufficient credits
|
|
257
|
+
try:
|
|
258
|
+
error_data = e.response.json()
|
|
259
|
+
current_balance = error_data.get("currentBalance", "unknown")
|
|
260
|
+
estimated_cost = error_data.get("estimatedCost", "unknown")
|
|
261
|
+
console.print(f"[red]Insufficient credits. Current balance: {current_balance}, estimated cost: {estimated_cost}[/red]")
|
|
262
|
+
except Exception:
|
|
263
|
+
console.print(f"[red]Insufficient credits: {err_content}[/red]")
|
|
264
|
+
raise click.UsageError("Insufficient credits for cloud fix")
|
|
265
|
+
elif status_code == 401: # Authentication error
|
|
266
|
+
console.print(f"[red]Authentication failed: {err_content}[/red]")
|
|
267
|
+
raise click.UsageError("Cloud authentication failed")
|
|
268
|
+
elif status_code == 403: # Authorization error (not approved)
|
|
269
|
+
console.print(f"[red]Access denied: {err_content}[/red]")
|
|
270
|
+
raise click.UsageError("Access denied - user not approved")
|
|
271
|
+
elif status_code == 400: # Validation error
|
|
272
|
+
console.print(f"[red]Invalid request: {err_content}[/red]")
|
|
273
|
+
raise click.UsageError(f"Invalid request: {err_content}")
|
|
274
|
+
else:
|
|
275
|
+
# Recoverable errors (5xx, unexpected errors): fall back to local
|
|
276
|
+
if cloud_only:
|
|
277
|
+
console.print(f"[red]Cloud HTTP error ({status_code}): {err_content}[/red]")
|
|
278
|
+
raise click.UsageError(f"Cloud HTTP error ({status_code}): {err_content}")
|
|
279
|
+
console.print(f"[yellow]Cloud HTTP error ({status_code}): {err_content}. Falling back to local.[/yellow]")
|
|
280
|
+
current_execution_is_local = True
|
|
281
|
+
|
|
282
|
+
except requests.exceptions.RequestException as e:
|
|
283
|
+
if cloud_only:
|
|
284
|
+
console.print(f"[red]Cloud network error: {e}[/red]")
|
|
285
|
+
raise click.UsageError(f"Cloud network error: {e}")
|
|
286
|
+
console.print(f"[yellow]Cloud network error: {e}. Falling back to local.[/yellow]")
|
|
287
|
+
current_execution_is_local = True
|
|
288
|
+
|
|
289
|
+
except json.JSONDecodeError:
|
|
290
|
+
if cloud_only:
|
|
291
|
+
console.print("[red]Cloud returned invalid JSON.[/red]")
|
|
292
|
+
raise click.UsageError("Cloud returned invalid JSON")
|
|
293
|
+
console.print("[yellow]Cloud returned invalid JSON. Falling back to local.[/yellow]")
|
|
294
|
+
current_execution_is_local = True
|
|
295
|
+
|
|
296
|
+
# Local execution path (for loop mode or when cloud failed/skipped)
|
|
109
297
|
if loop:
|
|
110
|
-
#
|
|
298
|
+
# Determine if loop should use cloud for LLM calls (hybrid mode)
|
|
299
|
+
# Local test execution stays local, but LLM fix calls can go to cloud
|
|
300
|
+
use_cloud_for_loop = not is_local_execution_preferred and not cloud_only
|
|
301
|
+
|
|
302
|
+
# If cloud_only is set but we're in loop mode, we still use hybrid approach
|
|
303
|
+
if cloud_only and not is_local_execution_preferred:
|
|
304
|
+
use_cloud_for_loop = True
|
|
305
|
+
|
|
306
|
+
if verbose:
|
|
307
|
+
mode_desc = "hybrid (local tests + cloud LLM)" if use_cloud_for_loop else "local"
|
|
308
|
+
console.print(Panel(f"Performing {mode_desc} fix loop...", title="[blue]Mode[/blue]", expand=False))
|
|
309
|
+
|
|
111
310
|
success, fixed_unit_test, fixed_code, attempts, total_cost, model_name = fix_error_loop(
|
|
112
311
|
unit_test_file=unit_test_file,
|
|
113
312
|
code_file=code_file,
|
|
313
|
+
prompt_file=prompt_file,
|
|
114
314
|
prompt=input_strings["prompt_file"],
|
|
115
315
|
verification_program=verification_program,
|
|
116
316
|
strength=strength,
|
|
@@ -119,10 +319,14 @@ def fix_main(
|
|
|
119
319
|
max_attempts=max_attempts,
|
|
120
320
|
budget=budget,
|
|
121
321
|
error_log_file=output_file_paths.get("output_results"),
|
|
122
|
-
verbose=verbose
|
|
322
|
+
verbose=verbose,
|
|
323
|
+
agentic_fallback=agentic_fallback,
|
|
324
|
+
use_cloud=use_cloud_for_loop
|
|
123
325
|
)
|
|
124
|
-
|
|
125
|
-
# Use fix_errors_from_unit_tests for single-pass fixing
|
|
326
|
+
elif not cloud_execution_succeeded:
|
|
327
|
+
# Use fix_errors_from_unit_tests for single-pass fixing (local fallback)
|
|
328
|
+
if verbose:
|
|
329
|
+
console.print(Panel("Performing local fix...", title="[blue]Mode[/blue]", expand=False))
|
|
126
330
|
update_unit_test, update_code, fixed_unit_test, fixed_code, analysis_results, total_cost, model_name = fix_errors_from_unit_tests(
|
|
127
331
|
unit_test=input_strings["unit_test_file"],
|
|
128
332
|
code=input_strings["code_file"],
|
|
@@ -134,16 +338,62 @@ def fix_main(
|
|
|
134
338
|
time=time, # Pass time to fix_errors_from_unit_tests
|
|
135
339
|
verbose=verbose
|
|
136
340
|
)
|
|
137
|
-
success = update_unit_test or update_code
|
|
138
341
|
attempts = 1
|
|
139
342
|
|
|
343
|
+
# Issue #158 fix: Validate the fix by running tests instead of
|
|
344
|
+
# trusting the LLM's suggestion flags (update_unit_test/update_code)
|
|
345
|
+
if update_unit_test or update_code:
|
|
346
|
+
# Write fixed files to temp location first, then run tests
|
|
347
|
+
import tempfile
|
|
348
|
+
import os as os_module
|
|
349
|
+
|
|
350
|
+
# Create temp files for testing
|
|
351
|
+
test_dir = tempfile.mkdtemp(prefix="pdd_fix_validate_")
|
|
352
|
+
temp_test_file = os_module.path.join(test_dir, "test_temp.py")
|
|
353
|
+
temp_code_file = os_module.path.join(test_dir, "code_temp.py")
|
|
354
|
+
|
|
355
|
+
try:
|
|
356
|
+
# Write the fixed content (or original if not changed)
|
|
357
|
+
test_content = fixed_unit_test if fixed_unit_test else input_strings["unit_test_file"]
|
|
358
|
+
code_content = fixed_code if fixed_code else input_strings["code_file"]
|
|
359
|
+
|
|
360
|
+
with open(temp_test_file, 'w') as f:
|
|
361
|
+
f.write(test_content)
|
|
362
|
+
with open(temp_code_file, 'w') as f:
|
|
363
|
+
f.write(code_content)
|
|
364
|
+
|
|
365
|
+
# Run pytest on the fixed test file to validate
|
|
366
|
+
fails, errors, warnings, test_output = run_pytest_on_file(temp_test_file)
|
|
367
|
+
|
|
368
|
+
# Success only if tests pass (no failures or errors)
|
|
369
|
+
success = (fails == 0 and errors == 0)
|
|
370
|
+
|
|
371
|
+
if verbose:
|
|
372
|
+
rprint(f"[cyan]Fix validation: {fails} failures, {errors} errors, {warnings} warnings[/cyan]")
|
|
373
|
+
if not success:
|
|
374
|
+
rprint("[yellow]Fix suggested by LLM did not pass tests[/yellow]")
|
|
375
|
+
finally:
|
|
376
|
+
# Cleanup temp files
|
|
377
|
+
import shutil
|
|
378
|
+
try:
|
|
379
|
+
shutil.rmtree(test_dir)
|
|
380
|
+
except Exception:
|
|
381
|
+
pass
|
|
382
|
+
else:
|
|
383
|
+
# No changes suggested by LLM
|
|
384
|
+
success = False
|
|
385
|
+
|
|
140
386
|
# Save fixed files
|
|
141
387
|
if fixed_unit_test:
|
|
142
|
-
|
|
388
|
+
output_test_path = Path(output_file_paths["output_test"])
|
|
389
|
+
output_test_path.parent.mkdir(parents=True, exist_ok=True)
|
|
390
|
+
with open(output_test_path, 'w') as f:
|
|
143
391
|
f.write(fixed_unit_test)
|
|
144
392
|
|
|
145
393
|
if fixed_code:
|
|
146
|
-
|
|
394
|
+
output_code_path = Path(output_file_paths["output_code"])
|
|
395
|
+
output_code_path.parent.mkdir(parents=True, exist_ok=True)
|
|
396
|
+
with open(output_code_path, 'w') as f:
|
|
147
397
|
f.write(fixed_code)
|
|
148
398
|
|
|
149
399
|
# Provide user feedback
|
|
@@ -286,6 +536,12 @@ def fix_main(
|
|
|
286
536
|
|
|
287
537
|
return success, fixed_unit_test, fixed_code, attempts, total_cost, model_name
|
|
288
538
|
|
|
539
|
+
except click.Abort:
|
|
540
|
+
# User cancelled - re-raise to stop the sync loop
|
|
541
|
+
raise
|
|
542
|
+
except click.UsageError:
|
|
543
|
+
# Re-raise UsageError for proper CLI handling (e.g., cloud auth failures, insufficient credits)
|
|
544
|
+
raise
|
|
289
545
|
except Exception as e:
|
|
290
546
|
if not ctx.obj.get('quiet', False):
|
|
291
547
|
# Safely handle and print MarkupError
|
|
@@ -296,4 +552,5 @@ def fix_main(
|
|
|
296
552
|
# Print other errors normally, escaping the error string
|
|
297
553
|
from rich.markup import escape # Ensure escape is imported
|
|
298
554
|
rprint(f"[bold red]Error:[/bold red] {escape(str(e))}")
|
|
299
|
-
sys.exit(1)
|
|
555
|
+
# Return error result instead of sys.exit(1) to allow orchestrator to handle gracefully
|
|
556
|
+
return False, "", "", 0, 0.0, f"Error: {e}"
|
pdd/fix_verification_errors.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import re
|
|
2
1
|
from typing import Dict, Any, Optional
|
|
3
2
|
from rich import print as rprint
|
|
4
3
|
from rich.markdown import Markdown
|
|
@@ -145,6 +144,7 @@ def fix_verification_errors(
|
|
|
145
144
|
verification_result_obj = verification_response.get('result')
|
|
146
145
|
|
|
147
146
|
if isinstance(verification_result_obj, VerificationOutput):
|
|
147
|
+
# llm_invoke handles all parsing when output_pydantic is specified
|
|
148
148
|
verification_issues_count = verification_result_obj.issues_count
|
|
149
149
|
verification_details = verification_result_obj.details
|
|
150
150
|
if verbose:
|
|
@@ -162,66 +162,18 @@ def fix_verification_errors(
|
|
|
162
162
|
if verbose:
|
|
163
163
|
rprint(f"\n[yellow]Found {verification_issues_count} potential issues. Proceeding to fix step.[/yellow]")
|
|
164
164
|
else:
|
|
165
|
-
rprint(f"[yellow]Warning:[/yellow]
|
|
165
|
+
rprint(f"[yellow]Warning:[/yellow] issues_count is {verification_issues_count}, but details field is empty or missing. Treating as no actionable issues found.")
|
|
166
166
|
verification_issues_count = 0
|
|
167
167
|
else:
|
|
168
168
|
if verbose:
|
|
169
169
|
rprint("\n[green]No issues found during verification based on structured output.[/green]")
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
parsed_issues_count = int(issues_match.group(1))
|
|
175
|
-
details_match = re.search(r'<details>(.*?)</details>', verification_result_obj, re.DOTALL)
|
|
176
|
-
parsed_verification_details = details_match.group(1).strip() if (details_match and details_match.group(1)) else None
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
if parsed_issues_count > 0:
|
|
180
|
-
if parsed_verification_details: # Check if details exist and are not empty
|
|
181
|
-
issues_found = True
|
|
182
|
-
verification_issues_count = parsed_issues_count
|
|
183
|
-
verification_details = parsed_verification_details
|
|
184
|
-
if verbose:
|
|
185
|
-
rprint(f"\n[yellow]Found {verification_issues_count} potential issues in string response. Proceeding to fix step.[/yellow]")
|
|
186
|
-
else:
|
|
187
|
-
rprint(f"[yellow]Warning:[/yellow] <issues_count> is {parsed_issues_count} in string response, but <details> field is empty or missing. Treating as no actionable issues found.")
|
|
188
|
-
verification_issues_count = 0
|
|
189
|
-
issues_found = False
|
|
190
|
-
else: # parsed_issues_count == 0
|
|
191
|
-
verification_issues_count = 0
|
|
192
|
-
issues_found = False
|
|
193
|
-
if verbose:
|
|
194
|
-
rprint("\n[green]No issues found in string verification based on <issues_count> being 0.[/green]")
|
|
195
|
-
else: # issues_match is None (tag not found or content not digits)
|
|
196
|
-
rprint("[bold red]Error:[/bold red] Could not find or parse integer value from <issues_count> tag in string response.")
|
|
197
|
-
return {
|
|
198
|
-
"explanation": None,
|
|
199
|
-
"fixed_program": program,
|
|
200
|
-
"fixed_code": code,
|
|
201
|
-
"total_cost": total_cost,
|
|
202
|
-
"model_name": model_name,
|
|
203
|
-
"verification_issues_count": 0,
|
|
204
|
-
}
|
|
205
|
-
except ValueError: # Should not be hit if regex is \d+, but as a safeguard
|
|
206
|
-
rprint("[bold red]Error:[/bold red] Invalid non-integer value in <issues_count> tag in string response.")
|
|
207
|
-
return {
|
|
208
|
-
"explanation": None,
|
|
209
|
-
"fixed_program": program,
|
|
210
|
-
"fixed_code": code,
|
|
211
|
-
"total_cost": total_cost,
|
|
212
|
-
"model_name": model_name,
|
|
213
|
-
"verification_issues_count": 0,
|
|
214
|
-
}
|
|
215
|
-
else: # Not VerificationOutput and not a successfully parsed string
|
|
216
|
-
rprint("[bold red]Error:[/bold red] Verification LLM call did not return the expected structured output (e.g., parsing failed).")
|
|
217
|
-
rprint(f" [dim]Expected type:[/dim] {VerificationOutput} or str")
|
|
170
|
+
else:
|
|
171
|
+
# llm_invoke should always return VerificationOutput when output_pydantic is specified
|
|
172
|
+
rprint("[bold red]Error:[/bold red] Verification LLM call did not return the expected structured output.")
|
|
173
|
+
rprint(f" [dim]Expected type:[/dim] {VerificationOutput}")
|
|
218
174
|
rprint(f" [dim]Received type:[/dim] {type(verification_result_obj)}")
|
|
219
175
|
content_str = str(verification_result_obj)
|
|
220
176
|
rprint(f" [dim]Received content:[/dim] {content_str[:500]}{'...' if len(content_str) > 500 else ''}")
|
|
221
|
-
raw_text = verification_response.get('result_text')
|
|
222
|
-
if raw_text:
|
|
223
|
-
raw_text_str = str(raw_text)
|
|
224
|
-
rprint(f" [dim]Raw LLM text (if available from llm_invoke):[/dim] {raw_text_str[:500]}{'...' if len(raw_text_str) > 500 else ''}")
|
|
225
177
|
return {
|
|
226
178
|
"explanation": None,
|
|
227
179
|
"fixed_program": program,
|
|
@@ -262,63 +214,23 @@ def fix_verification_errors(
|
|
|
262
214
|
rprint(f" [dim]Cost:[/dim] ${fix_response.get('cost', 0.0):.6f}")
|
|
263
215
|
|
|
264
216
|
fix_result_obj = fix_response.get('result')
|
|
265
|
-
parsed_fix_successfully = False
|
|
266
217
|
|
|
267
218
|
if isinstance(fix_result_obj, FixerOutput):
|
|
219
|
+
# llm_invoke handles all parsing and unescaping via _unescape_code_newlines
|
|
268
220
|
fixed_program = fix_result_obj.fixed_program
|
|
269
221
|
fixed_code = fix_result_obj.fixed_code
|
|
270
222
|
fix_explanation = fix_result_obj.explanation
|
|
271
|
-
|
|
272
|
-
# Unescape literal \n strings to actual newlines
|
|
273
|
-
if fixed_program:
|
|
274
|
-
fixed_program = fixed_program.replace('\\n', '\n')
|
|
275
|
-
if fixed_code:
|
|
276
|
-
fixed_code = fixed_code.replace('\\n', '\n')
|
|
277
|
-
|
|
278
|
-
parsed_fix_successfully = True
|
|
223
|
+
|
|
279
224
|
if verbose:
|
|
280
225
|
rprint("[green]Successfully parsed structured output for fix.[/green]")
|
|
281
226
|
rprint(Markdown(f"**Explanation from LLM:**\n{fix_explanation}"))
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
if program_match or code_match or explanation_match: # If any tag is found, attempt to parse
|
|
288
|
-
fixed_program_candidate = program_match.group(1).strip() if (program_match and program_match.group(1)) else None
|
|
289
|
-
fixed_code_candidate = code_match.group(1).strip() if (code_match and code_match.group(1)) else None
|
|
290
|
-
fix_explanation_candidate = explanation_match.group(1).strip() if (explanation_match and explanation_match.group(1)) else None
|
|
291
|
-
|
|
292
|
-
# Unescape literal \n strings to actual newlines
|
|
293
|
-
if fixed_program_candidate:
|
|
294
|
-
fixed_program_candidate = fixed_program_candidate.replace('\\n', '\n')
|
|
295
|
-
if fixed_code_candidate:
|
|
296
|
-
fixed_code_candidate = fixed_code_candidate.replace('\\n', '\n')
|
|
297
|
-
|
|
298
|
-
fixed_program = fixed_program_candidate if fixed_program_candidate else program
|
|
299
|
-
fixed_code = fixed_code_candidate if fixed_code_candidate else code
|
|
300
|
-
fix_explanation = fix_explanation_candidate if fix_explanation_candidate else "[Fix explanation not provided by LLM]"
|
|
301
|
-
parsed_fix_successfully = True
|
|
302
|
-
|
|
303
|
-
if verbose:
|
|
304
|
-
if not program_match or not fixed_program_candidate:
|
|
305
|
-
rprint("[yellow]Warning:[/yellow] Could not find or parse <fixed_program> tag in fix result string. Using original program.")
|
|
306
|
-
if not code_match or not fixed_code_candidate:
|
|
307
|
-
rprint("[yellow]Warning:[/yellow] Could not find or parse <fixed_code> tag in fix result string. Using original code module.")
|
|
308
|
-
if not explanation_match or not fix_explanation_candidate:
|
|
309
|
-
rprint("[yellow]Warning:[/yellow] Could not find or parse <explanation> tag in fix result string. Using default explanation.")
|
|
310
|
-
# else: string, but no relevant tags. Will fall to parsed_fix_successfully = False below
|
|
311
|
-
|
|
312
|
-
if not parsed_fix_successfully:
|
|
313
|
-
rprint(f"[bold red]Error:[/bold red] Fix generation LLM call did not return the expected structured output (e.g., parsing failed).")
|
|
314
|
-
rprint(f" [dim]Expected type:[/dim] {FixerOutput} or str (with XML tags)")
|
|
227
|
+
else:
|
|
228
|
+
# llm_invoke should always return FixerOutput when output_pydantic is specified
|
|
229
|
+
rprint(f"[bold red]Error:[/bold red] Fix generation LLM call did not return the expected structured output.")
|
|
230
|
+
rprint(f" [dim]Expected type:[/dim] {FixerOutput}")
|
|
315
231
|
rprint(f" [dim]Received type:[/dim] {type(fix_result_obj)}")
|
|
316
232
|
content_str = str(fix_result_obj)
|
|
317
233
|
rprint(f" [dim]Received content:[/dim] {content_str[:500]}{'...' if len(content_str) > 500 else ''}")
|
|
318
|
-
raw_text = fix_response.get('result_text')
|
|
319
|
-
if raw_text:
|
|
320
|
-
raw_text_str = str(raw_text)
|
|
321
|
-
rprint(f" [dim]Raw LLM text (if available from llm_invoke):[/dim] {raw_text_str[:500]}{'...' if len(raw_text_str) > 500 else ''}")
|
|
322
234
|
fix_explanation = "[Error: Failed to parse structured output from LLM for fix explanation]"
|
|
323
235
|
# fixed_program and fixed_code remain original (already initialized)
|
|
324
236
|
|