pdd-cli 0.0.42__py3-none-any.whl → 0.0.90__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdd/__init__.py +4 -4
- pdd/agentic_common.py +863 -0
- pdd/agentic_crash.py +534 -0
- pdd/agentic_fix.py +1179 -0
- pdd/agentic_langtest.py +162 -0
- pdd/agentic_update.py +370 -0
- pdd/agentic_verify.py +183 -0
- pdd/auto_deps_main.py +15 -5
- pdd/auto_include.py +63 -5
- pdd/bug_main.py +3 -2
- pdd/bug_to_unit_test.py +2 -0
- pdd/change_main.py +11 -4
- pdd/cli.py +22 -1181
- pdd/cmd_test_main.py +80 -19
- pdd/code_generator.py +58 -18
- pdd/code_generator_main.py +672 -25
- pdd/commands/__init__.py +42 -0
- pdd/commands/analysis.py +248 -0
- pdd/commands/fix.py +140 -0
- pdd/commands/generate.py +257 -0
- pdd/commands/maintenance.py +174 -0
- pdd/commands/misc.py +79 -0
- pdd/commands/modify.py +230 -0
- pdd/commands/report.py +144 -0
- pdd/commands/templates.py +215 -0
- pdd/commands/utility.py +110 -0
- pdd/config_resolution.py +58 -0
- pdd/conflicts_main.py +8 -3
- pdd/construct_paths.py +281 -81
- pdd/context_generator.py +10 -2
- pdd/context_generator_main.py +113 -11
- pdd/continue_generation.py +47 -7
- pdd/core/__init__.py +0 -0
- pdd/core/cli.py +503 -0
- pdd/core/dump.py +554 -0
- pdd/core/errors.py +63 -0
- pdd/core/utils.py +90 -0
- pdd/crash_main.py +44 -11
- pdd/data/language_format.csv +71 -62
- pdd/data/llm_model.csv +20 -18
- pdd/detect_change_main.py +5 -4
- pdd/fix_code_loop.py +331 -77
- pdd/fix_error_loop.py +209 -60
- pdd/fix_errors_from_unit_tests.py +4 -3
- pdd/fix_main.py +75 -18
- pdd/fix_verification_errors.py +12 -100
- pdd/fix_verification_errors_loop.py +319 -272
- pdd/fix_verification_main.py +57 -17
- pdd/generate_output_paths.py +93 -10
- pdd/generate_test.py +16 -5
- pdd/get_jwt_token.py +48 -9
- pdd/get_run_command.py +73 -0
- pdd/get_test_command.py +68 -0
- pdd/git_update.py +70 -19
- pdd/increase_tests.py +7 -0
- pdd/incremental_code_generator.py +2 -2
- pdd/insert_includes.py +11 -3
- pdd/llm_invoke.py +1278 -110
- pdd/load_prompt_template.py +36 -10
- pdd/pdd_completion.fish +25 -2
- pdd/pdd_completion.sh +30 -4
- pdd/pdd_completion.zsh +79 -4
- pdd/postprocess.py +10 -3
- pdd/preprocess.py +228 -15
- pdd/preprocess_main.py +8 -5
- pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
- pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
- pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
- pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
- pdd/prompts/agentic_update_LLM.prompt +1071 -0
- pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
- pdd/prompts/auto_include_LLM.prompt +98 -101
- pdd/prompts/change_LLM.prompt +1 -3
- pdd/prompts/detect_change_LLM.prompt +562 -3
- pdd/prompts/example_generator_LLM.prompt +22 -1
- pdd/prompts/extract_code_LLM.prompt +5 -1
- pdd/prompts/extract_program_code_fix_LLM.prompt +14 -2
- pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
- pdd/prompts/extract_promptline_LLM.prompt +17 -11
- pdd/prompts/find_verification_errors_LLM.prompt +6 -0
- pdd/prompts/fix_code_module_errors_LLM.prompt +16 -4
- pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +6 -41
- pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
- pdd/prompts/generate_test_LLM.prompt +21 -6
- pdd/prompts/increase_tests_LLM.prompt +1 -2
- pdd/prompts/insert_includes_LLM.prompt +1181 -6
- pdd/prompts/split_LLM.prompt +1 -62
- pdd/prompts/trace_LLM.prompt +25 -22
- pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
- pdd/prompts/update_prompt_LLM.prompt +22 -1
- pdd/prompts/xml_convertor_LLM.prompt +3246 -7
- pdd/pytest_output.py +188 -21
- pdd/python_env_detector.py +151 -0
- pdd/render_mermaid.py +236 -0
- pdd/setup_tool.py +648 -0
- pdd/simple_math.py +2 -0
- pdd/split_main.py +3 -2
- pdd/summarize_directory.py +56 -7
- pdd/sync_determine_operation.py +918 -186
- pdd/sync_main.py +82 -32
- pdd/sync_orchestration.py +1456 -453
- pdd/sync_tui.py +848 -0
- pdd/template_registry.py +264 -0
- pdd/templates/architecture/architecture_json.prompt +242 -0
- pdd/templates/generic/generate_prompt.prompt +174 -0
- pdd/trace.py +168 -12
- pdd/trace_main.py +4 -3
- pdd/track_cost.py +151 -61
- pdd/unfinished_prompt.py +49 -3
- pdd/update_main.py +549 -67
- pdd/update_model_costs.py +2 -2
- pdd/update_prompt.py +19 -4
- {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/METADATA +20 -7
- pdd_cli-0.0.90.dist-info/RECORD +153 -0
- {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/licenses/LICENSE +1 -1
- pdd_cli-0.0.42.dist-info/RECORD +0 -115
- {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/WHEEL +0 -0
- {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/top_level.txt +0 -0
pdd/fix_error_loop.py
CHANGED
|
@@ -5,13 +5,19 @@ import subprocess
|
|
|
5
5
|
import shutil
|
|
6
6
|
import json
|
|
7
7
|
from datetime import datetime
|
|
8
|
+
from pathlib import Path
|
|
8
9
|
|
|
9
10
|
from rich import print as rprint
|
|
10
11
|
from rich.console import Console
|
|
11
12
|
|
|
12
13
|
# Relative import from an internal module.
|
|
14
|
+
from .get_language import get_language
|
|
13
15
|
from .fix_errors_from_unit_tests import fix_errors_from_unit_tests
|
|
14
|
-
from . import DEFAULT_TIME
|
|
16
|
+
from . import DEFAULT_TIME # Import DEFAULT_TIME
|
|
17
|
+
from .python_env_detector import detect_host_python_executable
|
|
18
|
+
from .agentic_fix import run_agentic_fix
|
|
19
|
+
from .agentic_langtest import default_verify_cmd_for
|
|
20
|
+
|
|
15
21
|
|
|
16
22
|
console = Console()
|
|
17
23
|
|
|
@@ -19,42 +25,63 @@ def escape_brackets(text: str) -> str:
|
|
|
19
25
|
"""Escape square brackets so Rich doesn't misinterpret them."""
|
|
20
26
|
return text.replace("[", "\\[").replace("]", "\\]")
|
|
21
27
|
|
|
28
|
+
# ---------- Normalize any agentic return shape to a 4-tuple ----------
|
|
29
|
+
def _normalize_agentic_result(result):
|
|
30
|
+
"""
|
|
31
|
+
Normalize run_agentic_fix result into: (success: bool, msg: str, cost: float, model: str, changed_files: List[str])
|
|
32
|
+
Handles older 2/3/4-tuple shapes used by tests/monkeypatches.
|
|
33
|
+
"""
|
|
34
|
+
if isinstance(result, tuple):
|
|
35
|
+
if len(result) == 5:
|
|
36
|
+
ok, msg, cost, model, changed_files = result
|
|
37
|
+
return bool(ok), str(msg), float(cost), str(model or "agentic-cli"), list(changed_files or [])
|
|
38
|
+
if len(result) == 4:
|
|
39
|
+
ok, msg, cost, model = result
|
|
40
|
+
return bool(ok), str(msg), float(cost), str(model or "agentic-cli"), []
|
|
41
|
+
if len(result) == 3:
|
|
42
|
+
ok, msg, cost = result
|
|
43
|
+
return bool(ok), str(msg), float(cost), "agentic-cli", []
|
|
44
|
+
if len(result) == 2:
|
|
45
|
+
ok, msg = result
|
|
46
|
+
return bool(ok), str(msg), 0.0, "agentic-cli", []
|
|
47
|
+
# Fallback (shouldn't happen)
|
|
48
|
+
return False, "Invalid agentic result shape", 0.0, "agentic-cli", []
|
|
49
|
+
|
|
50
|
+
def _safe_run_agentic_fix(*, prompt_file, code_file, unit_test_file, error_log_file, cwd=None):
|
|
51
|
+
"""
|
|
52
|
+
Call (possibly monkeypatched) run_agentic_fix and normalize its return.
|
|
53
|
+
"""
|
|
54
|
+
res = run_agentic_fix(
|
|
55
|
+
prompt_file=prompt_file,
|
|
56
|
+
code_file=code_file,
|
|
57
|
+
unit_test_file=unit_test_file,
|
|
58
|
+
error_log_file=error_log_file,
|
|
59
|
+
cwd=cwd,
|
|
60
|
+
)
|
|
61
|
+
return _normalize_agentic_result(res)
|
|
62
|
+
# ---------------------------------------------------------------------
|
|
63
|
+
|
|
64
|
+
|
|
22
65
|
def run_pytest_on_file(test_file: str) -> tuple[int, int, int, str]:
|
|
23
66
|
"""
|
|
24
|
-
Run pytest on the specified test file using subprocess.
|
|
67
|
+
Run pytest on the specified test file using the subprocess-based runner.
|
|
25
68
|
Returns a tuple: (failures, errors, warnings, logs)
|
|
26
69
|
"""
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
warnings = test_results.get('warnings', 0)
|
|
43
|
-
|
|
44
|
-
if return_code == 2:
|
|
45
|
-
errors += 1
|
|
46
|
-
|
|
47
|
-
# Combine stdout and stderr from the test results
|
|
48
|
-
logs = test_results.get('standard_output', '') + '\n' + test_results.get('standard_error', '')
|
|
49
|
-
|
|
50
|
-
return failures, errors, warnings, logs
|
|
51
|
-
|
|
52
|
-
except json.JSONDecodeError:
|
|
53
|
-
# If JSON parsing fails, return the raw output
|
|
54
|
-
return 1, 1, 0, f"Failed to parse pytest output:\n{result.stdout}\n{result.stderr}"
|
|
55
|
-
|
|
56
|
-
except Exception as e:
|
|
57
|
-
return 1, 1, 0, f"Error running pytest: {str(e)}"
|
|
70
|
+
from .pytest_output import run_pytest_and_capture_output
|
|
71
|
+
# Use the subprocess-based runner to avoid module caching issues
|
|
72
|
+
output_data = run_pytest_and_capture_output(test_file)
|
|
73
|
+
|
|
74
|
+
# Extract results
|
|
75
|
+
results = output_data.get("test_results", [{}])[0]
|
|
76
|
+
|
|
77
|
+
failures = results.get("failures", 0)
|
|
78
|
+
errors = results.get("errors", 0)
|
|
79
|
+
warnings = results.get("warnings", 0)
|
|
80
|
+
|
|
81
|
+
# Combine stdout/stderr for the log
|
|
82
|
+
logs = (results.get("standard_output", "") or "") + "\n" + (results.get("standard_error", "") or "")
|
|
83
|
+
|
|
84
|
+
return failures, errors, warnings, logs
|
|
58
85
|
|
|
59
86
|
def format_log_for_output(log_structure):
|
|
60
87
|
"""
|
|
@@ -74,6 +101,8 @@ def format_log_for_output(log_structure):
|
|
|
74
101
|
# Fix attempt with XML tags
|
|
75
102
|
if iteration.get("fix_attempt"):
|
|
76
103
|
formatted_text += f"<fix_attempt iteration={iteration['number']}>\n"
|
|
104
|
+
if iteration.get("model_name"):
|
|
105
|
+
formatted_text += f"Model: {iteration['model_name']}\n"
|
|
77
106
|
formatted_text += f"{iteration['fix_attempt']}\n"
|
|
78
107
|
formatted_text += "</fix_attempt>\n\n"
|
|
79
108
|
|
|
@@ -98,6 +127,7 @@ def format_log_for_output(log_structure):
|
|
|
98
127
|
|
|
99
128
|
def fix_error_loop(unit_test_file: str,
|
|
100
129
|
code_file: str,
|
|
130
|
+
prompt_file: str,
|
|
101
131
|
prompt: str,
|
|
102
132
|
verification_program: str,
|
|
103
133
|
strength: float,
|
|
@@ -106,7 +136,8 @@ def fix_error_loop(unit_test_file: str,
|
|
|
106
136
|
budget: float,
|
|
107
137
|
error_log_file: str = "error_log.txt",
|
|
108
138
|
verbose: bool = False,
|
|
109
|
-
time: float = DEFAULT_TIME
|
|
139
|
+
time: float = DEFAULT_TIME,
|
|
140
|
+
agentic_fallback: bool = True):
|
|
110
141
|
"""
|
|
111
142
|
Attempt to fix errors in a unit test and corresponding code using repeated iterations,
|
|
112
143
|
counting only the number of times we actually call the LLM fix function.
|
|
@@ -127,7 +158,7 @@ def fix_error_loop(unit_test_file: str,
|
|
|
127
158
|
error_log_file: Path to file to log errors (default: "error_log.txt").
|
|
128
159
|
verbose: Enable verbose logging (default: False).
|
|
129
160
|
time: Time parameter for the fix_errors_from_unit_tests call.
|
|
130
|
-
|
|
161
|
+
agentic_fallback: Whether to trigger cli agentic fallback when fix fails.
|
|
131
162
|
Outputs:
|
|
132
163
|
success: Boolean indicating if the overall process succeeded.
|
|
133
164
|
final_unit_test: String contents of the final unit test file.
|
|
@@ -184,7 +215,24 @@ def fix_error_loop(unit_test_file: str,
|
|
|
184
215
|
iteration = 0
|
|
185
216
|
# Run an initial test to determine starting state
|
|
186
217
|
try:
|
|
187
|
-
|
|
218
|
+
is_python = str(code_file).lower().endswith(".py")
|
|
219
|
+
if is_python:
|
|
220
|
+
initial_fails, initial_errors, initial_warnings, pytest_output = run_pytest_on_file(unit_test_file)
|
|
221
|
+
else:
|
|
222
|
+
# For non-Python files, run the verification program to get an initial error state
|
|
223
|
+
rprint(f"[cyan]Non-Python target detected. Running verification program to get initial state...[/cyan]")
|
|
224
|
+
lang = get_language(os.path.splitext(code_file)[1])
|
|
225
|
+
verify_cmd = default_verify_cmd_for(lang, unit_test_file)
|
|
226
|
+
if not verify_cmd:
|
|
227
|
+
raise ValueError(f"No default verification command for language: {lang}")
|
|
228
|
+
|
|
229
|
+
verify_result = subprocess.run(verify_cmd, capture_output=True, text=True, shell=True, stdin=subprocess.DEVNULL)
|
|
230
|
+
pytest_output = (verify_result.stdout or "") + "\n" + (verify_result.stderr or "")
|
|
231
|
+
if verify_result.returncode == 0:
|
|
232
|
+
initial_fails, initial_errors, initial_warnings = 0, 0, 0
|
|
233
|
+
else:
|
|
234
|
+
initial_fails, initial_errors, initial_warnings = 1, 0, 0 # Treat any failure as one "fail"
|
|
235
|
+
|
|
188
236
|
# Store initial state for statistics
|
|
189
237
|
stats = {
|
|
190
238
|
"initial_fails": initial_fails,
|
|
@@ -197,14 +245,62 @@ def fix_error_loop(unit_test_file: str,
|
|
|
197
245
|
"iterations_info": []
|
|
198
246
|
}
|
|
199
247
|
except Exception as e:
|
|
200
|
-
rprint(f"[red]Error running initial
|
|
248
|
+
rprint(f"[red]Error running initial test/verification:[/red] {e}")
|
|
201
249
|
return False, "", "", fix_attempts, total_cost, model_name
|
|
202
250
|
|
|
251
|
+
# If target is not a Python file, trigger agentic fallback if tests fail
|
|
252
|
+
if not is_python:
|
|
253
|
+
if initial_fails > 0 or initial_errors > 0:
|
|
254
|
+
rprint("[cyan]Non-Python target failed initial verification. Triggering agentic fallback...[/cyan]")
|
|
255
|
+
error_log_path = Path(error_log_file)
|
|
256
|
+
error_log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
257
|
+
with open(error_log_path, "w") as f:
|
|
258
|
+
f.write(pytest_output)
|
|
259
|
+
|
|
260
|
+
rprint(f"[cyan]Attempting agentic fix fallback (prompt_file={prompt_file!r})...[/cyan]")
|
|
261
|
+
success, agent_msg, agent_cost, agent_model, agent_changed_files = _safe_run_agentic_fix(
|
|
262
|
+
prompt_file=prompt_file,
|
|
263
|
+
code_file=code_file,
|
|
264
|
+
unit_test_file=unit_test_file,
|
|
265
|
+
error_log_file=error_log_file,
|
|
266
|
+
cwd=Path(prompt_file).parent if prompt_file else None,
|
|
267
|
+
)
|
|
268
|
+
if not success:
|
|
269
|
+
rprint(f"[bold red]Agentic fix fallback failed: {agent_msg}[/bold red]")
|
|
270
|
+
if agent_changed_files:
|
|
271
|
+
rprint(f"[cyan]Agent modified {len(agent_changed_files)} file(s):[/cyan]")
|
|
272
|
+
for f in agent_changed_files:
|
|
273
|
+
rprint(f" • {f}")
|
|
274
|
+
final_unit_test = ""
|
|
275
|
+
final_code = ""
|
|
276
|
+
try:
|
|
277
|
+
with open(unit_test_file, "r") as f:
|
|
278
|
+
final_unit_test = f.read()
|
|
279
|
+
except Exception:
|
|
280
|
+
pass
|
|
281
|
+
try:
|
|
282
|
+
with open(code_file, "r") as f:
|
|
283
|
+
final_code = f.read()
|
|
284
|
+
except Exception:
|
|
285
|
+
pass
|
|
286
|
+
return success, final_unit_test, final_code, 1, agent_cost, agent_model
|
|
287
|
+
else:
|
|
288
|
+
# Non-python tests passed, so we are successful.
|
|
289
|
+
rprint("[green]Non-Python tests passed. No fix needed.[/green]")
|
|
290
|
+
try:
|
|
291
|
+
with open(unit_test_file, "r") as f:
|
|
292
|
+
final_unit_test = f.read()
|
|
293
|
+
with open(code_file, "r") as f:
|
|
294
|
+
final_code = f.read()
|
|
295
|
+
except Exception as e:
|
|
296
|
+
rprint(f"[yellow]Warning: Could not read final files: {e}[/yellow]")
|
|
297
|
+
return True, final_unit_test, final_code, 0, 0.0, "N/A"
|
|
298
|
+
|
|
203
299
|
fails, errors, warnings = initial_fails, initial_errors, initial_warnings
|
|
204
300
|
|
|
205
301
|
# Determine success state immediately
|
|
206
302
|
success = (fails == 0 and errors == 0 and warnings == 0)
|
|
207
|
-
|
|
303
|
+
|
|
208
304
|
# Track if tests were initially passing
|
|
209
305
|
initially_passing = success
|
|
210
306
|
|
|
@@ -241,13 +337,23 @@ def fix_error_loop(unit_test_file: str,
|
|
|
241
337
|
|
|
242
338
|
# Update structured log
|
|
243
339
|
log_structure["iterations"][-1]["post_test_output"] = pytest_output
|
|
244
|
-
|
|
340
|
+
|
|
245
341
|
# Write formatted log to file
|
|
246
|
-
|
|
342
|
+
error_log_path = Path(error_log_file)
|
|
343
|
+
error_log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
344
|
+
with open(error_log_path, "w") as elog:
|
|
247
345
|
elog.write(format_log_for_output(log_structure))
|
|
248
346
|
|
|
249
347
|
# Set success to True (already determined)
|
|
250
|
-
#
|
|
348
|
+
# Read the actual fixed files to return the successful state
|
|
349
|
+
try:
|
|
350
|
+
with open(unit_test_file, "r") as f:
|
|
351
|
+
final_unit_test = f.read()
|
|
352
|
+
with open(code_file, "r") as f:
|
|
353
|
+
final_code = f.read()
|
|
354
|
+
except Exception as e:
|
|
355
|
+
rprint(f"[yellow]Warning: Could not read fixed files: {e}[/yellow]")
|
|
356
|
+
# Keep empty strings as fallback
|
|
251
357
|
break
|
|
252
358
|
|
|
253
359
|
iteration_header = f"=== Attempt iteration {iteration} ==="
|
|
@@ -325,7 +431,7 @@ def fix_error_loop(unit_test_file: str,
|
|
|
325
431
|
try:
|
|
326
432
|
# Format the log for the LLM
|
|
327
433
|
formatted_log = format_log_for_output(log_structure)
|
|
328
|
-
|
|
434
|
+
|
|
329
435
|
updated_unit_test, updated_code, fixed_unit_test, fixed_code, analysis, cost, model_name = fix_errors_from_unit_tests(
|
|
330
436
|
unit_test_contents,
|
|
331
437
|
code_contents,
|
|
@@ -335,11 +441,12 @@ def fix_error_loop(unit_test_file: str,
|
|
|
335
441
|
strength,
|
|
336
442
|
temperature,
|
|
337
443
|
verbose=verbose,
|
|
338
|
-
time=time
|
|
444
|
+
time=time # Pass time parameter
|
|
339
445
|
)
|
|
340
446
|
|
|
341
447
|
# Update the fix attempt in the structured log
|
|
342
448
|
log_structure["iterations"][-1]["fix_attempt"] = analysis
|
|
449
|
+
log_structure["iterations"][-1]["model_name"] = model_name
|
|
343
450
|
except Exception as e:
|
|
344
451
|
rprint(f"[red]Error during fix_errors_from_unit_tests call:[/red] {e}")
|
|
345
452
|
break
|
|
@@ -380,8 +487,8 @@ def fix_error_loop(unit_test_file: str,
|
|
|
380
487
|
|
|
381
488
|
# Run the verification:
|
|
382
489
|
try:
|
|
383
|
-
verify_cmd = [
|
|
384
|
-
verify_result = subprocess.run(verify_cmd, capture_output=True, text=True)
|
|
490
|
+
verify_cmd = [detect_host_python_executable(), verification_program]
|
|
491
|
+
verify_result = subprocess.run(verify_cmd, capture_output=True, text=True, stdin=subprocess.DEVNULL)
|
|
385
492
|
# Safely handle None for stdout or stderr:
|
|
386
493
|
verify_stdout = verify_result.stdout or ""
|
|
387
494
|
verify_stderr = verify_result.stderr or ""
|
|
@@ -411,9 +518,11 @@ def fix_error_loop(unit_test_file: str,
|
|
|
411
518
|
|
|
412
519
|
# Update post-test output in structured log
|
|
413
520
|
log_structure["iterations"][-1]["post_test_output"] = pytest_output
|
|
414
|
-
|
|
521
|
+
|
|
415
522
|
# Write updated structured log to file after each iteration
|
|
416
|
-
|
|
523
|
+
error_log_path = Path(error_log_file)
|
|
524
|
+
error_log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
525
|
+
with open(error_log_path, "w") as elog:
|
|
417
526
|
elog.write(format_log_for_output(log_structure))
|
|
418
527
|
|
|
419
528
|
# Update iteration stats with post-fix results
|
|
@@ -477,8 +586,8 @@ def fix_error_loop(unit_test_file: str,
|
|
|
477
586
|
else:
|
|
478
587
|
stats["best_iteration"] = "final"
|
|
479
588
|
|
|
480
|
-
# Read final file contents
|
|
481
|
-
#
|
|
589
|
+
# Read final file contents for non-initially-passing tests
|
|
590
|
+
# (Initially passing tests have files read at lines 344-348)
|
|
482
591
|
try:
|
|
483
592
|
if not initially_passing:
|
|
484
593
|
with open(unit_test_file, "r") as f:
|
|
@@ -489,11 +598,6 @@ def fix_error_loop(unit_test_file: str,
|
|
|
489
598
|
rprint(f"[red]Error reading final files:[/red] {e}")
|
|
490
599
|
final_unit_test, final_code = "", ""
|
|
491
600
|
|
|
492
|
-
# Check if we broke out early because tests already passed
|
|
493
|
-
if stats["best_iteration"] == 0 and fix_attempts == 0:
|
|
494
|
-
# Still return at least 1 attempt to acknowledge the work done
|
|
495
|
-
fix_attempts = 1
|
|
496
|
-
|
|
497
601
|
# Print summary statistics
|
|
498
602
|
rprint("\n[bold cyan]Summary Statistics:[/bold cyan]")
|
|
499
603
|
rprint(f"Initial state: {initial_fails} fails, {initial_errors} errors, {initial_warnings} warnings")
|
|
@@ -503,17 +607,62 @@ def fix_error_loop(unit_test_file: str,
|
|
|
503
607
|
|
|
504
608
|
# Calculate improvements
|
|
505
609
|
stats["improvement"] = {
|
|
506
|
-
"fails_reduced": initial_fails - stats[
|
|
507
|
-
"errors_reduced": initial_errors - stats[
|
|
508
|
-
"warnings_reduced": initial_warnings - stats[
|
|
509
|
-
"percent_improvement": 100 if initial_fails + initial_errors + initial_warnings == 0 else
|
|
510
|
-
(1 - (stats[
|
|
610
|
+
"fails_reduced": initial_fails - stats['final_fails'],
|
|
611
|
+
"errors_reduced": initial_errors - stats['final_errors'],
|
|
612
|
+
"warnings_reduced": initial_warnings - stats['final_warnings'],
|
|
613
|
+
"percent_improvement": 100 if (initial_fails + initial_errors + initial_warnings) == 0 else
|
|
614
|
+
(1 - (stats['final_fails'] + stats['final_errors'] + stats['final_warnings']) /
|
|
511
615
|
(initial_fails + initial_errors + initial_warnings)) * 100
|
|
512
616
|
}
|
|
513
617
|
|
|
514
618
|
rprint(f"Improvement: {stats['improvement']['fails_reduced']} fails, {stats['improvement']['errors_reduced']} errors, {stats['improvement']['warnings_reduced']} warnings")
|
|
515
619
|
rprint(f"Overall improvement: {stats['improvement']['percent_improvement']:.2f}%")
|
|
516
620
|
|
|
621
|
+
# Agentic fallback at end adds cost & model (normalized)
|
|
622
|
+
if not success and agentic_fallback and total_cost < budget:
|
|
623
|
+
# Ensure error_log_file exists before calling agentic fix
|
|
624
|
+
# Write the current log structure if it hasn't been written yet
|
|
625
|
+
try:
|
|
626
|
+
if not os.path.exists(error_log_file) or os.path.getsize(error_log_file) == 0:
|
|
627
|
+
error_log_path = Path(error_log_file)
|
|
628
|
+
error_log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
629
|
+
with open(error_log_path, "w") as elog:
|
|
630
|
+
if log_structure["iterations"]:
|
|
631
|
+
elog.write(format_log_for_output(log_structure))
|
|
632
|
+
else:
|
|
633
|
+
# No iterations ran, write initial state info
|
|
634
|
+
elog.write(f"Initial state: {initial_fails} fails, {initial_errors} errors, {initial_warnings} warnings\n")
|
|
635
|
+
if 'pytest_output' in locals():
|
|
636
|
+
elog.write(f"\n<pytest_output>\n{pytest_output}\n</pytest_output>\n")
|
|
637
|
+
except Exception as e:
|
|
638
|
+
rprint(f"[yellow]Warning: Could not write error log before agentic fallback: {e}[/yellow]")
|
|
639
|
+
|
|
640
|
+
rprint(f"[cyan]Attempting agentic fix fallback (prompt_file={prompt_file!r})...[/cyan]")
|
|
641
|
+
agent_success, agent_msg, agent_cost, agent_model, agent_changed_files = _safe_run_agentic_fix(
|
|
642
|
+
prompt_file=prompt_file,
|
|
643
|
+
code_file=code_file,
|
|
644
|
+
unit_test_file=unit_test_file,
|
|
645
|
+
error_log_file=error_log_file,
|
|
646
|
+
cwd=Path(prompt_file).parent if prompt_file else None,
|
|
647
|
+
)
|
|
648
|
+
total_cost += agent_cost
|
|
649
|
+
if not agent_success:
|
|
650
|
+
rprint(f"[bold red]Agentic fix fallback failed: {agent_msg}[/bold red]")
|
|
651
|
+
if agent_changed_files:
|
|
652
|
+
rprint(f"[cyan]Agent modified {len(agent_changed_files)} file(s):[/cyan]")
|
|
653
|
+
for f in agent_changed_files:
|
|
654
|
+
rprint(f" • {f}")
|
|
655
|
+
if agent_success:
|
|
656
|
+
model_name = agent_model or model_name
|
|
657
|
+
try:
|
|
658
|
+
with open(unit_test_file, "r") as f:
|
|
659
|
+
final_unit_test = f.read()
|
|
660
|
+
with open(code_file, "r") as f:
|
|
661
|
+
final_code = f.read()
|
|
662
|
+
except Exception as e:
|
|
663
|
+
rprint(f"[yellow]Warning: Could not read files after successful agentic fix: {e}[/yellow]")
|
|
664
|
+
success = True
|
|
665
|
+
|
|
517
666
|
return success, final_unit_test, final_code, fix_attempts, total_cost, model_name
|
|
518
667
|
|
|
519
668
|
# If this module is run directly for testing purposes:
|
|
@@ -548,4 +697,4 @@ if __name__ == "__main__":
|
|
|
548
697
|
rprint(f"Attempts: {attempts}")
|
|
549
698
|
rprint(f"Total cost: ${total_cost:.6f}")
|
|
550
699
|
rprint(f"Model used: {model_name}")
|
|
551
|
-
rprint(f"Final unit test contents:\n{final_unit_test}")
|
|
700
|
+
rprint(f"Final unit test contents:\n{final_unit_test}")
|
|
@@ -114,7 +114,8 @@ def fix_errors_from_unit_tests(
|
|
|
114
114
|
Fix errors in unit tests using LLM models and log the process.
|
|
115
115
|
|
|
116
116
|
Args:
|
|
117
|
-
unit_test (str): The unit test code
|
|
117
|
+
unit_test (str): The unit test code, potentially multiple files concatenated
|
|
118
|
+
with <file name="filename.py">...</file> tags.
|
|
118
119
|
code (str): The code under test
|
|
119
120
|
prompt (str): The prompt that generated the code
|
|
120
121
|
error (str): The error message
|
|
@@ -244,10 +245,10 @@ def fix_errors_from_unit_tests(
|
|
|
244
245
|
if verbose:
|
|
245
246
|
console.print(f"[bold red]{error_msg}[/bold red]")
|
|
246
247
|
write_to_error_file(error_file, error_msg)
|
|
247
|
-
return False, False, "", "", "", 0.0, ""
|
|
248
|
+
return False, False, "", "", "", 0.0, f"Error: ValidationError - {str(e)[:100]}"
|
|
248
249
|
except Exception as e:
|
|
249
250
|
error_msg = f"Error in fix_errors_from_unit_tests: {str(e)}"
|
|
250
251
|
if verbose:
|
|
251
252
|
console.print(f"[bold red]{error_msg}[/bold red]")
|
|
252
253
|
write_to_error_file(error_file, error_msg)
|
|
253
|
-
return False, False, "", "", "", 0.0, ""
|
|
254
|
+
return False, False, "", "", "", 0.0, f"Error: {type(e).__name__}"
|
pdd/fix_main.py
CHANGED
|
@@ -13,7 +13,7 @@ from .preprocess import preprocess
|
|
|
13
13
|
|
|
14
14
|
from .construct_paths import construct_paths
|
|
15
15
|
from .fix_errors_from_unit_tests import fix_errors_from_unit_tests
|
|
16
|
-
from .fix_error_loop import fix_error_loop
|
|
16
|
+
from .fix_error_loop import fix_error_loop, run_pytest_on_file
|
|
17
17
|
from .get_jwt_token import get_jwt_token
|
|
18
18
|
from .get_language import get_language
|
|
19
19
|
|
|
@@ -33,7 +33,10 @@ def fix_main(
|
|
|
33
33
|
verification_program: Optional[str],
|
|
34
34
|
max_attempts: int,
|
|
35
35
|
budget: float,
|
|
36
|
-
auto_submit: bool
|
|
36
|
+
auto_submit: bool,
|
|
37
|
+
agentic_fallback: bool = True,
|
|
38
|
+
strength: Optional[float] = None,
|
|
39
|
+
temperature: Optional[float] = None,
|
|
37
40
|
) -> Tuple[bool, str, str, int, float, str]:
|
|
38
41
|
"""
|
|
39
42
|
Main function to fix errors in code and unit tests.
|
|
@@ -52,7 +55,7 @@ def fix_main(
|
|
|
52
55
|
max_attempts: Maximum number of fix attempts
|
|
53
56
|
budget: Maximum cost allowed for fixing
|
|
54
57
|
auto_submit: Whether to auto-submit example if tests pass
|
|
55
|
-
|
|
58
|
+
agentic_fallback: Whether the cli agent fallback is triggered
|
|
56
59
|
Returns:
|
|
57
60
|
Tuple containing:
|
|
58
61
|
- Success status (bool)
|
|
@@ -69,13 +72,13 @@ def fix_main(
|
|
|
69
72
|
# Initialize analysis_results to None to prevent reference errors
|
|
70
73
|
analysis_results = None
|
|
71
74
|
|
|
75
|
+
# Input validation - let these propagate to caller for proper exit code
|
|
76
|
+
if not loop:
|
|
77
|
+
error_path = Path(error_file)
|
|
78
|
+
if not error_path.exists():
|
|
79
|
+
raise FileNotFoundError(f"Error file '{error_file}' does not exist.")
|
|
80
|
+
|
|
72
81
|
try:
|
|
73
|
-
# Verify error file exists if not in loop mode
|
|
74
|
-
if not loop:
|
|
75
|
-
error_path = Path(error_file)
|
|
76
|
-
if not error_path.exists():
|
|
77
|
-
raise FileNotFoundError(f"Error file '{error_file}' does not exist.")
|
|
78
|
-
|
|
79
82
|
# Construct file paths
|
|
80
83
|
input_file_paths = {
|
|
81
84
|
"prompt_file": prompt_file,
|
|
@@ -97,12 +100,14 @@ def fix_main(
|
|
|
97
100
|
quiet=ctx.obj.get('quiet', False),
|
|
98
101
|
command="fix",
|
|
99
102
|
command_options=command_options,
|
|
100
|
-
create_error_file=loop # Only create error file if in loop mode
|
|
103
|
+
create_error_file=loop, # Only create error file if in loop mode
|
|
104
|
+
context_override=ctx.obj.get('context'),
|
|
105
|
+
confirm_callback=ctx.obj.get('confirm_callback')
|
|
101
106
|
)
|
|
102
107
|
|
|
103
|
-
# Get parameters from context
|
|
104
|
-
strength = ctx.obj.get('strength', DEFAULT_STRENGTH)
|
|
105
|
-
temperature = ctx.obj.get('temperature', 0)
|
|
108
|
+
# Get parameters from context (prefer passed parameters over ctx.obj)
|
|
109
|
+
strength = strength if strength is not None else ctx.obj.get('strength', DEFAULT_STRENGTH)
|
|
110
|
+
temperature = temperature if temperature is not None else ctx.obj.get('temperature', 0)
|
|
106
111
|
verbose = ctx.obj.get('verbose', False)
|
|
107
112
|
time = ctx.obj.get('time') # Get time from context
|
|
108
113
|
|
|
@@ -111,6 +116,7 @@ def fix_main(
|
|
|
111
116
|
success, fixed_unit_test, fixed_code, attempts, total_cost, model_name = fix_error_loop(
|
|
112
117
|
unit_test_file=unit_test_file,
|
|
113
118
|
code_file=code_file,
|
|
119
|
+
prompt_file=prompt_file,
|
|
114
120
|
prompt=input_strings["prompt_file"],
|
|
115
121
|
verification_program=verification_program,
|
|
116
122
|
strength=strength,
|
|
@@ -119,7 +125,8 @@ def fix_main(
|
|
|
119
125
|
max_attempts=max_attempts,
|
|
120
126
|
budget=budget,
|
|
121
127
|
error_log_file=output_file_paths.get("output_results"),
|
|
122
|
-
verbose=verbose
|
|
128
|
+
verbose=verbose,
|
|
129
|
+
agentic_fallback=agentic_fallback
|
|
123
130
|
)
|
|
124
131
|
else:
|
|
125
132
|
# Use fix_errors_from_unit_tests for single-pass fixing
|
|
@@ -134,16 +141,62 @@ def fix_main(
|
|
|
134
141
|
time=time, # Pass time to fix_errors_from_unit_tests
|
|
135
142
|
verbose=verbose
|
|
136
143
|
)
|
|
137
|
-
success = update_unit_test or update_code
|
|
138
144
|
attempts = 1
|
|
139
145
|
|
|
146
|
+
# Issue #158 fix: Validate the fix by running tests instead of
|
|
147
|
+
# trusting the LLM's suggestion flags (update_unit_test/update_code)
|
|
148
|
+
if update_unit_test or update_code:
|
|
149
|
+
# Write fixed files to temp location first, then run tests
|
|
150
|
+
import tempfile
|
|
151
|
+
import os as os_module
|
|
152
|
+
|
|
153
|
+
# Create temp files for testing
|
|
154
|
+
test_dir = tempfile.mkdtemp(prefix="pdd_fix_validate_")
|
|
155
|
+
temp_test_file = os_module.path.join(test_dir, "test_temp.py")
|
|
156
|
+
temp_code_file = os_module.path.join(test_dir, "code_temp.py")
|
|
157
|
+
|
|
158
|
+
try:
|
|
159
|
+
# Write the fixed content (or original if not changed)
|
|
160
|
+
test_content = fixed_unit_test if fixed_unit_test else input_strings["unit_test_file"]
|
|
161
|
+
code_content = fixed_code if fixed_code else input_strings["code_file"]
|
|
162
|
+
|
|
163
|
+
with open(temp_test_file, 'w') as f:
|
|
164
|
+
f.write(test_content)
|
|
165
|
+
with open(temp_code_file, 'w') as f:
|
|
166
|
+
f.write(code_content)
|
|
167
|
+
|
|
168
|
+
# Run pytest on the fixed test file to validate
|
|
169
|
+
fails, errors, warnings, test_output = run_pytest_on_file(temp_test_file)
|
|
170
|
+
|
|
171
|
+
# Success only if tests pass (no failures or errors)
|
|
172
|
+
success = (fails == 0 and errors == 0)
|
|
173
|
+
|
|
174
|
+
if verbose:
|
|
175
|
+
rprint(f"[cyan]Fix validation: {fails} failures, {errors} errors, {warnings} warnings[/cyan]")
|
|
176
|
+
if not success:
|
|
177
|
+
rprint("[yellow]Fix suggested by LLM did not pass tests[/yellow]")
|
|
178
|
+
finally:
|
|
179
|
+
# Cleanup temp files
|
|
180
|
+
import shutil
|
|
181
|
+
try:
|
|
182
|
+
shutil.rmtree(test_dir)
|
|
183
|
+
except Exception:
|
|
184
|
+
pass
|
|
185
|
+
else:
|
|
186
|
+
# No changes suggested by LLM
|
|
187
|
+
success = False
|
|
188
|
+
|
|
140
189
|
# Save fixed files
|
|
141
190
|
if fixed_unit_test:
|
|
142
|
-
|
|
191
|
+
output_test_path = Path(output_file_paths["output_test"])
|
|
192
|
+
output_test_path.parent.mkdir(parents=True, exist_ok=True)
|
|
193
|
+
with open(output_test_path, 'w') as f:
|
|
143
194
|
f.write(fixed_unit_test)
|
|
144
195
|
|
|
145
196
|
if fixed_code:
|
|
146
|
-
|
|
197
|
+
output_code_path = Path(output_file_paths["output_code"])
|
|
198
|
+
output_code_path.parent.mkdir(parents=True, exist_ok=True)
|
|
199
|
+
with open(output_code_path, 'w') as f:
|
|
147
200
|
f.write(fixed_code)
|
|
148
201
|
|
|
149
202
|
# Provide user feedback
|
|
@@ -286,6 +339,9 @@ def fix_main(
|
|
|
286
339
|
|
|
287
340
|
return success, fixed_unit_test, fixed_code, attempts, total_cost, model_name
|
|
288
341
|
|
|
342
|
+
except click.Abort:
|
|
343
|
+
# User cancelled - re-raise to stop the sync loop
|
|
344
|
+
raise
|
|
289
345
|
except Exception as e:
|
|
290
346
|
if not ctx.obj.get('quiet', False):
|
|
291
347
|
# Safely handle and print MarkupError
|
|
@@ -296,4 +352,5 @@ def fix_main(
|
|
|
296
352
|
# Print other errors normally, escaping the error string
|
|
297
353
|
from rich.markup import escape # Ensure escape is imported
|
|
298
354
|
rprint(f"[bold red]Error:[/bold red] {escape(str(e))}")
|
|
299
|
-
sys.exit(1)
|
|
355
|
+
# Return error result instead of sys.exit(1) to allow orchestrator to handle gracefully
|
|
356
|
+
return False, "", "", 0, 0.0, f"Error: {e}"
|