pdd-cli 0.0.45__py3-none-any.whl → 0.0.90__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdd/__init__.py +4 -4
- pdd/agentic_common.py +863 -0
- pdd/agentic_crash.py +534 -0
- pdd/agentic_fix.py +1179 -0
- pdd/agentic_langtest.py +162 -0
- pdd/agentic_update.py +370 -0
- pdd/agentic_verify.py +183 -0
- pdd/auto_deps_main.py +15 -5
- pdd/auto_include.py +63 -5
- pdd/bug_main.py +3 -2
- pdd/bug_to_unit_test.py +2 -0
- pdd/change_main.py +11 -4
- pdd/cli.py +22 -1181
- pdd/cmd_test_main.py +73 -21
- pdd/code_generator.py +58 -18
- pdd/code_generator_main.py +672 -25
- pdd/commands/__init__.py +42 -0
- pdd/commands/analysis.py +248 -0
- pdd/commands/fix.py +140 -0
- pdd/commands/generate.py +257 -0
- pdd/commands/maintenance.py +174 -0
- pdd/commands/misc.py +79 -0
- pdd/commands/modify.py +230 -0
- pdd/commands/report.py +144 -0
- pdd/commands/templates.py +215 -0
- pdd/commands/utility.py +110 -0
- pdd/config_resolution.py +58 -0
- pdd/conflicts_main.py +8 -3
- pdd/construct_paths.py +258 -82
- pdd/context_generator.py +10 -2
- pdd/context_generator_main.py +113 -11
- pdd/continue_generation.py +47 -7
- pdd/core/__init__.py +0 -0
- pdd/core/cli.py +503 -0
- pdd/core/dump.py +554 -0
- pdd/core/errors.py +63 -0
- pdd/core/utils.py +90 -0
- pdd/crash_main.py +44 -11
- pdd/data/language_format.csv +71 -63
- pdd/data/llm_model.csv +20 -18
- pdd/detect_change_main.py +5 -4
- pdd/fix_code_loop.py +330 -76
- pdd/fix_error_loop.py +207 -61
- pdd/fix_errors_from_unit_tests.py +4 -3
- pdd/fix_main.py +75 -18
- pdd/fix_verification_errors.py +12 -100
- pdd/fix_verification_errors_loop.py +306 -272
- pdd/fix_verification_main.py +28 -9
- pdd/generate_output_paths.py +93 -10
- pdd/generate_test.py +16 -5
- pdd/get_jwt_token.py +9 -2
- pdd/get_run_command.py +73 -0
- pdd/get_test_command.py +68 -0
- pdd/git_update.py +70 -19
- pdd/incremental_code_generator.py +2 -2
- pdd/insert_includes.py +11 -3
- pdd/llm_invoke.py +1269 -103
- pdd/load_prompt_template.py +36 -10
- pdd/pdd_completion.fish +25 -2
- pdd/pdd_completion.sh +30 -4
- pdd/pdd_completion.zsh +79 -4
- pdd/postprocess.py +10 -3
- pdd/preprocess.py +228 -15
- pdd/preprocess_main.py +8 -5
- pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
- pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
- pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
- pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
- pdd/prompts/agentic_update_LLM.prompt +1071 -0
- pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
- pdd/prompts/auto_include_LLM.prompt +100 -905
- pdd/prompts/detect_change_LLM.prompt +122 -20
- pdd/prompts/example_generator_LLM.prompt +22 -1
- pdd/prompts/extract_code_LLM.prompt +5 -1
- pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
- pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
- pdd/prompts/extract_promptline_LLM.prompt +17 -11
- pdd/prompts/find_verification_errors_LLM.prompt +6 -0
- pdd/prompts/fix_code_module_errors_LLM.prompt +4 -2
- pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +8 -0
- pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
- pdd/prompts/generate_test_LLM.prompt +21 -6
- pdd/prompts/increase_tests_LLM.prompt +1 -5
- pdd/prompts/insert_includes_LLM.prompt +228 -108
- pdd/prompts/trace_LLM.prompt +25 -22
- pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
- pdd/prompts/update_prompt_LLM.prompt +22 -1
- pdd/pytest_output.py +127 -12
- pdd/render_mermaid.py +236 -0
- pdd/setup_tool.py +648 -0
- pdd/simple_math.py +2 -0
- pdd/split_main.py +3 -2
- pdd/summarize_directory.py +49 -6
- pdd/sync_determine_operation.py +543 -98
- pdd/sync_main.py +81 -31
- pdd/sync_orchestration.py +1334 -751
- pdd/sync_tui.py +848 -0
- pdd/template_registry.py +264 -0
- pdd/templates/architecture/architecture_json.prompt +242 -0
- pdd/templates/generic/generate_prompt.prompt +174 -0
- pdd/trace.py +168 -12
- pdd/trace_main.py +4 -3
- pdd/track_cost.py +151 -61
- pdd/unfinished_prompt.py +49 -3
- pdd/update_main.py +549 -67
- pdd/update_model_costs.py +2 -2
- pdd/update_prompt.py +19 -4
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/METADATA +19 -6
- pdd_cli-0.0.90.dist-info/RECORD +153 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/licenses/LICENSE +1 -1
- pdd_cli-0.0.45.dist-info/RECORD +0 -116
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/WHEEL +0 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/top_level.txt +0 -0
pdd/fix_error_loop.py
CHANGED
|
@@ -5,14 +5,19 @@ import subprocess
|
|
|
5
5
|
import shutil
|
|
6
6
|
import json
|
|
7
7
|
from datetime import datetime
|
|
8
|
+
from pathlib import Path
|
|
8
9
|
|
|
9
10
|
from rich import print as rprint
|
|
10
11
|
from rich.console import Console
|
|
11
12
|
|
|
12
13
|
# Relative import from an internal module.
|
|
14
|
+
from .get_language import get_language
|
|
13
15
|
from .fix_errors_from_unit_tests import fix_errors_from_unit_tests
|
|
14
|
-
from . import DEFAULT_TIME
|
|
16
|
+
from . import DEFAULT_TIME # Import DEFAULT_TIME
|
|
15
17
|
from .python_env_detector import detect_host_python_executable
|
|
18
|
+
from .agentic_fix import run_agentic_fix
|
|
19
|
+
from .agentic_langtest import default_verify_cmd_for
|
|
20
|
+
|
|
16
21
|
|
|
17
22
|
console = Console()
|
|
18
23
|
|
|
@@ -20,44 +25,63 @@ def escape_brackets(text: str) -> str:
|
|
|
20
25
|
"""Escape square brackets so Rich doesn't misinterpret them."""
|
|
21
26
|
return text.replace("[", "\\[").replace("]", "\\]")
|
|
22
27
|
|
|
28
|
+
# ---------- Normalize any agentic return shape to a 4-tuple ----------
|
|
29
|
+
def _normalize_agentic_result(result):
|
|
30
|
+
"""
|
|
31
|
+
Normalize run_agentic_fix result into: (success: bool, msg: str, cost: float, model: str, changed_files: List[str])
|
|
32
|
+
Handles older 2/3/4-tuple shapes used by tests/monkeypatches.
|
|
33
|
+
"""
|
|
34
|
+
if isinstance(result, tuple):
|
|
35
|
+
if len(result) == 5:
|
|
36
|
+
ok, msg, cost, model, changed_files = result
|
|
37
|
+
return bool(ok), str(msg), float(cost), str(model or "agentic-cli"), list(changed_files or [])
|
|
38
|
+
if len(result) == 4:
|
|
39
|
+
ok, msg, cost, model = result
|
|
40
|
+
return bool(ok), str(msg), float(cost), str(model or "agentic-cli"), []
|
|
41
|
+
if len(result) == 3:
|
|
42
|
+
ok, msg, cost = result
|
|
43
|
+
return bool(ok), str(msg), float(cost), "agentic-cli", []
|
|
44
|
+
if len(result) == 2:
|
|
45
|
+
ok, msg = result
|
|
46
|
+
return bool(ok), str(msg), 0.0, "agentic-cli", []
|
|
47
|
+
# Fallback (shouldn't happen)
|
|
48
|
+
return False, "Invalid agentic result shape", 0.0, "agentic-cli", []
|
|
49
|
+
|
|
50
|
+
def _safe_run_agentic_fix(*, prompt_file, code_file, unit_test_file, error_log_file, cwd=None):
|
|
51
|
+
"""
|
|
52
|
+
Call (possibly monkeypatched) run_agentic_fix and normalize its return.
|
|
53
|
+
"""
|
|
54
|
+
res = run_agentic_fix(
|
|
55
|
+
prompt_file=prompt_file,
|
|
56
|
+
code_file=code_file,
|
|
57
|
+
unit_test_file=unit_test_file,
|
|
58
|
+
error_log_file=error_log_file,
|
|
59
|
+
cwd=cwd,
|
|
60
|
+
)
|
|
61
|
+
return _normalize_agentic_result(res)
|
|
62
|
+
# ---------------------------------------------------------------------
|
|
63
|
+
|
|
64
|
+
|
|
23
65
|
def run_pytest_on_file(test_file: str) -> tuple[int, int, int, str]:
|
|
24
66
|
"""
|
|
25
|
-
Run pytest on the specified test file using subprocess.
|
|
67
|
+
Run pytest on the specified test file using the subprocess-based runner.
|
|
26
68
|
Returns a tuple: (failures, errors, warnings, logs)
|
|
27
69
|
"""
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
failures = test_results.get('failures', 0)
|
|
44
|
-
errors = test_results.get('errors', 0)
|
|
45
|
-
warnings = test_results.get('warnings', 0)
|
|
46
|
-
|
|
47
|
-
if return_code == 2:
|
|
48
|
-
errors += 1
|
|
49
|
-
|
|
50
|
-
# Combine stdout and stderr from the test results
|
|
51
|
-
logs = test_results.get('standard_output', '') + '\n' + test_results.get('standard_error', '')
|
|
52
|
-
|
|
53
|
-
return failures, errors, warnings, logs
|
|
54
|
-
|
|
55
|
-
except json.JSONDecodeError:
|
|
56
|
-
# If JSON parsing fails, return the raw output
|
|
57
|
-
return 1, 1, 0, f"Failed to parse pytest output:\n{result.stdout}\n{result.stderr}"
|
|
58
|
-
|
|
59
|
-
except Exception as e:
|
|
60
|
-
return 1, 1, 0, f"Error running pytest: {str(e)}"
|
|
70
|
+
from .pytest_output import run_pytest_and_capture_output
|
|
71
|
+
# Use the subprocess-based runner to avoid module caching issues
|
|
72
|
+
output_data = run_pytest_and_capture_output(test_file)
|
|
73
|
+
|
|
74
|
+
# Extract results
|
|
75
|
+
results = output_data.get("test_results", [{}])[0]
|
|
76
|
+
|
|
77
|
+
failures = results.get("failures", 0)
|
|
78
|
+
errors = results.get("errors", 0)
|
|
79
|
+
warnings = results.get("warnings", 0)
|
|
80
|
+
|
|
81
|
+
# Combine stdout/stderr for the log
|
|
82
|
+
logs = (results.get("standard_output", "") or "") + "\n" + (results.get("standard_error", "") or "")
|
|
83
|
+
|
|
84
|
+
return failures, errors, warnings, logs
|
|
61
85
|
|
|
62
86
|
def format_log_for_output(log_structure):
|
|
63
87
|
"""
|
|
@@ -77,6 +101,8 @@ def format_log_for_output(log_structure):
|
|
|
77
101
|
# Fix attempt with XML tags
|
|
78
102
|
if iteration.get("fix_attempt"):
|
|
79
103
|
formatted_text += f"<fix_attempt iteration={iteration['number']}>\n"
|
|
104
|
+
if iteration.get("model_name"):
|
|
105
|
+
formatted_text += f"Model: {iteration['model_name']}\n"
|
|
80
106
|
formatted_text += f"{iteration['fix_attempt']}\n"
|
|
81
107
|
formatted_text += "</fix_attempt>\n\n"
|
|
82
108
|
|
|
@@ -101,6 +127,7 @@ def format_log_for_output(log_structure):
|
|
|
101
127
|
|
|
102
128
|
def fix_error_loop(unit_test_file: str,
|
|
103
129
|
code_file: str,
|
|
130
|
+
prompt_file: str,
|
|
104
131
|
prompt: str,
|
|
105
132
|
verification_program: str,
|
|
106
133
|
strength: float,
|
|
@@ -109,7 +136,8 @@ def fix_error_loop(unit_test_file: str,
|
|
|
109
136
|
budget: float,
|
|
110
137
|
error_log_file: str = "error_log.txt",
|
|
111
138
|
verbose: bool = False,
|
|
112
|
-
time: float = DEFAULT_TIME
|
|
139
|
+
time: float = DEFAULT_TIME,
|
|
140
|
+
agentic_fallback: bool = True):
|
|
113
141
|
"""
|
|
114
142
|
Attempt to fix errors in a unit test and corresponding code using repeated iterations,
|
|
115
143
|
counting only the number of times we actually call the LLM fix function.
|
|
@@ -130,7 +158,7 @@ def fix_error_loop(unit_test_file: str,
|
|
|
130
158
|
error_log_file: Path to file to log errors (default: "error_log.txt").
|
|
131
159
|
verbose: Enable verbose logging (default: False).
|
|
132
160
|
time: Time parameter for the fix_errors_from_unit_tests call.
|
|
133
|
-
|
|
161
|
+
agentic_fallback: Whether to trigger cli agentic fallback when fix fails.
|
|
134
162
|
Outputs:
|
|
135
163
|
success: Boolean indicating if the overall process succeeded.
|
|
136
164
|
final_unit_test: String contents of the final unit test file.
|
|
@@ -187,7 +215,24 @@ def fix_error_loop(unit_test_file: str,
|
|
|
187
215
|
iteration = 0
|
|
188
216
|
# Run an initial test to determine starting state
|
|
189
217
|
try:
|
|
190
|
-
|
|
218
|
+
is_python = str(code_file).lower().endswith(".py")
|
|
219
|
+
if is_python:
|
|
220
|
+
initial_fails, initial_errors, initial_warnings, pytest_output = run_pytest_on_file(unit_test_file)
|
|
221
|
+
else:
|
|
222
|
+
# For non-Python files, run the verification program to get an initial error state
|
|
223
|
+
rprint(f"[cyan]Non-Python target detected. Running verification program to get initial state...[/cyan]")
|
|
224
|
+
lang = get_language(os.path.splitext(code_file)[1])
|
|
225
|
+
verify_cmd = default_verify_cmd_for(lang, unit_test_file)
|
|
226
|
+
if not verify_cmd:
|
|
227
|
+
raise ValueError(f"No default verification command for language: {lang}")
|
|
228
|
+
|
|
229
|
+
verify_result = subprocess.run(verify_cmd, capture_output=True, text=True, shell=True, stdin=subprocess.DEVNULL)
|
|
230
|
+
pytest_output = (verify_result.stdout or "") + "\n" + (verify_result.stderr or "")
|
|
231
|
+
if verify_result.returncode == 0:
|
|
232
|
+
initial_fails, initial_errors, initial_warnings = 0, 0, 0
|
|
233
|
+
else:
|
|
234
|
+
initial_fails, initial_errors, initial_warnings = 1, 0, 0 # Treat any failure as one "fail"
|
|
235
|
+
|
|
191
236
|
# Store initial state for statistics
|
|
192
237
|
stats = {
|
|
193
238
|
"initial_fails": initial_fails,
|
|
@@ -200,14 +245,62 @@ def fix_error_loop(unit_test_file: str,
|
|
|
200
245
|
"iterations_info": []
|
|
201
246
|
}
|
|
202
247
|
except Exception as e:
|
|
203
|
-
rprint(f"[red]Error running initial
|
|
248
|
+
rprint(f"[red]Error running initial test/verification:[/red] {e}")
|
|
204
249
|
return False, "", "", fix_attempts, total_cost, model_name
|
|
205
250
|
|
|
251
|
+
# If target is not a Python file, trigger agentic fallback if tests fail
|
|
252
|
+
if not is_python:
|
|
253
|
+
if initial_fails > 0 or initial_errors > 0:
|
|
254
|
+
rprint("[cyan]Non-Python target failed initial verification. Triggering agentic fallback...[/cyan]")
|
|
255
|
+
error_log_path = Path(error_log_file)
|
|
256
|
+
error_log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
257
|
+
with open(error_log_path, "w") as f:
|
|
258
|
+
f.write(pytest_output)
|
|
259
|
+
|
|
260
|
+
rprint(f"[cyan]Attempting agentic fix fallback (prompt_file={prompt_file!r})...[/cyan]")
|
|
261
|
+
success, agent_msg, agent_cost, agent_model, agent_changed_files = _safe_run_agentic_fix(
|
|
262
|
+
prompt_file=prompt_file,
|
|
263
|
+
code_file=code_file,
|
|
264
|
+
unit_test_file=unit_test_file,
|
|
265
|
+
error_log_file=error_log_file,
|
|
266
|
+
cwd=Path(prompt_file).parent if prompt_file else None,
|
|
267
|
+
)
|
|
268
|
+
if not success:
|
|
269
|
+
rprint(f"[bold red]Agentic fix fallback failed: {agent_msg}[/bold red]")
|
|
270
|
+
if agent_changed_files:
|
|
271
|
+
rprint(f"[cyan]Agent modified {len(agent_changed_files)} file(s):[/cyan]")
|
|
272
|
+
for f in agent_changed_files:
|
|
273
|
+
rprint(f" • {f}")
|
|
274
|
+
final_unit_test = ""
|
|
275
|
+
final_code = ""
|
|
276
|
+
try:
|
|
277
|
+
with open(unit_test_file, "r") as f:
|
|
278
|
+
final_unit_test = f.read()
|
|
279
|
+
except Exception:
|
|
280
|
+
pass
|
|
281
|
+
try:
|
|
282
|
+
with open(code_file, "r") as f:
|
|
283
|
+
final_code = f.read()
|
|
284
|
+
except Exception:
|
|
285
|
+
pass
|
|
286
|
+
return success, final_unit_test, final_code, 1, agent_cost, agent_model
|
|
287
|
+
else:
|
|
288
|
+
# Non-python tests passed, so we are successful.
|
|
289
|
+
rprint("[green]Non-Python tests passed. No fix needed.[/green]")
|
|
290
|
+
try:
|
|
291
|
+
with open(unit_test_file, "r") as f:
|
|
292
|
+
final_unit_test = f.read()
|
|
293
|
+
with open(code_file, "r") as f:
|
|
294
|
+
final_code = f.read()
|
|
295
|
+
except Exception as e:
|
|
296
|
+
rprint(f"[yellow]Warning: Could not read final files: {e}[/yellow]")
|
|
297
|
+
return True, final_unit_test, final_code, 0, 0.0, "N/A"
|
|
298
|
+
|
|
206
299
|
fails, errors, warnings = initial_fails, initial_errors, initial_warnings
|
|
207
300
|
|
|
208
301
|
# Determine success state immediately
|
|
209
302
|
success = (fails == 0 and errors == 0 and warnings == 0)
|
|
210
|
-
|
|
303
|
+
|
|
211
304
|
# Track if tests were initially passing
|
|
212
305
|
initially_passing = success
|
|
213
306
|
|
|
@@ -244,13 +337,23 @@ def fix_error_loop(unit_test_file: str,
|
|
|
244
337
|
|
|
245
338
|
# Update structured log
|
|
246
339
|
log_structure["iterations"][-1]["post_test_output"] = pytest_output
|
|
247
|
-
|
|
340
|
+
|
|
248
341
|
# Write formatted log to file
|
|
249
|
-
|
|
342
|
+
error_log_path = Path(error_log_file)
|
|
343
|
+
error_log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
344
|
+
with open(error_log_path, "w") as elog:
|
|
250
345
|
elog.write(format_log_for_output(log_structure))
|
|
251
346
|
|
|
252
347
|
# Set success to True (already determined)
|
|
253
|
-
#
|
|
348
|
+
# Read the actual fixed files to return the successful state
|
|
349
|
+
try:
|
|
350
|
+
with open(unit_test_file, "r") as f:
|
|
351
|
+
final_unit_test = f.read()
|
|
352
|
+
with open(code_file, "r") as f:
|
|
353
|
+
final_code = f.read()
|
|
354
|
+
except Exception as e:
|
|
355
|
+
rprint(f"[yellow]Warning: Could not read fixed files: {e}[/yellow]")
|
|
356
|
+
# Keep empty strings as fallback
|
|
254
357
|
break
|
|
255
358
|
|
|
256
359
|
iteration_header = f"=== Attempt iteration {iteration} ==="
|
|
@@ -328,7 +431,7 @@ def fix_error_loop(unit_test_file: str,
|
|
|
328
431
|
try:
|
|
329
432
|
# Format the log for the LLM
|
|
330
433
|
formatted_log = format_log_for_output(log_structure)
|
|
331
|
-
|
|
434
|
+
|
|
332
435
|
updated_unit_test, updated_code, fixed_unit_test, fixed_code, analysis, cost, model_name = fix_errors_from_unit_tests(
|
|
333
436
|
unit_test_contents,
|
|
334
437
|
code_contents,
|
|
@@ -338,11 +441,12 @@ def fix_error_loop(unit_test_file: str,
|
|
|
338
441
|
strength,
|
|
339
442
|
temperature,
|
|
340
443
|
verbose=verbose,
|
|
341
|
-
time=time
|
|
444
|
+
time=time # Pass time parameter
|
|
342
445
|
)
|
|
343
446
|
|
|
344
447
|
# Update the fix attempt in the structured log
|
|
345
448
|
log_structure["iterations"][-1]["fix_attempt"] = analysis
|
|
449
|
+
log_structure["iterations"][-1]["model_name"] = model_name
|
|
346
450
|
except Exception as e:
|
|
347
451
|
rprint(f"[red]Error during fix_errors_from_unit_tests call:[/red] {e}")
|
|
348
452
|
break
|
|
@@ -384,7 +488,7 @@ def fix_error_loop(unit_test_file: str,
|
|
|
384
488
|
# Run the verification:
|
|
385
489
|
try:
|
|
386
490
|
verify_cmd = [detect_host_python_executable(), verification_program]
|
|
387
|
-
verify_result = subprocess.run(verify_cmd, capture_output=True, text=True)
|
|
491
|
+
verify_result = subprocess.run(verify_cmd, capture_output=True, text=True, stdin=subprocess.DEVNULL)
|
|
388
492
|
# Safely handle None for stdout or stderr:
|
|
389
493
|
verify_stdout = verify_result.stdout or ""
|
|
390
494
|
verify_stderr = verify_result.stderr or ""
|
|
@@ -414,9 +518,11 @@ def fix_error_loop(unit_test_file: str,
|
|
|
414
518
|
|
|
415
519
|
# Update post-test output in structured log
|
|
416
520
|
log_structure["iterations"][-1]["post_test_output"] = pytest_output
|
|
417
|
-
|
|
521
|
+
|
|
418
522
|
# Write updated structured log to file after each iteration
|
|
419
|
-
|
|
523
|
+
error_log_path = Path(error_log_file)
|
|
524
|
+
error_log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
525
|
+
with open(error_log_path, "w") as elog:
|
|
420
526
|
elog.write(format_log_for_output(log_structure))
|
|
421
527
|
|
|
422
528
|
# Update iteration stats with post-fix results
|
|
@@ -480,8 +586,8 @@ def fix_error_loop(unit_test_file: str,
|
|
|
480
586
|
else:
|
|
481
587
|
stats["best_iteration"] = "final"
|
|
482
588
|
|
|
483
|
-
# Read final file contents
|
|
484
|
-
#
|
|
589
|
+
# Read final file contents for non-initially-passing tests
|
|
590
|
+
# (Initially passing tests have files read at lines 344-348)
|
|
485
591
|
try:
|
|
486
592
|
if not initially_passing:
|
|
487
593
|
with open(unit_test_file, "r") as f:
|
|
@@ -492,11 +598,6 @@ def fix_error_loop(unit_test_file: str,
|
|
|
492
598
|
rprint(f"[red]Error reading final files:[/red] {e}")
|
|
493
599
|
final_unit_test, final_code = "", ""
|
|
494
600
|
|
|
495
|
-
# Check if we broke out early because tests already passed
|
|
496
|
-
if stats["best_iteration"] == 0 and fix_attempts == 0:
|
|
497
|
-
# Still return at least 1 attempt to acknowledge the work done
|
|
498
|
-
fix_attempts = 1
|
|
499
|
-
|
|
500
601
|
# Print summary statistics
|
|
501
602
|
rprint("\n[bold cyan]Summary Statistics:[/bold cyan]")
|
|
502
603
|
rprint(f"Initial state: {initial_fails} fails, {initial_errors} errors, {initial_warnings} warnings")
|
|
@@ -506,17 +607,62 @@ def fix_error_loop(unit_test_file: str,
|
|
|
506
607
|
|
|
507
608
|
# Calculate improvements
|
|
508
609
|
stats["improvement"] = {
|
|
509
|
-
"fails_reduced": initial_fails - stats[
|
|
510
|
-
"errors_reduced": initial_errors - stats[
|
|
511
|
-
"warnings_reduced": initial_warnings - stats[
|
|
512
|
-
"percent_improvement": 100 if initial_fails + initial_errors + initial_warnings == 0 else
|
|
513
|
-
(1 - (stats[
|
|
610
|
+
"fails_reduced": initial_fails - stats['final_fails'],
|
|
611
|
+
"errors_reduced": initial_errors - stats['final_errors'],
|
|
612
|
+
"warnings_reduced": initial_warnings - stats['final_warnings'],
|
|
613
|
+
"percent_improvement": 100 if (initial_fails + initial_errors + initial_warnings) == 0 else
|
|
614
|
+
(1 - (stats['final_fails'] + stats['final_errors'] + stats['final_warnings']) /
|
|
514
615
|
(initial_fails + initial_errors + initial_warnings)) * 100
|
|
515
616
|
}
|
|
516
617
|
|
|
517
618
|
rprint(f"Improvement: {stats['improvement']['fails_reduced']} fails, {stats['improvement']['errors_reduced']} errors, {stats['improvement']['warnings_reduced']} warnings")
|
|
518
619
|
rprint(f"Overall improvement: {stats['improvement']['percent_improvement']:.2f}%")
|
|
519
620
|
|
|
621
|
+
# Agentic fallback at end adds cost & model (normalized)
|
|
622
|
+
if not success and agentic_fallback and total_cost < budget:
|
|
623
|
+
# Ensure error_log_file exists before calling agentic fix
|
|
624
|
+
# Write the current log structure if it hasn't been written yet
|
|
625
|
+
try:
|
|
626
|
+
if not os.path.exists(error_log_file) or os.path.getsize(error_log_file) == 0:
|
|
627
|
+
error_log_path = Path(error_log_file)
|
|
628
|
+
error_log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
629
|
+
with open(error_log_path, "w") as elog:
|
|
630
|
+
if log_structure["iterations"]:
|
|
631
|
+
elog.write(format_log_for_output(log_structure))
|
|
632
|
+
else:
|
|
633
|
+
# No iterations ran, write initial state info
|
|
634
|
+
elog.write(f"Initial state: {initial_fails} fails, {initial_errors} errors, {initial_warnings} warnings\n")
|
|
635
|
+
if 'pytest_output' in locals():
|
|
636
|
+
elog.write(f"\n<pytest_output>\n{pytest_output}\n</pytest_output>\n")
|
|
637
|
+
except Exception as e:
|
|
638
|
+
rprint(f"[yellow]Warning: Could not write error log before agentic fallback: {e}[/yellow]")
|
|
639
|
+
|
|
640
|
+
rprint(f"[cyan]Attempting agentic fix fallback (prompt_file={prompt_file!r})...[/cyan]")
|
|
641
|
+
agent_success, agent_msg, agent_cost, agent_model, agent_changed_files = _safe_run_agentic_fix(
|
|
642
|
+
prompt_file=prompt_file,
|
|
643
|
+
code_file=code_file,
|
|
644
|
+
unit_test_file=unit_test_file,
|
|
645
|
+
error_log_file=error_log_file,
|
|
646
|
+
cwd=Path(prompt_file).parent if prompt_file else None,
|
|
647
|
+
)
|
|
648
|
+
total_cost += agent_cost
|
|
649
|
+
if not agent_success:
|
|
650
|
+
rprint(f"[bold red]Agentic fix fallback failed: {agent_msg}[/bold red]")
|
|
651
|
+
if agent_changed_files:
|
|
652
|
+
rprint(f"[cyan]Agent modified {len(agent_changed_files)} file(s):[/cyan]")
|
|
653
|
+
for f in agent_changed_files:
|
|
654
|
+
rprint(f" • {f}")
|
|
655
|
+
if agent_success:
|
|
656
|
+
model_name = agent_model or model_name
|
|
657
|
+
try:
|
|
658
|
+
with open(unit_test_file, "r") as f:
|
|
659
|
+
final_unit_test = f.read()
|
|
660
|
+
with open(code_file, "r") as f:
|
|
661
|
+
final_code = f.read()
|
|
662
|
+
except Exception as e:
|
|
663
|
+
rprint(f"[yellow]Warning: Could not read files after successful agentic fix: {e}[/yellow]")
|
|
664
|
+
success = True
|
|
665
|
+
|
|
520
666
|
return success, final_unit_test, final_code, fix_attempts, total_cost, model_name
|
|
521
667
|
|
|
522
668
|
# If this module is run directly for testing purposes:
|
|
@@ -551,4 +697,4 @@ if __name__ == "__main__":
|
|
|
551
697
|
rprint(f"Attempts: {attempts}")
|
|
552
698
|
rprint(f"Total cost: ${total_cost:.6f}")
|
|
553
699
|
rprint(f"Model used: {model_name}")
|
|
554
|
-
rprint(f"Final unit test contents:\n{final_unit_test}")
|
|
700
|
+
rprint(f"Final unit test contents:\n{final_unit_test}")
|
|
@@ -114,7 +114,8 @@ def fix_errors_from_unit_tests(
|
|
|
114
114
|
Fix errors in unit tests using LLM models and log the process.
|
|
115
115
|
|
|
116
116
|
Args:
|
|
117
|
-
unit_test (str): The unit test code
|
|
117
|
+
unit_test (str): The unit test code, potentially multiple files concatenated
|
|
118
|
+
with <file name="filename.py">...</file> tags.
|
|
118
119
|
code (str): The code under test
|
|
119
120
|
prompt (str): The prompt that generated the code
|
|
120
121
|
error (str): The error message
|
|
@@ -244,10 +245,10 @@ def fix_errors_from_unit_tests(
|
|
|
244
245
|
if verbose:
|
|
245
246
|
console.print(f"[bold red]{error_msg}[/bold red]")
|
|
246
247
|
write_to_error_file(error_file, error_msg)
|
|
247
|
-
return False, False, "", "", "", 0.0, ""
|
|
248
|
+
return False, False, "", "", "", 0.0, f"Error: ValidationError - {str(e)[:100]}"
|
|
248
249
|
except Exception as e:
|
|
249
250
|
error_msg = f"Error in fix_errors_from_unit_tests: {str(e)}"
|
|
250
251
|
if verbose:
|
|
251
252
|
console.print(f"[bold red]{error_msg}[/bold red]")
|
|
252
253
|
write_to_error_file(error_file, error_msg)
|
|
253
|
-
return False, False, "", "", "", 0.0, ""
|
|
254
|
+
return False, False, "", "", "", 0.0, f"Error: {type(e).__name__}"
|
pdd/fix_main.py
CHANGED
|
@@ -13,7 +13,7 @@ from .preprocess import preprocess
|
|
|
13
13
|
|
|
14
14
|
from .construct_paths import construct_paths
|
|
15
15
|
from .fix_errors_from_unit_tests import fix_errors_from_unit_tests
|
|
16
|
-
from .fix_error_loop import fix_error_loop
|
|
16
|
+
from .fix_error_loop import fix_error_loop, run_pytest_on_file
|
|
17
17
|
from .get_jwt_token import get_jwt_token
|
|
18
18
|
from .get_language import get_language
|
|
19
19
|
|
|
@@ -33,7 +33,10 @@ def fix_main(
|
|
|
33
33
|
verification_program: Optional[str],
|
|
34
34
|
max_attempts: int,
|
|
35
35
|
budget: float,
|
|
36
|
-
auto_submit: bool
|
|
36
|
+
auto_submit: bool,
|
|
37
|
+
agentic_fallback: bool = True,
|
|
38
|
+
strength: Optional[float] = None,
|
|
39
|
+
temperature: Optional[float] = None,
|
|
37
40
|
) -> Tuple[bool, str, str, int, float, str]:
|
|
38
41
|
"""
|
|
39
42
|
Main function to fix errors in code and unit tests.
|
|
@@ -52,7 +55,7 @@ def fix_main(
|
|
|
52
55
|
max_attempts: Maximum number of fix attempts
|
|
53
56
|
budget: Maximum cost allowed for fixing
|
|
54
57
|
auto_submit: Whether to auto-submit example if tests pass
|
|
55
|
-
|
|
58
|
+
agentic_fallback: Whether the cli agent fallback is triggered
|
|
56
59
|
Returns:
|
|
57
60
|
Tuple containing:
|
|
58
61
|
- Success status (bool)
|
|
@@ -69,13 +72,13 @@ def fix_main(
|
|
|
69
72
|
# Initialize analysis_results to None to prevent reference errors
|
|
70
73
|
analysis_results = None
|
|
71
74
|
|
|
75
|
+
# Input validation - let these propagate to caller for proper exit code
|
|
76
|
+
if not loop:
|
|
77
|
+
error_path = Path(error_file)
|
|
78
|
+
if not error_path.exists():
|
|
79
|
+
raise FileNotFoundError(f"Error file '{error_file}' does not exist.")
|
|
80
|
+
|
|
72
81
|
try:
|
|
73
|
-
# Verify error file exists if not in loop mode
|
|
74
|
-
if not loop:
|
|
75
|
-
error_path = Path(error_file)
|
|
76
|
-
if not error_path.exists():
|
|
77
|
-
raise FileNotFoundError(f"Error file '{error_file}' does not exist.")
|
|
78
|
-
|
|
79
82
|
# Construct file paths
|
|
80
83
|
input_file_paths = {
|
|
81
84
|
"prompt_file": prompt_file,
|
|
@@ -97,12 +100,14 @@ def fix_main(
|
|
|
97
100
|
quiet=ctx.obj.get('quiet', False),
|
|
98
101
|
command="fix",
|
|
99
102
|
command_options=command_options,
|
|
100
|
-
create_error_file=loop # Only create error file if in loop mode
|
|
103
|
+
create_error_file=loop, # Only create error file if in loop mode
|
|
104
|
+
context_override=ctx.obj.get('context'),
|
|
105
|
+
confirm_callback=ctx.obj.get('confirm_callback')
|
|
101
106
|
)
|
|
102
107
|
|
|
103
|
-
# Get parameters from context
|
|
104
|
-
strength = ctx.obj.get('strength', DEFAULT_STRENGTH)
|
|
105
|
-
temperature = ctx.obj.get('temperature', 0)
|
|
108
|
+
# Get parameters from context (prefer passed parameters over ctx.obj)
|
|
109
|
+
strength = strength if strength is not None else ctx.obj.get('strength', DEFAULT_STRENGTH)
|
|
110
|
+
temperature = temperature if temperature is not None else ctx.obj.get('temperature', 0)
|
|
106
111
|
verbose = ctx.obj.get('verbose', False)
|
|
107
112
|
time = ctx.obj.get('time') # Get time from context
|
|
108
113
|
|
|
@@ -111,6 +116,7 @@ def fix_main(
|
|
|
111
116
|
success, fixed_unit_test, fixed_code, attempts, total_cost, model_name = fix_error_loop(
|
|
112
117
|
unit_test_file=unit_test_file,
|
|
113
118
|
code_file=code_file,
|
|
119
|
+
prompt_file=prompt_file,
|
|
114
120
|
prompt=input_strings["prompt_file"],
|
|
115
121
|
verification_program=verification_program,
|
|
116
122
|
strength=strength,
|
|
@@ -119,7 +125,8 @@ def fix_main(
|
|
|
119
125
|
max_attempts=max_attempts,
|
|
120
126
|
budget=budget,
|
|
121
127
|
error_log_file=output_file_paths.get("output_results"),
|
|
122
|
-
verbose=verbose
|
|
128
|
+
verbose=verbose,
|
|
129
|
+
agentic_fallback=agentic_fallback
|
|
123
130
|
)
|
|
124
131
|
else:
|
|
125
132
|
# Use fix_errors_from_unit_tests for single-pass fixing
|
|
@@ -134,16 +141,62 @@ def fix_main(
|
|
|
134
141
|
time=time, # Pass time to fix_errors_from_unit_tests
|
|
135
142
|
verbose=verbose
|
|
136
143
|
)
|
|
137
|
-
success = update_unit_test or update_code
|
|
138
144
|
attempts = 1
|
|
139
145
|
|
|
146
|
+
# Issue #158 fix: Validate the fix by running tests instead of
|
|
147
|
+
# trusting the LLM's suggestion flags (update_unit_test/update_code)
|
|
148
|
+
if update_unit_test or update_code:
|
|
149
|
+
# Write fixed files to temp location first, then run tests
|
|
150
|
+
import tempfile
|
|
151
|
+
import os as os_module
|
|
152
|
+
|
|
153
|
+
# Create temp files for testing
|
|
154
|
+
test_dir = tempfile.mkdtemp(prefix="pdd_fix_validate_")
|
|
155
|
+
temp_test_file = os_module.path.join(test_dir, "test_temp.py")
|
|
156
|
+
temp_code_file = os_module.path.join(test_dir, "code_temp.py")
|
|
157
|
+
|
|
158
|
+
try:
|
|
159
|
+
# Write the fixed content (or original if not changed)
|
|
160
|
+
test_content = fixed_unit_test if fixed_unit_test else input_strings["unit_test_file"]
|
|
161
|
+
code_content = fixed_code if fixed_code else input_strings["code_file"]
|
|
162
|
+
|
|
163
|
+
with open(temp_test_file, 'w') as f:
|
|
164
|
+
f.write(test_content)
|
|
165
|
+
with open(temp_code_file, 'w') as f:
|
|
166
|
+
f.write(code_content)
|
|
167
|
+
|
|
168
|
+
# Run pytest on the fixed test file to validate
|
|
169
|
+
fails, errors, warnings, test_output = run_pytest_on_file(temp_test_file)
|
|
170
|
+
|
|
171
|
+
# Success only if tests pass (no failures or errors)
|
|
172
|
+
success = (fails == 0 and errors == 0)
|
|
173
|
+
|
|
174
|
+
if verbose:
|
|
175
|
+
rprint(f"[cyan]Fix validation: {fails} failures, {errors} errors, {warnings} warnings[/cyan]")
|
|
176
|
+
if not success:
|
|
177
|
+
rprint("[yellow]Fix suggested by LLM did not pass tests[/yellow]")
|
|
178
|
+
finally:
|
|
179
|
+
# Cleanup temp files
|
|
180
|
+
import shutil
|
|
181
|
+
try:
|
|
182
|
+
shutil.rmtree(test_dir)
|
|
183
|
+
except Exception:
|
|
184
|
+
pass
|
|
185
|
+
else:
|
|
186
|
+
# No changes suggested by LLM
|
|
187
|
+
success = False
|
|
188
|
+
|
|
140
189
|
# Save fixed files
|
|
141
190
|
if fixed_unit_test:
|
|
142
|
-
|
|
191
|
+
output_test_path = Path(output_file_paths["output_test"])
|
|
192
|
+
output_test_path.parent.mkdir(parents=True, exist_ok=True)
|
|
193
|
+
with open(output_test_path, 'w') as f:
|
|
143
194
|
f.write(fixed_unit_test)
|
|
144
195
|
|
|
145
196
|
if fixed_code:
|
|
146
|
-
|
|
197
|
+
output_code_path = Path(output_file_paths["output_code"])
|
|
198
|
+
output_code_path.parent.mkdir(parents=True, exist_ok=True)
|
|
199
|
+
with open(output_code_path, 'w') as f:
|
|
147
200
|
f.write(fixed_code)
|
|
148
201
|
|
|
149
202
|
# Provide user feedback
|
|
@@ -286,6 +339,9 @@ def fix_main(
|
|
|
286
339
|
|
|
287
340
|
return success, fixed_unit_test, fixed_code, attempts, total_cost, model_name
|
|
288
341
|
|
|
342
|
+
except click.Abort:
|
|
343
|
+
# User cancelled - re-raise to stop the sync loop
|
|
344
|
+
raise
|
|
289
345
|
except Exception as e:
|
|
290
346
|
if not ctx.obj.get('quiet', False):
|
|
291
347
|
# Safely handle and print MarkupError
|
|
@@ -296,4 +352,5 @@ def fix_main(
|
|
|
296
352
|
# Print other errors normally, escaping the error string
|
|
297
353
|
from rich.markup import escape # Ensure escape is imported
|
|
298
354
|
rprint(f"[bold red]Error:[/bold red] {escape(str(e))}")
|
|
299
|
-
sys.exit(1)
|
|
355
|
+
# Return error result instead of sys.exit(1) to allow orchestrator to handle gracefully
|
|
356
|
+
return False, "", "", 0, 0.0, f"Error: {e}"
|