pdd-cli 0.0.45__py3-none-any.whl → 0.0.118__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdd/__init__.py +40 -8
- pdd/agentic_bug.py +323 -0
- pdd/agentic_bug_orchestrator.py +497 -0
- pdd/agentic_change.py +231 -0
- pdd/agentic_change_orchestrator.py +526 -0
- pdd/agentic_common.py +598 -0
- pdd/agentic_crash.py +534 -0
- pdd/agentic_e2e_fix.py +319 -0
- pdd/agentic_e2e_fix_orchestrator.py +426 -0
- pdd/agentic_fix.py +1294 -0
- pdd/agentic_langtest.py +162 -0
- pdd/agentic_update.py +387 -0
- pdd/agentic_verify.py +183 -0
- pdd/architecture_sync.py +565 -0
- pdd/auth_service.py +210 -0
- pdd/auto_deps_main.py +71 -51
- pdd/auto_include.py +245 -5
- pdd/auto_update.py +125 -47
- pdd/bug_main.py +196 -23
- pdd/bug_to_unit_test.py +2 -0
- pdd/change_main.py +11 -4
- pdd/cli.py +22 -1181
- pdd/cmd_test_main.py +350 -150
- pdd/code_generator.py +60 -18
- pdd/code_generator_main.py +790 -57
- pdd/commands/__init__.py +48 -0
- pdd/commands/analysis.py +306 -0
- pdd/commands/auth.py +309 -0
- pdd/commands/connect.py +290 -0
- pdd/commands/fix.py +163 -0
- pdd/commands/generate.py +257 -0
- pdd/commands/maintenance.py +175 -0
- pdd/commands/misc.py +87 -0
- pdd/commands/modify.py +256 -0
- pdd/commands/report.py +144 -0
- pdd/commands/sessions.py +284 -0
- pdd/commands/templates.py +215 -0
- pdd/commands/utility.py +110 -0
- pdd/config_resolution.py +58 -0
- pdd/conflicts_main.py +8 -3
- pdd/construct_paths.py +589 -111
- pdd/context_generator.py +10 -2
- pdd/context_generator_main.py +175 -76
- pdd/continue_generation.py +53 -10
- pdd/core/__init__.py +33 -0
- pdd/core/cli.py +527 -0
- pdd/core/cloud.py +237 -0
- pdd/core/dump.py +554 -0
- pdd/core/errors.py +67 -0
- pdd/core/remote_session.py +61 -0
- pdd/core/utils.py +90 -0
- pdd/crash_main.py +262 -33
- pdd/data/language_format.csv +71 -63
- pdd/data/llm_model.csv +20 -18
- pdd/detect_change_main.py +5 -4
- pdd/docs/prompting_guide.md +864 -0
- pdd/docs/whitepaper_with_benchmarks/data_and_functions/benchmark_analysis.py +495 -0
- pdd/docs/whitepaper_with_benchmarks/data_and_functions/creation_compare.py +528 -0
- pdd/fix_code_loop.py +523 -95
- pdd/fix_code_module_errors.py +6 -2
- pdd/fix_error_loop.py +491 -92
- pdd/fix_errors_from_unit_tests.py +4 -3
- pdd/fix_main.py +278 -21
- pdd/fix_verification_errors.py +12 -100
- pdd/fix_verification_errors_loop.py +529 -286
- pdd/fix_verification_main.py +294 -89
- pdd/frontend/dist/assets/index-B5DZHykP.css +1 -0
- pdd/frontend/dist/assets/index-DQ3wkeQ2.js +449 -0
- pdd/frontend/dist/index.html +376 -0
- pdd/frontend/dist/logo.svg +33 -0
- pdd/generate_output_paths.py +139 -15
- pdd/generate_test.py +218 -146
- pdd/get_comment.py +19 -44
- pdd/get_extension.py +8 -9
- pdd/get_jwt_token.py +318 -22
- pdd/get_language.py +8 -7
- pdd/get_run_command.py +75 -0
- pdd/get_test_command.py +68 -0
- pdd/git_update.py +70 -19
- pdd/incremental_code_generator.py +2 -2
- pdd/insert_includes.py +13 -4
- pdd/llm_invoke.py +1711 -181
- pdd/load_prompt_template.py +19 -12
- pdd/path_resolution.py +140 -0
- pdd/pdd_completion.fish +25 -2
- pdd/pdd_completion.sh +30 -4
- pdd/pdd_completion.zsh +79 -4
- pdd/postprocess.py +14 -4
- pdd/preprocess.py +293 -24
- pdd/preprocess_main.py +41 -6
- pdd/prompts/agentic_bug_step10_pr_LLM.prompt +182 -0
- pdd/prompts/agentic_bug_step1_duplicate_LLM.prompt +73 -0
- pdd/prompts/agentic_bug_step2_docs_LLM.prompt +129 -0
- pdd/prompts/agentic_bug_step3_triage_LLM.prompt +95 -0
- pdd/prompts/agentic_bug_step4_reproduce_LLM.prompt +97 -0
- pdd/prompts/agentic_bug_step5_root_cause_LLM.prompt +123 -0
- pdd/prompts/agentic_bug_step6_test_plan_LLM.prompt +107 -0
- pdd/prompts/agentic_bug_step7_generate_LLM.prompt +172 -0
- pdd/prompts/agentic_bug_step8_verify_LLM.prompt +119 -0
- pdd/prompts/agentic_bug_step9_e2e_test_LLM.prompt +289 -0
- pdd/prompts/agentic_change_step10_identify_issues_LLM.prompt +1006 -0
- pdd/prompts/agentic_change_step11_fix_issues_LLM.prompt +984 -0
- pdd/prompts/agentic_change_step12_create_pr_LLM.prompt +131 -0
- pdd/prompts/agentic_change_step1_duplicate_LLM.prompt +73 -0
- pdd/prompts/agentic_change_step2_docs_LLM.prompt +101 -0
- pdd/prompts/agentic_change_step3_research_LLM.prompt +126 -0
- pdd/prompts/agentic_change_step4_clarify_LLM.prompt +164 -0
- pdd/prompts/agentic_change_step5_docs_change_LLM.prompt +981 -0
- pdd/prompts/agentic_change_step6_devunits_LLM.prompt +1005 -0
- pdd/prompts/agentic_change_step7_architecture_LLM.prompt +1044 -0
- pdd/prompts/agentic_change_step8_analyze_LLM.prompt +1027 -0
- pdd/prompts/agentic_change_step9_implement_LLM.prompt +1077 -0
- pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
- pdd/prompts/agentic_e2e_fix_step1_unit_tests_LLM.prompt +90 -0
- pdd/prompts/agentic_e2e_fix_step2_e2e_tests_LLM.prompt +91 -0
- pdd/prompts/agentic_e2e_fix_step3_root_cause_LLM.prompt +89 -0
- pdd/prompts/agentic_e2e_fix_step4_fix_e2e_tests_LLM.prompt +96 -0
- pdd/prompts/agentic_e2e_fix_step5_identify_devunits_LLM.prompt +91 -0
- pdd/prompts/agentic_e2e_fix_step6_create_unit_tests_LLM.prompt +106 -0
- pdd/prompts/agentic_e2e_fix_step7_verify_tests_LLM.prompt +116 -0
- pdd/prompts/agentic_e2e_fix_step8_run_pdd_fix_LLM.prompt +120 -0
- pdd/prompts/agentic_e2e_fix_step9_verify_all_LLM.prompt +146 -0
- pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
- pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
- pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
- pdd/prompts/agentic_update_LLM.prompt +925 -0
- pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
- pdd/prompts/auto_include_LLM.prompt +122 -905
- pdd/prompts/change_LLM.prompt +3093 -1
- pdd/prompts/detect_change_LLM.prompt +686 -27
- pdd/prompts/example_generator_LLM.prompt +22 -1
- pdd/prompts/extract_code_LLM.prompt +5 -1
- pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
- pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
- pdd/prompts/extract_promptline_LLM.prompt +17 -11
- pdd/prompts/find_verification_errors_LLM.prompt +6 -0
- pdd/prompts/fix_code_module_errors_LLM.prompt +12 -2
- pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +9 -0
- pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
- pdd/prompts/generate_test_LLM.prompt +41 -7
- pdd/prompts/generate_test_from_example_LLM.prompt +115 -0
- pdd/prompts/increase_tests_LLM.prompt +1 -5
- pdd/prompts/insert_includes_LLM.prompt +316 -186
- pdd/prompts/prompt_code_diff_LLM.prompt +119 -0
- pdd/prompts/prompt_diff_LLM.prompt +82 -0
- pdd/prompts/trace_LLM.prompt +25 -22
- pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
- pdd/prompts/update_prompt_LLM.prompt +22 -1
- pdd/pytest_output.py +127 -12
- pdd/remote_session.py +876 -0
- pdd/render_mermaid.py +236 -0
- pdd/server/__init__.py +52 -0
- pdd/server/app.py +335 -0
- pdd/server/click_executor.py +587 -0
- pdd/server/executor.py +338 -0
- pdd/server/jobs.py +661 -0
- pdd/server/models.py +241 -0
- pdd/server/routes/__init__.py +31 -0
- pdd/server/routes/architecture.py +451 -0
- pdd/server/routes/auth.py +364 -0
- pdd/server/routes/commands.py +929 -0
- pdd/server/routes/config.py +42 -0
- pdd/server/routes/files.py +603 -0
- pdd/server/routes/prompts.py +1322 -0
- pdd/server/routes/websocket.py +473 -0
- pdd/server/security.py +243 -0
- pdd/server/terminal_spawner.py +209 -0
- pdd/server/token_counter.py +222 -0
- pdd/setup_tool.py +648 -0
- pdd/simple_math.py +2 -0
- pdd/split_main.py +3 -2
- pdd/summarize_directory.py +237 -195
- pdd/sync_animation.py +8 -4
- pdd/sync_determine_operation.py +839 -112
- pdd/sync_main.py +351 -57
- pdd/sync_orchestration.py +1400 -756
- pdd/sync_tui.py +848 -0
- pdd/template_expander.py +161 -0
- pdd/template_registry.py +264 -0
- pdd/templates/architecture/architecture_json.prompt +237 -0
- pdd/templates/generic/generate_prompt.prompt +174 -0
- pdd/trace.py +168 -12
- pdd/trace_main.py +4 -3
- pdd/track_cost.py +140 -63
- pdd/unfinished_prompt.py +51 -4
- pdd/update_main.py +567 -67
- pdd/update_model_costs.py +2 -2
- pdd/update_prompt.py +19 -4
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/METADATA +29 -11
- pdd_cli-0.0.118.dist-info/RECORD +227 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/licenses/LICENSE +1 -1
- pdd_cli-0.0.45.dist-info/RECORD +0 -116
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/WHEEL +0 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/top_level.txt +0 -0
pdd/fix_error_loop.py
CHANGED
|
@@ -5,14 +5,25 @@ import subprocess
|
|
|
5
5
|
import shutil
|
|
6
6
|
import json
|
|
7
7
|
from datetime import datetime
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Tuple, Optional
|
|
8
10
|
|
|
11
|
+
import requests
|
|
9
12
|
from rich import print as rprint
|
|
10
13
|
from rich.console import Console
|
|
14
|
+
from rich.panel import Panel
|
|
11
15
|
|
|
12
16
|
# Relative import from an internal module.
|
|
17
|
+
from .get_language import get_language
|
|
13
18
|
from .fix_errors_from_unit_tests import fix_errors_from_unit_tests
|
|
14
|
-
from . import DEFAULT_TIME
|
|
19
|
+
from . import DEFAULT_TIME # Import DEFAULT_TIME
|
|
15
20
|
from .python_env_detector import detect_host_python_executable
|
|
21
|
+
from .agentic_fix import run_agentic_fix
|
|
22
|
+
from .agentic_langtest import default_verify_cmd_for
|
|
23
|
+
from .core.cloud import CloudConfig
|
|
24
|
+
|
|
25
|
+
# Cloud request timeout for LLM calls
|
|
26
|
+
CLOUD_FIX_TIMEOUT = 400 # seconds
|
|
16
27
|
|
|
17
28
|
console = Console()
|
|
18
29
|
|
|
@@ -20,44 +31,190 @@ def escape_brackets(text: str) -> str:
|
|
|
20
31
|
"""Escape square brackets so Rich doesn't misinterpret them."""
|
|
21
32
|
return text.replace("[", "\\[").replace("]", "\\]")
|
|
22
33
|
|
|
23
|
-
|
|
34
|
+
|
|
35
|
+
def cloud_fix_errors(
|
|
36
|
+
unit_test: str,
|
|
37
|
+
code: str,
|
|
38
|
+
prompt: str,
|
|
39
|
+
error: str,
|
|
40
|
+
error_file: str,
|
|
41
|
+
strength: float,
|
|
42
|
+
temperature: float,
|
|
43
|
+
verbose: bool = False,
|
|
44
|
+
time: float = DEFAULT_TIME,
|
|
45
|
+
code_file_ext: str = ".py"
|
|
46
|
+
) -> Tuple[bool, bool, str, str, str, float, str]:
|
|
24
47
|
"""
|
|
25
|
-
|
|
26
|
-
|
|
48
|
+
Call the cloud fixCode endpoint to fix errors in code and unit tests.
|
|
49
|
+
|
|
50
|
+
This function has the same interface as fix_errors_from_unit_tests to allow
|
|
51
|
+
seamless switching between local and cloud execution in the fix loop.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
unit_test: Unit test code string
|
|
55
|
+
code: Source code string
|
|
56
|
+
prompt: Prompt that generated the code
|
|
57
|
+
error: Error messages/logs from test failures
|
|
58
|
+
error_file: Path to write error analysis (not used in cloud, but kept for interface compatibility)
|
|
59
|
+
strength: Model strength parameter [0,1]
|
|
60
|
+
temperature: Model temperature parameter [0,1]
|
|
61
|
+
verbose: Enable verbose logging
|
|
62
|
+
time: Time budget for thinking effort
|
|
63
|
+
code_file_ext: File extension to determine language (e.g., ".py", ".java")
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
Tuple of:
|
|
67
|
+
- update_unit_test: Whether unit test was updated
|
|
68
|
+
- update_code: Whether code was updated
|
|
69
|
+
- fixed_unit_test: Fixed unit test code
|
|
70
|
+
- fixed_code: Fixed source code
|
|
71
|
+
- analysis: Analysis/explanation of fixes
|
|
72
|
+
- total_cost: Cost of the operation
|
|
73
|
+
- model_name: Name of model used
|
|
74
|
+
|
|
75
|
+
Raises:
|
|
76
|
+
RuntimeError: When cloud execution fails with non-recoverable error
|
|
27
77
|
"""
|
|
78
|
+
jwt_token = CloudConfig.get_jwt_token(verbose=verbose)
|
|
79
|
+
|
|
80
|
+
if not jwt_token:
|
|
81
|
+
raise RuntimeError("Cloud authentication failed - no JWT token available")
|
|
82
|
+
|
|
83
|
+
# Build cloud payload
|
|
84
|
+
payload = {
|
|
85
|
+
"unitTest": unit_test,
|
|
86
|
+
"code": code,
|
|
87
|
+
"prompt": prompt,
|
|
88
|
+
"errors": error,
|
|
89
|
+
"language": get_language(code_file_ext),
|
|
90
|
+
"strength": strength,
|
|
91
|
+
"temperature": temperature,
|
|
92
|
+
"time": time if time is not None else 0.25,
|
|
93
|
+
"verbose": verbose,
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
headers = {
|
|
97
|
+
"Authorization": f"Bearer {jwt_token}",
|
|
98
|
+
"Content-Type": "application/json"
|
|
99
|
+
}
|
|
100
|
+
cloud_url = CloudConfig.get_endpoint_url("fixCode")
|
|
101
|
+
|
|
102
|
+
if verbose:
|
|
103
|
+
console.print(Panel(f"Calling cloud fix at {cloud_url}", title="[blue]Cloud LLM[/blue]", expand=False))
|
|
104
|
+
|
|
28
105
|
try:
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
try:
|
|
37
|
-
output = json.loads(result.stdout)
|
|
38
|
-
test_results = output.get('test_results', [{}])[0]
|
|
39
|
-
|
|
40
|
-
# Check pytest's return code first
|
|
41
|
-
return_code = test_results.get('return_code', 1)
|
|
42
|
-
|
|
43
|
-
failures = test_results.get('failures', 0)
|
|
44
|
-
errors = test_results.get('errors', 0)
|
|
45
|
-
warnings = test_results.get('warnings', 0)
|
|
106
|
+
response = requests.post(
|
|
107
|
+
cloud_url,
|
|
108
|
+
json=payload,
|
|
109
|
+
headers=headers,
|
|
110
|
+
timeout=CLOUD_FIX_TIMEOUT
|
|
111
|
+
)
|
|
112
|
+
response.raise_for_status()
|
|
46
113
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
114
|
+
response_data = response.json()
|
|
115
|
+
fixed_unit_test = response_data.get("fixedUnitTest", "")
|
|
116
|
+
fixed_code = response_data.get("fixedCode", "")
|
|
117
|
+
analysis = response_data.get("analysis", "")
|
|
118
|
+
total_cost = float(response_data.get("totalCost", 0.0))
|
|
119
|
+
model_name = response_data.get("modelName", "cloud_model")
|
|
120
|
+
update_unit_test = response_data.get("updateUnitTest", False)
|
|
121
|
+
update_code = response_data.get("updateCode", False)
|
|
122
|
+
|
|
123
|
+
if verbose:
|
|
124
|
+
console.print(f"[cyan]Cloud fix completed. Model: {model_name}, Cost: ${total_cost:.6f}[/cyan]")
|
|
125
|
+
|
|
126
|
+
return update_unit_test, update_code, fixed_unit_test, fixed_code, analysis, total_cost, model_name
|
|
127
|
+
|
|
128
|
+
except requests.exceptions.Timeout:
|
|
129
|
+
raise RuntimeError(f"Cloud fix timed out after {CLOUD_FIX_TIMEOUT}s")
|
|
130
|
+
|
|
131
|
+
except requests.exceptions.HTTPError as e:
|
|
132
|
+
status_code = e.response.status_code if e.response else 0
|
|
133
|
+
err_content = e.response.text[:200] if e.response else "No response content"
|
|
134
|
+
|
|
135
|
+
# Non-recoverable errors
|
|
136
|
+
if status_code == 402:
|
|
137
|
+
try:
|
|
138
|
+
error_data = e.response.json()
|
|
139
|
+
current_balance = error_data.get("currentBalance", "unknown")
|
|
140
|
+
estimated_cost = error_data.get("estimatedCost", "unknown")
|
|
141
|
+
raise RuntimeError(f"Insufficient credits. Balance: {current_balance}, estimated cost: {estimated_cost}")
|
|
142
|
+
except json.JSONDecodeError:
|
|
143
|
+
raise RuntimeError(f"Insufficient credits: {err_content}")
|
|
144
|
+
elif status_code == 401:
|
|
145
|
+
raise RuntimeError(f"Authentication failed: {err_content}")
|
|
146
|
+
elif status_code == 403:
|
|
147
|
+
raise RuntimeError(f"Access denied: {err_content}")
|
|
148
|
+
elif status_code == 400:
|
|
149
|
+
raise RuntimeError(f"Invalid request: {err_content}")
|
|
150
|
+
else:
|
|
151
|
+
# 5xx or other errors - raise for caller to handle
|
|
152
|
+
raise RuntimeError(f"Cloud HTTP error ({status_code}): {err_content}")
|
|
153
|
+
|
|
154
|
+
except requests.exceptions.RequestException as e:
|
|
155
|
+
raise RuntimeError(f"Cloud network error: {e}")
|
|
156
|
+
|
|
157
|
+
except json.JSONDecodeError:
|
|
158
|
+
raise RuntimeError("Cloud returned invalid JSON response")
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
# ---------- Normalize any agentic return shape to a 4-tuple ----------
|
|
162
|
+
def _normalize_agentic_result(result):
|
|
163
|
+
"""
|
|
164
|
+
Normalize run_agentic_fix result into: (success: bool, msg: str, cost: float, model: str, changed_files: List[str])
|
|
165
|
+
Handles older 2/3/4-tuple shapes used by tests/monkeypatches.
|
|
166
|
+
"""
|
|
167
|
+
if isinstance(result, tuple):
|
|
168
|
+
if len(result) == 5:
|
|
169
|
+
ok, msg, cost, model, changed_files = result
|
|
170
|
+
return bool(ok), str(msg), float(cost), str(model or "agentic-cli"), list(changed_files or [])
|
|
171
|
+
if len(result) == 4:
|
|
172
|
+
ok, msg, cost, model = result
|
|
173
|
+
return bool(ok), str(msg), float(cost), str(model or "agentic-cli"), []
|
|
174
|
+
if len(result) == 3:
|
|
175
|
+
ok, msg, cost = result
|
|
176
|
+
return bool(ok), str(msg), float(cost), "agentic-cli", []
|
|
177
|
+
if len(result) == 2:
|
|
178
|
+
ok, msg = result
|
|
179
|
+
return bool(ok), str(msg), 0.0, "agentic-cli", []
|
|
180
|
+
# Fallback (shouldn't happen)
|
|
181
|
+
return False, "Invalid agentic result shape", 0.0, "agentic-cli", []
|
|
182
|
+
|
|
183
|
+
def _safe_run_agentic_fix(*, prompt_file, code_file, unit_test_file, error_log_file, cwd=None):
|
|
184
|
+
"""
|
|
185
|
+
Call (possibly monkeypatched) run_agentic_fix and normalize its return.
|
|
186
|
+
"""
|
|
187
|
+
res = run_agentic_fix(
|
|
188
|
+
prompt_file=prompt_file,
|
|
189
|
+
code_file=code_file,
|
|
190
|
+
unit_test_file=unit_test_file,
|
|
191
|
+
error_log_file=error_log_file,
|
|
192
|
+
cwd=cwd,
|
|
193
|
+
)
|
|
194
|
+
return _normalize_agentic_result(res)
|
|
195
|
+
# ---------------------------------------------------------------------
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def run_pytest_on_file(test_file: str) -> tuple[int, int, int, str]:
|
|
199
|
+
"""
|
|
200
|
+
Run pytest on the specified test file using the subprocess-based runner.
|
|
201
|
+
Returns a tuple: (failures, errors, warnings, logs)
|
|
202
|
+
"""
|
|
203
|
+
from .pytest_output import run_pytest_and_capture_output
|
|
204
|
+
# Use the subprocess-based runner to avoid module caching issues
|
|
205
|
+
output_data = run_pytest_and_capture_output(test_file)
|
|
206
|
+
|
|
207
|
+
# Extract results
|
|
208
|
+
results = output_data.get("test_results", [{}])[0]
|
|
209
|
+
|
|
210
|
+
failures = results.get("failures", 0)
|
|
211
|
+
errors = results.get("errors", 0)
|
|
212
|
+
warnings = results.get("warnings", 0)
|
|
213
|
+
|
|
214
|
+
# Combine stdout/stderr for the log
|
|
215
|
+
logs = (results.get("standard_output", "") or "") + "\n" + (results.get("standard_error", "") or "")
|
|
216
|
+
|
|
217
|
+
return failures, errors, warnings, logs
|
|
61
218
|
|
|
62
219
|
def format_log_for_output(log_structure):
|
|
63
220
|
"""
|
|
@@ -77,6 +234,8 @@ def format_log_for_output(log_structure):
|
|
|
77
234
|
# Fix attempt with XML tags
|
|
78
235
|
if iteration.get("fix_attempt"):
|
|
79
236
|
formatted_text += f"<fix_attempt iteration={iteration['number']}>\n"
|
|
237
|
+
if iteration.get("model_name"):
|
|
238
|
+
formatted_text += f"Model: {iteration['model_name']}\n"
|
|
80
239
|
formatted_text += f"{iteration['fix_attempt']}\n"
|
|
81
240
|
formatted_text += "</fix_attempt>\n\n"
|
|
82
241
|
|
|
@@ -101,6 +260,7 @@ def format_log_for_output(log_structure):
|
|
|
101
260
|
|
|
102
261
|
def fix_error_loop(unit_test_file: str,
|
|
103
262
|
code_file: str,
|
|
263
|
+
prompt_file: str,
|
|
104
264
|
prompt: str,
|
|
105
265
|
verification_program: str,
|
|
106
266
|
strength: float,
|
|
@@ -109,15 +269,22 @@ def fix_error_loop(unit_test_file: str,
|
|
|
109
269
|
budget: float,
|
|
110
270
|
error_log_file: str = "error_log.txt",
|
|
111
271
|
verbose: bool = False,
|
|
112
|
-
time: float = DEFAULT_TIME
|
|
272
|
+
time: float = DEFAULT_TIME,
|
|
273
|
+
agentic_fallback: bool = True,
|
|
274
|
+
use_cloud: bool = False):
|
|
113
275
|
"""
|
|
114
|
-
Attempt to fix errors in a unit test and corresponding code using repeated iterations,
|
|
115
|
-
counting only the number of times we actually call the LLM fix function.
|
|
276
|
+
Attempt to fix errors in a unit test and corresponding code using repeated iterations,
|
|
277
|
+
counting only the number of times we actually call the LLM fix function.
|
|
116
278
|
The tests are re-run in the same iteration after a fix to see if we've succeeded,
|
|
117
279
|
so that 'attempts' matches the number of fix attempts (not the total test runs).
|
|
118
280
|
|
|
119
281
|
This updated version uses structured logging to avoid redundant entries.
|
|
120
282
|
|
|
283
|
+
Hybrid Cloud Support:
|
|
284
|
+
When use_cloud=True, the LLM fix calls are routed to the cloud fixCode endpoint
|
|
285
|
+
while local test execution (pytest, verification programs) stays local. This allows
|
|
286
|
+
the loop to pass local test results to the cloud for analysis and fixes.
|
|
287
|
+
|
|
121
288
|
Inputs:
|
|
122
289
|
unit_test_file: Path to the file containing unit tests.
|
|
123
290
|
code_file: Path to the file containing the code under test.
|
|
@@ -130,7 +297,8 @@ def fix_error_loop(unit_test_file: str,
|
|
|
130
297
|
error_log_file: Path to file to log errors (default: "error_log.txt").
|
|
131
298
|
verbose: Enable verbose logging (default: False).
|
|
132
299
|
time: Time parameter for the fix_errors_from_unit_tests call.
|
|
133
|
-
|
|
300
|
+
agentic_fallback: Whether to trigger cli agentic fallback when fix fails.
|
|
301
|
+
use_cloud: If True, use cloud LLM for fix calls while keeping test execution local.
|
|
134
302
|
Outputs:
|
|
135
303
|
success: Boolean indicating if the overall process succeeded.
|
|
136
304
|
final_unit_test: String contents of the final unit test file.
|
|
@@ -185,9 +353,63 @@ def fix_error_loop(unit_test_file: str,
|
|
|
185
353
|
|
|
186
354
|
# We do up to max_attempts fix attempts or until budget is exceeded
|
|
187
355
|
iteration = 0
|
|
356
|
+
# Determine if target is Python (moved before try block for use in exception handler)
|
|
357
|
+
is_python = str(code_file).lower().endswith(".py")
|
|
188
358
|
# Run an initial test to determine starting state
|
|
189
359
|
try:
|
|
190
|
-
|
|
360
|
+
if is_python:
|
|
361
|
+
initial_fails, initial_errors, initial_warnings, pytest_output = run_pytest_on_file(unit_test_file)
|
|
362
|
+
else:
|
|
363
|
+
# For non-Python files, run the verification program to get an initial error state
|
|
364
|
+
rprint(f"[cyan]Non-Python target detected. Running verification program to get initial state...[/cyan]")
|
|
365
|
+
lang = get_language(os.path.splitext(code_file)[1])
|
|
366
|
+
verify_cmd = default_verify_cmd_for(lang, unit_test_file)
|
|
367
|
+
if not verify_cmd:
|
|
368
|
+
# No verify command available (e.g., Java without maven/gradle).
|
|
369
|
+
# Trigger agentic fallback directly.
|
|
370
|
+
rprint(f"[cyan]No verification command for {lang}. Triggering agentic fallback directly...[/cyan]")
|
|
371
|
+
error_log_path = Path(error_log_file)
|
|
372
|
+
error_log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
373
|
+
if not error_log_path.exists() or error_log_path.stat().st_size == 0:
|
|
374
|
+
with open(error_log_path, "w") as f:
|
|
375
|
+
f.write(f"No verification command available for language: {lang}\n")
|
|
376
|
+
f.write("Agentic fix will attempt to resolve the issue.\n")
|
|
377
|
+
|
|
378
|
+
rprint(f"[cyan]Attempting agentic fix fallback (prompt_file={prompt_file!r})...[/cyan]")
|
|
379
|
+
success, agent_msg, agent_cost, agent_model, agent_changed_files = _safe_run_agentic_fix(
|
|
380
|
+
prompt_file=prompt_file,
|
|
381
|
+
code_file=code_file,
|
|
382
|
+
unit_test_file=unit_test_file,
|
|
383
|
+
error_log_file=error_log_file,
|
|
384
|
+
cwd=None, # Use project root (cwd), not prompt file's parent
|
|
385
|
+
)
|
|
386
|
+
if not success:
|
|
387
|
+
rprint(f"[bold red]Agentic fix fallback failed: {agent_msg}[/bold red]")
|
|
388
|
+
if agent_changed_files:
|
|
389
|
+
rprint(f"[cyan]Agent modified {len(agent_changed_files)} file(s):[/cyan]")
|
|
390
|
+
for f in agent_changed_files:
|
|
391
|
+
rprint(f" • {f}")
|
|
392
|
+
final_unit_test = ""
|
|
393
|
+
final_code = ""
|
|
394
|
+
try:
|
|
395
|
+
with open(unit_test_file, "r") as f:
|
|
396
|
+
final_unit_test = f.read()
|
|
397
|
+
except Exception:
|
|
398
|
+
pass
|
|
399
|
+
try:
|
|
400
|
+
with open(code_file, "r") as f:
|
|
401
|
+
final_code = f.read()
|
|
402
|
+
except Exception:
|
|
403
|
+
pass
|
|
404
|
+
return success, final_unit_test, final_code, 1, agent_cost, agent_model
|
|
405
|
+
|
|
406
|
+
verify_result = subprocess.run(verify_cmd, capture_output=True, text=True, shell=True, stdin=subprocess.DEVNULL)
|
|
407
|
+
pytest_output = (verify_result.stdout or "") + "\n" + (verify_result.stderr or "")
|
|
408
|
+
if verify_result.returncode == 0:
|
|
409
|
+
initial_fails, initial_errors, initial_warnings = 0, 0, 0
|
|
410
|
+
else:
|
|
411
|
+
initial_fails, initial_errors, initial_warnings = 1, 0, 0 # Treat any failure as one "fail"
|
|
412
|
+
|
|
191
413
|
# Store initial state for statistics
|
|
192
414
|
stats = {
|
|
193
415
|
"initial_fails": initial_fails,
|
|
@@ -200,14 +422,98 @@ def fix_error_loop(unit_test_file: str,
|
|
|
200
422
|
"iterations_info": []
|
|
201
423
|
}
|
|
202
424
|
except Exception as e:
|
|
203
|
-
rprint(f"[red]Error running initial
|
|
204
|
-
|
|
425
|
+
rprint(f"[red]Error running initial test/verification:[/red] {e}")
|
|
426
|
+
# Instead of returning early, trigger agentic fallback if enabled (Issue #266)
|
|
427
|
+
if agentic_fallback:
|
|
428
|
+
rprint("[cyan]Initial test failed with exception. Triggering agentic fallback...[/cyan]")
|
|
429
|
+
error_log_path = Path(error_log_file)
|
|
430
|
+
error_log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
431
|
+
with open(error_log_path, "w") as f:
|
|
432
|
+
f.write(f"Initial test/verification failed with exception:\n{e}\n")
|
|
433
|
+
|
|
434
|
+
success, agent_msg, agent_cost, agent_model, agent_changed_files = _safe_run_agentic_fix(
|
|
435
|
+
prompt_file=prompt_file,
|
|
436
|
+
code_file=code_file,
|
|
437
|
+
unit_test_file=unit_test_file,
|
|
438
|
+
error_log_file=error_log_file,
|
|
439
|
+
cwd=None,
|
|
440
|
+
)
|
|
441
|
+
if not success:
|
|
442
|
+
rprint(f"[bold red]Agentic fix fallback failed: {agent_msg}[/bold red]")
|
|
443
|
+
if agent_changed_files:
|
|
444
|
+
rprint(f"[cyan]Agent modified {len(agent_changed_files)} file(s):[/cyan]")
|
|
445
|
+
for f in agent_changed_files:
|
|
446
|
+
rprint(f" • {f}")
|
|
447
|
+
final_unit_test = ""
|
|
448
|
+
final_code = ""
|
|
449
|
+
try:
|
|
450
|
+
with open(unit_test_file, "r") as f:
|
|
451
|
+
final_unit_test = f.read()
|
|
452
|
+
except Exception:
|
|
453
|
+
pass
|
|
454
|
+
try:
|
|
455
|
+
with open(code_file, "r") as f:
|
|
456
|
+
final_code = f.read()
|
|
457
|
+
except Exception:
|
|
458
|
+
pass
|
|
459
|
+
return success, final_unit_test, final_code, 1, agent_cost, agent_model
|
|
460
|
+
else:
|
|
461
|
+
# Agentic fallback disabled, return failure
|
|
462
|
+
return False, "", "", fix_attempts, total_cost, model_name
|
|
463
|
+
|
|
464
|
+
# If target is not a Python file, trigger agentic fallback if tests fail
|
|
465
|
+
if not is_python:
|
|
466
|
+
if initial_fails > 0 or initial_errors > 0:
|
|
467
|
+
rprint("[cyan]Non-Python target failed initial verification. Triggering agentic fallback...[/cyan]")
|
|
468
|
+
error_log_path = Path(error_log_file)
|
|
469
|
+
error_log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
470
|
+
with open(error_log_path, "w") as f:
|
|
471
|
+
f.write(pytest_output)
|
|
472
|
+
|
|
473
|
+
rprint(f"[cyan]Attempting agentic fix fallback (prompt_file={prompt_file!r})...[/cyan]")
|
|
474
|
+
success, agent_msg, agent_cost, agent_model, agent_changed_files = _safe_run_agentic_fix(
|
|
475
|
+
prompt_file=prompt_file,
|
|
476
|
+
code_file=code_file,
|
|
477
|
+
unit_test_file=unit_test_file,
|
|
478
|
+
error_log_file=error_log_file,
|
|
479
|
+
cwd=None, # Use project root (cwd), not prompt file's parent
|
|
480
|
+
)
|
|
481
|
+
if not success:
|
|
482
|
+
rprint(f"[bold red]Agentic fix fallback failed: {agent_msg}[/bold red]")
|
|
483
|
+
if agent_changed_files:
|
|
484
|
+
rprint(f"[cyan]Agent modified {len(agent_changed_files)} file(s):[/cyan]")
|
|
485
|
+
for f in agent_changed_files:
|
|
486
|
+
rprint(f" • {f}")
|
|
487
|
+
final_unit_test = ""
|
|
488
|
+
final_code = ""
|
|
489
|
+
try:
|
|
490
|
+
with open(unit_test_file, "r") as f:
|
|
491
|
+
final_unit_test = f.read()
|
|
492
|
+
except Exception:
|
|
493
|
+
pass
|
|
494
|
+
try:
|
|
495
|
+
with open(code_file, "r") as f:
|
|
496
|
+
final_code = f.read()
|
|
497
|
+
except Exception:
|
|
498
|
+
pass
|
|
499
|
+
return success, final_unit_test, final_code, 1, agent_cost, agent_model
|
|
500
|
+
else:
|
|
501
|
+
# Non-python tests passed, so we are successful.
|
|
502
|
+
rprint("[green]Non-Python tests passed. No fix needed.[/green]")
|
|
503
|
+
try:
|
|
504
|
+
with open(unit_test_file, "r") as f:
|
|
505
|
+
final_unit_test = f.read()
|
|
506
|
+
with open(code_file, "r") as f:
|
|
507
|
+
final_code = f.read()
|
|
508
|
+
except Exception as e:
|
|
509
|
+
rprint(f"[yellow]Warning: Could not read final files: {e}[/yellow]")
|
|
510
|
+
return True, final_unit_test, final_code, 0, 0.0, "N/A"
|
|
205
511
|
|
|
206
512
|
fails, errors, warnings = initial_fails, initial_errors, initial_warnings
|
|
207
513
|
|
|
208
514
|
# Determine success state immediately
|
|
209
515
|
success = (fails == 0 and errors == 0 and warnings == 0)
|
|
210
|
-
|
|
516
|
+
|
|
211
517
|
# Track if tests were initially passing
|
|
212
518
|
initially_passing = success
|
|
213
519
|
|
|
@@ -244,13 +550,23 @@ def fix_error_loop(unit_test_file: str,
|
|
|
244
550
|
|
|
245
551
|
# Update structured log
|
|
246
552
|
log_structure["iterations"][-1]["post_test_output"] = pytest_output
|
|
247
|
-
|
|
553
|
+
|
|
248
554
|
# Write formatted log to file
|
|
249
|
-
|
|
555
|
+
error_log_path = Path(error_log_file)
|
|
556
|
+
error_log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
557
|
+
with open(error_log_path, "w") as elog:
|
|
250
558
|
elog.write(format_log_for_output(log_structure))
|
|
251
559
|
|
|
252
560
|
# Set success to True (already determined)
|
|
253
|
-
#
|
|
561
|
+
# Read the actual fixed files to return the successful state
|
|
562
|
+
try:
|
|
563
|
+
with open(unit_test_file, "r") as f:
|
|
564
|
+
final_unit_test = f.read()
|
|
565
|
+
with open(code_file, "r") as f:
|
|
566
|
+
final_code = f.read()
|
|
567
|
+
except Exception as e:
|
|
568
|
+
rprint(f"[yellow]Warning: Could not read fixed files: {e}[/yellow]")
|
|
569
|
+
# Keep empty strings as fallback
|
|
254
570
|
break
|
|
255
571
|
|
|
256
572
|
iteration_header = f"=== Attempt iteration {iteration} ==="
|
|
@@ -280,17 +596,18 @@ def fix_error_loop(unit_test_file: str,
|
|
|
280
596
|
break
|
|
281
597
|
|
|
282
598
|
# We only attempt to fix if test is failing or has warnings:
|
|
283
|
-
# Let's create backups
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
)
|
|
599
|
+
# Let's create backups in .pdd/backups/ to avoid polluting code/test directories
|
|
600
|
+
code_name = os.path.basename(code_file)
|
|
601
|
+
code_basename = os.path.splitext(code_name)[0]
|
|
602
|
+
unit_test_name = os.path.basename(unit_test_file)
|
|
603
|
+
unit_test_ext = os.path.splitext(unit_test_name)[1]
|
|
604
|
+
code_ext = os.path.splitext(code_name)[1]
|
|
605
|
+
|
|
606
|
+
backup_dir = Path.cwd() / '.pdd' / 'backups' / code_basename / timestamp
|
|
607
|
+
backup_dir.mkdir(parents=True, exist_ok=True)
|
|
608
|
+
|
|
609
|
+
unit_test_backup = str(backup_dir / f"test_{iteration}_{errors}_{fails}_{warnings}{unit_test_ext}")
|
|
610
|
+
code_backup = str(backup_dir / f"code_{iteration}_{errors}_{fails}_{warnings}{code_ext}")
|
|
294
611
|
try:
|
|
295
612
|
shutil.copy(unit_test_file, unit_test_backup)
|
|
296
613
|
shutil.copy(code_file, code_backup)
|
|
@@ -299,7 +616,8 @@ def fix_error_loop(unit_test_file: str,
|
|
|
299
616
|
rprint(f"[green]Created backup for code file:[/green] {code_backup}")
|
|
300
617
|
except Exception as e:
|
|
301
618
|
rprint(f"[red]Error creating backup files:[/red] {e}")
|
|
302
|
-
|
|
619
|
+
success = False
|
|
620
|
+
break # Exit loop but continue to agentic fallback (Issue #266)
|
|
303
621
|
|
|
304
622
|
# Update best iteration if needed:
|
|
305
623
|
if (errors < best_iteration_info["errors"] or
|
|
@@ -322,29 +640,67 @@ def fix_error_loop(unit_test_file: str,
|
|
|
322
640
|
code_contents = f.read()
|
|
323
641
|
except Exception as e:
|
|
324
642
|
rprint(f"[red]Error reading input files:[/red] {e}")
|
|
325
|
-
|
|
643
|
+
success = False
|
|
644
|
+
break # Exit loop but continue to agentic fallback (Issue #266)
|
|
326
645
|
|
|
327
|
-
# Call fix:
|
|
646
|
+
# Call fix (cloud or local based on use_cloud parameter):
|
|
328
647
|
try:
|
|
329
|
-
# Format the log for the LLM
|
|
648
|
+
# Format the log for the LLM - includes local test results
|
|
330
649
|
formatted_log = format_log_for_output(log_structure)
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
650
|
+
|
|
651
|
+
if use_cloud:
|
|
652
|
+
# Use cloud LLM for fix - local test results passed via formatted_log
|
|
653
|
+
try:
|
|
654
|
+
updated_unit_test, updated_code, fixed_unit_test, fixed_code, analysis, cost, model_name = cloud_fix_errors(
|
|
655
|
+
unit_test=unit_test_contents,
|
|
656
|
+
code=code_contents,
|
|
657
|
+
prompt=prompt,
|
|
658
|
+
error=formatted_log, # Pass local test results to cloud
|
|
659
|
+
error_file=error_log_file,
|
|
660
|
+
strength=strength,
|
|
661
|
+
temperature=temperature,
|
|
662
|
+
verbose=verbose,
|
|
663
|
+
time=time,
|
|
664
|
+
code_file_ext=os.path.splitext(code_file)[1]
|
|
665
|
+
)
|
|
666
|
+
except RuntimeError as cloud_err:
|
|
667
|
+
# Cloud failed - fall back to local if it's a recoverable error
|
|
668
|
+
if "Insufficient credits" in str(cloud_err) or "Authentication failed" in str(cloud_err) or "Access denied" in str(cloud_err):
|
|
669
|
+
# Non-recoverable errors - stop the loop
|
|
670
|
+
rprint(f"[red]Cloud fix error (non-recoverable):[/red] {cloud_err}")
|
|
671
|
+
break
|
|
672
|
+
# Recoverable errors - fall back to local
|
|
673
|
+
rprint(f"[yellow]Cloud fix failed, falling back to local:[/yellow] {cloud_err}")
|
|
674
|
+
updated_unit_test, updated_code, fixed_unit_test, fixed_code, analysis, cost, model_name = fix_errors_from_unit_tests(
|
|
675
|
+
unit_test_contents,
|
|
676
|
+
code_contents,
|
|
677
|
+
prompt,
|
|
678
|
+
formatted_log,
|
|
679
|
+
error_log_file,
|
|
680
|
+
strength,
|
|
681
|
+
temperature,
|
|
682
|
+
verbose=verbose,
|
|
683
|
+
time=time
|
|
684
|
+
)
|
|
685
|
+
else:
|
|
686
|
+
# Use local LLM for fix
|
|
687
|
+
updated_unit_test, updated_code, fixed_unit_test, fixed_code, analysis, cost, model_name = fix_errors_from_unit_tests(
|
|
688
|
+
unit_test_contents,
|
|
689
|
+
code_contents,
|
|
690
|
+
prompt,
|
|
691
|
+
formatted_log, # Use formatted log instead of reading the file
|
|
692
|
+
error_log_file,
|
|
693
|
+
strength,
|
|
694
|
+
temperature,
|
|
695
|
+
verbose=verbose,
|
|
696
|
+
time=time # Pass time parameter
|
|
697
|
+
)
|
|
698
|
+
|
|
344
699
|
# Update the fix attempt in the structured log
|
|
345
700
|
log_structure["iterations"][-1]["fix_attempt"] = analysis
|
|
701
|
+
log_structure["iterations"][-1]["model_name"] = model_name
|
|
346
702
|
except Exception as e:
|
|
347
|
-
rprint(f"[red]Error during
|
|
703
|
+
rprint(f"[red]Error during fix call:[/red] {e}")
|
|
348
704
|
break
|
|
349
705
|
|
|
350
706
|
fix_attempts += 1 # We used one fix attempt
|
|
@@ -384,7 +740,7 @@ def fix_error_loop(unit_test_file: str,
|
|
|
384
740
|
# Run the verification:
|
|
385
741
|
try:
|
|
386
742
|
verify_cmd = [detect_host_python_executable(), verification_program]
|
|
387
|
-
verify_result = subprocess.run(verify_cmd, capture_output=True, text=True)
|
|
743
|
+
verify_result = subprocess.run(verify_cmd, capture_output=True, text=True, stdin=subprocess.DEVNULL)
|
|
388
744
|
# Safely handle None for stdout or stderr:
|
|
389
745
|
verify_stdout = verify_result.stdout or ""
|
|
390
746
|
verify_stderr = verify_result.stderr or ""
|
|
@@ -414,9 +770,11 @@ def fix_error_loop(unit_test_file: str,
|
|
|
414
770
|
|
|
415
771
|
# Update post-test output in structured log
|
|
416
772
|
log_structure["iterations"][-1]["post_test_output"] = pytest_output
|
|
417
|
-
|
|
773
|
+
|
|
418
774
|
# Write updated structured log to file after each iteration
|
|
419
|
-
|
|
775
|
+
error_log_path = Path(error_log_file)
|
|
776
|
+
error_log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
777
|
+
with open(error_log_path, "w") as elog:
|
|
420
778
|
elog.write(format_log_for_output(log_structure))
|
|
421
779
|
|
|
422
780
|
# Update iteration stats with post-fix results
|
|
@@ -438,7 +796,8 @@ def fix_error_loop(unit_test_file: str,
|
|
|
438
796
|
stats["final_warnings"] = warnings
|
|
439
797
|
except Exception as e:
|
|
440
798
|
rprint(f"[red]Error running pytest for next iteration:[/red] {e}")
|
|
441
|
-
|
|
799
|
+
success = False
|
|
800
|
+
break # Exit loop but continue to agentic fallback (Issue #266)
|
|
442
801
|
|
|
443
802
|
# Possibly restore best iteration if the final run is not as good:
|
|
444
803
|
if best_iteration_info["attempt"] is not None and not success:
|
|
@@ -480,8 +839,8 @@ def fix_error_loop(unit_test_file: str,
|
|
|
480
839
|
else:
|
|
481
840
|
stats["best_iteration"] = "final"
|
|
482
841
|
|
|
483
|
-
# Read final file contents
|
|
484
|
-
#
|
|
842
|
+
# Read final file contents for non-initially-passing tests
|
|
843
|
+
# (Initially passing tests have files read at lines 344-348)
|
|
485
844
|
try:
|
|
486
845
|
if not initially_passing:
|
|
487
846
|
with open(unit_test_file, "r") as f:
|
|
@@ -492,11 +851,6 @@ def fix_error_loop(unit_test_file: str,
|
|
|
492
851
|
rprint(f"[red]Error reading final files:[/red] {e}")
|
|
493
852
|
final_unit_test, final_code = "", ""
|
|
494
853
|
|
|
495
|
-
# Check if we broke out early because tests already passed
|
|
496
|
-
if stats["best_iteration"] == 0 and fix_attempts == 0:
|
|
497
|
-
# Still return at least 1 attempt to acknowledge the work done
|
|
498
|
-
fix_attempts = 1
|
|
499
|
-
|
|
500
854
|
# Print summary statistics
|
|
501
855
|
rprint("\n[bold cyan]Summary Statistics:[/bold cyan]")
|
|
502
856
|
rprint(f"Initial state: {initial_fails} fails, {initial_errors} errors, {initial_warnings} warnings")
|
|
@@ -506,17 +860,62 @@ def fix_error_loop(unit_test_file: str,
|
|
|
506
860
|
|
|
507
861
|
# Calculate improvements
|
|
508
862
|
stats["improvement"] = {
|
|
509
|
-
"fails_reduced": initial_fails - stats[
|
|
510
|
-
"errors_reduced": initial_errors - stats[
|
|
511
|
-
"warnings_reduced": initial_warnings - stats[
|
|
512
|
-
"percent_improvement": 100 if initial_fails + initial_errors + initial_warnings == 0 else
|
|
513
|
-
(1 - (stats[
|
|
863
|
+
"fails_reduced": initial_fails - stats['final_fails'],
|
|
864
|
+
"errors_reduced": initial_errors - stats['final_errors'],
|
|
865
|
+
"warnings_reduced": initial_warnings - stats['final_warnings'],
|
|
866
|
+
"percent_improvement": 100 if (initial_fails + initial_errors + initial_warnings) == 0 else
|
|
867
|
+
(1 - (stats['final_fails'] + stats['final_errors'] + stats['final_warnings']) /
|
|
514
868
|
(initial_fails + initial_errors + initial_warnings)) * 100
|
|
515
869
|
}
|
|
516
870
|
|
|
517
871
|
rprint(f"Improvement: {stats['improvement']['fails_reduced']} fails, {stats['improvement']['errors_reduced']} errors, {stats['improvement']['warnings_reduced']} warnings")
|
|
518
872
|
rprint(f"Overall improvement: {stats['improvement']['percent_improvement']:.2f}%")
|
|
519
873
|
|
|
874
|
+
# Agentic fallback at end adds cost & model (normalized)
|
|
875
|
+
if not success and agentic_fallback and total_cost < budget:
|
|
876
|
+
# Ensure error_log_file exists before calling agentic fix
|
|
877
|
+
# Write the current log structure if it hasn't been written yet
|
|
878
|
+
try:
|
|
879
|
+
if not os.path.exists(error_log_file) or os.path.getsize(error_log_file) == 0:
|
|
880
|
+
error_log_path = Path(error_log_file)
|
|
881
|
+
error_log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
882
|
+
with open(error_log_path, "w") as elog:
|
|
883
|
+
if log_structure["iterations"]:
|
|
884
|
+
elog.write(format_log_for_output(log_structure))
|
|
885
|
+
else:
|
|
886
|
+
# No iterations ran, write initial state info
|
|
887
|
+
elog.write(f"Initial state: {initial_fails} fails, {initial_errors} errors, {initial_warnings} warnings\n")
|
|
888
|
+
if 'pytest_output' in locals():
|
|
889
|
+
elog.write(f"\n<pytest_output>\n{pytest_output}\n</pytest_output>\n")
|
|
890
|
+
except Exception as e:
|
|
891
|
+
rprint(f"[yellow]Warning: Could not write error log before agentic fallback: {e}[/yellow]")
|
|
892
|
+
|
|
893
|
+
rprint(f"[cyan]Attempting agentic fix fallback (prompt_file={prompt_file!r})...[/cyan]")
|
|
894
|
+
agent_success, agent_msg, agent_cost, agent_model, agent_changed_files = _safe_run_agentic_fix(
|
|
895
|
+
prompt_file=prompt_file,
|
|
896
|
+
code_file=code_file,
|
|
897
|
+
unit_test_file=unit_test_file,
|
|
898
|
+
error_log_file=error_log_file,
|
|
899
|
+
cwd=None, # Use project root (cwd), not prompt file's parent
|
|
900
|
+
)
|
|
901
|
+
total_cost += agent_cost
|
|
902
|
+
if not agent_success:
|
|
903
|
+
rprint(f"[bold red]Agentic fix fallback failed: {agent_msg}[/bold red]")
|
|
904
|
+
if agent_changed_files:
|
|
905
|
+
rprint(f"[cyan]Agent modified {len(agent_changed_files)} file(s):[/cyan]")
|
|
906
|
+
for f in agent_changed_files:
|
|
907
|
+
rprint(f" • {f}")
|
|
908
|
+
if agent_success:
|
|
909
|
+
model_name = agent_model or model_name
|
|
910
|
+
try:
|
|
911
|
+
with open(unit_test_file, "r") as f:
|
|
912
|
+
final_unit_test = f.read()
|
|
913
|
+
with open(code_file, "r") as f:
|
|
914
|
+
final_code = f.read()
|
|
915
|
+
except Exception as e:
|
|
916
|
+
rprint(f"[yellow]Warning: Could not read files after successful agentic fix: {e}[/yellow]")
|
|
917
|
+
success = True
|
|
918
|
+
|
|
520
919
|
return success, final_unit_test, final_code, fix_attempts, total_cost, model_name
|
|
521
920
|
|
|
522
921
|
# If this module is run directly for testing purposes:
|
|
@@ -551,4 +950,4 @@ if __name__ == "__main__":
|
|
|
551
950
|
rprint(f"Attempts: {attempts}")
|
|
552
951
|
rprint(f"Total cost: ${total_cost:.6f}")
|
|
553
952
|
rprint(f"Model used: {model_name}")
|
|
554
|
-
rprint(f"Final unit test contents:\n{final_unit_test}")
|
|
953
|
+
rprint(f"Final unit test contents:\n{final_unit_test}")
|