pdd-cli 0.0.45__py3-none-any.whl → 0.0.118__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdd/__init__.py +40 -8
- pdd/agentic_bug.py +323 -0
- pdd/agentic_bug_orchestrator.py +497 -0
- pdd/agentic_change.py +231 -0
- pdd/agentic_change_orchestrator.py +526 -0
- pdd/agentic_common.py +598 -0
- pdd/agentic_crash.py +534 -0
- pdd/agentic_e2e_fix.py +319 -0
- pdd/agentic_e2e_fix_orchestrator.py +426 -0
- pdd/agentic_fix.py +1294 -0
- pdd/agentic_langtest.py +162 -0
- pdd/agentic_update.py +387 -0
- pdd/agentic_verify.py +183 -0
- pdd/architecture_sync.py +565 -0
- pdd/auth_service.py +210 -0
- pdd/auto_deps_main.py +71 -51
- pdd/auto_include.py +245 -5
- pdd/auto_update.py +125 -47
- pdd/bug_main.py +196 -23
- pdd/bug_to_unit_test.py +2 -0
- pdd/change_main.py +11 -4
- pdd/cli.py +22 -1181
- pdd/cmd_test_main.py +350 -150
- pdd/code_generator.py +60 -18
- pdd/code_generator_main.py +790 -57
- pdd/commands/__init__.py +48 -0
- pdd/commands/analysis.py +306 -0
- pdd/commands/auth.py +309 -0
- pdd/commands/connect.py +290 -0
- pdd/commands/fix.py +163 -0
- pdd/commands/generate.py +257 -0
- pdd/commands/maintenance.py +175 -0
- pdd/commands/misc.py +87 -0
- pdd/commands/modify.py +256 -0
- pdd/commands/report.py +144 -0
- pdd/commands/sessions.py +284 -0
- pdd/commands/templates.py +215 -0
- pdd/commands/utility.py +110 -0
- pdd/config_resolution.py +58 -0
- pdd/conflicts_main.py +8 -3
- pdd/construct_paths.py +589 -111
- pdd/context_generator.py +10 -2
- pdd/context_generator_main.py +175 -76
- pdd/continue_generation.py +53 -10
- pdd/core/__init__.py +33 -0
- pdd/core/cli.py +527 -0
- pdd/core/cloud.py +237 -0
- pdd/core/dump.py +554 -0
- pdd/core/errors.py +67 -0
- pdd/core/remote_session.py +61 -0
- pdd/core/utils.py +90 -0
- pdd/crash_main.py +262 -33
- pdd/data/language_format.csv +71 -63
- pdd/data/llm_model.csv +20 -18
- pdd/detect_change_main.py +5 -4
- pdd/docs/prompting_guide.md +864 -0
- pdd/docs/whitepaper_with_benchmarks/data_and_functions/benchmark_analysis.py +495 -0
- pdd/docs/whitepaper_with_benchmarks/data_and_functions/creation_compare.py +528 -0
- pdd/fix_code_loop.py +523 -95
- pdd/fix_code_module_errors.py +6 -2
- pdd/fix_error_loop.py +491 -92
- pdd/fix_errors_from_unit_tests.py +4 -3
- pdd/fix_main.py +278 -21
- pdd/fix_verification_errors.py +12 -100
- pdd/fix_verification_errors_loop.py +529 -286
- pdd/fix_verification_main.py +294 -89
- pdd/frontend/dist/assets/index-B5DZHykP.css +1 -0
- pdd/frontend/dist/assets/index-DQ3wkeQ2.js +449 -0
- pdd/frontend/dist/index.html +376 -0
- pdd/frontend/dist/logo.svg +33 -0
- pdd/generate_output_paths.py +139 -15
- pdd/generate_test.py +218 -146
- pdd/get_comment.py +19 -44
- pdd/get_extension.py +8 -9
- pdd/get_jwt_token.py +318 -22
- pdd/get_language.py +8 -7
- pdd/get_run_command.py +75 -0
- pdd/get_test_command.py +68 -0
- pdd/git_update.py +70 -19
- pdd/incremental_code_generator.py +2 -2
- pdd/insert_includes.py +13 -4
- pdd/llm_invoke.py +1711 -181
- pdd/load_prompt_template.py +19 -12
- pdd/path_resolution.py +140 -0
- pdd/pdd_completion.fish +25 -2
- pdd/pdd_completion.sh +30 -4
- pdd/pdd_completion.zsh +79 -4
- pdd/postprocess.py +14 -4
- pdd/preprocess.py +293 -24
- pdd/preprocess_main.py +41 -6
- pdd/prompts/agentic_bug_step10_pr_LLM.prompt +182 -0
- pdd/prompts/agentic_bug_step1_duplicate_LLM.prompt +73 -0
- pdd/prompts/agentic_bug_step2_docs_LLM.prompt +129 -0
- pdd/prompts/agentic_bug_step3_triage_LLM.prompt +95 -0
- pdd/prompts/agentic_bug_step4_reproduce_LLM.prompt +97 -0
- pdd/prompts/agentic_bug_step5_root_cause_LLM.prompt +123 -0
- pdd/prompts/agentic_bug_step6_test_plan_LLM.prompt +107 -0
- pdd/prompts/agentic_bug_step7_generate_LLM.prompt +172 -0
- pdd/prompts/agentic_bug_step8_verify_LLM.prompt +119 -0
- pdd/prompts/agentic_bug_step9_e2e_test_LLM.prompt +289 -0
- pdd/prompts/agentic_change_step10_identify_issues_LLM.prompt +1006 -0
- pdd/prompts/agentic_change_step11_fix_issues_LLM.prompt +984 -0
- pdd/prompts/agentic_change_step12_create_pr_LLM.prompt +131 -0
- pdd/prompts/agentic_change_step1_duplicate_LLM.prompt +73 -0
- pdd/prompts/agentic_change_step2_docs_LLM.prompt +101 -0
- pdd/prompts/agentic_change_step3_research_LLM.prompt +126 -0
- pdd/prompts/agentic_change_step4_clarify_LLM.prompt +164 -0
- pdd/prompts/agentic_change_step5_docs_change_LLM.prompt +981 -0
- pdd/prompts/agentic_change_step6_devunits_LLM.prompt +1005 -0
- pdd/prompts/agentic_change_step7_architecture_LLM.prompt +1044 -0
- pdd/prompts/agentic_change_step8_analyze_LLM.prompt +1027 -0
- pdd/prompts/agentic_change_step9_implement_LLM.prompt +1077 -0
- pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
- pdd/prompts/agentic_e2e_fix_step1_unit_tests_LLM.prompt +90 -0
- pdd/prompts/agentic_e2e_fix_step2_e2e_tests_LLM.prompt +91 -0
- pdd/prompts/agentic_e2e_fix_step3_root_cause_LLM.prompt +89 -0
- pdd/prompts/agentic_e2e_fix_step4_fix_e2e_tests_LLM.prompt +96 -0
- pdd/prompts/agentic_e2e_fix_step5_identify_devunits_LLM.prompt +91 -0
- pdd/prompts/agentic_e2e_fix_step6_create_unit_tests_LLM.prompt +106 -0
- pdd/prompts/agentic_e2e_fix_step7_verify_tests_LLM.prompt +116 -0
- pdd/prompts/agentic_e2e_fix_step8_run_pdd_fix_LLM.prompt +120 -0
- pdd/prompts/agentic_e2e_fix_step9_verify_all_LLM.prompt +146 -0
- pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
- pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
- pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
- pdd/prompts/agentic_update_LLM.prompt +925 -0
- pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
- pdd/prompts/auto_include_LLM.prompt +122 -905
- pdd/prompts/change_LLM.prompt +3093 -1
- pdd/prompts/detect_change_LLM.prompt +686 -27
- pdd/prompts/example_generator_LLM.prompt +22 -1
- pdd/prompts/extract_code_LLM.prompt +5 -1
- pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
- pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
- pdd/prompts/extract_promptline_LLM.prompt +17 -11
- pdd/prompts/find_verification_errors_LLM.prompt +6 -0
- pdd/prompts/fix_code_module_errors_LLM.prompt +12 -2
- pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +9 -0
- pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
- pdd/prompts/generate_test_LLM.prompt +41 -7
- pdd/prompts/generate_test_from_example_LLM.prompt +115 -0
- pdd/prompts/increase_tests_LLM.prompt +1 -5
- pdd/prompts/insert_includes_LLM.prompt +316 -186
- pdd/prompts/prompt_code_diff_LLM.prompt +119 -0
- pdd/prompts/prompt_diff_LLM.prompt +82 -0
- pdd/prompts/trace_LLM.prompt +25 -22
- pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
- pdd/prompts/update_prompt_LLM.prompt +22 -1
- pdd/pytest_output.py +127 -12
- pdd/remote_session.py +876 -0
- pdd/render_mermaid.py +236 -0
- pdd/server/__init__.py +52 -0
- pdd/server/app.py +335 -0
- pdd/server/click_executor.py +587 -0
- pdd/server/executor.py +338 -0
- pdd/server/jobs.py +661 -0
- pdd/server/models.py +241 -0
- pdd/server/routes/__init__.py +31 -0
- pdd/server/routes/architecture.py +451 -0
- pdd/server/routes/auth.py +364 -0
- pdd/server/routes/commands.py +929 -0
- pdd/server/routes/config.py +42 -0
- pdd/server/routes/files.py +603 -0
- pdd/server/routes/prompts.py +1322 -0
- pdd/server/routes/websocket.py +473 -0
- pdd/server/security.py +243 -0
- pdd/server/terminal_spawner.py +209 -0
- pdd/server/token_counter.py +222 -0
- pdd/setup_tool.py +648 -0
- pdd/simple_math.py +2 -0
- pdd/split_main.py +3 -2
- pdd/summarize_directory.py +237 -195
- pdd/sync_animation.py +8 -4
- pdd/sync_determine_operation.py +839 -112
- pdd/sync_main.py +351 -57
- pdd/sync_orchestration.py +1400 -756
- pdd/sync_tui.py +848 -0
- pdd/template_expander.py +161 -0
- pdd/template_registry.py +264 -0
- pdd/templates/architecture/architecture_json.prompt +237 -0
- pdd/templates/generic/generate_prompt.prompt +174 -0
- pdd/trace.py +168 -12
- pdd/trace_main.py +4 -3
- pdd/track_cost.py +140 -63
- pdd/unfinished_prompt.py +51 -4
- pdd/update_main.py +567 -67
- pdd/update_model_costs.py +2 -2
- pdd/update_prompt.py +19 -4
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/METADATA +29 -11
- pdd_cli-0.0.118.dist-info/RECORD +227 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/licenses/LICENSE +1 -1
- pdd_cli-0.0.45.dist-info/RECORD +0 -116
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/WHEEL +0 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/top_level.txt +0 -0
pdd/fix_code_loop.py
CHANGED
|
@@ -1,30 +1,289 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import json
|
|
1
3
|
import os
|
|
2
4
|
import shutil
|
|
3
5
|
import subprocess
|
|
4
6
|
import sys
|
|
7
|
+
import threading
|
|
5
8
|
from pathlib import Path
|
|
6
|
-
from typing import Tuple, Optional, Union
|
|
7
|
-
|
|
9
|
+
from typing import Tuple, Optional, Union, List
|
|
10
|
+
|
|
11
|
+
import requests
|
|
12
|
+
|
|
13
|
+
# Try to import DEFAULT_TIME, with fallback
|
|
14
|
+
try:
|
|
15
|
+
from . import DEFAULT_TIME
|
|
16
|
+
except ImportError:
|
|
17
|
+
DEFAULT_TIME = 0.5
|
|
18
|
+
|
|
19
|
+
# Try to import agentic modules, with fallbacks
|
|
20
|
+
try:
|
|
21
|
+
from .agentic_crash import run_agentic_crash
|
|
22
|
+
except ImportError:
|
|
23
|
+
def run_agentic_crash(**kwargs):
|
|
24
|
+
return (False, "Agentic crash handler not available", 0.0, "N/A", [])
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
from .get_language import get_language
|
|
28
|
+
except ImportError:
|
|
29
|
+
def get_language(ext):
|
|
30
|
+
return "unknown"
|
|
31
|
+
|
|
32
|
+
try:
|
|
33
|
+
from .agentic_langtest import default_verify_cmd_for
|
|
34
|
+
except ImportError:
|
|
35
|
+
def default_verify_cmd_for(lang, verification_program):
|
|
36
|
+
return None
|
|
37
|
+
|
|
38
|
+
def _normalize_agentic_result(result):
|
|
39
|
+
"""
|
|
40
|
+
Normalize run_agentic_crash result into: (success: bool, msg: str, cost: float, model: str, changed_files: List[str])
|
|
41
|
+
Handles older 2/3/4-tuple shapes used by tests/monkeypatches.
|
|
42
|
+
"""
|
|
43
|
+
if isinstance(result, tuple):
|
|
44
|
+
if len(result) == 5:
|
|
45
|
+
ok, msg, cost, model, changed_files = result
|
|
46
|
+
return bool(ok), str(msg), float(cost), str(model or "agentic-cli"), list(changed_files or [])
|
|
47
|
+
if len(result) == 4:
|
|
48
|
+
ok, msg, cost, model = result
|
|
49
|
+
return bool(ok), str(msg), float(cost), str(model or "agentic-cli"), []
|
|
50
|
+
if len(result) == 3:
|
|
51
|
+
ok, msg, cost = result
|
|
52
|
+
return bool(ok), str(msg), float(cost), "agentic-cli", []
|
|
53
|
+
if len(result) == 2:
|
|
54
|
+
ok, msg = result
|
|
55
|
+
return bool(ok), str(msg), 0.0, "agentic-cli", []
|
|
56
|
+
# Fallback (shouldn't happen)
|
|
57
|
+
return False, "Invalid agentic result shape", 0.0, "agentic-cli", []
|
|
58
|
+
|
|
59
|
+
def _safe_run_agentic_crash(*, prompt_file, code_file, program_file, crash_log_file, cwd=None):
|
|
60
|
+
"""
|
|
61
|
+
Call (possibly monkeypatched) run_agentic_crash and normalize its return.
|
|
62
|
+
Maps arguments to the expected signature of run_agentic_crash.
|
|
63
|
+
|
|
64
|
+
Note: cwd parameter is accepted for compatibility but not passed to run_agentic_crash
|
|
65
|
+
as it determines the working directory from prompt_file.parent internally.
|
|
66
|
+
"""
|
|
67
|
+
if not prompt_file:
|
|
68
|
+
return False, "Agentic fix requires a valid prompt file.", 0.0, "agentic-cli", []
|
|
69
|
+
|
|
70
|
+
try:
|
|
71
|
+
# Ensure inputs are Path objects as expected by run_agentic_crash
|
|
72
|
+
call_args = {
|
|
73
|
+
"prompt_file": Path(prompt_file),
|
|
74
|
+
"code_file": Path(code_file),
|
|
75
|
+
"program_file": Path(program_file),
|
|
76
|
+
"crash_log_file": Path(crash_log_file),
|
|
77
|
+
"verbose": True,
|
|
78
|
+
"quiet": False,
|
|
79
|
+
}
|
|
80
|
+
# Note: cwd is not passed - run_agentic_crash uses prompt_file.parent as project root
|
|
81
|
+
|
|
82
|
+
res = run_agentic_crash(**call_args)
|
|
83
|
+
return _normalize_agentic_result(res)
|
|
84
|
+
except Exception as e:
|
|
85
|
+
return False, f"Agentic crash handler failed: {e}", 0.0, "agentic-cli", []
|
|
8
86
|
|
|
9
87
|
# Use Rich for pretty printing to the console
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
console = Console(record=True)
|
|
13
|
-
rprint = console.print
|
|
88
|
+
try:
|
|
89
|
+
from rich.console import Console
|
|
90
|
+
console = Console(record=True)
|
|
91
|
+
rprint = console.print
|
|
92
|
+
except ImportError:
|
|
93
|
+
# Fallback if Rich is not available
|
|
94
|
+
def rprint(*args, **kwargs):
|
|
95
|
+
print(*args)
|
|
96
|
+
|
|
97
|
+
# Cloud configuration
|
|
98
|
+
try:
|
|
99
|
+
from .core.cloud import CloudConfig
|
|
100
|
+
CLOUD_AVAILABLE = True
|
|
101
|
+
except ImportError:
|
|
102
|
+
CLOUD_AVAILABLE = False
|
|
103
|
+
CloudConfig = None
|
|
104
|
+
|
|
105
|
+
# Cloud request timeout for crash fix
|
|
106
|
+
CLOUD_REQUEST_TIMEOUT = 400 # seconds
|
|
107
|
+
|
|
108
|
+
def cloud_crash_fix(
|
|
109
|
+
program: str,
|
|
110
|
+
prompt: str,
|
|
111
|
+
code: str,
|
|
112
|
+
errors: str,
|
|
113
|
+
strength: float,
|
|
114
|
+
temperature: float,
|
|
115
|
+
time: float,
|
|
116
|
+
verbose: bool,
|
|
117
|
+
program_path: str = "",
|
|
118
|
+
code_path: str = "",
|
|
119
|
+
language: str = "python",
|
|
120
|
+
) -> Tuple[bool, bool, str, str, str, float, Optional[str]]:
|
|
121
|
+
"""
|
|
122
|
+
Call cloud crashCode endpoint for LLM crash fix.
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
Tuple of (update_program, update_code, fixed_program, fixed_code, analysis, cost, model_name)
|
|
126
|
+
"""
|
|
127
|
+
if not CLOUD_AVAILABLE or CloudConfig is None:
|
|
128
|
+
raise RuntimeError("Cloud configuration not available")
|
|
129
|
+
|
|
130
|
+
jwt_token = CloudConfig.get_jwt_token(verbose=verbose)
|
|
131
|
+
if not jwt_token:
|
|
132
|
+
raise RuntimeError("Cloud authentication failed - no JWT token")
|
|
133
|
+
|
|
134
|
+
payload = {
|
|
135
|
+
"programContent": program,
|
|
136
|
+
"promptContent": prompt,
|
|
137
|
+
"codeContent": code,
|
|
138
|
+
"errorContent": errors,
|
|
139
|
+
"language": language,
|
|
140
|
+
"strength": strength,
|
|
141
|
+
"temperature": temperature,
|
|
142
|
+
"time": time if time is not None else 0.25,
|
|
143
|
+
"verbose": verbose,
|
|
144
|
+
"programPath": program_path,
|
|
145
|
+
"codePath": code_path,
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
headers = {
|
|
149
|
+
"Authorization": f"Bearer {jwt_token}",
|
|
150
|
+
"Content-Type": "application/json"
|
|
151
|
+
}
|
|
152
|
+
cloud_url = CloudConfig.get_endpoint_url("crashCode")
|
|
153
|
+
|
|
154
|
+
response = requests.post(
|
|
155
|
+
cloud_url,
|
|
156
|
+
json=payload,
|
|
157
|
+
headers=headers,
|
|
158
|
+
timeout=CLOUD_REQUEST_TIMEOUT
|
|
159
|
+
)
|
|
160
|
+
response.raise_for_status()
|
|
161
|
+
|
|
162
|
+
response_data = response.json()
|
|
163
|
+
fixed_code = response_data.get("fixedCode", "")
|
|
164
|
+
fixed_program = response_data.get("fixedProgram", "")
|
|
165
|
+
update_code = response_data.get("updateCode", False)
|
|
166
|
+
update_program = response_data.get("updateProgram", False)
|
|
167
|
+
analysis = response_data.get("analysis", "")
|
|
168
|
+
cost = float(response_data.get("totalCost", 0.0))
|
|
169
|
+
model_name = response_data.get("modelName", "cloud_model")
|
|
170
|
+
|
|
171
|
+
return update_program, update_code, fixed_program, fixed_code, analysis, cost, model_name
|
|
172
|
+
|
|
14
173
|
|
|
15
174
|
# Use relative import for internal modules
|
|
16
175
|
try:
|
|
17
|
-
# Attempt relative import for package context
|
|
18
176
|
from .fix_code_module_errors import fix_code_module_errors
|
|
19
177
|
except ImportError:
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
178
|
+
try:
|
|
179
|
+
from fix_code_module_errors import fix_code_module_errors
|
|
180
|
+
except ImportError:
|
|
181
|
+
# Provide a stub that will fail gracefully
|
|
182
|
+
def fix_code_module_errors(**kwargs):
|
|
183
|
+
return (False, False, "", "", "Module not available", 0.0, None)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
class ProcessResult:
|
|
187
|
+
def __init__(self, returncode, stdout, stderr):
|
|
188
|
+
self.returncode = returncode
|
|
189
|
+
self.stdout = stdout
|
|
190
|
+
self.stderr = stderr
|
|
191
|
+
|
|
192
|
+
def run_process_with_output(cmd_args, timeout=300):
|
|
193
|
+
"""
|
|
194
|
+
Runs a process, streaming stdout/stderr to the console while capturing them.
|
|
195
|
+
Allows interaction via stdin.
|
|
196
|
+
|
|
197
|
+
Uses start_new_session=True to create a new process group, allowing us to
|
|
198
|
+
kill all child processes if the main process times out.
|
|
199
|
+
"""
|
|
200
|
+
import os
|
|
201
|
+
import signal
|
|
202
|
+
|
|
203
|
+
try:
|
|
204
|
+
proc = subprocess.Popen(
|
|
205
|
+
cmd_args,
|
|
206
|
+
stdin=subprocess.DEVNULL,
|
|
207
|
+
stdout=subprocess.PIPE,
|
|
208
|
+
stderr=subprocess.PIPE,
|
|
209
|
+
bufsize=0,
|
|
210
|
+
start_new_session=True # Create new process group for clean termination
|
|
211
|
+
)
|
|
212
|
+
except Exception as e:
|
|
213
|
+
return -1, "", str(e)
|
|
214
|
+
|
|
215
|
+
captured_stdout = []
|
|
216
|
+
captured_stderr = []
|
|
217
|
+
|
|
218
|
+
def stream_pipe(pipe, sink, capture_list):
|
|
219
|
+
while True:
|
|
220
|
+
try:
|
|
221
|
+
chunk = pipe.read(1)
|
|
222
|
+
if not chunk:
|
|
223
|
+
break
|
|
224
|
+
capture_list.append(chunk)
|
|
225
|
+
except (ValueError, IOError, OSError):
|
|
226
|
+
# OSError can occur when pipe is closed during read
|
|
227
|
+
break
|
|
228
|
+
|
|
229
|
+
t_out = threading.Thread(target=stream_pipe, args=(proc.stdout, sys.stdout, captured_stdout), daemon=True)
|
|
230
|
+
t_err = threading.Thread(target=stream_pipe, args=(proc.stderr, sys.stderr, captured_stderr), daemon=True)
|
|
231
|
+
|
|
232
|
+
t_out.start()
|
|
233
|
+
t_err.start()
|
|
234
|
+
|
|
235
|
+
timed_out = False
|
|
236
|
+
try:
|
|
237
|
+
proc.wait(timeout=timeout)
|
|
238
|
+
except subprocess.TimeoutExpired:
|
|
239
|
+
timed_out = True
|
|
240
|
+
captured_stderr.append(b"\n[Timeout]\n")
|
|
241
|
+
|
|
242
|
+
# Kill process and entire process group if needed
|
|
243
|
+
if timed_out or proc.returncode is None:
|
|
244
|
+
try:
|
|
245
|
+
# Kill entire process group to handle forked children
|
|
246
|
+
os.killpg(os.getpgid(proc.pid), signal.SIGKILL)
|
|
247
|
+
except (ProcessLookupError, OSError):
|
|
248
|
+
# Process group may already be dead
|
|
249
|
+
pass
|
|
250
|
+
try:
|
|
251
|
+
proc.kill()
|
|
252
|
+
proc.wait(timeout=5)
|
|
253
|
+
except Exception:
|
|
254
|
+
pass
|
|
255
|
+
|
|
256
|
+
# Wait for threads to finish reading with timeout
|
|
257
|
+
# For normal completion, threads will exit when they read EOF from the pipe
|
|
258
|
+
# For timeout/kill cases, we may need to close pipes to unblock them
|
|
259
|
+
THREAD_JOIN_TIMEOUT = 5 # seconds - enough time to drain normal output buffers
|
|
260
|
+
|
|
261
|
+
t_out.join(timeout=THREAD_JOIN_TIMEOUT)
|
|
262
|
+
t_err.join(timeout=THREAD_JOIN_TIMEOUT)
|
|
263
|
+
|
|
264
|
+
# If threads are still alive after first timeout, close pipes to unblock them
|
|
265
|
+
# This handles cases where child processes keep pipes open
|
|
266
|
+
if t_out.is_alive() or t_err.is_alive():
|
|
267
|
+
try:
|
|
268
|
+
proc.stdout.close()
|
|
269
|
+
except Exception:
|
|
270
|
+
pass
|
|
271
|
+
try:
|
|
272
|
+
proc.stderr.close()
|
|
273
|
+
except Exception:
|
|
274
|
+
pass
|
|
275
|
+
# Give threads a bit more time after closing pipes
|
|
276
|
+
t_out.join(timeout=2)
|
|
277
|
+
t_err.join(timeout=2)
|
|
278
|
+
|
|
279
|
+
# If threads are still alive after all attempts, log it
|
|
280
|
+
if t_out.is_alive() or t_err.is_alive():
|
|
281
|
+
captured_stderr.append(b"\n[Thread join timeout - some output may be lost]\n")
|
|
282
|
+
|
|
283
|
+
stdout_str = b"".join(captured_stdout).decode('utf-8', errors='replace')
|
|
284
|
+
stderr_str = b"".join(captured_stderr).decode('utf-8', errors='replace')
|
|
285
|
+
|
|
286
|
+
return proc.returncode if proc.returncode is not None else -1, stdout_str, stderr_str
|
|
28
287
|
|
|
29
288
|
|
|
30
289
|
def fix_code_loop(
|
|
@@ -38,10 +297,18 @@ def fix_code_loop(
|
|
|
38
297
|
error_log_file: str,
|
|
39
298
|
verbose: bool = False,
|
|
40
299
|
time: float = DEFAULT_TIME,
|
|
300
|
+
prompt_file: str = "",
|
|
301
|
+
agentic_fallback: bool = True,
|
|
302
|
+
use_cloud: bool = False,
|
|
41
303
|
) -> Tuple[bool, str, str, int, float, Optional[str]]:
|
|
42
304
|
"""
|
|
43
305
|
Attempts to fix errors in a code module through multiple iterations.
|
|
44
306
|
|
|
307
|
+
Hybrid Cloud Support:
|
|
308
|
+
When use_cloud=True, the LLM fix calls are routed to the cloud crashCode endpoint
|
|
309
|
+
while local verification program execution stays local. This allows the loop to
|
|
310
|
+
pass local verification results to the cloud for analysis and fixes.
|
|
311
|
+
|
|
45
312
|
Args:
|
|
46
313
|
code_file: Path to the code file being tested.
|
|
47
314
|
prompt: The prompt that generated the code under test.
|
|
@@ -53,6 +320,9 @@ def fix_code_loop(
|
|
|
53
320
|
error_log_file: Path to the error log file.
|
|
54
321
|
verbose: Enable detailed logging (default: False).
|
|
55
322
|
time: Time limit for the LLM calls (default: DEFAULT_TIME).
|
|
323
|
+
prompt_file: Path to the prompt file.
|
|
324
|
+
agentic_fallback: Enable agentic fallback if the primary fix mechanism fails.
|
|
325
|
+
use_cloud: If True, use cloud LLM for fix calls while keeping verification execution local.
|
|
56
326
|
|
|
57
327
|
Returns:
|
|
58
328
|
Tuple containing the following in order:
|
|
@@ -63,15 +333,98 @@ def fix_code_loop(
|
|
|
63
333
|
- total_cost (float): Total cost of all fix attempts.
|
|
64
334
|
- model_name (str | None): Name of the LLM model used (or None if no LLM calls were made).
|
|
65
335
|
"""
|
|
66
|
-
#
|
|
336
|
+
# Handle default time if passed as None (though signature defaults to DEFAULT_TIME)
|
|
337
|
+
if time is None:
|
|
338
|
+
time = DEFAULT_TIME
|
|
339
|
+
|
|
340
|
+
# --- Start: File Checks ---
|
|
67
341
|
if not Path(code_file).is_file():
|
|
68
|
-
# Raising error for code file is acceptable as it's fundamental
|
|
69
342
|
raise FileNotFoundError(f"Code file not found: {code_file}")
|
|
70
343
|
if not Path(verification_program).is_file():
|
|
71
|
-
# Handle missing verification program gracefully as per test expectation
|
|
72
344
|
rprint(f"[bold red]Error: Verification program not found: {verification_program}[/bold red]")
|
|
73
345
|
return False, "", "", 0, 0.0, None
|
|
74
|
-
# --- End:
|
|
346
|
+
# --- End: File Checks ---
|
|
347
|
+
|
|
348
|
+
is_python = str(code_file).lower().endswith(".py")
|
|
349
|
+
if not is_python:
|
|
350
|
+
# For non-Python files, run the verification program to get an initial error state
|
|
351
|
+
rprint(f"[cyan]Non-Python target detected. Running verification program to get initial state...[/cyan]")
|
|
352
|
+
lang = get_language(os.path.splitext(code_file)[1])
|
|
353
|
+
verify_cmd = default_verify_cmd_for(lang, verification_program)
|
|
354
|
+
if not verify_cmd:
|
|
355
|
+
# No verify command available (e.g., Java without maven/gradle).
|
|
356
|
+
# Trigger agentic fallback directly using any existing error log.
|
|
357
|
+
rprint(f"[cyan]No verification command for {lang}. Triggering agentic fallback directly...[/cyan]")
|
|
358
|
+
error_log_path = Path(error_log_file)
|
|
359
|
+
error_log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
360
|
+
# Read existing error content or create minimal log
|
|
361
|
+
if not error_log_path.exists() or error_log_path.stat().st_size == 0:
|
|
362
|
+
with open(error_log_path, "w") as f:
|
|
363
|
+
f.write(f"No verification command available for language: {lang}\n")
|
|
364
|
+
f.write("Agentic fix will attempt to resolve the issue.\n")
|
|
365
|
+
|
|
366
|
+
success, _msg, agent_cost, agent_model, agent_changed_files = _safe_run_agentic_crash(
|
|
367
|
+
prompt_file=prompt_file,
|
|
368
|
+
code_file=code_file,
|
|
369
|
+
program_file=verification_program,
|
|
370
|
+
crash_log_file=error_log_file,
|
|
371
|
+
cwd=Path(prompt_file).parent if prompt_file else None
|
|
372
|
+
)
|
|
373
|
+
final_program = ""
|
|
374
|
+
final_code = ""
|
|
375
|
+
try:
|
|
376
|
+
with open(verification_program, "r") as f:
|
|
377
|
+
final_program = f.read()
|
|
378
|
+
except Exception:
|
|
379
|
+
pass
|
|
380
|
+
try:
|
|
381
|
+
with open(code_file, "r") as f:
|
|
382
|
+
final_code = f.read()
|
|
383
|
+
except Exception:
|
|
384
|
+
pass
|
|
385
|
+
return success, final_program, final_code, 1, agent_cost, agent_model
|
|
386
|
+
|
|
387
|
+
verify_result = subprocess.run(verify_cmd, capture_output=True, text=True, shell=True)
|
|
388
|
+
pytest_output = (verify_result.stdout or "") + "\n" + (verify_result.stderr or "")
|
|
389
|
+
if verify_result.returncode != 0:
|
|
390
|
+
rprint("[cyan]Non-Python target failed initial verification. Triggering agentic fallback...[/cyan]")
|
|
391
|
+
error_log_path = Path(error_log_file)
|
|
392
|
+
error_log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
393
|
+
with open(error_log_path, "w") as f:
|
|
394
|
+
f.write(pytest_output)
|
|
395
|
+
|
|
396
|
+
success, _msg, agent_cost, agent_model, agent_changed_files = _safe_run_agentic_crash(
|
|
397
|
+
prompt_file=prompt_file,
|
|
398
|
+
code_file=code_file,
|
|
399
|
+
program_file=verification_program,
|
|
400
|
+
crash_log_file=error_log_file,
|
|
401
|
+
cwd=Path(prompt_file).parent if prompt_file else None
|
|
402
|
+
)
|
|
403
|
+
final_program = ""
|
|
404
|
+
final_code = ""
|
|
405
|
+
try:
|
|
406
|
+
with open(verification_program, "r") as f:
|
|
407
|
+
final_program = f.read()
|
|
408
|
+
except Exception:
|
|
409
|
+
pass
|
|
410
|
+
try:
|
|
411
|
+
with open(code_file, "r") as f:
|
|
412
|
+
final_code = f.read()
|
|
413
|
+
except Exception:
|
|
414
|
+
pass
|
|
415
|
+
return success, final_program, final_code, 1, agent_cost, agent_model
|
|
416
|
+
else:
|
|
417
|
+
rprint("[green]Non-Python tests passed. No fix needed.[/green]")
|
|
418
|
+
try:
|
|
419
|
+
final_program = ""
|
|
420
|
+
final_code = ""
|
|
421
|
+
with open(verification_program, "r") as f:
|
|
422
|
+
final_program = f.read()
|
|
423
|
+
with open(code_file, "r") as f:
|
|
424
|
+
final_code = f.read()
|
|
425
|
+
except Exception as e:
|
|
426
|
+
rprint(f"[yellow]Warning: Could not read final files: {e}[/yellow]")
|
|
427
|
+
return True, final_program, final_code, 0, 0.0, "N/A"
|
|
75
428
|
|
|
76
429
|
# Step 1: Remove existing error log file
|
|
77
430
|
try:
|
|
@@ -83,20 +436,26 @@ def fix_code_loop(
|
|
|
83
436
|
rprint(f"Error log file not found, no need to remove: {error_log_file}")
|
|
84
437
|
except OSError as e:
|
|
85
438
|
rprint(f"[bold red]Error removing log file {error_log_file}: {e}[/bold red]")
|
|
86
|
-
# Decide if this is fatal or not; for now, we continue
|
|
87
439
|
|
|
88
440
|
# Step 2: Initialize variables
|
|
89
441
|
attempts = 0
|
|
90
442
|
total_cost = 0.0
|
|
91
443
|
success = False
|
|
92
444
|
model_name = None
|
|
93
|
-
history_log = "<history>\n"
|
|
445
|
+
history_log = "<history>\n"
|
|
94
446
|
|
|
95
447
|
# Create initial backups before any modifications
|
|
448
|
+
# Store in .pdd/backups/ to avoid polluting code/test directories
|
|
449
|
+
from datetime import datetime
|
|
450
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
96
451
|
code_file_path = Path(code_file)
|
|
97
452
|
verification_program_path = Path(verification_program)
|
|
98
|
-
|
|
99
|
-
|
|
453
|
+
|
|
454
|
+
backup_dir = Path.cwd() / '.pdd' / 'backups' / code_file_path.stem / timestamp
|
|
455
|
+
backup_dir.mkdir(parents=True, exist_ok=True)
|
|
456
|
+
|
|
457
|
+
original_code_backup = str(backup_dir / f"code_original{code_file_path.suffix}")
|
|
458
|
+
original_program_backup = str(backup_dir / f"program_original{verification_program_path.suffix}")
|
|
100
459
|
|
|
101
460
|
try:
|
|
102
461
|
shutil.copy2(code_file, original_code_backup)
|
|
@@ -105,33 +464,41 @@ def fix_code_loop(
|
|
|
105
464
|
rprint(f"Created initial backups: {original_code_backup}, {original_program_backup}")
|
|
106
465
|
except Exception as e:
|
|
107
466
|
rprint(f"[bold red]Error creating initial backups: {e}[/bold red]")
|
|
108
|
-
# If backups fail, we cannot guarantee restoration. Return failure.
|
|
109
467
|
return False, "", "", 0, 0.0, None
|
|
110
468
|
|
|
469
|
+
# Initialize process for scope
|
|
470
|
+
process = None
|
|
111
471
|
|
|
112
472
|
# Step 3: Enter the fixing loop
|
|
113
473
|
while attempts < max_attempts and total_cost <= budget:
|
|
114
|
-
current_attempt
|
|
115
|
-
|
|
116
|
-
|
|
474
|
+
# current_attempt is used for logging the current iteration number
|
|
475
|
+
current_iteration_number = attempts + 1
|
|
476
|
+
rprint(f"\n[bold cyan]Attempt {current_iteration_number}/{max_attempts}...[/bold cyan]")
|
|
477
|
+
attempt_log_entry = f' <attempt number="{current_iteration_number}">\n'
|
|
117
478
|
|
|
118
479
|
# b. Run the verification program
|
|
119
480
|
if verbose:
|
|
120
481
|
rprint(f"Running verification: {sys.executable} {verification_program}")
|
|
121
482
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
483
|
+
try:
|
|
484
|
+
returncode, stdout, stderr = run_process_with_output(
|
|
485
|
+
[sys.executable, verification_program],
|
|
486
|
+
timeout=300
|
|
487
|
+
)
|
|
488
|
+
process = ProcessResult(returncode, stdout, stderr)
|
|
489
|
+
|
|
490
|
+
verification_status = f"Success (Return Code: {process.returncode})" if process.returncode == 0 else f"Failure (Return Code: {process.returncode})"
|
|
491
|
+
verification_output = process.stdout or "[No standard output]"
|
|
492
|
+
verification_error = process.stderr or "[No standard error]"
|
|
493
|
+
except Exception as e:
|
|
494
|
+
verification_status = f"Failure (Exception: {e})"
|
|
495
|
+
verification_output = "[Exception occurred]"
|
|
496
|
+
verification_error = str(e)
|
|
497
|
+
process = ProcessResult(-1, "", str(e))
|
|
128
498
|
|
|
129
|
-
verification_status = f"Success (Return Code: {process.returncode})" if process.returncode == 0 else f"Failure (Return Code: {process.returncode})"
|
|
130
|
-
verification_output = process.stdout or "[No standard output]"
|
|
131
|
-
verification_error = process.stderr or "[No standard error]"
|
|
132
499
|
|
|
133
500
|
# Add verification results to the attempt log entry
|
|
134
|
-
attempt_log_entry += f"""
|
|
501
|
+
attempt_log_entry += f"""
|
|
135
502
|
<verification>
|
|
136
503
|
<status>{verification_status}</status>
|
|
137
504
|
<output><![CDATA[
|
|
@@ -155,7 +522,7 @@ def fix_code_loop(
|
|
|
155
522
|
current_error_message = verification_error # Use stderr as the primary error source
|
|
156
523
|
|
|
157
524
|
# Add current error to the attempt log entry
|
|
158
|
-
attempt_log_entry += f"""
|
|
525
|
+
attempt_log_entry += f"""
|
|
159
526
|
<current_error><![CDATA[
|
|
160
527
|
{current_error_message}
|
|
161
528
|
]]></current_error>
|
|
@@ -164,30 +531,29 @@ def fix_code_loop(
|
|
|
164
531
|
# Check budget *before* making the potentially expensive LLM call for the next attempt
|
|
165
532
|
# (Only check if cost > 0 to avoid breaking before first attempt if budget is 0)
|
|
166
533
|
if total_cost > budget and attempts > 0: # Check after first attempt cost is added
|
|
167
|
-
rprint(f"[bold yellow]Budget exceeded (${total_cost:.4f} > ${budget:.4f}) before attempt {
|
|
534
|
+
rprint(f"[bold yellow]Budget exceeded (${total_cost:.4f} > ${budget:.4f}) before attempt {current_iteration_number}. Stopping.[/bold yellow]")
|
|
168
535
|
history_log += attempt_log_entry + " <error>Budget exceeded before LLM call</error>\n </attempt>\n"
|
|
169
536
|
break
|
|
170
537
|
|
|
171
538
|
# Check max attempts *before* the LLM call for this attempt
|
|
172
539
|
if attempts >= max_attempts:
|
|
173
|
-
rprint(f"[bold red]Maximum attempts ({max_attempts}) reached before attempt {
|
|
540
|
+
rprint(f"[bold red]Maximum attempts ({max_attempts}) reached before attempt {current_iteration_number}. Stopping.[/bold red]")
|
|
174
541
|
# No need to add to history here, loop condition handles it
|
|
175
542
|
break
|
|
176
543
|
|
|
177
544
|
|
|
178
545
|
# Create backup copies for this iteration BEFORE calling LLM
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
program_backup_path = f"{program_base}_{current_attempt}{program_ext}"
|
|
546
|
+
# Store in .pdd/backups/ (backup_dir already created above)
|
|
547
|
+
code_backup_path = str(backup_dir / f"code_{current_iteration_number}{code_file_path.suffix}")
|
|
548
|
+
program_backup_path = str(backup_dir / f"program_{current_iteration_number}{verification_program_path.suffix}")
|
|
183
549
|
|
|
184
550
|
try:
|
|
185
551
|
shutil.copy2(code_file, code_backup_path)
|
|
186
552
|
shutil.copy2(verification_program, program_backup_path)
|
|
187
553
|
if verbose:
|
|
188
|
-
rprint(f"Created backups for attempt {
|
|
554
|
+
rprint(f"Created backups for attempt {current_iteration_number}: {code_backup_path}, {program_backup_path}")
|
|
189
555
|
except Exception as e:
|
|
190
|
-
rprint(f"[bold red]Error creating backups for attempt {
|
|
556
|
+
rprint(f"[bold red]Error creating backups for attempt {current_iteration_number}: {e}[/bold red]")
|
|
191
557
|
history_log += attempt_log_entry + f" <error>Failed to create backups: {e}</error>\n </attempt>\n"
|
|
192
558
|
break # Cannot proceed reliably without backups
|
|
193
559
|
|
|
@@ -204,7 +570,7 @@ def fix_code_loop(
|
|
|
204
570
|
# Temporarily close the XML structure for the LLM call
|
|
205
571
|
error_context_for_llm = history_log + attempt_log_entry + " </attempt>\n</history>\n"
|
|
206
572
|
|
|
207
|
-
# Call
|
|
573
|
+
# Call fix (cloud or local based on use_cloud parameter)
|
|
208
574
|
rprint("Attempting to fix errors using LLM...")
|
|
209
575
|
update_program, update_code, fixed_program, fixed_code = False, False, "", ""
|
|
210
576
|
program_code_fix, cost, model_name_iter = "", 0.0, None
|
|
@@ -214,36 +580,79 @@ def fix_code_loop(
|
|
|
214
580
|
# For simplicity, we assume fix_code_module_errors prints directly using `rprint`
|
|
215
581
|
|
|
216
582
|
try:
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
583
|
+
if use_cloud:
|
|
584
|
+
# Use cloud LLM for fix - local verification results passed via error_context_for_llm
|
|
585
|
+
try:
|
|
586
|
+
(update_program, update_code, fixed_program, fixed_code,
|
|
587
|
+
program_code_fix, cost, model_name_iter) = cloud_crash_fix(
|
|
588
|
+
program=current_program,
|
|
589
|
+
prompt=prompt,
|
|
590
|
+
code=current_code,
|
|
591
|
+
errors=error_context_for_llm,
|
|
592
|
+
strength=strength,
|
|
593
|
+
temperature=temperature,
|
|
594
|
+
time=time,
|
|
595
|
+
verbose=verbose,
|
|
596
|
+
program_path=verification_program,
|
|
597
|
+
code_path=code_file,
|
|
598
|
+
language="python" if is_python else get_language(os.path.splitext(code_file)[1]),
|
|
599
|
+
)
|
|
600
|
+
if model_name_iter:
|
|
601
|
+
model_name = model_name_iter
|
|
602
|
+
if verbose:
|
|
603
|
+
rprint(f"[cyan]Cloud crash fix completed. Cost: ${cost:.4f}[/cyan]")
|
|
604
|
+
except (requests.exceptions.RequestException, RuntimeError) as cloud_err:
|
|
605
|
+
# Cloud failed - fall back to local
|
|
606
|
+
rprint(f"[yellow]Cloud crash fix failed: {cloud_err}. Falling back to local.[/yellow]")
|
|
607
|
+
(update_program, update_code, fixed_program, fixed_code,
|
|
608
|
+
program_code_fix, cost, model_name_iter) = fix_code_module_errors(
|
|
609
|
+
program=current_program,
|
|
610
|
+
prompt=prompt,
|
|
611
|
+
code=current_code,
|
|
612
|
+
errors=error_context_for_llm,
|
|
613
|
+
strength=strength,
|
|
614
|
+
temperature=temperature,
|
|
615
|
+
time=time,
|
|
616
|
+
verbose=verbose,
|
|
617
|
+
program_path=verification_program,
|
|
618
|
+
code_path=code_file,
|
|
619
|
+
)
|
|
620
|
+
if model_name_iter:
|
|
621
|
+
model_name = model_name_iter
|
|
622
|
+
else:
|
|
623
|
+
# Local LLM fix
|
|
624
|
+
# Note: The example signature for fix_code_module_errors returns 7 values
|
|
625
|
+
(update_program, update_code, fixed_program, fixed_code,
|
|
626
|
+
program_code_fix, cost, model_name_iter) = fix_code_module_errors(
|
|
627
|
+
program=current_program,
|
|
628
|
+
prompt=prompt,
|
|
629
|
+
code=current_code,
|
|
630
|
+
errors=error_context_for_llm, # Pass the structured history
|
|
631
|
+
strength=strength,
|
|
632
|
+
temperature=temperature,
|
|
633
|
+
time=time, # Pass time
|
|
634
|
+
verbose=verbose,
|
|
635
|
+
program_path=verification_program, # Pass file path for LLM context
|
|
636
|
+
code_path=code_file, # Pass file path for LLM context
|
|
637
|
+
)
|
|
638
|
+
if model_name_iter:
|
|
639
|
+
model_name = model_name_iter # Update model name if returned
|
|
231
640
|
|
|
232
641
|
except Exception as e:
|
|
233
642
|
rprint(f"[bold red]Error calling fix_code_module_errors: {e}[/bold red]")
|
|
234
643
|
cost = 0.0 # Assume no cost if the call failed
|
|
235
644
|
# Add error to the attempt log entry
|
|
236
|
-
attempt_log_entry += f"""
|
|
645
|
+
attempt_log_entry += f"""
|
|
237
646
|
<fixing>
|
|
238
647
|
<error>LLM call failed: {e}</error>
|
|
239
648
|
</fixing>
|
|
240
649
|
"""
|
|
241
|
-
# Continue to the next attempt or break if limits reached? Let's break.
|
|
242
650
|
history_log += attempt_log_entry + " </attempt>\n" # Log the attempt with the LLM error
|
|
651
|
+
attempts += 1 # Increment attempts even if LLM call failed
|
|
243
652
|
break # Stop if the fixing mechanism itself fails
|
|
244
653
|
|
|
245
654
|
# Add fixing results to the attempt log entry
|
|
246
|
-
attempt_log_entry += f"""
|
|
655
|
+
attempt_log_entry += f"""
|
|
247
656
|
<fixing>
|
|
248
657
|
<llm_analysis><![CDATA[
|
|
249
658
|
{program_code_fix or "[No analysis provided]"}
|
|
@@ -269,11 +678,13 @@ def fix_code_loop(
|
|
|
269
678
|
rprint(f"[bold red]Error writing to log file {error_log_file}: {e}[/bold red]")
|
|
270
679
|
|
|
271
680
|
|
|
272
|
-
# Add cost and
|
|
681
|
+
# Add cost and increment attempt counter (as per fix report) *before* checking budget
|
|
273
682
|
total_cost += cost
|
|
683
|
+
attempts += 1 # Moved this line here as per fix report
|
|
274
684
|
rprint(f"Attempt Cost: ${cost:.4f}, Total Cost: ${total_cost:.4f}, Budget: ${budget:.4f}")
|
|
685
|
+
|
|
275
686
|
if total_cost > budget:
|
|
276
|
-
rprint(f"[bold yellow]Budget exceeded (${total_cost:.4f} > ${budget:.4f}) after attempt {
|
|
687
|
+
rprint(f"[bold yellow]Budget exceeded (${total_cost:.4f} > ${budget:.4f}) after attempt {attempts}. Stopping.[/bold yellow]")
|
|
277
688
|
break # Stop loop
|
|
278
689
|
|
|
279
690
|
# If LLM suggested no changes but verification failed, stop to prevent loops
|
|
@@ -295,8 +706,7 @@ def fix_code_loop(
|
|
|
295
706
|
success = False # Mark as failed if we can't write updates
|
|
296
707
|
break # Stop if we cannot apply fixes
|
|
297
708
|
|
|
298
|
-
#
|
|
299
|
-
attempts += 1
|
|
709
|
+
# The original 'attempts += 1' was here. It has been moved earlier.
|
|
300
710
|
|
|
301
711
|
# Check if max attempts reached after incrementing (for the next loop iteration check)
|
|
302
712
|
if attempts >= max_attempts:
|
|
@@ -358,39 +768,57 @@ def fix_code_loop(
|
|
|
358
768
|
rprint(f"[bold red]Final write to log file {error_log_file} failed: {e}[/bold red]")
|
|
359
769
|
|
|
360
770
|
# Determine final number of attempts for reporting
|
|
361
|
-
#
|
|
362
|
-
# If loop finished by failure (budget, max_attempts, no_change_needed, error),
|
|
363
|
-
# the number of attempts *initiated* is 'attempts + 1' unless max_attempts was exactly hit.
|
|
364
|
-
# The tests seem to expect the number of attempts *initiated*.
|
|
365
|
-
# Let's refine the calculation slightly for clarity.
|
|
366
|
-
# 'attempts' holds the count of *completed* loops (0-indexed).
|
|
367
|
-
# 'current_attempt' holds the user-facing number (1-indexed) of the loop *currently running or just finished*.
|
|
771
|
+
# The 'attempts' variable correctly counts the number of LLM fix cycles that were initiated.
|
|
368
772
|
final_attempts_reported = attempts
|
|
369
|
-
if not success:
|
|
370
|
-
# If failure occurred, it happened *during* or *after* the 'current_attempt' was initiated.
|
|
371
|
-
# If loop broke due to budget/no_change/error, current_attempt reflects the attempt number where failure occurred.
|
|
372
|
-
# If loop broke because attempts >= max_attempts, the last valid value for current_attempt was max_attempts.
|
|
373
|
-
# The number of attempts *tried* is current_attempt.
|
|
374
|
-
# However, the tests seem aligned with the previous logic. Let's stick to it unless further tests fail.
|
|
375
|
-
final_attempts_reported = attempts if success else (attempts + 1 if attempts < max_attempts and process.returncode != 0 else attempts)
|
|
376
|
-
# Re-evaluating the test logic:
|
|
377
|
-
# - Budget test: attempts=1 when loop breaks, expects 2. (attempts+1) -> 2. Correct.
|
|
378
|
-
# - Max attempts test: attempts=0 when loop breaks (no change), max_attempts=2, expects <=2. (attempts+1) -> 1. Correct.
|
|
379
|
-
# - If max_attempts=2 was reached *normally* (failed attempt 1, failed attempt 2), attempts would be 2.
|
|
380
|
-
# The logic `attempts + 1 if attempts < max_attempts else attempts` would return 2. Correct.
|
|
381
|
-
# Let's simplify the return calculation based on 'attempts' which counts completed loops.
|
|
382
|
-
final_attempts_reported = attempts # Number of fully completed fix cycles
|
|
383
|
-
if not success and process and process.returncode != 0: # If we failed after at least one verification run
|
|
384
|
-
# Count the final failed attempt unless success was achieved on the very last possible attempt
|
|
385
|
-
if attempts < max_attempts:
|
|
386
|
-
final_attempts_reported += 1
|
|
387
773
|
|
|
774
|
+
if not success and agentic_fallback:
|
|
775
|
+
# Ensure error_log_file exists before calling agentic fix
|
|
776
|
+
try:
|
|
777
|
+
if not os.path.exists(error_log_file) or os.path.getsize(error_log_file) == 0:
|
|
778
|
+
# Write minimal error log for agentic fix
|
|
779
|
+
error_log_path = Path(error_log_file)
|
|
780
|
+
error_log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
781
|
+
with open(error_log_path, "w") as elog:
|
|
782
|
+
if process:
|
|
783
|
+
elog.write(f"Verification failed with return code: {process.returncode}\n")
|
|
784
|
+
if process.stdout:
|
|
785
|
+
elog.write(f"\nStdout:\n{process.stdout}\n")
|
|
786
|
+
if process.stderr:
|
|
787
|
+
elog.write(f"\nStderr:\n{process.stderr}\n")
|
|
788
|
+
else:
|
|
789
|
+
elog.write("No error information available\n")
|
|
790
|
+
except Exception as e:
|
|
791
|
+
rprint(f"[yellow]Warning: Could not write error log before agentic fallback: {e}[/yellow]")
|
|
792
|
+
|
|
793
|
+
rprint(f"[cyan]Attempting agentic fallback (prompt_file={prompt_file!r})...[/cyan]")
|
|
794
|
+
agent_success, agent_msg, agent_cost, agent_model, agent_changed_files = _safe_run_agentic_crash(
|
|
795
|
+
prompt_file=prompt_file,
|
|
796
|
+
code_file=code_file,
|
|
797
|
+
program_file=verification_program,
|
|
798
|
+
crash_log_file=error_log_file,
|
|
799
|
+
cwd=Path(prompt_file).parent if prompt_file else None
|
|
800
|
+
)
|
|
801
|
+
total_cost += agent_cost
|
|
802
|
+
if not agent_success:
|
|
803
|
+
rprint(f"[bold red]Agentic fallback failed: {agent_msg}[/bold red]")
|
|
804
|
+
if agent_changed_files:
|
|
805
|
+
rprint(f"[cyan]Agent modified {len(agent_changed_files)} file(s):[/cyan]")
|
|
806
|
+
for f in agent_changed_files:
|
|
807
|
+
rprint(f" • {f}")
|
|
808
|
+
if agent_success:
|
|
809
|
+
model_name = agent_model or model_name
|
|
810
|
+
try:
|
|
811
|
+
final_code_content = Path(code_file).read_text(encoding='utf-8')
|
|
812
|
+
final_program_content = Path(verification_program).read_text(encoding='utf-8')
|
|
813
|
+
except Exception as e:
|
|
814
|
+
rprint(f"[yellow]Warning: Could not read files after successful agentic fix: {e}[/yellow]")
|
|
815
|
+
success = True
|
|
388
816
|
|
|
389
817
|
return (
|
|
390
818
|
success,
|
|
391
819
|
final_program_content,
|
|
392
820
|
final_code_content,
|
|
393
|
-
final_attempts_reported,
|
|
821
|
+
final_attempts_reported,
|
|
394
822
|
total_cost,
|
|
395
823
|
model_name,
|
|
396
824
|
)
|
|
@@ -519,4 +947,4 @@ sys.exit(0) # Exit with zero code for success
|
|
|
519
947
|
# for f in Path(".").glob("dummy_verify_*.py"): # Remove attempt backups like dummy_verify_1.py
|
|
520
948
|
# if "_original_backup" not in f.name: os.remove(f)
|
|
521
949
|
# except OSError as e:
|
|
522
|
-
# print(f"Error cleaning up dummy files: {e}")
|
|
950
|
+
# print(f"Error cleaning up dummy files: {e}")
|