pdd-cli 0.0.45__py3-none-any.whl → 0.0.118__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdd/__init__.py +40 -8
- pdd/agentic_bug.py +323 -0
- pdd/agentic_bug_orchestrator.py +497 -0
- pdd/agentic_change.py +231 -0
- pdd/agentic_change_orchestrator.py +526 -0
- pdd/agentic_common.py +598 -0
- pdd/agentic_crash.py +534 -0
- pdd/agentic_e2e_fix.py +319 -0
- pdd/agentic_e2e_fix_orchestrator.py +426 -0
- pdd/agentic_fix.py +1294 -0
- pdd/agentic_langtest.py +162 -0
- pdd/agentic_update.py +387 -0
- pdd/agentic_verify.py +183 -0
- pdd/architecture_sync.py +565 -0
- pdd/auth_service.py +210 -0
- pdd/auto_deps_main.py +71 -51
- pdd/auto_include.py +245 -5
- pdd/auto_update.py +125 -47
- pdd/bug_main.py +196 -23
- pdd/bug_to_unit_test.py +2 -0
- pdd/change_main.py +11 -4
- pdd/cli.py +22 -1181
- pdd/cmd_test_main.py +350 -150
- pdd/code_generator.py +60 -18
- pdd/code_generator_main.py +790 -57
- pdd/commands/__init__.py +48 -0
- pdd/commands/analysis.py +306 -0
- pdd/commands/auth.py +309 -0
- pdd/commands/connect.py +290 -0
- pdd/commands/fix.py +163 -0
- pdd/commands/generate.py +257 -0
- pdd/commands/maintenance.py +175 -0
- pdd/commands/misc.py +87 -0
- pdd/commands/modify.py +256 -0
- pdd/commands/report.py +144 -0
- pdd/commands/sessions.py +284 -0
- pdd/commands/templates.py +215 -0
- pdd/commands/utility.py +110 -0
- pdd/config_resolution.py +58 -0
- pdd/conflicts_main.py +8 -3
- pdd/construct_paths.py +589 -111
- pdd/context_generator.py +10 -2
- pdd/context_generator_main.py +175 -76
- pdd/continue_generation.py +53 -10
- pdd/core/__init__.py +33 -0
- pdd/core/cli.py +527 -0
- pdd/core/cloud.py +237 -0
- pdd/core/dump.py +554 -0
- pdd/core/errors.py +67 -0
- pdd/core/remote_session.py +61 -0
- pdd/core/utils.py +90 -0
- pdd/crash_main.py +262 -33
- pdd/data/language_format.csv +71 -63
- pdd/data/llm_model.csv +20 -18
- pdd/detect_change_main.py +5 -4
- pdd/docs/prompting_guide.md +864 -0
- pdd/docs/whitepaper_with_benchmarks/data_and_functions/benchmark_analysis.py +495 -0
- pdd/docs/whitepaper_with_benchmarks/data_and_functions/creation_compare.py +528 -0
- pdd/fix_code_loop.py +523 -95
- pdd/fix_code_module_errors.py +6 -2
- pdd/fix_error_loop.py +491 -92
- pdd/fix_errors_from_unit_tests.py +4 -3
- pdd/fix_main.py +278 -21
- pdd/fix_verification_errors.py +12 -100
- pdd/fix_verification_errors_loop.py +529 -286
- pdd/fix_verification_main.py +294 -89
- pdd/frontend/dist/assets/index-B5DZHykP.css +1 -0
- pdd/frontend/dist/assets/index-DQ3wkeQ2.js +449 -0
- pdd/frontend/dist/index.html +376 -0
- pdd/frontend/dist/logo.svg +33 -0
- pdd/generate_output_paths.py +139 -15
- pdd/generate_test.py +218 -146
- pdd/get_comment.py +19 -44
- pdd/get_extension.py +8 -9
- pdd/get_jwt_token.py +318 -22
- pdd/get_language.py +8 -7
- pdd/get_run_command.py +75 -0
- pdd/get_test_command.py +68 -0
- pdd/git_update.py +70 -19
- pdd/incremental_code_generator.py +2 -2
- pdd/insert_includes.py +13 -4
- pdd/llm_invoke.py +1711 -181
- pdd/load_prompt_template.py +19 -12
- pdd/path_resolution.py +140 -0
- pdd/pdd_completion.fish +25 -2
- pdd/pdd_completion.sh +30 -4
- pdd/pdd_completion.zsh +79 -4
- pdd/postprocess.py +14 -4
- pdd/preprocess.py +293 -24
- pdd/preprocess_main.py +41 -6
- pdd/prompts/agentic_bug_step10_pr_LLM.prompt +182 -0
- pdd/prompts/agentic_bug_step1_duplicate_LLM.prompt +73 -0
- pdd/prompts/agentic_bug_step2_docs_LLM.prompt +129 -0
- pdd/prompts/agentic_bug_step3_triage_LLM.prompt +95 -0
- pdd/prompts/agentic_bug_step4_reproduce_LLM.prompt +97 -0
- pdd/prompts/agentic_bug_step5_root_cause_LLM.prompt +123 -0
- pdd/prompts/agentic_bug_step6_test_plan_LLM.prompt +107 -0
- pdd/prompts/agentic_bug_step7_generate_LLM.prompt +172 -0
- pdd/prompts/agentic_bug_step8_verify_LLM.prompt +119 -0
- pdd/prompts/agentic_bug_step9_e2e_test_LLM.prompt +289 -0
- pdd/prompts/agentic_change_step10_identify_issues_LLM.prompt +1006 -0
- pdd/prompts/agentic_change_step11_fix_issues_LLM.prompt +984 -0
- pdd/prompts/agentic_change_step12_create_pr_LLM.prompt +131 -0
- pdd/prompts/agentic_change_step1_duplicate_LLM.prompt +73 -0
- pdd/prompts/agentic_change_step2_docs_LLM.prompt +101 -0
- pdd/prompts/agentic_change_step3_research_LLM.prompt +126 -0
- pdd/prompts/agentic_change_step4_clarify_LLM.prompt +164 -0
- pdd/prompts/agentic_change_step5_docs_change_LLM.prompt +981 -0
- pdd/prompts/agentic_change_step6_devunits_LLM.prompt +1005 -0
- pdd/prompts/agentic_change_step7_architecture_LLM.prompt +1044 -0
- pdd/prompts/agentic_change_step8_analyze_LLM.prompt +1027 -0
- pdd/prompts/agentic_change_step9_implement_LLM.prompt +1077 -0
- pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
- pdd/prompts/agentic_e2e_fix_step1_unit_tests_LLM.prompt +90 -0
- pdd/prompts/agentic_e2e_fix_step2_e2e_tests_LLM.prompt +91 -0
- pdd/prompts/agentic_e2e_fix_step3_root_cause_LLM.prompt +89 -0
- pdd/prompts/agentic_e2e_fix_step4_fix_e2e_tests_LLM.prompt +96 -0
- pdd/prompts/agentic_e2e_fix_step5_identify_devunits_LLM.prompt +91 -0
- pdd/prompts/agentic_e2e_fix_step6_create_unit_tests_LLM.prompt +106 -0
- pdd/prompts/agentic_e2e_fix_step7_verify_tests_LLM.prompt +116 -0
- pdd/prompts/agentic_e2e_fix_step8_run_pdd_fix_LLM.prompt +120 -0
- pdd/prompts/agentic_e2e_fix_step9_verify_all_LLM.prompt +146 -0
- pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
- pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
- pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
- pdd/prompts/agentic_update_LLM.prompt +925 -0
- pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
- pdd/prompts/auto_include_LLM.prompt +122 -905
- pdd/prompts/change_LLM.prompt +3093 -1
- pdd/prompts/detect_change_LLM.prompt +686 -27
- pdd/prompts/example_generator_LLM.prompt +22 -1
- pdd/prompts/extract_code_LLM.prompt +5 -1
- pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
- pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
- pdd/prompts/extract_promptline_LLM.prompt +17 -11
- pdd/prompts/find_verification_errors_LLM.prompt +6 -0
- pdd/prompts/fix_code_module_errors_LLM.prompt +12 -2
- pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +9 -0
- pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
- pdd/prompts/generate_test_LLM.prompt +41 -7
- pdd/prompts/generate_test_from_example_LLM.prompt +115 -0
- pdd/prompts/increase_tests_LLM.prompt +1 -5
- pdd/prompts/insert_includes_LLM.prompt +316 -186
- pdd/prompts/prompt_code_diff_LLM.prompt +119 -0
- pdd/prompts/prompt_diff_LLM.prompt +82 -0
- pdd/prompts/trace_LLM.prompt +25 -22
- pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
- pdd/prompts/update_prompt_LLM.prompt +22 -1
- pdd/pytest_output.py +127 -12
- pdd/remote_session.py +876 -0
- pdd/render_mermaid.py +236 -0
- pdd/server/__init__.py +52 -0
- pdd/server/app.py +335 -0
- pdd/server/click_executor.py +587 -0
- pdd/server/executor.py +338 -0
- pdd/server/jobs.py +661 -0
- pdd/server/models.py +241 -0
- pdd/server/routes/__init__.py +31 -0
- pdd/server/routes/architecture.py +451 -0
- pdd/server/routes/auth.py +364 -0
- pdd/server/routes/commands.py +929 -0
- pdd/server/routes/config.py +42 -0
- pdd/server/routes/files.py +603 -0
- pdd/server/routes/prompts.py +1322 -0
- pdd/server/routes/websocket.py +473 -0
- pdd/server/security.py +243 -0
- pdd/server/terminal_spawner.py +209 -0
- pdd/server/token_counter.py +222 -0
- pdd/setup_tool.py +648 -0
- pdd/simple_math.py +2 -0
- pdd/split_main.py +3 -2
- pdd/summarize_directory.py +237 -195
- pdd/sync_animation.py +8 -4
- pdd/sync_determine_operation.py +839 -112
- pdd/sync_main.py +351 -57
- pdd/sync_orchestration.py +1400 -756
- pdd/sync_tui.py +848 -0
- pdd/template_expander.py +161 -0
- pdd/template_registry.py +264 -0
- pdd/templates/architecture/architecture_json.prompt +237 -0
- pdd/templates/generic/generate_prompt.prompt +174 -0
- pdd/trace.py +168 -12
- pdd/trace_main.py +4 -3
- pdd/track_cost.py +140 -63
- pdd/unfinished_prompt.py +51 -4
- pdd/update_main.py +567 -67
- pdd/update_model_costs.py +2 -2
- pdd/update_prompt.py +19 -4
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/METADATA +29 -11
- pdd_cli-0.0.118.dist-info/RECORD +227 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/licenses/LICENSE +1 -1
- pdd_cli-0.0.45.dist-info/RECORD +0 -116
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/WHEEL +0 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/top_level.txt +0 -0
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import json
|
|
1
2
|
import os
|
|
2
3
|
import shutil
|
|
3
4
|
import subprocess
|
|
@@ -8,6 +9,8 @@ from typing import Dict, Tuple, Any, Optional
|
|
|
8
9
|
from xml.sax.saxutils import escape
|
|
9
10
|
import time
|
|
10
11
|
|
|
12
|
+
import requests
|
|
13
|
+
|
|
11
14
|
from rich.console import Console
|
|
12
15
|
|
|
13
16
|
# Use relative import assuming fix_verification_errors is in the same package
|
|
@@ -27,6 +30,126 @@ except ImportError:
|
|
|
27
30
|
|
|
28
31
|
from . import DEFAULT_TIME # Import DEFAULT_TIME
|
|
29
32
|
from .python_env_detector import detect_host_python_executable
|
|
33
|
+
from .get_language import get_language
|
|
34
|
+
from .agentic_langtest import default_verify_cmd_for
|
|
35
|
+
from .agentic_verify import run_agentic_verify
|
|
36
|
+
|
|
37
|
+
# Cloud configuration
|
|
38
|
+
try:
|
|
39
|
+
from .core.cloud import CloudConfig
|
|
40
|
+
CLOUD_AVAILABLE = True
|
|
41
|
+
except ImportError:
|
|
42
|
+
CLOUD_AVAILABLE = False
|
|
43
|
+
CloudConfig = None
|
|
44
|
+
|
|
45
|
+
# Cloud request timeout for verify fix
|
|
46
|
+
CLOUD_REQUEST_TIMEOUT = 400 # seconds
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def cloud_verify_fix(
|
|
50
|
+
program: str,
|
|
51
|
+
prompt: str,
|
|
52
|
+
code: str,
|
|
53
|
+
output: str,
|
|
54
|
+
strength: float,
|
|
55
|
+
temperature: float,
|
|
56
|
+
time_param: float,
|
|
57
|
+
verbose: bool,
|
|
58
|
+
language: str = "python",
|
|
59
|
+
) -> Dict[str, Any]:
|
|
60
|
+
"""
|
|
61
|
+
Call cloud verifyCode endpoint for LLM verification fix.
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
Dict with keys: fixed_code, fixed_program, explanation, verification_issues_count, total_cost, model_name
|
|
65
|
+
"""
|
|
66
|
+
if not CLOUD_AVAILABLE or CloudConfig is None:
|
|
67
|
+
raise RuntimeError("Cloud configuration not available")
|
|
68
|
+
|
|
69
|
+
jwt_token = CloudConfig.get_jwt_token(verbose=verbose)
|
|
70
|
+
if not jwt_token:
|
|
71
|
+
raise RuntimeError("Cloud authentication failed - no JWT token")
|
|
72
|
+
|
|
73
|
+
payload = {
|
|
74
|
+
"programContent": program,
|
|
75
|
+
"promptContent": prompt,
|
|
76
|
+
"codeContent": code,
|
|
77
|
+
"outputContent": output,
|
|
78
|
+
"language": language,
|
|
79
|
+
"strength": strength,
|
|
80
|
+
"temperature": temperature,
|
|
81
|
+
"time": time_param if time_param is not None else 0.25,
|
|
82
|
+
"verbose": verbose,
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
headers = {
|
|
86
|
+
"Authorization": f"Bearer {jwt_token}",
|
|
87
|
+
"Content-Type": "application/json"
|
|
88
|
+
}
|
|
89
|
+
cloud_url = CloudConfig.get_endpoint_url("verifyCode")
|
|
90
|
+
|
|
91
|
+
response = requests.post(
|
|
92
|
+
cloud_url,
|
|
93
|
+
json=payload,
|
|
94
|
+
headers=headers,
|
|
95
|
+
timeout=CLOUD_REQUEST_TIMEOUT
|
|
96
|
+
)
|
|
97
|
+
response.raise_for_status()
|
|
98
|
+
|
|
99
|
+
response_data = response.json()
|
|
100
|
+
return {
|
|
101
|
+
"fixed_code": response_data.get("fixedCode", code),
|
|
102
|
+
"fixed_program": response_data.get("fixedProgram", program),
|
|
103
|
+
"explanation": response_data.get("explanation", ""),
|
|
104
|
+
"verification_issues_count": response_data.get("issuesCount", 0),
|
|
105
|
+
"total_cost": float(response_data.get("totalCost", 0.0)),
|
|
106
|
+
"model_name": response_data.get("modelName", "cloud_model"),
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
def _normalize_agentic_result(result):
|
|
110
|
+
"""
|
|
111
|
+
Normalize run_agentic_verify result into: (success: bool, msg: str, cost: float, model: str, changed_files: List[str])
|
|
112
|
+
Handles older 2/3/4-tuple shapes used by tests/monkeypatches.
|
|
113
|
+
"""
|
|
114
|
+
if isinstance(result, tuple):
|
|
115
|
+
if len(result) == 5:
|
|
116
|
+
ok, msg, cost, model, changed_files = result
|
|
117
|
+
return bool(ok), str(msg), float(cost), str(model or "agentic-cli"), list(changed_files or [])
|
|
118
|
+
if len(result) == 4:
|
|
119
|
+
ok, msg, cost, model = result
|
|
120
|
+
return bool(ok), str(msg), float(cost), str(model or "agentic-cli"), []
|
|
121
|
+
if len(result) == 3:
|
|
122
|
+
ok, msg, cost = result
|
|
123
|
+
return bool(ok), str(msg), float(cost), "agentic-cli", []
|
|
124
|
+
if len(result) == 2:
|
|
125
|
+
ok, msg = result
|
|
126
|
+
return bool(ok), str(msg), 0.0, "agentic-cli", []
|
|
127
|
+
# Fallback (shouldn't happen)
|
|
128
|
+
return False, "Invalid agentic result shape", 0.0, "agentic-cli", []
|
|
129
|
+
|
|
130
|
+
def _safe_run_agentic_verify(*, prompt_file, code_file, program_file, verification_log_file, verbose=False, cwd=None):
|
|
131
|
+
"""
|
|
132
|
+
Call (possibly monkeypatched) run_agentic_verify and normalize its return.
|
|
133
|
+
|
|
134
|
+
Note: cwd parameter is accepted for compatibility but not passed to run_agentic_verify
|
|
135
|
+
as it determines the working directory from prompt_file.parent internally.
|
|
136
|
+
"""
|
|
137
|
+
if not prompt_file:
|
|
138
|
+
return False, "Agentic verify requires a valid prompt file.", 0.0, "agentic-cli", []
|
|
139
|
+
|
|
140
|
+
try:
|
|
141
|
+
res = run_agentic_verify(
|
|
142
|
+
prompt_file=Path(prompt_file),
|
|
143
|
+
code_file=Path(code_file),
|
|
144
|
+
program_file=Path(program_file),
|
|
145
|
+
verification_log_file=Path(verification_log_file),
|
|
146
|
+
verbose=verbose,
|
|
147
|
+
quiet=not verbose,
|
|
148
|
+
# Note: cwd is not passed - run_agentic_verify uses prompt_file.parent as project root
|
|
149
|
+
)
|
|
150
|
+
return _normalize_agentic_result(res)
|
|
151
|
+
except Exception as e:
|
|
152
|
+
return False, f"Agentic verify failed: {e}", 0.0, "agentic-cli", []
|
|
30
153
|
|
|
31
154
|
# Initialize Rich Console for pretty printing
|
|
32
155
|
console = Console()
|
|
@@ -56,7 +179,7 @@ def _run_program(
|
|
|
56
179
|
command.extend(args)
|
|
57
180
|
|
|
58
181
|
try:
|
|
59
|
-
# Run from staging root directory instead of examples/
|
|
182
|
+
# Run from staging root directory instead of examples/
|
|
60
183
|
# This allows imports from both pdd/ and examples/ subdirectories
|
|
61
184
|
staging_root = program_path.parent.parent # Go up from examples/ to staging root
|
|
62
185
|
|
|
@@ -95,6 +218,7 @@ def fix_verification_errors_loop(
|
|
|
95
218
|
program_file: str,
|
|
96
219
|
code_file: str,
|
|
97
220
|
prompt: str,
|
|
221
|
+
prompt_file: str,
|
|
98
222
|
verification_program: str,
|
|
99
223
|
strength: float,
|
|
100
224
|
temperature: float,
|
|
@@ -105,16 +229,24 @@ def fix_verification_errors_loop(
|
|
|
105
229
|
output_program_path: Optional[str] = None,
|
|
106
230
|
verbose: bool = False,
|
|
107
231
|
program_args: Optional[list[str]] = None,
|
|
108
|
-
llm_time: float = DEFAULT_TIME # Add time parameter
|
|
232
|
+
llm_time: float = DEFAULT_TIME, # Add time parameter
|
|
233
|
+
agentic_fallback: bool = True,
|
|
234
|
+
use_cloud: bool = False,
|
|
109
235
|
) -> Dict[str, Any]:
|
|
110
236
|
"""
|
|
111
237
|
Attempts to fix errors in a code file based on program execution output
|
|
112
238
|
against the prompt's intent, iterating multiple times with secondary verification.
|
|
113
239
|
|
|
240
|
+
Hybrid Cloud Support:
|
|
241
|
+
When use_cloud=True, the LLM fix calls are routed to the cloud verifyCode endpoint
|
|
242
|
+
while local program execution stays local. This allows the loop to pass local
|
|
243
|
+
verification results to the cloud for analysis and fixes.
|
|
244
|
+
|
|
114
245
|
Args:
|
|
115
246
|
program_file: Path to the Python program exercising the code.
|
|
116
247
|
code_file: Path to the code file being tested/verified.
|
|
117
248
|
prompt: The prompt defining the intended behavior.
|
|
249
|
+
prompt_file: Path to the prompt file.
|
|
118
250
|
verification_program: Path to a secondary program to verify code changes.
|
|
119
251
|
strength: LLM model strength (0.0 to 1.0).
|
|
120
252
|
temperature: LLM temperature (0.0 to 1.0).
|
|
@@ -126,6 +258,8 @@ def fix_verification_errors_loop(
|
|
|
126
258
|
verbose: Enable verbose logging (default: False).
|
|
127
259
|
program_args: Optional list of command-line arguments for the program_file.
|
|
128
260
|
llm_time: Time parameter for fix_verification_errors calls (default: DEFAULT_TIME).
|
|
261
|
+
agentic_fallback: Enable agentic fallback if the primary fix mechanism fails.
|
|
262
|
+
use_cloud: If True, use cloud LLM for fix calls while keeping verification execution local.
|
|
129
263
|
|
|
130
264
|
Returns:
|
|
131
265
|
A dictionary containing:
|
|
@@ -137,6 +271,108 @@ def fix_verification_errors_loop(
|
|
|
137
271
|
'model_name': str | None - Name of the LLM model used.
|
|
138
272
|
'statistics': dict - Detailed statistics about the process.
|
|
139
273
|
"""
|
|
274
|
+
is_python = str(code_file).lower().endswith(".py")
|
|
275
|
+
if not is_python:
|
|
276
|
+
# For non-Python files, run the verification program to get an initial error state
|
|
277
|
+
console.print(f"[cyan]Non-Python target detected. Running verification program to get initial state...[/cyan]")
|
|
278
|
+
lang = get_language(os.path.splitext(code_file)[1])
|
|
279
|
+
verify_cmd = default_verify_cmd_for(lang, verification_program)
|
|
280
|
+
if not verify_cmd:
|
|
281
|
+
# No verify command available (e.g., Java without maven/gradle).
|
|
282
|
+
# Trigger agentic fallback directly.
|
|
283
|
+
console.print(f"[cyan]No verification command for {lang}. Triggering agentic fallback directly...[/cyan]")
|
|
284
|
+
verification_log_path = Path(verification_log_file)
|
|
285
|
+
verification_log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
286
|
+
# Create minimal error log if it doesn't exist
|
|
287
|
+
if not verification_log_path.exists() or verification_log_path.stat().st_size == 0:
|
|
288
|
+
with open(verification_log_path, "w") as f:
|
|
289
|
+
f.write(f"No verification command available for language: {lang}\n")
|
|
290
|
+
f.write("Agentic fix will attempt to resolve the issue.\n")
|
|
291
|
+
|
|
292
|
+
agent_cwd = Path(prompt_file).parent if prompt_file else None
|
|
293
|
+
console.print(f"[cyan]Attempting agentic verify fallback (prompt_file={prompt_file!r})...[/cyan]")
|
|
294
|
+
success, agent_msg, agent_cost, agent_model, agent_changed_files = _safe_run_agentic_verify(
|
|
295
|
+
prompt_file=prompt_file,
|
|
296
|
+
code_file=code_file,
|
|
297
|
+
program_file=verification_program,
|
|
298
|
+
verification_log_file=verification_log_file,
|
|
299
|
+
verbose=verbose,
|
|
300
|
+
cwd=agent_cwd,
|
|
301
|
+
)
|
|
302
|
+
if not success:
|
|
303
|
+
console.print(f"[bold red]Agentic verify fallback failed: {agent_msg}[/bold red]")
|
|
304
|
+
if agent_changed_files:
|
|
305
|
+
console.print(f"[cyan]Agent modified {len(agent_changed_files)} file(s):[/cyan]")
|
|
306
|
+
for f in agent_changed_files:
|
|
307
|
+
console.print(f" • {f}")
|
|
308
|
+
final_program = ""
|
|
309
|
+
final_code = ""
|
|
310
|
+
try:
|
|
311
|
+
with open(verification_program, "r") as f:
|
|
312
|
+
final_program = f.read()
|
|
313
|
+
except Exception:
|
|
314
|
+
pass
|
|
315
|
+
try:
|
|
316
|
+
with open(code_file, "r") as f:
|
|
317
|
+
final_code = f.read()
|
|
318
|
+
except Exception:
|
|
319
|
+
pass
|
|
320
|
+
return {
|
|
321
|
+
"success": success,
|
|
322
|
+
"final_program": final_program,
|
|
323
|
+
"final_code": final_code,
|
|
324
|
+
"total_attempts": 1,
|
|
325
|
+
"total_cost": agent_cost,
|
|
326
|
+
"model_name": agent_model,
|
|
327
|
+
"statistics": {},
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
verify_result = subprocess.run(verify_cmd, capture_output=True, text=True, shell=True)
|
|
331
|
+
pytest_output = (verify_result.stdout or "") + "\n" + (verify_result.stderr or "")
|
|
332
|
+
console.print("[cyan]Non-Python target detected. Triggering agentic fallback...[/cyan]")
|
|
333
|
+
verification_log_path = Path(verification_log_file)
|
|
334
|
+
verification_log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
335
|
+
with open(verification_log_path, "w") as f:
|
|
336
|
+
f.write(pytest_output)
|
|
337
|
+
|
|
338
|
+
agent_cwd = Path(prompt_file).parent if prompt_file else None
|
|
339
|
+
console.print(f"[cyan]Attempting agentic verify fallback (prompt_file={prompt_file!r})...[/cyan]")
|
|
340
|
+
success, agent_msg, agent_cost, agent_model, agent_changed_files = _safe_run_agentic_verify(
|
|
341
|
+
prompt_file=prompt_file,
|
|
342
|
+
code_file=code_file,
|
|
343
|
+
program_file=verification_program,
|
|
344
|
+
verification_log_file=verification_log_file,
|
|
345
|
+
verbose=verbose,
|
|
346
|
+
cwd=agent_cwd,
|
|
347
|
+
)
|
|
348
|
+
if not success:
|
|
349
|
+
console.print(f"[bold red]Agentic verify fallback failed: {agent_msg}[/bold red]")
|
|
350
|
+
if agent_changed_files:
|
|
351
|
+
console.print(f"[cyan]Agent modified {len(agent_changed_files)} file(s):[/cyan]")
|
|
352
|
+
for f in agent_changed_files:
|
|
353
|
+
console.print(f" • {f}")
|
|
354
|
+
final_program = ""
|
|
355
|
+
final_code = ""
|
|
356
|
+
try:
|
|
357
|
+
with open(verification_program, "r") as f:
|
|
358
|
+
final_program = f.read()
|
|
359
|
+
except Exception:
|
|
360
|
+
pass
|
|
361
|
+
try:
|
|
362
|
+
with open(code_file, "r") as f:
|
|
363
|
+
final_code = f.read()
|
|
364
|
+
except Exception:
|
|
365
|
+
pass
|
|
366
|
+
return {
|
|
367
|
+
"success": success,
|
|
368
|
+
"final_program": final_program,
|
|
369
|
+
"final_code": final_code,
|
|
370
|
+
"total_attempts": 1,
|
|
371
|
+
"total_cost": agent_cost,
|
|
372
|
+
"model_name": agent_model,
|
|
373
|
+
"statistics": {},
|
|
374
|
+
}
|
|
375
|
+
|
|
140
376
|
program_path = Path(program_file).resolve()
|
|
141
377
|
code_path = Path(code_file).resolve()
|
|
142
378
|
verification_program_path = Path(verification_program).resolve()
|
|
@@ -158,9 +394,9 @@ def fix_verification_errors_loop(
|
|
|
158
394
|
if not 0.0 <= temperature <= 1.0:
|
|
159
395
|
console.print(f"[bold red]Error: Temperature must be between 0.0 and 1.0.[/bold red]")
|
|
160
396
|
return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": {}}
|
|
161
|
-
#
|
|
162
|
-
if max_attempts
|
|
163
|
-
console.print(f"[bold red]Error: Max attempts must be
|
|
397
|
+
# max_attempts must be non-negative (0 is valid - skips LLM loop, goes straight to agentic mode)
|
|
398
|
+
if max_attempts < 0:
|
|
399
|
+
console.print(f"[bold red]Error: Max attempts must be non-negative.[/bold red]")
|
|
164
400
|
return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": {}}
|
|
165
401
|
if budget < 0:
|
|
166
402
|
console.print(f"[bold red]Error: Budget cannot be negative.[/bold red]")
|
|
@@ -182,6 +418,7 @@ def fix_verification_errors_loop(
|
|
|
182
418
|
total_cost = 0.0
|
|
183
419
|
model_name: Optional[str] = None
|
|
184
420
|
overall_success = False
|
|
421
|
+
any_verification_passed = False # Track if ANY iteration passed secondary verification
|
|
185
422
|
best_iteration = {
|
|
186
423
|
'attempt': -1, # 0 represents initial state
|
|
187
424
|
'program_backup': None,
|
|
@@ -202,6 +439,11 @@ def fix_verification_errors_loop(
|
|
|
202
439
|
program_contents = "" # Keep track of current contents
|
|
203
440
|
code_contents = "" # Keep track of current contents
|
|
204
441
|
|
|
442
|
+
# Create backup directory in .pdd/backups/ to avoid polluting code/test directories
|
|
443
|
+
backup_timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
444
|
+
backup_dir = Path.cwd() / '.pdd' / 'backups' / code_path.stem / backup_timestamp
|
|
445
|
+
backup_dir.mkdir(parents=True, exist_ok=True)
|
|
446
|
+
|
|
205
447
|
# --- Step 3: Determine Initial State ---
|
|
206
448
|
if verbose:
|
|
207
449
|
console.print("[bold cyan]Step 3: Determining Initial State...[/bold cyan]")
|
|
@@ -216,6 +458,21 @@ def fix_verification_errors_loop(
|
|
|
216
458
|
stats['status_message'] = f'Error reading initial files: {e}' # Add status message
|
|
217
459
|
return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": stats}
|
|
218
460
|
|
|
461
|
+
# 3a-pre: Validate code file is not empty (prevents infinite loops with empty content)
|
|
462
|
+
if not initial_code_content or len(initial_code_content.strip()) == 0:
|
|
463
|
+
error_msg = f"Code file is empty or contains only whitespace: {code_path}"
|
|
464
|
+
console.print(f"[bold red]Error: {error_msg}[/bold red]")
|
|
465
|
+
stats['status_message'] = f'Error: Code file is empty - cannot verify'
|
|
466
|
+
return {
|
|
467
|
+
"success": False,
|
|
468
|
+
"final_program": initial_program_content,
|
|
469
|
+
"final_code": "",
|
|
470
|
+
"total_attempts": 0,
|
|
471
|
+
"total_cost": 0.0,
|
|
472
|
+
"model_name": None,
|
|
473
|
+
"statistics": stats
|
|
474
|
+
}
|
|
475
|
+
|
|
219
476
|
# 3a: Run initial program with args
|
|
220
477
|
initial_return_code, initial_output = _run_program(program_path, args=program_args)
|
|
221
478
|
if verbose:
|
|
@@ -232,128 +489,185 @@ def fix_verification_errors_loop(
|
|
|
232
489
|
initial_log_entry += '</InitialState>'
|
|
233
490
|
_write_log_entry(log_path, initial_log_entry)
|
|
234
491
|
|
|
492
|
+
# 3c: Check if skipping LLM assessment (max_attempts=0 means skip to agentic fallback)
|
|
493
|
+
skip_llm = (max_attempts == 0)
|
|
494
|
+
|
|
235
495
|
# 3d: Call fix_verification_errors for initial assessment
|
|
236
496
|
try:
|
|
237
|
-
if
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
total_cost += initial_cost
|
|
253
|
-
model_name = initial_fix_result.get('model_name') # Capture model name early
|
|
254
|
-
if verbose:
|
|
255
|
-
console.print(f"Initial assessment cost: ${initial_cost:.6f}, Total cost: ${total_cost:.6f}")
|
|
256
|
-
|
|
257
|
-
# 3f: Extract initial issues
|
|
258
|
-
initial_issues_count = initial_fix_result.get('verification_issues_count', -1)
|
|
259
|
-
stats['initial_issues'] = initial_issues_count
|
|
260
|
-
if verbose:
|
|
261
|
-
console.print(f"Initial verification issues found: {initial_issues_count}")
|
|
262
|
-
if initial_fix_result.get('explanation'):
|
|
263
|
-
console.print("Initial assessment explanation:")
|
|
264
|
-
console.print(initial_fix_result['explanation'])
|
|
265
|
-
|
|
266
|
-
# FIX: Add check for initial assessment error *before* checking success/budget
|
|
267
|
-
# Check if the fixer function returned its specific error state (None explanation/model)
|
|
268
|
-
if initial_fix_result.get('explanation') is None and initial_fix_result.get('model_name') is None:
|
|
269
|
-
error_msg = "Error: Fixer returned invalid/error state during initial assessment"
|
|
270
|
-
console.print(f"[bold red]{error_msg}. Aborting.[/bold red]")
|
|
271
|
-
stats['status_message'] = error_msg
|
|
272
|
-
stats['final_issues'] = -1 # Indicate unknown/error state
|
|
273
|
-
# Write final action log for error on initial check
|
|
274
|
-
final_log_entry = "<FinalActions>\n"
|
|
275
|
-
final_log_entry += f' <Error>{escape(error_msg)}</Error>\n'
|
|
276
|
-
final_log_entry += "</FinalActions>"
|
|
277
|
-
_write_log_entry(log_path, final_log_entry)
|
|
278
|
-
# Return failure state
|
|
279
|
-
return {
|
|
280
|
-
"success": False,
|
|
281
|
-
"final_program": initial_program_content,
|
|
282
|
-
"final_code": initial_code_content,
|
|
283
|
-
"total_attempts": 0,
|
|
284
|
-
"total_cost": total_cost, # May be non-zero if error occurred after some cost
|
|
285
|
-
"model_name": model_name, # May have been set before error
|
|
286
|
-
"statistics": stats,
|
|
287
|
-
}
|
|
288
|
-
|
|
289
|
-
# 3g: Initialize best iteration tracker
|
|
290
|
-
# Store original paths as the 'backup' for iteration 0
|
|
291
|
-
best_iteration = {
|
|
292
|
-
'attempt': 0, # Use 0 for initial state
|
|
293
|
-
'program_backup': str(program_path), # Path to original
|
|
294
|
-
'code_backup': str(code_path), # Path to original
|
|
295
|
-
'issues': initial_issues_count if initial_issues_count != -1 else float('inf')
|
|
296
|
-
}
|
|
297
|
-
stats['best_iteration_num'] = 0
|
|
298
|
-
stats['best_iteration_issues'] = best_iteration['issues']
|
|
299
|
-
|
|
300
|
-
# 3h: Check for immediate success or budget exceeded
|
|
301
|
-
if initial_issues_count == 0:
|
|
302
|
-
console.print("[bold green]Initial check found 0 verification issues. No fixing loop needed.[/bold green]")
|
|
303
|
-
overall_success = True
|
|
304
|
-
stats['final_issues'] = 0
|
|
305
|
-
stats['status_message'] = 'Success on initial check'
|
|
306
|
-
stats['improvement_issues'] = 0
|
|
307
|
-
stats['improvement_percent'] = 100.0 # Reached target of 0 issues
|
|
308
|
-
|
|
309
|
-
# Write final action log for successful initial check
|
|
497
|
+
if skip_llm:
|
|
498
|
+
# Skip initial LLM assessment when max_attempts=0
|
|
499
|
+
console.print("[bold cyan]max_attempts=0: Skipping LLM assessment, proceeding to agentic fallback.[/bold cyan]")
|
|
500
|
+
# Set up state for skipping the LLM loop
|
|
501
|
+
stats['initial_issues'] = -1 # Unknown since we skipped assessment
|
|
502
|
+
stats['final_issues'] = -1
|
|
503
|
+
stats['best_iteration_num'] = -1
|
|
504
|
+
stats['best_iteration_issues'] = float('inf')
|
|
505
|
+
stats['status_message'] = 'Skipped LLM (max_attempts=0)'
|
|
506
|
+
stats['improvement_issues'] = 'N/A'
|
|
507
|
+
stats['improvement_percent'] = 'N/A'
|
|
508
|
+
overall_success = False # Trigger agentic fallback
|
|
509
|
+
final_program_content = initial_program_content
|
|
510
|
+
final_code_content = initial_code_content
|
|
511
|
+
# Write log entry for skipped LLM
|
|
310
512
|
final_log_entry = "<FinalActions>\n"
|
|
311
|
-
final_log_entry += f' <Action>
|
|
513
|
+
final_log_entry += f' <Action>Skipped LLM assessment and loop (max_attempts=0), proceeding to agentic fallback.</Action>\n'
|
|
312
514
|
final_log_entry += "</FinalActions>"
|
|
313
515
|
_write_log_entry(log_path, final_log_entry)
|
|
516
|
+
# Skip to final stats (the while loop below will also be skipped since 0 < 0 is False)
|
|
517
|
+
initial_issues_count = -1 # Sentinel: unknown/not applicable when LLM assessment is skipped; kept numeric for downstream comparisons
|
|
518
|
+
else:
|
|
519
|
+
if verbose:
|
|
520
|
+
console.print("Running initial assessment with fix_verification_errors...")
|
|
521
|
+
# Use actual strength/temp for realistic initial assessment
|
|
522
|
+
# Use cloud or local based on use_cloud parameter
|
|
523
|
+
if use_cloud:
|
|
524
|
+
try:
|
|
525
|
+
initial_fix_result = cloud_verify_fix(
|
|
526
|
+
program=initial_program_content,
|
|
527
|
+
prompt=prompt,
|
|
528
|
+
code=initial_code_content,
|
|
529
|
+
output=initial_output,
|
|
530
|
+
strength=strength,
|
|
531
|
+
temperature=temperature,
|
|
532
|
+
time_param=llm_time,
|
|
533
|
+
verbose=verbose,
|
|
534
|
+
language="python" if is_python else get_language(os.path.splitext(code_file)[1]),
|
|
535
|
+
)
|
|
536
|
+
if verbose:
|
|
537
|
+
console.print(f"[cyan]Cloud verify fix completed.[/cyan]")
|
|
538
|
+
except (requests.exceptions.RequestException, RuntimeError) as cloud_err:
|
|
539
|
+
# Cloud failed - fall back to local
|
|
540
|
+
console.print(f"[yellow]Cloud verify fix failed: {cloud_err}. Falling back to local.[/yellow]")
|
|
541
|
+
initial_fix_result = fix_verification_errors(
|
|
542
|
+
program=initial_program_content,
|
|
543
|
+
prompt=prompt,
|
|
544
|
+
code=initial_code_content,
|
|
545
|
+
output=initial_output,
|
|
546
|
+
strength=strength,
|
|
547
|
+
temperature=temperature,
|
|
548
|
+
verbose=verbose,
|
|
549
|
+
time=llm_time
|
|
550
|
+
)
|
|
551
|
+
else:
|
|
552
|
+
initial_fix_result = fix_verification_errors(
|
|
553
|
+
program=initial_program_content,
|
|
554
|
+
prompt=prompt,
|
|
555
|
+
code=initial_code_content,
|
|
556
|
+
output=initial_output,
|
|
557
|
+
strength=strength,
|
|
558
|
+
temperature=temperature,
|
|
559
|
+
verbose=verbose,
|
|
560
|
+
time=llm_time # Pass time
|
|
561
|
+
)
|
|
562
|
+
# 3e: Add cost
|
|
563
|
+
initial_cost = initial_fix_result.get('total_cost', 0.0)
|
|
564
|
+
total_cost += initial_cost
|
|
565
|
+
model_name = initial_fix_result.get('model_name') # Capture model name early
|
|
566
|
+
if verbose:
|
|
567
|
+
console.print(f"Initial assessment cost: ${initial_cost:.6f}, Total cost: ${total_cost:.6f}")
|
|
314
568
|
|
|
315
|
-
#
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
#
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
"
|
|
330
|
-
"
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
"
|
|
569
|
+
# 3f: Extract initial issues
|
|
570
|
+
initial_issues_count = initial_fix_result.get('verification_issues_count', -1)
|
|
571
|
+
stats['initial_issues'] = initial_issues_count
|
|
572
|
+
if verbose:
|
|
573
|
+
console.print(f"Initial verification issues found: {initial_issues_count}")
|
|
574
|
+
if initial_fix_result.get('explanation'):
|
|
575
|
+
console.print("Initial assessment explanation:")
|
|
576
|
+
console.print(initial_fix_result['explanation'])
|
|
577
|
+
|
|
578
|
+
# The following checks only apply when we ran the LLM assessment (not skipped)
|
|
579
|
+
if not skip_llm:
|
|
580
|
+
# FIX: Add check for initial assessment error *before* checking success/budget
|
|
581
|
+
# Check if the fixer function returned its specific error state (None explanation/model)
|
|
582
|
+
if initial_fix_result.get('explanation') is None and initial_fix_result.get('model_name') is None:
|
|
583
|
+
error_msg = "Error: Fixer returned invalid/error state during initial assessment"
|
|
584
|
+
console.print(f"[bold red]{error_msg}. Aborting.[/bold red]")
|
|
585
|
+
stats['status_message'] = error_msg
|
|
586
|
+
stats['final_issues'] = -1 # Indicate unknown/error state
|
|
587
|
+
# Write final action log for error on initial check
|
|
588
|
+
final_log_entry = "<FinalActions>\n"
|
|
589
|
+
final_log_entry += f' <Error>{escape(error_msg)}</Error>\n'
|
|
590
|
+
final_log_entry += "</FinalActions>"
|
|
591
|
+
_write_log_entry(log_path, final_log_entry)
|
|
592
|
+
# Return failure state
|
|
593
|
+
return {
|
|
594
|
+
"success": False,
|
|
595
|
+
"final_program": initial_program_content,
|
|
596
|
+
"final_code": initial_code_content,
|
|
597
|
+
"total_attempts": 0,
|
|
598
|
+
"total_cost": total_cost, # May be non-zero if error occurred after some cost
|
|
599
|
+
"model_name": model_name, # May have been set before error
|
|
600
|
+
"statistics": stats,
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
# 3g: Initialize best iteration tracker
|
|
604
|
+
# Store original paths as the 'backup' for iteration 0
|
|
605
|
+
best_iteration = {
|
|
606
|
+
'attempt': 0, # Use 0 for initial state
|
|
607
|
+
'program_backup': str(program_path), # Path to original
|
|
608
|
+
'code_backup': str(code_path), # Path to original
|
|
609
|
+
'issues': initial_issues_count if initial_issues_count != -1 else float('inf')
|
|
335
610
|
}
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
611
|
+
stats['best_iteration_num'] = 0
|
|
612
|
+
stats['best_iteration_issues'] = best_iteration['issues']
|
|
613
|
+
|
|
614
|
+
# 3h: Check for immediate success or budget exceeded
|
|
615
|
+
if initial_issues_count == 0:
|
|
616
|
+
console.print("[bold green]Initial check found 0 verification issues. No fixing loop needed.[/bold green]")
|
|
617
|
+
overall_success = True
|
|
618
|
+
stats['final_issues'] = 0
|
|
619
|
+
stats['status_message'] = 'Success on initial check'
|
|
620
|
+
stats['improvement_issues'] = 0
|
|
621
|
+
stats['improvement_percent'] = 100.0 # Reached target of 0 issues
|
|
622
|
+
|
|
623
|
+
# Write final action log for successful initial check
|
|
624
|
+
final_log_entry = "<FinalActions>\n"
|
|
625
|
+
final_log_entry += f' <Action>Process finished successfully on initial check.</Action>\n'
|
|
626
|
+
final_log_entry += "</FinalActions>"
|
|
627
|
+
_write_log_entry(log_path, final_log_entry)
|
|
628
|
+
|
|
629
|
+
# Step 7 (early exit): Print stats
|
|
630
|
+
console.print("\n[bold]--- Final Statistics ---[/bold]")
|
|
631
|
+
console.print(f"Initial Issues: {stats['initial_issues']}")
|
|
632
|
+
console.print(f"Final Issues: {stats['final_issues']}")
|
|
633
|
+
console.print(f"Best Iteration: {stats['best_iteration_num']} (Issues: {stats['best_iteration_issues']})")
|
|
634
|
+
console.print(f"Improvement (Issues Reduced): {stats['improvement_issues']}")
|
|
635
|
+
console.print(f"Improvement (Percent Towards 0 Issues): {stats['improvement_percent']:.2f}%")
|
|
636
|
+
console.print(f"Overall Status: {stats['status_message']}")
|
|
637
|
+
console.print(f"Total Attempts Made: {attempts}") # attempts is 0 here
|
|
638
|
+
console.print(f"Total Cost: ${total_cost:.6f}")
|
|
639
|
+
console.print(f"Model Used: {model_name or 'N/A'}")
|
|
640
|
+
# Step 8 (early exit): Return
|
|
641
|
+
return {
|
|
642
|
+
"success": overall_success,
|
|
643
|
+
"final_program": initial_program_content,
|
|
644
|
+
"final_code": initial_code_content,
|
|
645
|
+
"total_attempts": attempts, # attempts is 0
|
|
646
|
+
"total_cost": total_cost,
|
|
647
|
+
"model_name": model_name,
|
|
648
|
+
"statistics": stats,
|
|
649
|
+
}
|
|
650
|
+
elif total_cost >= budget:
|
|
651
|
+
console.print(f"[bold yellow]Budget ${budget:.4f} exceeded during initial assessment (Cost: ${total_cost:.4f}). Aborting.[/bold yellow]")
|
|
652
|
+
stats['status_message'] = 'Budget exceeded on initial check'
|
|
653
|
+
stats['final_issues'] = stats['initial_issues'] # Final issues same as initial
|
|
654
|
+
|
|
655
|
+
# Write final action log for budget exceeded on initial check
|
|
656
|
+
final_log_entry = "<FinalActions>\n"
|
|
657
|
+
final_log_entry += f' <Action>Budget exceeded on initial check.</Action>\n'
|
|
658
|
+
final_log_entry += "</FinalActions>"
|
|
659
|
+
_write_log_entry(log_path, final_log_entry)
|
|
660
|
+
|
|
661
|
+
# No changes made, return initial state
|
|
662
|
+
return {
|
|
663
|
+
"success": False,
|
|
664
|
+
"final_program": initial_program_content,
|
|
665
|
+
"final_code": initial_code_content,
|
|
666
|
+
"total_attempts": 0,
|
|
667
|
+
"total_cost": total_cost,
|
|
668
|
+
"model_name": model_name,
|
|
669
|
+
"statistics": stats,
|
|
670
|
+
}
|
|
357
671
|
|
|
358
672
|
except Exception as e:
|
|
359
673
|
console.print(f"[bold red]Error during initial assessment with fix_verification_errors: {e}[/bold red]")
|
|
@@ -404,9 +718,9 @@ def fix_verification_errors_loop(
|
|
|
404
718
|
# code_contents = code_path.read_text(encoding="utf-8")
|
|
405
719
|
# except IOError as e: ...
|
|
406
720
|
|
|
407
|
-
# 4d: Create backups
|
|
408
|
-
program_backup_path =
|
|
409
|
-
code_backup_path =
|
|
721
|
+
# 4d: Create backups in .pdd/backups/ (backup_dir already created above)
|
|
722
|
+
program_backup_path = backup_dir / f"program_{current_attempt}{program_path.suffix}"
|
|
723
|
+
code_backup_path = backup_dir / f"code_{current_attempt}{code_path.suffix}"
|
|
410
724
|
try:
|
|
411
725
|
# Copy from the *current* state before this iteration's fix
|
|
412
726
|
program_path.write_text(program_contents, encoding="utf-8") # Ensure file matches memory state
|
|
@@ -426,7 +740,7 @@ def fix_verification_errors_loop(
|
|
|
426
740
|
stats['status_message'] = f'Error creating backups on attempt {current_attempt}'
|
|
427
741
|
break # Don't proceed without backups
|
|
428
742
|
|
|
429
|
-
# 4e: Call fix_verification_errors
|
|
743
|
+
# 4e: Call fix_verification_errors (cloud or local based on use_cloud parameter)
|
|
430
744
|
iteration_log_xml += f' <InputsToFixer>\n'
|
|
431
745
|
iteration_log_xml += f' <Program>{escape(program_contents)}</Program>\n'
|
|
432
746
|
iteration_log_xml += f' <Code>{escape(code_contents)}</Code>\n'
|
|
@@ -438,16 +752,46 @@ def fix_verification_errors_loop(
|
|
|
438
752
|
try:
|
|
439
753
|
if verbose:
|
|
440
754
|
console.print("Calling fix_verification_errors...")
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
755
|
+
# Use cloud or local based on use_cloud parameter
|
|
756
|
+
if use_cloud:
|
|
757
|
+
try:
|
|
758
|
+
fix_result = cloud_verify_fix(
|
|
759
|
+
program=program_contents,
|
|
760
|
+
prompt=prompt,
|
|
761
|
+
code=code_contents,
|
|
762
|
+
output=program_output,
|
|
763
|
+
strength=strength,
|
|
764
|
+
temperature=temperature,
|
|
765
|
+
time_param=llm_time,
|
|
766
|
+
verbose=verbose,
|
|
767
|
+
language="python" if is_python else get_language(os.path.splitext(code_file)[1]),
|
|
768
|
+
)
|
|
769
|
+
if verbose:
|
|
770
|
+
console.print(f"[cyan]Cloud verify fix completed.[/cyan]")
|
|
771
|
+
except (requests.exceptions.RequestException, RuntimeError) as cloud_err:
|
|
772
|
+
# Cloud failed - fall back to local
|
|
773
|
+
console.print(f"[yellow]Cloud verify fix failed: {cloud_err}. Falling back to local.[/yellow]")
|
|
774
|
+
fix_result = fix_verification_errors(
|
|
775
|
+
program=program_contents,
|
|
776
|
+
prompt=prompt,
|
|
777
|
+
code=code_contents,
|
|
778
|
+
output=program_output,
|
|
779
|
+
strength=strength,
|
|
780
|
+
temperature=temperature,
|
|
781
|
+
verbose=verbose,
|
|
782
|
+
time=llm_time
|
|
783
|
+
)
|
|
784
|
+
else:
|
|
785
|
+
fix_result = fix_verification_errors(
|
|
786
|
+
program=program_contents,
|
|
787
|
+
prompt=prompt,
|
|
788
|
+
code=code_contents,
|
|
789
|
+
output=program_output,
|
|
790
|
+
strength=strength,
|
|
791
|
+
temperature=temperature,
|
|
792
|
+
verbose=verbose,
|
|
793
|
+
time=llm_time # Pass time
|
|
794
|
+
)
|
|
451
795
|
|
|
452
796
|
# 4f: Add cost
|
|
453
797
|
attempt_cost = fix_result.get('total_cost', 0.0)
|
|
@@ -593,6 +937,9 @@ def fix_verification_errors_loop(
|
|
|
593
937
|
|
|
594
938
|
# Now, decide outcome based on issue count and verification status
|
|
595
939
|
if secondary_verification_passed:
|
|
940
|
+
# Only track as "verification passed" if code was actually changed and verified
|
|
941
|
+
if code_updated:
|
|
942
|
+
any_verification_passed = True # Track that at least one verification passed
|
|
596
943
|
# Update best iteration if current attempt is better
|
|
597
944
|
if current_issues_count != -1 and current_issues_count < best_iteration['issues']:
|
|
598
945
|
if verbose:
|
|
@@ -735,8 +1082,14 @@ def fix_verification_errors_loop(
|
|
|
735
1082
|
if verbose:
|
|
736
1083
|
console.print(f"Restored {program_path} from {best_program_path}")
|
|
737
1084
|
console.print(f"Restored {code_path} from {best_code_path}")
|
|
738
|
-
#
|
|
739
|
-
|
|
1085
|
+
# Only mark as success if verification actually passed
|
|
1086
|
+
# (best_iteration is only updated when secondary verification passes,
|
|
1087
|
+
# but we double-check with any_verification_passed for safety)
|
|
1088
|
+
if any_verification_passed:
|
|
1089
|
+
stats['final_issues'] = 0
|
|
1090
|
+
overall_success = True
|
|
1091
|
+
else:
|
|
1092
|
+
stats['final_issues'] = best_iteration['issues']
|
|
740
1093
|
else:
|
|
741
1094
|
console.print(f"[bold red]Error: Backup files for best iteration {best_iteration['attempt']} not found! Cannot restore.[/bold red]")
|
|
742
1095
|
final_log_entry += f' <Error>Backup files for best iteration {best_iteration["attempt"]} not found.</Error>\n'
|
|
@@ -750,6 +1103,15 @@ def fix_verification_errors_loop(
|
|
|
750
1103
|
stats['status_message'] += f' - Error restoring best iteration: {e}'
|
|
751
1104
|
stats['final_issues'] = -1 # Indicate uncertainty
|
|
752
1105
|
|
|
1106
|
+
# If verification passed (even if issue count didn't decrease), consider it success
|
|
1107
|
+
elif any_verification_passed:
|
|
1108
|
+
console.print("[green]Verification passed. Keeping current state.[/green]")
|
|
1109
|
+
final_log_entry += f' <Action>Verification passed; keeping current state.</Action>\n'
|
|
1110
|
+
# Verification passed = code works, so final issues is effectively 0
|
|
1111
|
+
stats['final_issues'] = 0
|
|
1112
|
+
stats['status_message'] = 'Success - verification passed'
|
|
1113
|
+
overall_success = True
|
|
1114
|
+
|
|
753
1115
|
# If no improvement was made or recorded (best is still initial state or worse)
|
|
754
1116
|
elif best_iteration['attempt'] <= 0 or best_iteration['issues'] >= initial_issues_val:
|
|
755
1117
|
console.print("[yellow]No improvement recorded over the initial state. Restoring original files.[/yellow]")
|
|
@@ -864,6 +1226,36 @@ def fix_verification_errors_loop(
|
|
|
864
1226
|
if final_known and stats['final_issues'] != 0:
|
|
865
1227
|
overall_success = False
|
|
866
1228
|
|
|
1229
|
+
if not overall_success and agentic_fallback:
|
|
1230
|
+
console.print(f"[bold yellow]Initiating agentic fallback (prompt_file={prompt_file!r})...[/bold yellow]")
|
|
1231
|
+
agent_cwd = Path(prompt_file).parent if prompt_file else None
|
|
1232
|
+
agent_success, agent_msg, agent_cost, agent_model, agent_changed_files = _safe_run_agentic_verify(
|
|
1233
|
+
prompt_file=prompt_file,
|
|
1234
|
+
code_file=code_file,
|
|
1235
|
+
program_file=verification_program,
|
|
1236
|
+
verification_log_file=verification_log_file,
|
|
1237
|
+
verbose=verbose,
|
|
1238
|
+
cwd=agent_cwd,
|
|
1239
|
+
)
|
|
1240
|
+
total_cost += agent_cost
|
|
1241
|
+
if not agent_success:
|
|
1242
|
+
console.print(f"[bold red]Agentic verify fallback failed: {agent_msg}[/bold red]")
|
|
1243
|
+
if agent_changed_files:
|
|
1244
|
+
console.print(f"[cyan]Agent modified {len(agent_changed_files)} file(s):[/cyan]")
|
|
1245
|
+
for f in agent_changed_files:
|
|
1246
|
+
console.print(f" • {f}")
|
|
1247
|
+
if agent_success:
|
|
1248
|
+
console.print("[bold green]Agentic fallback successful.[/bold green]")
|
|
1249
|
+
overall_success = True
|
|
1250
|
+
model_name = agent_model or model_name
|
|
1251
|
+
try:
|
|
1252
|
+
final_code_content = Path(code_file).read_text(encoding="utf-8")
|
|
1253
|
+
final_program_content = Path(program_file).read_text(encoding="utf-8")
|
|
1254
|
+
except Exception as e:
|
|
1255
|
+
console.print(f"[yellow]Warning: Could not read files after successful agentic fix: {e}[/yellow]")
|
|
1256
|
+
else:
|
|
1257
|
+
console.print("[bold red]Agentic fallback failed.[/bold red]")
|
|
1258
|
+
|
|
867
1259
|
return {
|
|
868
1260
|
"success": overall_success,
|
|
869
1261
|
"final_program": final_program_content,
|
|
@@ -872,153 +1264,4 @@ def fix_verification_errors_loop(
|
|
|
872
1264
|
"total_cost": total_cost,
|
|
873
1265
|
"model_name": model_name,
|
|
874
1266
|
"statistics": stats,
|
|
875
|
-
}
|
|
876
|
-
|
|
877
|
-
# Example usage (requires setting up dummy files and potentially mocking fix_verification_errors)
|
|
878
|
-
if __name__ == "__main__":
|
|
879
|
-
# Create dummy files for demonstration
|
|
880
|
-
# In a real scenario, these files would exist and contain actual code/programs.
|
|
881
|
-
console.print("[yellow]Setting up dummy files for demonstration...[/yellow]")
|
|
882
|
-
temp_dir = Path("./temp_fix_verification_loop")
|
|
883
|
-
temp_dir.mkdir(exist_ok=True)
|
|
884
|
-
|
|
885
|
-
program_file = temp_dir / "my_program.py"
|
|
886
|
-
code_file = temp_dir / "my_code_module.py"
|
|
887
|
-
verification_program_file = temp_dir / "verify_syntax.py"
|
|
888
|
-
|
|
889
|
-
program_file.write_text("""
|
|
890
|
-
import my_code_module
|
|
891
|
-
import sys
|
|
892
|
-
# Simulate using the module and checking output
|
|
893
|
-
val = int(sys.argv[1]) if len(sys.argv) > 1 else 5
|
|
894
|
-
result = my_code_module.process(val)
|
|
895
|
-
expected = val * 2
|
|
896
|
-
print(f"Input: {val}")
|
|
897
|
-
print(f"Result: {result}")
|
|
898
|
-
print(f"Expected: {expected}")
|
|
899
|
-
if result == expected:
|
|
900
|
-
print("VERIFICATION_SUCCESS")
|
|
901
|
-
else:
|
|
902
|
-
print(f"VERIFICATION_FAILURE: Expected {expected}, got {result}")
|
|
903
|
-
""", encoding="utf-8")
|
|
904
|
-
|
|
905
|
-
# Initial code with a bug
|
|
906
|
-
code_file.write_text("""
|
|
907
|
-
# my_code_module.py
|
|
908
|
-
def process(x):
|
|
909
|
-
# Bug: should be x * 2
|
|
910
|
-
return x + 2
|
|
911
|
-
""", encoding="utf-8")
|
|
912
|
-
|
|
913
|
-
# Simple verification program (e.g., syntax check)
|
|
914
|
-
verification_program_file.write_text("""
|
|
915
|
-
import sys
|
|
916
|
-
import py_compile
|
|
917
|
-
import os
|
|
918
|
-
# Check syntax of the code file (passed as argument, but we'll hardcode for simplicity here)
|
|
919
|
-
code_to_check = os.environ.get("CODE_FILE_TO_CHECK", "temp_fix_verification_loop/my_code_module.py")
|
|
920
|
-
print(f"Checking syntax of: {code_to_check}")
|
|
921
|
-
try:
|
|
922
|
-
py_compile.compile(code_to_check, doraise=True)
|
|
923
|
-
print("Syntax OK.")
|
|
924
|
-
sys.exit(0) # Success
|
|
925
|
-
except py_compile.PyCompileError as e:
|
|
926
|
-
print(f"Syntax Error: {e}")
|
|
927
|
-
sys.exit(1) # Failure
|
|
928
|
-
except Exception as e:
|
|
929
|
-
print(f"Verification Error: {e}")
|
|
930
|
-
sys.exit(1) # Failure
|
|
931
|
-
""", encoding="utf-8")
|
|
932
|
-
# Set environment variable for the verification script
|
|
933
|
-
os.environ["CODE_FILE_TO_CHECK"] = str(code_file.resolve())
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
# --- Mock fix_verification_errors ---
|
|
937
|
-
# This is crucial for testing without actual LLM calls / costs
|
|
938
|
-
# In a real test suite, use unittest.mock
|
|
939
|
-
_original_fix_verification_errors = fix_verification_errors
|
|
940
|
-
_call_count = 0
|
|
941
|
-
|
|
942
|
-
def mock_fix_verification_errors(program, prompt, code, output, strength, temperature, verbose):
|
|
943
|
-
global _call_count
|
|
944
|
-
_call_count += 1
|
|
945
|
-
cost = 0.001 * _call_count # Simulate increasing cost
|
|
946
|
-
model = "mock_model_v1"
|
|
947
|
-
explanation = ["Detected deviation: Output shows 'Result: 7', 'Expected: 10'.", "Issue seems to be in the `process` function calculation."]
|
|
948
|
-
issues_count = 1 # Assume 1 issue initially
|
|
949
|
-
|
|
950
|
-
fixed_program = program # Assume program doesn't need fixing
|
|
951
|
-
fixed_code = code
|
|
952
|
-
|
|
953
|
-
# Simulate fixing the code on the first *real* attempt (call_count == 2, as first is initial)
|
|
954
|
-
if "VERIFICATION_FAILURE" in output and _call_count >= 2:
|
|
955
|
-
explanation = ["Identified incorrect addition `x + 2`.", "Corrected to multiplication `x * 2` based on prompt intent and output mismatch."]
|
|
956
|
-
fixed_code = """
|
|
957
|
-
# my_code_module.py
|
|
958
|
-
def process(x):
|
|
959
|
-
# Fixed: should be x * 2
|
|
960
|
-
return x * 2
|
|
961
|
-
"""
|
|
962
|
-
issues_count = 0 # Fixed!
|
|
963
|
-
elif "VERIFICATION_SUCCESS" in output:
|
|
964
|
-
explanation = ["Output indicates VERIFICATION_SUCCESS."]
|
|
965
|
-
issues_count = 0 # Already correct
|
|
966
|
-
|
|
967
|
-
return {
|
|
968
|
-
'explanation': explanation,
|
|
969
|
-
'fixed_program': fixed_program,
|
|
970
|
-
'fixed_code': fixed_code,
|
|
971
|
-
'total_cost': cost,
|
|
972
|
-
'model_name': model,
|
|
973
|
-
'verification_issues_count': issues_count,
|
|
974
|
-
}
|
|
975
|
-
|
|
976
|
-
# Replace the real function with the mock
|
|
977
|
-
# In package context, you might need to patch differently
|
|
978
|
-
# For this script execution:
|
|
979
|
-
# Note: This direct replacement might not work if the function is imported
|
|
980
|
-
# using `from .fix_verification_errors import fix_verification_errors`.
|
|
981
|
-
# A proper mock framework (`unittest.mock.patch`) is better.
|
|
982
|
-
# Let's assume for this example run, we can modify the global scope *before* the loop calls it.
|
|
983
|
-
# This is fragile. A better approach involves dependency injection or mocking frameworks.
|
|
984
|
-
# HACK: Re-assigning the imported name in the global scope of this script
|
|
985
|
-
globals()['fix_verification_errors'] = mock_fix_verification_errors
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
console.print("\n[bold blue]--- Running fix_verification_errors_loop (with mock) ---[/bold blue]")
|
|
989
|
-
|
|
990
|
-
# Example program_args: Pass input value 10 and another arg 5
|
|
991
|
-
# Note: The example program only uses the first arg sys.argv[1]
|
|
992
|
-
example_args = ["10", "another_arg"]
|
|
993
|
-
|
|
994
|
-
results = fix_verification_errors_loop(
|
|
995
|
-
program_file=str(program_file),
|
|
996
|
-
code_file=str(code_file),
|
|
997
|
-
prompt="Create a module 'my_code_module.py' with a function 'process(x)' that returns the input multiplied by 2.",
|
|
998
|
-
verification_program=str(verification_program_file),
|
|
999
|
-
strength=0.5,
|
|
1000
|
-
temperature=0.1,
|
|
1001
|
-
max_attempts=3,
|
|
1002
|
-
budget=0.10, # Set a budget
|
|
1003
|
-
verification_log_file=str(temp_dir / "test_verification.log"),
|
|
1004
|
-
verbose=True,
|
|
1005
|
-
program_args=example_args
|
|
1006
|
-
)
|
|
1007
|
-
|
|
1008
|
-
console.print("\n[bold blue]--- Loop Finished ---[/bold blue]")
|
|
1009
|
-
console.print(f"Success: {results['success']}")
|
|
1010
|
-
console.print(f"Total Attempts: {results['total_attempts']}")
|
|
1011
|
-
console.print(f"Total Cost: ${results['total_cost']:.6f}")
|
|
1012
|
-
console.print(f"Model Name: {results['model_name']}")
|
|
1013
|
-
# console.print(f"Final Program:\n{results['final_program']}") # Can be long
|
|
1014
|
-
console.print(f"Final Code:\n{results['final_code']}")
|
|
1015
|
-
console.print(f"Statistics:\n{results['statistics']}")
|
|
1016
|
-
|
|
1017
|
-
# Restore original function if needed elsewhere
|
|
1018
|
-
globals()['fix_verification_errors'] = _original_fix_verification_errors
|
|
1019
|
-
|
|
1020
|
-
# Clean up dummy files
|
|
1021
|
-
# console.print("\n[yellow]Cleaning up dummy files...[/yellow]")
|
|
1022
|
-
# shutil.rmtree(temp_dir)
|
|
1023
|
-
console.print(f"\n[yellow]Dummy files and logs are in: {temp_dir}[/yellow]")
|
|
1024
|
-
console.print("[yellow]Please review the log file 'test_verification.log' inside that directory.[/yellow]")
|
|
1267
|
+
}
|