pdd-cli 0.0.45__py3-none-any.whl → 0.0.90__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdd/__init__.py +4 -4
- pdd/agentic_common.py +863 -0
- pdd/agentic_crash.py +534 -0
- pdd/agentic_fix.py +1179 -0
- pdd/agentic_langtest.py +162 -0
- pdd/agentic_update.py +370 -0
- pdd/agentic_verify.py +183 -0
- pdd/auto_deps_main.py +15 -5
- pdd/auto_include.py +63 -5
- pdd/bug_main.py +3 -2
- pdd/bug_to_unit_test.py +2 -0
- pdd/change_main.py +11 -4
- pdd/cli.py +22 -1181
- pdd/cmd_test_main.py +73 -21
- pdd/code_generator.py +58 -18
- pdd/code_generator_main.py +672 -25
- pdd/commands/__init__.py +42 -0
- pdd/commands/analysis.py +248 -0
- pdd/commands/fix.py +140 -0
- pdd/commands/generate.py +257 -0
- pdd/commands/maintenance.py +174 -0
- pdd/commands/misc.py +79 -0
- pdd/commands/modify.py +230 -0
- pdd/commands/report.py +144 -0
- pdd/commands/templates.py +215 -0
- pdd/commands/utility.py +110 -0
- pdd/config_resolution.py +58 -0
- pdd/conflicts_main.py +8 -3
- pdd/construct_paths.py +258 -82
- pdd/context_generator.py +10 -2
- pdd/context_generator_main.py +113 -11
- pdd/continue_generation.py +47 -7
- pdd/core/__init__.py +0 -0
- pdd/core/cli.py +503 -0
- pdd/core/dump.py +554 -0
- pdd/core/errors.py +63 -0
- pdd/core/utils.py +90 -0
- pdd/crash_main.py +44 -11
- pdd/data/language_format.csv +71 -63
- pdd/data/llm_model.csv +20 -18
- pdd/detect_change_main.py +5 -4
- pdd/fix_code_loop.py +330 -76
- pdd/fix_error_loop.py +207 -61
- pdd/fix_errors_from_unit_tests.py +4 -3
- pdd/fix_main.py +75 -18
- pdd/fix_verification_errors.py +12 -100
- pdd/fix_verification_errors_loop.py +306 -272
- pdd/fix_verification_main.py +28 -9
- pdd/generate_output_paths.py +93 -10
- pdd/generate_test.py +16 -5
- pdd/get_jwt_token.py +9 -2
- pdd/get_run_command.py +73 -0
- pdd/get_test_command.py +68 -0
- pdd/git_update.py +70 -19
- pdd/incremental_code_generator.py +2 -2
- pdd/insert_includes.py +11 -3
- pdd/llm_invoke.py +1269 -103
- pdd/load_prompt_template.py +36 -10
- pdd/pdd_completion.fish +25 -2
- pdd/pdd_completion.sh +30 -4
- pdd/pdd_completion.zsh +79 -4
- pdd/postprocess.py +10 -3
- pdd/preprocess.py +228 -15
- pdd/preprocess_main.py +8 -5
- pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
- pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
- pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
- pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
- pdd/prompts/agentic_update_LLM.prompt +1071 -0
- pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
- pdd/prompts/auto_include_LLM.prompt +100 -905
- pdd/prompts/detect_change_LLM.prompt +122 -20
- pdd/prompts/example_generator_LLM.prompt +22 -1
- pdd/prompts/extract_code_LLM.prompt +5 -1
- pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
- pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
- pdd/prompts/extract_promptline_LLM.prompt +17 -11
- pdd/prompts/find_verification_errors_LLM.prompt +6 -0
- pdd/prompts/fix_code_module_errors_LLM.prompt +4 -2
- pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +8 -0
- pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
- pdd/prompts/generate_test_LLM.prompt +21 -6
- pdd/prompts/increase_tests_LLM.prompt +1 -5
- pdd/prompts/insert_includes_LLM.prompt +228 -108
- pdd/prompts/trace_LLM.prompt +25 -22
- pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
- pdd/prompts/update_prompt_LLM.prompt +22 -1
- pdd/pytest_output.py +127 -12
- pdd/render_mermaid.py +236 -0
- pdd/setup_tool.py +648 -0
- pdd/simple_math.py +2 -0
- pdd/split_main.py +3 -2
- pdd/summarize_directory.py +49 -6
- pdd/sync_determine_operation.py +543 -98
- pdd/sync_main.py +81 -31
- pdd/sync_orchestration.py +1334 -751
- pdd/sync_tui.py +848 -0
- pdd/template_registry.py +264 -0
- pdd/templates/architecture/architecture_json.prompt +242 -0
- pdd/templates/generic/generate_prompt.prompt +174 -0
- pdd/trace.py +168 -12
- pdd/trace_main.py +4 -3
- pdd/track_cost.py +151 -61
- pdd/unfinished_prompt.py +49 -3
- pdd/update_main.py +549 -67
- pdd/update_model_costs.py +2 -2
- pdd/update_prompt.py +19 -4
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/METADATA +19 -6
- pdd_cli-0.0.90.dist-info/RECORD +153 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/licenses/LICENSE +1 -1
- pdd_cli-0.0.45.dist-info/RECORD +0 -116
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/WHEEL +0 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/top_level.txt +0 -0
|
@@ -27,6 +27,54 @@ except ImportError:
|
|
|
27
27
|
|
|
28
28
|
from . import DEFAULT_TIME # Import DEFAULT_TIME
|
|
29
29
|
from .python_env_detector import detect_host_python_executable
|
|
30
|
+
from .get_language import get_language
|
|
31
|
+
from .agentic_langtest import default_verify_cmd_for
|
|
32
|
+
from .agentic_verify import run_agentic_verify
|
|
33
|
+
|
|
34
|
+
def _normalize_agentic_result(result):
|
|
35
|
+
"""
|
|
36
|
+
Normalize run_agentic_verify result into: (success: bool, msg: str, cost: float, model: str, changed_files: List[str])
|
|
37
|
+
Handles older 2/3/4-tuple shapes used by tests/monkeypatches.
|
|
38
|
+
"""
|
|
39
|
+
if isinstance(result, tuple):
|
|
40
|
+
if len(result) == 5:
|
|
41
|
+
ok, msg, cost, model, changed_files = result
|
|
42
|
+
return bool(ok), str(msg), float(cost), str(model or "agentic-cli"), list(changed_files or [])
|
|
43
|
+
if len(result) == 4:
|
|
44
|
+
ok, msg, cost, model = result
|
|
45
|
+
return bool(ok), str(msg), float(cost), str(model or "agentic-cli"), []
|
|
46
|
+
if len(result) == 3:
|
|
47
|
+
ok, msg, cost = result
|
|
48
|
+
return bool(ok), str(msg), float(cost), "agentic-cli", []
|
|
49
|
+
if len(result) == 2:
|
|
50
|
+
ok, msg = result
|
|
51
|
+
return bool(ok), str(msg), 0.0, "agentic-cli", []
|
|
52
|
+
# Fallback (shouldn't happen)
|
|
53
|
+
return False, "Invalid agentic result shape", 0.0, "agentic-cli", []
|
|
54
|
+
|
|
55
|
+
def _safe_run_agentic_verify(*, prompt_file, code_file, program_file, verification_log_file, verbose=False, cwd=None):
|
|
56
|
+
"""
|
|
57
|
+
Call (possibly monkeypatched) run_agentic_verify and normalize its return.
|
|
58
|
+
|
|
59
|
+
Note: cwd parameter is accepted for compatibility but not passed to run_agentic_verify
|
|
60
|
+
as it determines the working directory from prompt_file.parent internally.
|
|
61
|
+
"""
|
|
62
|
+
if not prompt_file:
|
|
63
|
+
return False, "Agentic verify requires a valid prompt file.", 0.0, "agentic-cli", []
|
|
64
|
+
|
|
65
|
+
try:
|
|
66
|
+
res = run_agentic_verify(
|
|
67
|
+
prompt_file=Path(prompt_file),
|
|
68
|
+
code_file=Path(code_file),
|
|
69
|
+
program_file=Path(program_file),
|
|
70
|
+
verification_log_file=Path(verification_log_file),
|
|
71
|
+
verbose=verbose,
|
|
72
|
+
quiet=not verbose,
|
|
73
|
+
# Note: cwd is not passed - run_agentic_verify uses prompt_file.parent as project root
|
|
74
|
+
)
|
|
75
|
+
return _normalize_agentic_result(res)
|
|
76
|
+
except Exception as e:
|
|
77
|
+
return False, f"Agentic verify failed: {e}", 0.0, "agentic-cli", []
|
|
30
78
|
|
|
31
79
|
# Initialize Rich Console for pretty printing
|
|
32
80
|
console = Console()
|
|
@@ -56,7 +104,7 @@ def _run_program(
|
|
|
56
104
|
command.extend(args)
|
|
57
105
|
|
|
58
106
|
try:
|
|
59
|
-
# Run from staging root directory instead of examples/
|
|
107
|
+
# Run from staging root directory instead of examples/
|
|
60
108
|
# This allows imports from both pdd/ and examples/ subdirectories
|
|
61
109
|
staging_root = program_path.parent.parent # Go up from examples/ to staging root
|
|
62
110
|
|
|
@@ -95,6 +143,7 @@ def fix_verification_errors_loop(
|
|
|
95
143
|
program_file: str,
|
|
96
144
|
code_file: str,
|
|
97
145
|
prompt: str,
|
|
146
|
+
prompt_file: str,
|
|
98
147
|
verification_program: str,
|
|
99
148
|
strength: float,
|
|
100
149
|
temperature: float,
|
|
@@ -105,7 +154,8 @@ def fix_verification_errors_loop(
|
|
|
105
154
|
output_program_path: Optional[str] = None,
|
|
106
155
|
verbose: bool = False,
|
|
107
156
|
program_args: Optional[list[str]] = None,
|
|
108
|
-
llm_time: float = DEFAULT_TIME # Add time parameter
|
|
157
|
+
llm_time: float = DEFAULT_TIME, # Add time parameter
|
|
158
|
+
agentic_fallback: bool = True,
|
|
109
159
|
) -> Dict[str, Any]:
|
|
110
160
|
"""
|
|
111
161
|
Attempts to fix errors in a code file based on program execution output
|
|
@@ -115,6 +165,7 @@ def fix_verification_errors_loop(
|
|
|
115
165
|
program_file: Path to the Python program exercising the code.
|
|
116
166
|
code_file: Path to the code file being tested/verified.
|
|
117
167
|
prompt: The prompt defining the intended behavior.
|
|
168
|
+
prompt_file: Path to the prompt file.
|
|
118
169
|
verification_program: Path to a secondary program to verify code changes.
|
|
119
170
|
strength: LLM model strength (0.0 to 1.0).
|
|
120
171
|
temperature: LLM temperature (0.0 to 1.0).
|
|
@@ -126,6 +177,7 @@ def fix_verification_errors_loop(
|
|
|
126
177
|
verbose: Enable verbose logging (default: False).
|
|
127
178
|
program_args: Optional list of command-line arguments for the program_file.
|
|
128
179
|
llm_time: Time parameter for fix_verification_errors calls (default: DEFAULT_TIME).
|
|
180
|
+
agentic_fallback: Enable agentic fallback if the primary fix mechanism fails.
|
|
129
181
|
|
|
130
182
|
Returns:
|
|
131
183
|
A dictionary containing:
|
|
@@ -137,6 +189,61 @@ def fix_verification_errors_loop(
|
|
|
137
189
|
'model_name': str | None - Name of the LLM model used.
|
|
138
190
|
'statistics': dict - Detailed statistics about the process.
|
|
139
191
|
"""
|
|
192
|
+
is_python = str(code_file).lower().endswith(".py")
|
|
193
|
+
if not is_python:
|
|
194
|
+
# For non-Python files, run the verification program to get an initial error state
|
|
195
|
+
console.print(f"[cyan]Non-Python target detected. Running verification program to get initial state...[/cyan]")
|
|
196
|
+
lang = get_language(os.path.splitext(code_file)[1])
|
|
197
|
+
verify_cmd = default_verify_cmd_for(lang, verification_program)
|
|
198
|
+
if not verify_cmd:
|
|
199
|
+
raise ValueError(f"No default verification command for language: {lang}")
|
|
200
|
+
|
|
201
|
+
verify_result = subprocess.run(verify_cmd, capture_output=True, text=True, shell=True)
|
|
202
|
+
pytest_output = (verify_result.stdout or "") + "\n" + (verify_result.stderr or "")
|
|
203
|
+
console.print("[cyan]Non-Python target detected. Triggering agentic fallback...[/cyan]")
|
|
204
|
+
verification_log_path = Path(verification_log_file)
|
|
205
|
+
verification_log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
206
|
+
with open(verification_log_path, "w") as f:
|
|
207
|
+
f.write(pytest_output)
|
|
208
|
+
|
|
209
|
+
agent_cwd = Path(prompt_file).parent if prompt_file else None
|
|
210
|
+
console.print(f"[cyan]Attempting agentic verify fallback (prompt_file={prompt_file!r})...[/cyan]")
|
|
211
|
+
success, agent_msg, agent_cost, agent_model, agent_changed_files = _safe_run_agentic_verify(
|
|
212
|
+
prompt_file=prompt_file,
|
|
213
|
+
code_file=code_file,
|
|
214
|
+
program_file=verification_program,
|
|
215
|
+
verification_log_file=verification_log_file,
|
|
216
|
+
verbose=verbose,
|
|
217
|
+
cwd=agent_cwd,
|
|
218
|
+
)
|
|
219
|
+
if not success:
|
|
220
|
+
console.print(f"[bold red]Agentic verify fallback failed: {agent_msg}[/bold red]")
|
|
221
|
+
if agent_changed_files:
|
|
222
|
+
console.print(f"[cyan]Agent modified {len(agent_changed_files)} file(s):[/cyan]")
|
|
223
|
+
for f in agent_changed_files:
|
|
224
|
+
console.print(f" • {f}")
|
|
225
|
+
final_program = ""
|
|
226
|
+
final_code = ""
|
|
227
|
+
try:
|
|
228
|
+
with open(verification_program, "r") as f:
|
|
229
|
+
final_program = f.read()
|
|
230
|
+
except Exception:
|
|
231
|
+
pass
|
|
232
|
+
try:
|
|
233
|
+
with open(code_file, "r") as f:
|
|
234
|
+
final_code = f.read()
|
|
235
|
+
except Exception:
|
|
236
|
+
pass
|
|
237
|
+
return {
|
|
238
|
+
"success": success,
|
|
239
|
+
"final_program": final_program,
|
|
240
|
+
"final_code": final_code,
|
|
241
|
+
"total_attempts": 1,
|
|
242
|
+
"total_cost": agent_cost,
|
|
243
|
+
"model_name": agent_model,
|
|
244
|
+
"statistics": {},
|
|
245
|
+
}
|
|
246
|
+
|
|
140
247
|
program_path = Path(program_file).resolve()
|
|
141
248
|
code_path = Path(code_file).resolve()
|
|
142
249
|
verification_program_path = Path(verification_program).resolve()
|
|
@@ -158,9 +265,9 @@ def fix_verification_errors_loop(
|
|
|
158
265
|
if not 0.0 <= temperature <= 1.0:
|
|
159
266
|
console.print(f"[bold red]Error: Temperature must be between 0.0 and 1.0.[/bold red]")
|
|
160
267
|
return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": {}}
|
|
161
|
-
#
|
|
162
|
-
if max_attempts
|
|
163
|
-
console.print(f"[bold red]Error: Max attempts must be
|
|
268
|
+
# max_attempts must be non-negative (0 is valid - skips LLM loop, goes straight to agentic mode)
|
|
269
|
+
if max_attempts < 0:
|
|
270
|
+
console.print(f"[bold red]Error: Max attempts must be non-negative.[/bold red]")
|
|
164
271
|
return {"success": False, "final_program": "", "final_code": "", "total_attempts": 0, "total_cost": 0.0, "model_name": None, "statistics": {}}
|
|
165
272
|
if budget < 0:
|
|
166
273
|
console.print(f"[bold red]Error: Budget cannot be negative.[/bold red]")
|
|
@@ -182,6 +289,7 @@ def fix_verification_errors_loop(
|
|
|
182
289
|
total_cost = 0.0
|
|
183
290
|
model_name: Optional[str] = None
|
|
184
291
|
overall_success = False
|
|
292
|
+
any_verification_passed = False # Track if ANY iteration passed secondary verification
|
|
185
293
|
best_iteration = {
|
|
186
294
|
'attempt': -1, # 0 represents initial state
|
|
187
295
|
'program_backup': None,
|
|
@@ -232,128 +340,155 @@ def fix_verification_errors_loop(
|
|
|
232
340
|
initial_log_entry += '</InitialState>'
|
|
233
341
|
_write_log_entry(log_path, initial_log_entry)
|
|
234
342
|
|
|
343
|
+
# 3c: Check if skipping LLM assessment (max_attempts=0 means skip to agentic fallback)
|
|
344
|
+
skip_llm = (max_attempts == 0)
|
|
345
|
+
|
|
235
346
|
# 3d: Call fix_verification_errors for initial assessment
|
|
236
347
|
try:
|
|
237
|
-
if
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
total_cost += initial_cost
|
|
253
|
-
model_name = initial_fix_result.get('model_name') # Capture model name early
|
|
254
|
-
if verbose:
|
|
255
|
-
console.print(f"Initial assessment cost: ${initial_cost:.6f}, Total cost: ${total_cost:.6f}")
|
|
256
|
-
|
|
257
|
-
# 3f: Extract initial issues
|
|
258
|
-
initial_issues_count = initial_fix_result.get('verification_issues_count', -1)
|
|
259
|
-
stats['initial_issues'] = initial_issues_count
|
|
260
|
-
if verbose:
|
|
261
|
-
console.print(f"Initial verification issues found: {initial_issues_count}")
|
|
262
|
-
if initial_fix_result.get('explanation'):
|
|
263
|
-
console.print("Initial assessment explanation:")
|
|
264
|
-
console.print(initial_fix_result['explanation'])
|
|
265
|
-
|
|
266
|
-
# FIX: Add check for initial assessment error *before* checking success/budget
|
|
267
|
-
# Check if the fixer function returned its specific error state (None explanation/model)
|
|
268
|
-
if initial_fix_result.get('explanation') is None and initial_fix_result.get('model_name') is None:
|
|
269
|
-
error_msg = "Error: Fixer returned invalid/error state during initial assessment"
|
|
270
|
-
console.print(f"[bold red]{error_msg}. Aborting.[/bold red]")
|
|
271
|
-
stats['status_message'] = error_msg
|
|
272
|
-
stats['final_issues'] = -1 # Indicate unknown/error state
|
|
273
|
-
# Write final action log for error on initial check
|
|
274
|
-
final_log_entry = "<FinalActions>\n"
|
|
275
|
-
final_log_entry += f' <Error>{escape(error_msg)}</Error>\n'
|
|
276
|
-
final_log_entry += "</FinalActions>"
|
|
277
|
-
_write_log_entry(log_path, final_log_entry)
|
|
278
|
-
# Return failure state
|
|
279
|
-
return {
|
|
280
|
-
"success": False,
|
|
281
|
-
"final_program": initial_program_content,
|
|
282
|
-
"final_code": initial_code_content,
|
|
283
|
-
"total_attempts": 0,
|
|
284
|
-
"total_cost": total_cost, # May be non-zero if error occurred after some cost
|
|
285
|
-
"model_name": model_name, # May have been set before error
|
|
286
|
-
"statistics": stats,
|
|
287
|
-
}
|
|
288
|
-
|
|
289
|
-
# 3g: Initialize best iteration tracker
|
|
290
|
-
# Store original paths as the 'backup' for iteration 0
|
|
291
|
-
best_iteration = {
|
|
292
|
-
'attempt': 0, # Use 0 for initial state
|
|
293
|
-
'program_backup': str(program_path), # Path to original
|
|
294
|
-
'code_backup': str(code_path), # Path to original
|
|
295
|
-
'issues': initial_issues_count if initial_issues_count != -1 else float('inf')
|
|
296
|
-
}
|
|
297
|
-
stats['best_iteration_num'] = 0
|
|
298
|
-
stats['best_iteration_issues'] = best_iteration['issues']
|
|
299
|
-
|
|
300
|
-
# 3h: Check for immediate success or budget exceeded
|
|
301
|
-
if initial_issues_count == 0:
|
|
302
|
-
console.print("[bold green]Initial check found 0 verification issues. No fixing loop needed.[/bold green]")
|
|
303
|
-
overall_success = True
|
|
304
|
-
stats['final_issues'] = 0
|
|
305
|
-
stats['status_message'] = 'Success on initial check'
|
|
306
|
-
stats['improvement_issues'] = 0
|
|
307
|
-
stats['improvement_percent'] = 100.0 # Reached target of 0 issues
|
|
308
|
-
|
|
309
|
-
# Write final action log for successful initial check
|
|
348
|
+
if skip_llm:
|
|
349
|
+
# Skip initial LLM assessment when max_attempts=0
|
|
350
|
+
console.print("[bold cyan]max_attempts=0: Skipping LLM assessment, proceeding to agentic fallback.[/bold cyan]")
|
|
351
|
+
# Set up state for skipping the LLM loop
|
|
352
|
+
stats['initial_issues'] = -1 # Unknown since we skipped assessment
|
|
353
|
+
stats['final_issues'] = -1
|
|
354
|
+
stats['best_iteration_num'] = -1
|
|
355
|
+
stats['best_iteration_issues'] = float('inf')
|
|
356
|
+
stats['status_message'] = 'Skipped LLM (max_attempts=0)'
|
|
357
|
+
stats['improvement_issues'] = 'N/A'
|
|
358
|
+
stats['improvement_percent'] = 'N/A'
|
|
359
|
+
overall_success = False # Trigger agentic fallback
|
|
360
|
+
final_program_content = initial_program_content
|
|
361
|
+
final_code_content = initial_code_content
|
|
362
|
+
# Write log entry for skipped LLM
|
|
310
363
|
final_log_entry = "<FinalActions>\n"
|
|
311
|
-
final_log_entry += f' <Action>
|
|
364
|
+
final_log_entry += f' <Action>Skipped LLM assessment and loop (max_attempts=0), proceeding to agentic fallback.</Action>\n'
|
|
312
365
|
final_log_entry += "</FinalActions>"
|
|
313
366
|
_write_log_entry(log_path, final_log_entry)
|
|
367
|
+
# Skip to final stats (the while loop below will also be skipped since 0 < 0 is False)
|
|
368
|
+
initial_issues_count = -1 # Sentinel: unknown/not applicable when LLM assessment is skipped; kept numeric for downstream comparisons
|
|
369
|
+
else:
|
|
370
|
+
if verbose:
|
|
371
|
+
console.print("Running initial assessment with fix_verification_errors...")
|
|
372
|
+
# Use actual strength/temp for realistic initial assessment
|
|
373
|
+
initial_fix_result = fix_verification_errors(
|
|
374
|
+
program=initial_program_content,
|
|
375
|
+
prompt=prompt,
|
|
376
|
+
code=initial_code_content,
|
|
377
|
+
output=initial_output,
|
|
378
|
+
strength=strength,
|
|
379
|
+
temperature=temperature,
|
|
380
|
+
verbose=verbose,
|
|
381
|
+
time=llm_time # Pass time
|
|
382
|
+
)
|
|
383
|
+
# 3e: Add cost
|
|
384
|
+
initial_cost = initial_fix_result.get('total_cost', 0.0)
|
|
385
|
+
total_cost += initial_cost
|
|
386
|
+
model_name = initial_fix_result.get('model_name') # Capture model name early
|
|
387
|
+
if verbose:
|
|
388
|
+
console.print(f"Initial assessment cost: ${initial_cost:.6f}, Total cost: ${total_cost:.6f}")
|
|
314
389
|
|
|
315
|
-
#
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
#
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
"
|
|
330
|
-
"
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
"
|
|
390
|
+
# 3f: Extract initial issues
|
|
391
|
+
initial_issues_count = initial_fix_result.get('verification_issues_count', -1)
|
|
392
|
+
stats['initial_issues'] = initial_issues_count
|
|
393
|
+
if verbose:
|
|
394
|
+
console.print(f"Initial verification issues found: {initial_issues_count}")
|
|
395
|
+
if initial_fix_result.get('explanation'):
|
|
396
|
+
console.print("Initial assessment explanation:")
|
|
397
|
+
console.print(initial_fix_result['explanation'])
|
|
398
|
+
|
|
399
|
+
# The following checks only apply when we ran the LLM assessment (not skipped)
|
|
400
|
+
if not skip_llm:
|
|
401
|
+
# FIX: Add check for initial assessment error *before* checking success/budget
|
|
402
|
+
# Check if the fixer function returned its specific error state (None explanation/model)
|
|
403
|
+
if initial_fix_result.get('explanation') is None and initial_fix_result.get('model_name') is None:
|
|
404
|
+
error_msg = "Error: Fixer returned invalid/error state during initial assessment"
|
|
405
|
+
console.print(f"[bold red]{error_msg}. Aborting.[/bold red]")
|
|
406
|
+
stats['status_message'] = error_msg
|
|
407
|
+
stats['final_issues'] = -1 # Indicate unknown/error state
|
|
408
|
+
# Write final action log for error on initial check
|
|
409
|
+
final_log_entry = "<FinalActions>\n"
|
|
410
|
+
final_log_entry += f' <Error>{escape(error_msg)}</Error>\n'
|
|
411
|
+
final_log_entry += "</FinalActions>"
|
|
412
|
+
_write_log_entry(log_path, final_log_entry)
|
|
413
|
+
# Return failure state
|
|
414
|
+
return {
|
|
415
|
+
"success": False,
|
|
416
|
+
"final_program": initial_program_content,
|
|
417
|
+
"final_code": initial_code_content,
|
|
418
|
+
"total_attempts": 0,
|
|
419
|
+
"total_cost": total_cost, # May be non-zero if error occurred after some cost
|
|
420
|
+
"model_name": model_name, # May have been set before error
|
|
421
|
+
"statistics": stats,
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
# 3g: Initialize best iteration tracker
|
|
425
|
+
# Store original paths as the 'backup' for iteration 0
|
|
426
|
+
best_iteration = {
|
|
427
|
+
'attempt': 0, # Use 0 for initial state
|
|
428
|
+
'program_backup': str(program_path), # Path to original
|
|
429
|
+
'code_backup': str(code_path), # Path to original
|
|
430
|
+
'issues': initial_issues_count if initial_issues_count != -1 else float('inf')
|
|
335
431
|
}
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
432
|
+
stats['best_iteration_num'] = 0
|
|
433
|
+
stats['best_iteration_issues'] = best_iteration['issues']
|
|
434
|
+
|
|
435
|
+
# 3h: Check for immediate success or budget exceeded
|
|
436
|
+
if initial_issues_count == 0:
|
|
437
|
+
console.print("[bold green]Initial check found 0 verification issues. No fixing loop needed.[/bold green]")
|
|
438
|
+
overall_success = True
|
|
439
|
+
stats['final_issues'] = 0
|
|
440
|
+
stats['status_message'] = 'Success on initial check'
|
|
441
|
+
stats['improvement_issues'] = 0
|
|
442
|
+
stats['improvement_percent'] = 100.0 # Reached target of 0 issues
|
|
443
|
+
|
|
444
|
+
# Write final action log for successful initial check
|
|
445
|
+
final_log_entry = "<FinalActions>\n"
|
|
446
|
+
final_log_entry += f' <Action>Process finished successfully on initial check.</Action>\n'
|
|
447
|
+
final_log_entry += "</FinalActions>"
|
|
448
|
+
_write_log_entry(log_path, final_log_entry)
|
|
449
|
+
|
|
450
|
+
# Step 7 (early exit): Print stats
|
|
451
|
+
console.print("\n[bold]--- Final Statistics ---[/bold]")
|
|
452
|
+
console.print(f"Initial Issues: {stats['initial_issues']}")
|
|
453
|
+
console.print(f"Final Issues: {stats['final_issues']}")
|
|
454
|
+
console.print(f"Best Iteration: {stats['best_iteration_num']} (Issues: {stats['best_iteration_issues']})")
|
|
455
|
+
console.print(f"Improvement (Issues Reduced): {stats['improvement_issues']}")
|
|
456
|
+
console.print(f"Improvement (Percent Towards 0 Issues): {stats['improvement_percent']:.2f}%")
|
|
457
|
+
console.print(f"Overall Status: {stats['status_message']}")
|
|
458
|
+
console.print(f"Total Attempts Made: {attempts}") # attempts is 0 here
|
|
459
|
+
console.print(f"Total Cost: ${total_cost:.6f}")
|
|
460
|
+
console.print(f"Model Used: {model_name or 'N/A'}")
|
|
461
|
+
# Step 8 (early exit): Return
|
|
462
|
+
return {
|
|
463
|
+
"success": overall_success,
|
|
464
|
+
"final_program": initial_program_content,
|
|
465
|
+
"final_code": initial_code_content,
|
|
466
|
+
"total_attempts": attempts, # attempts is 0
|
|
467
|
+
"total_cost": total_cost,
|
|
468
|
+
"model_name": model_name,
|
|
469
|
+
"statistics": stats,
|
|
470
|
+
}
|
|
471
|
+
elif total_cost >= budget:
|
|
472
|
+
console.print(f"[bold yellow]Budget ${budget:.4f} exceeded during initial assessment (Cost: ${total_cost:.4f}). Aborting.[/bold yellow]")
|
|
473
|
+
stats['status_message'] = 'Budget exceeded on initial check'
|
|
474
|
+
stats['final_issues'] = stats['initial_issues'] # Final issues same as initial
|
|
475
|
+
|
|
476
|
+
# Write final action log for budget exceeded on initial check
|
|
477
|
+
final_log_entry = "<FinalActions>\n"
|
|
478
|
+
final_log_entry += f' <Action>Budget exceeded on initial check.</Action>\n'
|
|
479
|
+
final_log_entry += "</FinalActions>"
|
|
480
|
+
_write_log_entry(log_path, final_log_entry)
|
|
481
|
+
|
|
482
|
+
# No changes made, return initial state
|
|
483
|
+
return {
|
|
484
|
+
"success": False,
|
|
485
|
+
"final_program": initial_program_content,
|
|
486
|
+
"final_code": initial_code_content,
|
|
487
|
+
"total_attempts": 0,
|
|
488
|
+
"total_cost": total_cost,
|
|
489
|
+
"model_name": model_name,
|
|
490
|
+
"statistics": stats,
|
|
491
|
+
}
|
|
357
492
|
|
|
358
493
|
except Exception as e:
|
|
359
494
|
console.print(f"[bold red]Error during initial assessment with fix_verification_errors: {e}[/bold red]")
|
|
@@ -593,6 +728,9 @@ def fix_verification_errors_loop(
|
|
|
593
728
|
|
|
594
729
|
# Now, decide outcome based on issue count and verification status
|
|
595
730
|
if secondary_verification_passed:
|
|
731
|
+
# Only track as "verification passed" if code was actually changed and verified
|
|
732
|
+
if code_updated:
|
|
733
|
+
any_verification_passed = True # Track that at least one verification passed
|
|
596
734
|
# Update best iteration if current attempt is better
|
|
597
735
|
if current_issues_count != -1 and current_issues_count < best_iteration['issues']:
|
|
598
736
|
if verbose:
|
|
@@ -735,8 +873,14 @@ def fix_verification_errors_loop(
|
|
|
735
873
|
if verbose:
|
|
736
874
|
console.print(f"Restored {program_path} from {best_program_path}")
|
|
737
875
|
console.print(f"Restored {code_path} from {best_code_path}")
|
|
738
|
-
#
|
|
739
|
-
|
|
876
|
+
# Only mark as success if verification actually passed
|
|
877
|
+
# (best_iteration is only updated when secondary verification passes,
|
|
878
|
+
# but we double-check with any_verification_passed for safety)
|
|
879
|
+
if any_verification_passed:
|
|
880
|
+
stats['final_issues'] = 0
|
|
881
|
+
overall_success = True
|
|
882
|
+
else:
|
|
883
|
+
stats['final_issues'] = best_iteration['issues']
|
|
740
884
|
else:
|
|
741
885
|
console.print(f"[bold red]Error: Backup files for best iteration {best_iteration['attempt']} not found! Cannot restore.[/bold red]")
|
|
742
886
|
final_log_entry += f' <Error>Backup files for best iteration {best_iteration["attempt"]} not found.</Error>\n'
|
|
@@ -750,6 +894,15 @@ def fix_verification_errors_loop(
|
|
|
750
894
|
stats['status_message'] += f' - Error restoring best iteration: {e}'
|
|
751
895
|
stats['final_issues'] = -1 # Indicate uncertainty
|
|
752
896
|
|
|
897
|
+
# If verification passed (even if issue count didn't decrease), consider it success
|
|
898
|
+
elif any_verification_passed:
|
|
899
|
+
console.print("[green]Verification passed. Keeping current state.[/green]")
|
|
900
|
+
final_log_entry += f' <Action>Verification passed; keeping current state.</Action>\n'
|
|
901
|
+
# Verification passed = code works, so final issues is effectively 0
|
|
902
|
+
stats['final_issues'] = 0
|
|
903
|
+
stats['status_message'] = 'Success - verification passed'
|
|
904
|
+
overall_success = True
|
|
905
|
+
|
|
753
906
|
# If no improvement was made or recorded (best is still initial state or worse)
|
|
754
907
|
elif best_iteration['attempt'] <= 0 or best_iteration['issues'] >= initial_issues_val:
|
|
755
908
|
console.print("[yellow]No improvement recorded over the initial state. Restoring original files.[/yellow]")
|
|
@@ -864,6 +1017,36 @@ def fix_verification_errors_loop(
|
|
|
864
1017
|
if final_known and stats['final_issues'] != 0:
|
|
865
1018
|
overall_success = False
|
|
866
1019
|
|
|
1020
|
+
if not overall_success and agentic_fallback:
|
|
1021
|
+
console.print(f"[bold yellow]Initiating agentic fallback (prompt_file={prompt_file!r})...[/bold yellow]")
|
|
1022
|
+
agent_cwd = Path(prompt_file).parent if prompt_file else None
|
|
1023
|
+
agent_success, agent_msg, agent_cost, agent_model, agent_changed_files = _safe_run_agentic_verify(
|
|
1024
|
+
prompt_file=prompt_file,
|
|
1025
|
+
code_file=code_file,
|
|
1026
|
+
program_file=verification_program,
|
|
1027
|
+
verification_log_file=verification_log_file,
|
|
1028
|
+
verbose=verbose,
|
|
1029
|
+
cwd=agent_cwd,
|
|
1030
|
+
)
|
|
1031
|
+
total_cost += agent_cost
|
|
1032
|
+
if not agent_success:
|
|
1033
|
+
console.print(f"[bold red]Agentic verify fallback failed: {agent_msg}[/bold red]")
|
|
1034
|
+
if agent_changed_files:
|
|
1035
|
+
console.print(f"[cyan]Agent modified {len(agent_changed_files)} file(s):[/cyan]")
|
|
1036
|
+
for f in agent_changed_files:
|
|
1037
|
+
console.print(f" • {f}")
|
|
1038
|
+
if agent_success:
|
|
1039
|
+
console.print("[bold green]Agentic fallback successful.[/bold green]")
|
|
1040
|
+
overall_success = True
|
|
1041
|
+
model_name = agent_model or model_name
|
|
1042
|
+
try:
|
|
1043
|
+
final_code_content = Path(code_file).read_text(encoding="utf-8")
|
|
1044
|
+
final_program_content = Path(program_file).read_text(encoding="utf-8")
|
|
1045
|
+
except Exception as e:
|
|
1046
|
+
console.print(f"[yellow]Warning: Could not read files after successful agentic fix: {e}[/yellow]")
|
|
1047
|
+
else:
|
|
1048
|
+
console.print("[bold red]Agentic fallback failed.[/bold red]")
|
|
1049
|
+
|
|
867
1050
|
return {
|
|
868
1051
|
"success": overall_success,
|
|
869
1052
|
"final_program": final_program_content,
|
|
@@ -872,153 +1055,4 @@ def fix_verification_errors_loop(
|
|
|
872
1055
|
"total_cost": total_cost,
|
|
873
1056
|
"model_name": model_name,
|
|
874
1057
|
"statistics": stats,
|
|
875
|
-
}
|
|
876
|
-
|
|
877
|
-
# Example usage (requires setting up dummy files and potentially mocking fix_verification_errors)
|
|
878
|
-
if __name__ == "__main__":
|
|
879
|
-
# Create dummy files for demonstration
|
|
880
|
-
# In a real scenario, these files would exist and contain actual code/programs.
|
|
881
|
-
console.print("[yellow]Setting up dummy files for demonstration...[/yellow]")
|
|
882
|
-
temp_dir = Path("./temp_fix_verification_loop")
|
|
883
|
-
temp_dir.mkdir(exist_ok=True)
|
|
884
|
-
|
|
885
|
-
program_file = temp_dir / "my_program.py"
|
|
886
|
-
code_file = temp_dir / "my_code_module.py"
|
|
887
|
-
verification_program_file = temp_dir / "verify_syntax.py"
|
|
888
|
-
|
|
889
|
-
program_file.write_text("""
|
|
890
|
-
import my_code_module
|
|
891
|
-
import sys
|
|
892
|
-
# Simulate using the module and checking output
|
|
893
|
-
val = int(sys.argv[1]) if len(sys.argv) > 1 else 5
|
|
894
|
-
result = my_code_module.process(val)
|
|
895
|
-
expected = val * 2
|
|
896
|
-
print(f"Input: {val}")
|
|
897
|
-
print(f"Result: {result}")
|
|
898
|
-
print(f"Expected: {expected}")
|
|
899
|
-
if result == expected:
|
|
900
|
-
print("VERIFICATION_SUCCESS")
|
|
901
|
-
else:
|
|
902
|
-
print(f"VERIFICATION_FAILURE: Expected {expected}, got {result}")
|
|
903
|
-
""", encoding="utf-8")
|
|
904
|
-
|
|
905
|
-
# Initial code with a bug
|
|
906
|
-
code_file.write_text("""
|
|
907
|
-
# my_code_module.py
|
|
908
|
-
def process(x):
|
|
909
|
-
# Bug: should be x * 2
|
|
910
|
-
return x + 2
|
|
911
|
-
""", encoding="utf-8")
|
|
912
|
-
|
|
913
|
-
# Simple verification program (e.g., syntax check)
|
|
914
|
-
verification_program_file.write_text("""
|
|
915
|
-
import sys
|
|
916
|
-
import py_compile
|
|
917
|
-
import os
|
|
918
|
-
# Check syntax of the code file (passed as argument, but we'll hardcode for simplicity here)
|
|
919
|
-
code_to_check = os.environ.get("CODE_FILE_TO_CHECK", "temp_fix_verification_loop/my_code_module.py")
|
|
920
|
-
print(f"Checking syntax of: {code_to_check}")
|
|
921
|
-
try:
|
|
922
|
-
py_compile.compile(code_to_check, doraise=True)
|
|
923
|
-
print("Syntax OK.")
|
|
924
|
-
sys.exit(0) # Success
|
|
925
|
-
except py_compile.PyCompileError as e:
|
|
926
|
-
print(f"Syntax Error: {e}")
|
|
927
|
-
sys.exit(1) # Failure
|
|
928
|
-
except Exception as e:
|
|
929
|
-
print(f"Verification Error: {e}")
|
|
930
|
-
sys.exit(1) # Failure
|
|
931
|
-
""", encoding="utf-8")
|
|
932
|
-
# Set environment variable for the verification script
|
|
933
|
-
os.environ["CODE_FILE_TO_CHECK"] = str(code_file.resolve())
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
# --- Mock fix_verification_errors ---
|
|
937
|
-
# This is crucial for testing without actual LLM calls / costs
|
|
938
|
-
# In a real test suite, use unittest.mock
|
|
939
|
-
_original_fix_verification_errors = fix_verification_errors
|
|
940
|
-
_call_count = 0
|
|
941
|
-
|
|
942
|
-
def mock_fix_verification_errors(program, prompt, code, output, strength, temperature, verbose):
|
|
943
|
-
global _call_count
|
|
944
|
-
_call_count += 1
|
|
945
|
-
cost = 0.001 * _call_count # Simulate increasing cost
|
|
946
|
-
model = "mock_model_v1"
|
|
947
|
-
explanation = ["Detected deviation: Output shows 'Result: 7', 'Expected: 10'.", "Issue seems to be in the `process` function calculation."]
|
|
948
|
-
issues_count = 1 # Assume 1 issue initially
|
|
949
|
-
|
|
950
|
-
fixed_program = program # Assume program doesn't need fixing
|
|
951
|
-
fixed_code = code
|
|
952
|
-
|
|
953
|
-
# Simulate fixing the code on the first *real* attempt (call_count == 2, as first is initial)
|
|
954
|
-
if "VERIFICATION_FAILURE" in output and _call_count >= 2:
|
|
955
|
-
explanation = ["Identified incorrect addition `x + 2`.", "Corrected to multiplication `x * 2` based on prompt intent and output mismatch."]
|
|
956
|
-
fixed_code = """
|
|
957
|
-
# my_code_module.py
|
|
958
|
-
def process(x):
|
|
959
|
-
# Fixed: should be x * 2
|
|
960
|
-
return x * 2
|
|
961
|
-
"""
|
|
962
|
-
issues_count = 0 # Fixed!
|
|
963
|
-
elif "VERIFICATION_SUCCESS" in output:
|
|
964
|
-
explanation = ["Output indicates VERIFICATION_SUCCESS."]
|
|
965
|
-
issues_count = 0 # Already correct
|
|
966
|
-
|
|
967
|
-
return {
|
|
968
|
-
'explanation': explanation,
|
|
969
|
-
'fixed_program': fixed_program,
|
|
970
|
-
'fixed_code': fixed_code,
|
|
971
|
-
'total_cost': cost,
|
|
972
|
-
'model_name': model,
|
|
973
|
-
'verification_issues_count': issues_count,
|
|
974
|
-
}
|
|
975
|
-
|
|
976
|
-
# Replace the real function with the mock
|
|
977
|
-
# In package context, you might need to patch differently
|
|
978
|
-
# For this script execution:
|
|
979
|
-
# Note: This direct replacement might not work if the function is imported
|
|
980
|
-
# using `from .fix_verification_errors import fix_verification_errors`.
|
|
981
|
-
# A proper mock framework (`unittest.mock.patch`) is better.
|
|
982
|
-
# Let's assume for this example run, we can modify the global scope *before* the loop calls it.
|
|
983
|
-
# This is fragile. A better approach involves dependency injection or mocking frameworks.
|
|
984
|
-
# HACK: Re-assigning the imported name in the global scope of this script
|
|
985
|
-
globals()['fix_verification_errors'] = mock_fix_verification_errors
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
console.print("\n[bold blue]--- Running fix_verification_errors_loop (with mock) ---[/bold blue]")
|
|
989
|
-
|
|
990
|
-
# Example program_args: Pass input value 10 and another arg 5
|
|
991
|
-
# Note: The example program only uses the first arg sys.argv[1]
|
|
992
|
-
example_args = ["10", "another_arg"]
|
|
993
|
-
|
|
994
|
-
results = fix_verification_errors_loop(
|
|
995
|
-
program_file=str(program_file),
|
|
996
|
-
code_file=str(code_file),
|
|
997
|
-
prompt="Create a module 'my_code_module.py' with a function 'process(x)' that returns the input multiplied by 2.",
|
|
998
|
-
verification_program=str(verification_program_file),
|
|
999
|
-
strength=0.5,
|
|
1000
|
-
temperature=0.1,
|
|
1001
|
-
max_attempts=3,
|
|
1002
|
-
budget=0.10, # Set a budget
|
|
1003
|
-
verification_log_file=str(temp_dir / "test_verification.log"),
|
|
1004
|
-
verbose=True,
|
|
1005
|
-
program_args=example_args
|
|
1006
|
-
)
|
|
1007
|
-
|
|
1008
|
-
console.print("\n[bold blue]--- Loop Finished ---[/bold blue]")
|
|
1009
|
-
console.print(f"Success: {results['success']}")
|
|
1010
|
-
console.print(f"Total Attempts: {results['total_attempts']}")
|
|
1011
|
-
console.print(f"Total Cost: ${results['total_cost']:.6f}")
|
|
1012
|
-
console.print(f"Model Name: {results['model_name']}")
|
|
1013
|
-
# console.print(f"Final Program:\n{results['final_program']}") # Can be long
|
|
1014
|
-
console.print(f"Final Code:\n{results['final_code']}")
|
|
1015
|
-
console.print(f"Statistics:\n{results['statistics']}")
|
|
1016
|
-
|
|
1017
|
-
# Restore original function if needed elsewhere
|
|
1018
|
-
globals()['fix_verification_errors'] = _original_fix_verification_errors
|
|
1019
|
-
|
|
1020
|
-
# Clean up dummy files
|
|
1021
|
-
# console.print("\n[yellow]Cleaning up dummy files...[/yellow]")
|
|
1022
|
-
# shutil.rmtree(temp_dir)
|
|
1023
|
-
console.print(f"\n[yellow]Dummy files and logs are in: {temp_dir}[/yellow]")
|
|
1024
|
-
console.print("[yellow]Please review the log file 'test_verification.log' inside that directory.[/yellow]")
|
|
1058
|
+
}
|