pdd-cli 0.0.25__py3-none-any.whl → 0.0.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pdd-cli might be problematic. Click here for more details.
- pdd/__init__.py +10 -3
- pdd/bug_to_unit_test.py +1 -1
- pdd/cli.py +12 -3
- pdd/cli_1_0_2_0_20250510_000314.py +1054 -0
- pdd/cli_2_0_1_0_20250510_000314.py +1054 -0
- pdd/cli_3_0_1_0_20250510_000314.py +1054 -0
- pdd/cli_4_0_1_0_20250510_000314.py +1054 -0
- pdd/continue_generation.py +3 -1
- pdd/data/llm_model.csv +18 -17
- pdd/fix_main.py +3 -2
- pdd/fix_verification_errors.py +154 -109
- pdd/fix_verification_errors_loop.py +5 -1
- pdd/fix_verification_main.py +21 -1
- pdd/generate_output_paths.py +43 -2
- pdd/llm_invoke.py +1198 -353
- pdd/prompts/bug_to_unit_test_LLM.prompt +11 -11
- pdd/prompts/find_verification_errors_LLM.prompt +31 -18
- pdd/prompts/fix_verification_errors_LLM.prompt +25 -6
- pdd/prompts/trim_results_start_LLM.prompt +1 -1
- pdd/update_model_costs.py +446 -0
- {pdd_cli-0.0.25.dist-info → pdd_cli-0.0.27.dist-info}/METADATA +8 -16
- {pdd_cli-0.0.25.dist-info → pdd_cli-0.0.27.dist-info}/RECORD +26 -21
- {pdd_cli-0.0.25.dist-info → pdd_cli-0.0.27.dist-info}/WHEEL +1 -1
- {pdd_cli-0.0.25.dist-info → pdd_cli-0.0.27.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.25.dist-info → pdd_cli-0.0.27.dist-info}/licenses/LICENSE +0 -0
- {pdd_cli-0.0.25.dist-info → pdd_cli-0.0.27.dist-info}/top_level.txt +0 -0
pdd/continue_generation.py
CHANGED
|
@@ -11,9 +11,11 @@ from . import EXTRACTION_STRENGTH
|
|
|
11
11
|
console = Console()
|
|
12
12
|
|
|
13
13
|
class TrimResultsStartOutput(BaseModel):
|
|
14
|
+
explanation: str = Field(description="The explanation of how you determined what to cut out")
|
|
14
15
|
code_block: str = Field(description="The trimmed code block from the start")
|
|
15
16
|
|
|
16
17
|
class TrimResultsOutput(BaseModel):
|
|
18
|
+
explanation: str = Field(description="The explanation of the code block")
|
|
17
19
|
trimmed_continued_generation: str = Field(description="The trimmed continuation of the generation")
|
|
18
20
|
|
|
19
21
|
def continue_generation(
|
|
@@ -70,7 +72,7 @@ def continue_generation(
|
|
|
70
72
|
trim_start_response = llm_invoke(
|
|
71
73
|
prompt=processed_prompts['trim_start'],
|
|
72
74
|
input_json={"LLM_OUTPUT": llm_output},
|
|
73
|
-
strength=0.
|
|
75
|
+
strength=0.75,
|
|
74
76
|
temperature=0,
|
|
75
77
|
output_pydantic=TrimResultsStartOutput,
|
|
76
78
|
verbose=verbose
|
pdd/data/llm_model.csv
CHANGED
|
@@ -1,17 +1,18 @@
|
|
|
1
|
-
provider,model,input,output,coding_arena_elo,base_url,api_key,
|
|
2
|
-
OpenAI,
|
|
3
|
-
|
|
4
|
-
Anthropic,
|
|
5
|
-
OpenAI,
|
|
6
|
-
Google,
|
|
7
|
-
|
|
8
|
-
Anthropic,claude-3-7-sonnet-20250219,3,15,
|
|
9
|
-
Google,gemini-2.5-pro-
|
|
10
|
-
OpenAI,
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
OpenAI,
|
|
14
|
-
|
|
15
|
-
OpenAI,
|
|
16
|
-
|
|
17
|
-
|
|
1
|
+
provider,model,input,output,coding_arena_elo,base_url,api_key,max_reasoning_tokens,structured_output,reasoning_type
|
|
2
|
+
OpenAI,gpt-4.1-nano,0.1,0.4,1249,,OPENAI_API_KEY,0,True,none
|
|
3
|
+
xai,xai/grok-3-beta,3.0,15.0,1332,https://api.x.ai/v1,XAI_API_KEY,0,False,none
|
|
4
|
+
Anthropic,claude-3-5-haiku-20241022,.8,4,1261,,ANTHROPIC_API_KEY,0,True,none
|
|
5
|
+
OpenAI,deepseek/deepseek-chat,.27,1.1,1353,https://api.deepseek.com/beta,DEEPSEEK_API_KEY,0,False,none
|
|
6
|
+
Google,vertex_ai/gemini-2.5-flash-preview-04-17,0.15,0.6,1330,,VERTEX_CREDENTIALS,0,True,effort
|
|
7
|
+
Google,gemini-2.5-pro-exp-03-25,1.25,10.0,1360,,GOOGLE_API_KEY,0,True,none
|
|
8
|
+
Anthropic,claude-3-7-sonnet-20250219,3.0,15.0,1340,,ANTHROPIC_API_KEY,64000,True,budget
|
|
9
|
+
Google,vertex_ai/gemini-2.5-pro-preview-05-06,1.25,10.0,1361,,VERTEX_CREDENTIALS,0,True,none
|
|
10
|
+
OpenAI,o4-mini,1.1,4.4,1333,,OPENAI_API_KEY,0,True,effort
|
|
11
|
+
OpenAI,o3,10.0,40.0,1389,,OPENAI_API_KEY,0,True,effort
|
|
12
|
+
OpenAI,gpt-4.1,2.0,8.0,1335,,OPENAI_API_KEY,0,True,none
|
|
13
|
+
OpenAI,deepseek/deepseek-reasoner,0.55,2.19,1337,https://api.deepseek.com/beta,DEEPSEEK_API_KEY,0,False,none
|
|
14
|
+
Fireworks,fireworks_ai/accounts/fireworks/models/deepseek-r1,3.0,8.0,1338,,FIREWORKS_API_KEY,0,False,none
|
|
15
|
+
OpenAI,chatgpt-4o-latest,5,15,1369,,OPENAI_API_KEY,0,False,none
|
|
16
|
+
Anthropic,bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0,3.0,15.0,1339,,,64000,True,budget
|
|
17
|
+
OpenAI,azure/o4-mini,1.1,4.4,1334,,OPENAI_API_KEY,0,True,effort
|
|
18
|
+
OpenAI,openai/mlx-community/Qwen3-30B-A3B-4bit,0,0,1293,http://localhost:8080,,0,False,none
|
pdd/fix_main.py
CHANGED
|
@@ -289,6 +289,7 @@ def fix_main(
|
|
|
289
289
|
rprint(f"[bold red]Markup Error in fix_main:[/bold red]")
|
|
290
290
|
rprint(escape(str(e)))
|
|
291
291
|
else:
|
|
292
|
-
# Print other errors normally
|
|
293
|
-
|
|
292
|
+
# Print other errors normally, escaping the error string
|
|
293
|
+
from rich.markup import escape # Ensure escape is imported
|
|
294
|
+
rprint(f"[bold red]Error:[/bold red] {escape(str(e))}")
|
|
294
295
|
sys.exit(1)
|
pdd/fix_verification_errors.py
CHANGED
|
@@ -1,10 +1,22 @@
|
|
|
1
1
|
import re
|
|
2
|
-
from typing import Dict, Any
|
|
2
|
+
from typing import Dict, Any, Optional
|
|
3
3
|
from rich import print as rprint
|
|
4
4
|
from rich.markdown import Markdown
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
5
6
|
from .load_prompt_template import load_prompt_template
|
|
6
7
|
from .llm_invoke import llm_invoke
|
|
7
8
|
|
|
9
|
+
# Define Pydantic model for structured LLM output for VERIFICATION
|
|
10
|
+
class VerificationOutput(BaseModel):
|
|
11
|
+
issues_count: int = Field(description="The number of issues found during verification.")
|
|
12
|
+
details: Optional[str] = Field(description="Detailed explanation of any discrepancies or issues found. Can be null or empty if issues_count is 0.", default=None)
|
|
13
|
+
|
|
14
|
+
# Define Pydantic model for structured LLM output for FIXES
|
|
15
|
+
class FixerOutput(BaseModel):
|
|
16
|
+
explanation: str = Field(description="Detailed explanation of the analysis and fixes applied.")
|
|
17
|
+
fixed_code: str = Field(description="The complete, runnable, and fixed code module.")
|
|
18
|
+
fixed_program: str = Field(description="The complete, runnable, and fixed program that uses the code module.")
|
|
19
|
+
|
|
8
20
|
def fix_verification_errors(
|
|
9
21
|
program: str,
|
|
10
22
|
prompt: str,
|
|
@@ -41,18 +53,17 @@ def fix_verification_errors(
|
|
|
41
53
|
verification_issues_count = 0
|
|
42
54
|
verification_details = None
|
|
43
55
|
fix_explanation = None
|
|
44
|
-
fixed_program = program
|
|
45
|
-
fixed_code = code
|
|
56
|
+
fixed_program = program # Initialize with original program
|
|
57
|
+
fixed_code = code # Initialize with original code
|
|
46
58
|
final_explanation = None
|
|
47
59
|
|
|
48
60
|
# Check only essential inputs, allow empty output
|
|
49
61
|
if not all([program, prompt, code]):
|
|
50
|
-
# Keep the error print for program, prompt, code missing
|
|
51
62
|
rprint("[bold red]Error:[/bold red] Missing one or more required inputs (program, prompt, code).")
|
|
52
63
|
return {
|
|
53
64
|
"explanation": None,
|
|
54
|
-
"fixed_program": program,
|
|
55
|
-
"fixed_code": code,
|
|
65
|
+
"fixed_program": program,
|
|
66
|
+
"fixed_code": code,
|
|
56
67
|
"total_cost": 0.0,
|
|
57
68
|
"model_name": None,
|
|
58
69
|
"verification_issues_count": 0,
|
|
@@ -104,14 +115,14 @@ def fix_verification_errors(
|
|
|
104
115
|
input_json=verification_input_json,
|
|
105
116
|
strength=strength,
|
|
106
117
|
temperature=temperature,
|
|
107
|
-
verbose=False,
|
|
118
|
+
verbose=False,
|
|
119
|
+
output_pydantic=VerificationOutput
|
|
108
120
|
)
|
|
109
121
|
total_cost += verification_response.get('cost', 0.0)
|
|
110
122
|
model_name = verification_response.get('model_name', model_name)
|
|
111
|
-
verification_result = verification_response.get('result', '')
|
|
112
123
|
|
|
113
124
|
if verbose:
|
|
114
|
-
rprint(
|
|
125
|
+
rprint("[cyan]Verification LLM call complete.[/cyan]")
|
|
115
126
|
rprint(f" [dim]Model Used:[/dim] {verification_response.get('model_name', 'N/A')}")
|
|
116
127
|
rprint(f" [dim]Cost:[/dim] ${verification_response.get('cost', 0.0):.6f}")
|
|
117
128
|
|
|
@@ -123,73 +134,97 @@ def fix_verification_errors(
|
|
|
123
134
|
"fixed_code": code,
|
|
124
135
|
"total_cost": total_cost,
|
|
125
136
|
"model_name": model_name,
|
|
126
|
-
"verification_issues_count":
|
|
137
|
+
"verification_issues_count": 0, # Reset on LLM call error
|
|
127
138
|
}
|
|
128
139
|
|
|
129
|
-
if verbose:
|
|
130
|
-
rprint("\n[blue]Verification Result:[/blue]")
|
|
131
|
-
# Markdown object handles its own rendering, no extra needed here
|
|
132
|
-
rprint(Markdown(verification_result))
|
|
133
|
-
|
|
134
140
|
issues_found = False
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
rprint("
|
|
146
|
-
# Return the specific error structure for parsing errors after verification call
|
|
147
|
-
return {
|
|
148
|
-
"explanation": None,
|
|
149
|
-
"fixed_program": program,
|
|
150
|
-
"fixed_code": code,
|
|
151
|
-
"total_cost": total_cost, # Cost incurred so far
|
|
152
|
-
"model_name": model_name, # Model used so far
|
|
153
|
-
"verification_issues_count": 0, # Reset count on parsing error
|
|
154
|
-
}
|
|
141
|
+
verification_result_obj = verification_response.get('result')
|
|
142
|
+
|
|
143
|
+
if isinstance(verification_result_obj, VerificationOutput):
|
|
144
|
+
verification_issues_count = verification_result_obj.issues_count
|
|
145
|
+
verification_details = verification_result_obj.details
|
|
146
|
+
if verbose:
|
|
147
|
+
rprint("[green]Successfully parsed structured output from verification LLM.[/green]")
|
|
148
|
+
rprint("\n[blue]Verification Result (parsed):[/blue]")
|
|
149
|
+
rprint(f" Issues Count: {verification_issues_count}")
|
|
150
|
+
if verification_details:
|
|
151
|
+
rprint(Markdown(f"**Details:**\n{verification_details}"))
|
|
155
152
|
else:
|
|
156
|
-
|
|
157
|
-
rprint("[yellow]Warning:[/yellow] Could not find <issues_count> tag in verification result. Assuming 0 issues.")
|
|
158
|
-
verification_issues_count = 0
|
|
153
|
+
rprint(" Details: None provided or no issues found.")
|
|
159
154
|
|
|
160
|
-
# Proceed to check for details tag if count > 0
|
|
161
155
|
if verification_issues_count > 0:
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
issues_found = True
|
|
167
|
-
if verbose:
|
|
168
|
-
rprint(f"\n[yellow]Found {verification_issues_count} potential issues. Proceeding to fix step.[/yellow]")
|
|
169
|
-
else:
|
|
170
|
-
# Count > 0, but details empty -> Warning
|
|
171
|
-
rprint("[yellow]Warning:[/yellow] <issues_count> is > 0, but <details> tag is empty. Treating as no issues found.")
|
|
172
|
-
verification_issues_count = 0 # Reset count
|
|
156
|
+
if verification_details and verification_details.strip():
|
|
157
|
+
issues_found = True
|
|
158
|
+
if verbose:
|
|
159
|
+
rprint(f"\n[yellow]Found {verification_issues_count} potential issues. Proceeding to fix step.[/yellow]")
|
|
173
160
|
else:
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
verification_issues_count = 0 # Reset count
|
|
161
|
+
rprint(f"[yellow]Warning:[/yellow] <issues_count> is {verification_issues_count}, but <details> field is empty or missing. Treating as no actionable issues found.")
|
|
162
|
+
verification_issues_count = 0
|
|
177
163
|
else:
|
|
178
|
-
# verification_issues_count is 0 (either parsed as 0 or defaulted after warning)
|
|
179
164
|
if verbose:
|
|
180
|
-
rprint("\n[green]No issues found during verification.[/green]")
|
|
165
|
+
rprint("\n[green]No issues found during verification based on structured output.[/green]")
|
|
166
|
+
elif isinstance(verification_result_obj, str):
|
|
167
|
+
try:
|
|
168
|
+
issues_match = re.search(r'<issues_count>(\d+)</issues_count>', verification_result_obj)
|
|
169
|
+
if issues_match:
|
|
170
|
+
parsed_issues_count = int(issues_match.group(1))
|
|
171
|
+
details_match = re.search(r'<details>(.*?)</details>', verification_result_obj, re.DOTALL)
|
|
172
|
+
parsed_verification_details = details_match.group(1).strip() if (details_match and details_match.group(1)) else None
|
|
181
173
|
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
174
|
+
|
|
175
|
+
if parsed_issues_count > 0:
|
|
176
|
+
if parsed_verification_details: # Check if details exist and are not empty
|
|
177
|
+
issues_found = True
|
|
178
|
+
verification_issues_count = parsed_issues_count
|
|
179
|
+
verification_details = parsed_verification_details
|
|
180
|
+
if verbose:
|
|
181
|
+
rprint(f"\n[yellow]Found {verification_issues_count} potential issues in string response. Proceeding to fix step.[/yellow]")
|
|
182
|
+
else:
|
|
183
|
+
rprint(f"[yellow]Warning:[/yellow] <issues_count> is {parsed_issues_count} in string response, but <details> field is empty or missing. Treating as no actionable issues found.")
|
|
184
|
+
verification_issues_count = 0
|
|
185
|
+
issues_found = False
|
|
186
|
+
else: # parsed_issues_count == 0
|
|
187
|
+
verification_issues_count = 0
|
|
188
|
+
issues_found = False
|
|
189
|
+
if verbose:
|
|
190
|
+
rprint("\n[green]No issues found in string verification based on <issues_count> being 0.[/green]")
|
|
191
|
+
else: # issues_match is None (tag not found or content not digits)
|
|
192
|
+
rprint("[bold red]Error:[/bold red] Could not find or parse integer value from <issues_count> tag in string response.")
|
|
193
|
+
return {
|
|
194
|
+
"explanation": None,
|
|
195
|
+
"fixed_program": program,
|
|
196
|
+
"fixed_code": code,
|
|
197
|
+
"total_cost": total_cost,
|
|
198
|
+
"model_name": model_name,
|
|
199
|
+
"verification_issues_count": 0,
|
|
200
|
+
}
|
|
201
|
+
except ValueError: # Should not be hit if regex is \d+, but as a safeguard
|
|
202
|
+
rprint("[bold red]Error:[/bold red] Invalid non-integer value in <issues_count> tag in string response.")
|
|
203
|
+
return {
|
|
204
|
+
"explanation": None,
|
|
205
|
+
"fixed_program": program,
|
|
206
|
+
"fixed_code": code,
|
|
207
|
+
"total_cost": total_cost,
|
|
208
|
+
"model_name": model_name,
|
|
209
|
+
"verification_issues_count": 0,
|
|
210
|
+
}
|
|
211
|
+
else: # Not VerificationOutput and not a successfully parsed string
|
|
212
|
+
rprint("[bold red]Error:[/bold red] Verification LLM call did not return the expected structured output (e.g., parsing failed).")
|
|
213
|
+
rprint(f" [dim]Expected type:[/dim] {VerificationOutput} or str")
|
|
214
|
+
rprint(f" [dim]Received type:[/dim] {type(verification_result_obj)}")
|
|
215
|
+
content_str = str(verification_result_obj)
|
|
216
|
+
rprint(f" [dim]Received content:[/dim] {content_str[:500]}{'...' if len(content_str) > 500 else ''}")
|
|
217
|
+
raw_text = verification_response.get('result_text')
|
|
218
|
+
if raw_text:
|
|
219
|
+
raw_text_str = str(raw_text)
|
|
220
|
+
rprint(f" [dim]Raw LLM text (if available from llm_invoke):[/dim] {raw_text_str[:500]}{'...' if len(raw_text_str) > 500 else ''}")
|
|
186
221
|
return {
|
|
187
222
|
"explanation": None,
|
|
188
223
|
"fixed_program": program,
|
|
189
224
|
"fixed_code": code,
|
|
190
225
|
"total_cost": total_cost,
|
|
191
226
|
"model_name": model_name,
|
|
192
|
-
"verification_issues_count": 0,
|
|
227
|
+
"verification_issues_count": 0,
|
|
193
228
|
}
|
|
194
229
|
|
|
195
230
|
if issues_found and verification_details:
|
|
@@ -210,71 +245,81 @@ def fix_verification_errors(
|
|
|
210
245
|
input_json=fix_input_json,
|
|
211
246
|
strength=strength,
|
|
212
247
|
temperature=temperature,
|
|
213
|
-
verbose=False,
|
|
248
|
+
verbose=False,
|
|
249
|
+
output_pydantic=FixerOutput
|
|
214
250
|
)
|
|
215
251
|
total_cost += fix_response.get('cost', 0.0)
|
|
216
|
-
model_name = fix_response.get('model_name', model_name)
|
|
217
|
-
fix_result = fix_response.get('result', '')
|
|
252
|
+
model_name = fix_response.get('model_name', model_name)
|
|
218
253
|
|
|
219
254
|
if verbose:
|
|
220
255
|
rprint(f"[cyan]Fix LLM call complete.[/cyan]")
|
|
221
256
|
rprint(f" [dim]Model Used:[/dim] {fix_response.get('model_name', 'N/A')}")
|
|
222
257
|
rprint(f" [dim]Cost:[/dim] ${fix_response.get('cost', 0.0):.6f}")
|
|
223
|
-
rprint("\n[blue]Fix Result:[/blue]")
|
|
224
|
-
# Markdown object handles its own rendering, no extra needed here
|
|
225
|
-
rprint(Markdown(fix_result))
|
|
226
258
|
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
explanation_match = re.search(r"<explanation>(.*?)</explanation>", fix_result, re.DOTALL)
|
|
259
|
+
fix_result_obj = fix_response.get('result')
|
|
260
|
+
parsed_fix_successfully = False
|
|
230
261
|
|
|
231
|
-
if
|
|
232
|
-
fixed_program =
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
262
|
+
if isinstance(fix_result_obj, FixerOutput):
|
|
263
|
+
fixed_program = fix_result_obj.fixed_program
|
|
264
|
+
fixed_code = fix_result_obj.fixed_code
|
|
265
|
+
fix_explanation = fix_result_obj.explanation
|
|
266
|
+
parsed_fix_successfully = True
|
|
267
|
+
if verbose:
|
|
268
|
+
rprint("[green]Successfully parsed structured output for fix.[/green]")
|
|
269
|
+
rprint(Markdown(f"**Explanation from LLM:**\n{fix_explanation}"))
|
|
270
|
+
elif isinstance(fix_result_obj, str):
|
|
271
|
+
program_match = re.search(r'<fixed_program>(.*?)</fixed_program>', fix_result_obj, re.DOTALL)
|
|
272
|
+
code_match = re.search(r'<fixed_code>(.*?)</fixed_code>', fix_result_obj, re.DOTALL)
|
|
273
|
+
explanation_match = re.search(r'<explanation>(.*?)</explanation>', fix_result_obj, re.DOTALL)
|
|
236
274
|
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
if verbose: rprint("[yellow]Warning:[/yellow] Could not find <fixed_code> tag in fix result. Using original code module.")
|
|
275
|
+
if program_match or code_match or explanation_match: # If any tag is found, attempt to parse
|
|
276
|
+
fixed_program_candidate = program_match.group(1).strip() if (program_match and program_match.group(1)) else None
|
|
277
|
+
fixed_code_candidate = code_match.group(1).strip() if (code_match and code_match.group(1)) else None
|
|
278
|
+
fix_explanation_candidate = explanation_match.group(1).strip() if (explanation_match and explanation_match.group(1)) else None
|
|
242
279
|
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
280
|
+
fixed_program = fixed_program_candidate if fixed_program_candidate else program
|
|
281
|
+
fixed_code = fixed_code_candidate if fixed_code_candidate else code
|
|
282
|
+
fix_explanation = fix_explanation_candidate if fix_explanation_candidate else "[Fix explanation not provided by LLM]"
|
|
283
|
+
parsed_fix_successfully = True
|
|
284
|
+
|
|
285
|
+
if verbose:
|
|
286
|
+
if not program_match or not fixed_program_candidate:
|
|
287
|
+
rprint("[yellow]Warning:[/yellow] Could not find or parse <fixed_program> tag in fix result string. Using original program.")
|
|
288
|
+
if not code_match or not fixed_code_candidate:
|
|
289
|
+
rprint("[yellow]Warning:[/yellow] Could not find or parse <fixed_code> tag in fix result string. Using original code module.")
|
|
290
|
+
if not explanation_match or not fix_explanation_candidate:
|
|
291
|
+
rprint("[yellow]Warning:[/yellow] Could not find or parse <explanation> tag in fix result string. Using default explanation.")
|
|
292
|
+
# else: string, but no relevant tags. Will fall to parsed_fix_successfully = False below
|
|
293
|
+
|
|
294
|
+
if not parsed_fix_successfully:
|
|
295
|
+
rprint(f"[bold red]Error:[/bold red] Fix generation LLM call did not return the expected structured output (e.g., parsing failed).")
|
|
296
|
+
rprint(f" [dim]Expected type:[/dim] {FixerOutput} or str (with XML tags)")
|
|
297
|
+
rprint(f" [dim]Received type:[/dim] {type(fix_result_obj)}")
|
|
298
|
+
content_str = str(fix_result_obj)
|
|
299
|
+
rprint(f" [dim]Received content:[/dim] {content_str[:500]}{'...' if len(content_str) > 500 else ''}")
|
|
300
|
+
raw_text = fix_response.get('result_text')
|
|
301
|
+
if raw_text:
|
|
302
|
+
raw_text_str = str(raw_text)
|
|
303
|
+
rprint(f" [dim]Raw LLM text (if available from llm_invoke):[/dim] {raw_text_str[:500]}{'...' if len(raw_text_str) > 500 else ''}")
|
|
304
|
+
fix_explanation = "[Error: Failed to parse structured output from LLM for fix explanation]"
|
|
305
|
+
# fixed_program and fixed_code remain original (already initialized)
|
|
249
306
|
|
|
250
307
|
except Exception as e:
|
|
251
|
-
rprint(f"[bold red]Error during fix LLM call or
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
308
|
+
rprint(f"[bold red]Error during fix LLM call or processing structured output:[/bold red] {e}")
|
|
309
|
+
fix_explanation = f"[Error during fix generation: {e}]"
|
|
310
|
+
# fixed_program and fixed_code remain original
|
|
311
|
+
|
|
312
|
+
if issues_found:
|
|
313
|
+
final_explanation = (
|
|
314
|
+
f"<verification_details>{verification_details}</verification_details>\n"
|
|
315
|
+
f"<fix_explanation>{fix_explanation}</fix_explanation>"
|
|
316
|
+
)
|
|
317
|
+
else:
|
|
318
|
+
final_explanation = None # Or "" if an empty list/None is preferred per prompt for "no issues"
|
|
257
319
|
|
|
258
320
|
if verbose:
|
|
259
321
|
rprint(f"\n[bold blue]Total Cost for fix_verification_errors run:[/bold blue] ${total_cost:.6f}")
|
|
260
322
|
|
|
261
|
-
# Construct final explanation only if issues were initially found and processed
|
|
262
|
-
if verification_details:
|
|
263
|
-
if fix_explanation:
|
|
264
|
-
final_explanation = (
|
|
265
|
-
f"<verification_details>{verification_details}</verification_details>\n"
|
|
266
|
-
f"<fix_explanation>{fix_explanation}</fix_explanation>"
|
|
267
|
-
)
|
|
268
|
-
else:
|
|
269
|
-
# This case might occur if fix step wasn't run due to parsing issues after verification,
|
|
270
|
-
# or if fix_explanation extraction failed silently (though we added a default).
|
|
271
|
-
# Let's ensure we always provide some context if details were found.
|
|
272
|
-
final_explanation = (
|
|
273
|
-
f"<verification_details>{verification_details}</verification_details>\n"
|
|
274
|
-
f"<fix_explanation>[Fix explanation not available or fix step skipped]</fix_explanation>"
|
|
275
|
-
)
|
|
276
|
-
# If no issues were found initially (verification_details is None), final_explanation remains None
|
|
277
|
-
|
|
278
323
|
return {
|
|
279
324
|
"explanation": final_explanation,
|
|
280
325
|
"fixed_program": fixed_program,
|
|
@@ -282,4 +327,4 @@ def fix_verification_errors(
|
|
|
282
327
|
"total_cost": total_cost,
|
|
283
328
|
"model_name": model_name,
|
|
284
329
|
"verification_issues_count": verification_issues_count,
|
|
285
|
-
}
|
|
330
|
+
}
|
|
@@ -86,6 +86,8 @@ def fix_verification_errors_loop(
|
|
|
86
86
|
max_attempts: int,
|
|
87
87
|
budget: float,
|
|
88
88
|
verification_log_file: str = "verification.log",
|
|
89
|
+
output_code_path: Optional[str] = None,
|
|
90
|
+
output_program_path: Optional[str] = None,
|
|
89
91
|
verbose: bool = False,
|
|
90
92
|
program_args: Optional[list[str]] = None,
|
|
91
93
|
) -> Dict[str, Any]:
|
|
@@ -103,6 +105,8 @@ def fix_verification_errors_loop(
|
|
|
103
105
|
max_attempts: Maximum number of fix attempts.
|
|
104
106
|
budget: Maximum allowed cost in USD.
|
|
105
107
|
verification_log_file: Path for detailed XML logging (default: "verification.log").
|
|
108
|
+
output_code_path: Optional path to save fixed code (default: None).
|
|
109
|
+
output_program_path: Optional path to save fixed program (default: None).
|
|
106
110
|
verbose: Enable verbose logging (default: False).
|
|
107
111
|
program_args: Optional list of command-line arguments for the program_file.
|
|
108
112
|
|
|
@@ -423,7 +427,7 @@ def fix_verification_errors_loop(
|
|
|
423
427
|
output=program_output,
|
|
424
428
|
strength=strength,
|
|
425
429
|
temperature=temperature,
|
|
426
|
-
verbose=verbose
|
|
430
|
+
verbose=verbose
|
|
427
431
|
)
|
|
428
432
|
|
|
429
433
|
# 4f: Add cost
|
pdd/fix_verification_main.py
CHANGED
|
@@ -88,6 +88,7 @@ def fix_verification_main(
|
|
|
88
88
|
program_file: str,
|
|
89
89
|
output_results: Optional[str],
|
|
90
90
|
output_code: Optional[str],
|
|
91
|
+
output_program: Optional[str],
|
|
91
92
|
loop: bool,
|
|
92
93
|
verification_program: Optional[str], # Only used if loop=True
|
|
93
94
|
max_attempts: int = DEFAULT_MAX_ATTEMPTS,
|
|
@@ -105,6 +106,7 @@ def fix_verification_main(
|
|
|
105
106
|
program_file (str): Path to the program to run for verification.
|
|
106
107
|
output_results (Optional[str]): Path to save verification results log.
|
|
107
108
|
output_code (Optional[str]): Path to save the verified code file.
|
|
109
|
+
output_program (Optional[str]): Path to save the verified program file.
|
|
108
110
|
loop (bool): If True, perform iterative verification and fixing.
|
|
109
111
|
verification_program (Optional[str]): Path to a verification program (required if loop=True).
|
|
110
112
|
max_attempts (int): Max attempts for the loop.
|
|
@@ -161,12 +163,14 @@ def fix_verification_main(
|
|
|
161
163
|
command_options: Dict[str, Optional[str]] = {
|
|
162
164
|
"output_results": output_results,
|
|
163
165
|
"output_code": output_code,
|
|
166
|
+
"output_program": output_program,
|
|
164
167
|
}
|
|
165
168
|
|
|
166
169
|
# Initial default values (in case we need the manual fallback)
|
|
167
170
|
input_strings: Dict[str, str] = {}
|
|
168
171
|
output_code_path: Optional[str] = output_code
|
|
169
172
|
output_results_path: Optional[str] = output_results
|
|
173
|
+
output_program_path: Optional[str] = output_program
|
|
170
174
|
language: str = ""
|
|
171
175
|
|
|
172
176
|
try:
|
|
@@ -180,6 +184,7 @@ def fix_verification_main(
|
|
|
180
184
|
)
|
|
181
185
|
output_code_path = output_file_paths.get("output_code")
|
|
182
186
|
output_results_path = output_file_paths.get("output_results")
|
|
187
|
+
output_program_path = output_file_paths.get("output_program")
|
|
183
188
|
|
|
184
189
|
if verbose:
|
|
185
190
|
rich_print("[dim]Resolved output paths via construct_paths.[/dim]")
|
|
@@ -211,6 +216,9 @@ def fix_verification_main(
|
|
|
211
216
|
if output_results_path is None:
|
|
212
217
|
base, _ = os.path.splitext(program_file)
|
|
213
218
|
output_results_path = f"{base}_verify_results.log"
|
|
219
|
+
if output_program_path is None:
|
|
220
|
+
base_prog, ext_prog = os.path.splitext(program_file)
|
|
221
|
+
output_program_path = f"{base_prog}_verified{ext_prog}"
|
|
214
222
|
|
|
215
223
|
# Best‑effort language guess
|
|
216
224
|
if program_file.endswith(".py"):
|
|
@@ -331,7 +339,7 @@ def fix_verification_main(
|
|
|
331
339
|
model_name = fix_results['model_name']
|
|
332
340
|
|
|
333
341
|
# Build results log content for single pass
|
|
334
|
-
results_log_content =
|
|
342
|
+
results_log_content = "PDD Verify Results (Single Pass)\n"
|
|
335
343
|
results_log_content += f"Timestamp: {os.path.getmtime(prompt_file)}\n" # Use prompt timestamp as reference
|
|
336
344
|
results_log_content += f"Prompt File: {prompt_file}\n"
|
|
337
345
|
results_log_content += f"Code File: {code_file}\n"
|
|
@@ -361,6 +369,7 @@ def fix_verification_main(
|
|
|
361
369
|
# --- Output File Writing ---
|
|
362
370
|
saved_code_path: Optional[str] = None
|
|
363
371
|
saved_results_path: Optional[str] = None
|
|
372
|
+
saved_program_path: Optional[str] = None
|
|
364
373
|
|
|
365
374
|
if success and output_code_path:
|
|
366
375
|
try:
|
|
@@ -372,6 +381,16 @@ def fix_verification_main(
|
|
|
372
381
|
except IOError as e:
|
|
373
382
|
rich_print(f"[bold red]Error:[/bold red] Failed to write verified code file '{output_code_path}': {e}")
|
|
374
383
|
|
|
384
|
+
if success and output_program_path:
|
|
385
|
+
try:
|
|
386
|
+
with open(output_program_path, "w") as f:
|
|
387
|
+
f.write(final_program)
|
|
388
|
+
saved_program_path = output_program_path
|
|
389
|
+
if not quiet:
|
|
390
|
+
rich_print(f"Successfully verified program saved to: [green]{output_program_path}[/green]")
|
|
391
|
+
except IOError as e:
|
|
392
|
+
rich_print(f"[bold red]Error:[/bold red] Failed to write verified program file '{output_program_path}': {e}")
|
|
393
|
+
|
|
375
394
|
# Write results log (only for single pass, loop writes its own)
|
|
376
395
|
if not loop and output_results_path:
|
|
377
396
|
try:
|
|
@@ -398,6 +417,7 @@ def fix_verification_main(
|
|
|
398
417
|
f"Total Cost: ${total_cost:.6f}\n"
|
|
399
418
|
f"Model Used: {model_name}\n"
|
|
400
419
|
f"Verified Code Saved: {saved_code_path or 'N/A'}\n"
|
|
420
|
+
f"Verified Program Saved: {saved_program_path or 'N/A'}\n"
|
|
401
421
|
f"Results Log Saved: {saved_results_path or 'N/A'}",
|
|
402
422
|
title=title,
|
|
403
423
|
border_style="green" if success else "red"
|
pdd/generate_output_paths.py
CHANGED
|
@@ -25,7 +25,7 @@ COMMAND_OUTPUT_KEYS: Dict[str, List[str]] = {
|
|
|
25
25
|
'trace': ['output'],
|
|
26
26
|
'bug': ['output'],
|
|
27
27
|
'auto-deps': ['output'],
|
|
28
|
-
'verify': ['output_results', 'output_code'],
|
|
28
|
+
'verify': ['output_results', 'output_code', 'output_program'],
|
|
29
29
|
}
|
|
30
30
|
|
|
31
31
|
# Define default filename patterns for each output key
|
|
@@ -61,6 +61,7 @@ DEFAULT_FILENAMES: Dict[str, Dict[str, str]] = {
|
|
|
61
61
|
'verify': {
|
|
62
62
|
'output_results': '{basename}_verify_results.log',
|
|
63
63
|
'output_code': '{basename}_verified{ext}',
|
|
64
|
+
'output_program': '{basename}_program_verified{ext}',
|
|
64
65
|
},
|
|
65
66
|
}
|
|
66
67
|
|
|
@@ -93,6 +94,7 @@ ENV_VAR_MAP: Dict[str, Dict[str, str]] = {
|
|
|
93
94
|
'verify': {
|
|
94
95
|
'output_results': 'PDD_VERIFY_RESULTS_OUTPUT_PATH',
|
|
95
96
|
'output_code': 'PDD_VERIFY_CODE_OUTPUT_PATH',
|
|
97
|
+
'output_program': 'PDD_VERIFY_PROGRAM_OUTPUT_PATH',
|
|
96
98
|
},
|
|
97
99
|
}
|
|
98
100
|
|
|
@@ -435,4 +437,43 @@ if __name__ == '__main__':
|
|
|
435
437
|
# Expected: {
|
|
436
438
|
# 'output_results': '/path/to/cwd/module_to_verify_verify_results.log',
|
|
437
439
|
# 'output_code': '/path/to/cwd/module_to_verify_verified.py'
|
|
438
|
-
#
|
|
440
|
+
# 'output_program': '/path/to/cwd/module_to_verify_program_verified.py'
|
|
441
|
+
# }
|
|
442
|
+
|
|
443
|
+
# --- Test Case 12: Verify command with user-specified output_program directory ---
|
|
444
|
+
print("\n--- Test Case 12: Verify (User Dir for output_program) ---")
|
|
445
|
+
test_dir_verify_prog = "temp_verify_prog_output"
|
|
446
|
+
os.makedirs(test_dir_verify_prog, exist_ok=True)
|
|
447
|
+
paths12 = generate_output_paths(
|
|
448
|
+
command='verify',
|
|
449
|
+
output_locations={'output_program': test_dir_verify_prog + os.path.sep},
|
|
450
|
+
basename="module_to_verify",
|
|
451
|
+
language="python",
|
|
452
|
+
file_extension=".py"
|
|
453
|
+
)
|
|
454
|
+
print(f"Result: {paths12}")
|
|
455
|
+
# Expected: {
|
|
456
|
+
# 'output_results': '/path/to/cwd/module_to_verify_verify_results.log',
|
|
457
|
+
# 'output_code': '/path/to/cwd/module_to_verify_verified.py',
|
|
458
|
+
# 'output_program': f'/path/to/cwd/{test_dir_verify_prog}/module_to_verify_program_verified.py'
|
|
459
|
+
# }
|
|
460
|
+
os.rmdir(test_dir_verify_prog) # Clean up
|
|
461
|
+
|
|
462
|
+
# --- Test Case 13: Verify command with environment variable for output_program ---
|
|
463
|
+
print("\n--- Test Case 13: Verify (Env Var for output_program) ---")
|
|
464
|
+
env_verify_prog_path = "env_verify_program_custom.py"
|
|
465
|
+
os.environ['PDD_VERIFY_PROGRAM_OUTPUT_PATH'] = env_verify_prog_path
|
|
466
|
+
paths13 = generate_output_paths(
|
|
467
|
+
command='verify',
|
|
468
|
+
output_locations={},
|
|
469
|
+
basename="another_module_verify",
|
|
470
|
+
language="python",
|
|
471
|
+
file_extension=".py"
|
|
472
|
+
)
|
|
473
|
+
print(f"Result: {paths13}")
|
|
474
|
+
# Expected: {
|
|
475
|
+
# 'output_results': '/path/to/cwd/another_module_verify_verify_results.log',
|
|
476
|
+
# 'output_code': '/path/to/cwd/another_module_verify_verified.py',
|
|
477
|
+
# 'output_program': f'/path/to/cwd/{env_verify_prog_path}'
|
|
478
|
+
# }
|
|
479
|
+
del os.environ['PDD_VERIFY_PROGRAM_OUTPUT_PATH'] # Clean up
|