pdd-cli 0.0.23__py3-none-any.whl → 0.0.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pdd-cli might be problematic. Click here for more details.
- pdd/__init__.py +1 -1
- pdd/bug_main.py +16 -2
- pdd/data/llm_model.csv +8 -8
- pdd/fix_verification_errors.py +259 -0
- pdd/fix_verification_errors_loop.py +901 -0
- pdd/generate_output_paths.py +6 -0
- pdd/llm_invoke.py +4 -5
- pdd/pdd_completion.zsh +38 -1
- pdd/prompts/extract_prompt_split_LLM.prompt +7 -4
- pdd/prompts/find_verification_errors_LLM.prompt +25 -0
- pdd/prompts/fix_verification_errors_LLM.prompt +20 -0
- pdd/prompts/split_LLM.prompt +3 -3
- pdd/split.py +9 -9
- pdd/split_main.py +11 -11
- {pdd_cli-0.0.23.dist-info → pdd_cli-0.0.24.dist-info}/METADATA +3 -3
- {pdd_cli-0.0.23.dist-info → pdd_cli-0.0.24.dist-info}/RECORD +20 -16
- {pdd_cli-0.0.23.dist-info → pdd_cli-0.0.24.dist-info}/WHEEL +0 -0
- {pdd_cli-0.0.23.dist-info → pdd_cli-0.0.24.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.23.dist-info → pdd_cli-0.0.24.dist-info}/licenses/LICENSE +0 -0
- {pdd_cli-0.0.23.dist-info → pdd_cli-0.0.24.dist-info}/top_level.txt +0 -0
pdd/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.0.
|
|
1
|
+
__version__ = "0.0.24"
|
pdd/bug_main.py
CHANGED
|
@@ -3,6 +3,7 @@ import sys
|
|
|
3
3
|
from typing import Tuple, Optional
|
|
4
4
|
import click
|
|
5
5
|
from rich import print as rprint
|
|
6
|
+
from pathlib import Path
|
|
6
7
|
|
|
7
8
|
from .construct_paths import construct_paths
|
|
8
9
|
from .bug_to_unit_test import bug_to_unit_test
|
|
@@ -74,9 +75,22 @@ def bug_main(
|
|
|
74
75
|
|
|
75
76
|
# Save results if output path is provided
|
|
76
77
|
if output_file_paths.get("output"):
|
|
78
|
+
output_path = output_file_paths["output"]
|
|
79
|
+
# Additional check to ensure the path is not empty
|
|
80
|
+
if not output_path or output_path.strip() == '':
|
|
81
|
+
# Use a default output path in the current directory
|
|
82
|
+
output_path = f"test_{Path(code_file).stem}_bug.{language.lower()}"
|
|
83
|
+
if not ctx.obj.get('quiet', False):
|
|
84
|
+
rprint(f"[yellow]Warning: Empty output path detected. Using default: {output_path}[/yellow]")
|
|
85
|
+
output_file_paths["output"] = output_path
|
|
86
|
+
|
|
77
87
|
# Create directory if it doesn't exist
|
|
78
|
-
os.
|
|
79
|
-
|
|
88
|
+
dir_path = os.path.dirname(output_path)
|
|
89
|
+
if dir_path: # Only create directory if there's a directory part in the path
|
|
90
|
+
os.makedirs(dir_path, exist_ok=True)
|
|
91
|
+
|
|
92
|
+
# Write the file
|
|
93
|
+
with open(output_path, 'w') as f:
|
|
80
94
|
f.write(unit_test)
|
|
81
95
|
|
|
82
96
|
# Provide user feedback
|
pdd/data/llm_model.csv
CHANGED
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
provider,model,input,output,coding_arena_elo,base_url,api_key,counter,encoder,max_tokens,max_completion_tokens,structured_output
|
|
2
|
-
OpenAI,"gpt-
|
|
3
|
-
OpenAI,"grok-
|
|
2
|
+
OpenAI,"gpt-4.1-nano",0.1,0.40,1246,,OPENAI_API_KEY,tiktoken,o200k_base,,32768,True
|
|
3
|
+
OpenAI,"grok-3-beta",3,15,1255,"https://api.x.ai/v1",XAI_API_KEY,tiktoken,o200k_base,131072,,False
|
|
4
4
|
Anthropic,"claude-3-5-haiku-20241022",1,5,1259,,ANTHROPIC_API_KEY,anthropic,claude-3-sonnet-20240229,8192,,False
|
|
5
5
|
OpenAI,"deepseek-coder",0.14,0.28,1279,https://api.deepseek.com/beta,DEEPSEEK_API_KEY,autotokenizer,deepseek-coder-7b-instruct-v1.5,8192,,False
|
|
6
|
-
Google,"gemini-2.
|
|
7
|
-
GoogleVertexAI,"gemini-2.
|
|
6
|
+
Google,"gemini-2.5-flash-preview-04-17",.15,3.5,1291,,GOOGLE_API_KEY,,,65535,,False
|
|
7
|
+
GoogleVertexAI,"gemini-2.5-pro-exp-03-25",1.25,10,1299,,VERTEX_AI_API_KEY,,,65535,,False
|
|
8
8
|
Anthropic,claude-3-7-sonnet-20250219,3,15,1312,,ANTHROPIC_API_KEY,anthropic,claude-3-sonnet-20240229,64000,,False
|
|
9
|
-
Google,gemini-2.5-pro-exp-03-25,1.25,
|
|
9
|
+
Google,gemini-2.5-pro-exp-03-25,1.25,10,1313,,GOOGLE_API_KEY,,,65535,,False
|
|
10
10
|
OpenAI,"deepseek-r1-distill-llama-70b-specdec",5,5,1314,https://api.groq.com/openai/v1,GROQ_API_KEY,autotokenizer,deepseek-coder-7b-instruct-v1.5,16384,,False
|
|
11
11
|
Ollama,"deepseek-r1:70b-llama-distill-q8_0",0.0,0.0,1315,,PWD,,,,,False
|
|
12
12
|
Ollama,deepseek-r1:32b-qwen-distill-fp16,0.0,0.0,1316,,PWD,,,,,False
|
|
13
|
-
OpenAI,"
|
|
14
|
-
OpenAI,"
|
|
15
|
-
OpenAI,"gpt-
|
|
13
|
+
OpenAI,"o4-mini",1.1,4.4,1319,,OPENAI_API_KEY,tiktoken,o200k_base,,100000,True
|
|
14
|
+
OpenAI,"o3",10,40,1331,,OPENAI_API_KEY,tiktoken,o200k_base,,100000,True
|
|
15
|
+
OpenAI,"gpt-4.1",2,8,1332,,OPENAI_API_KEY,tiktoken,o200k_base,,32768,True
|
|
16
16
|
OpenAI,"deepseek-reasoner",0.55,2.19,1336,https://api.deepseek.com/beta,DEEPSEEK_API_KEY,autotokenizer,deepseek-coder-7b-instruct-v1.5,8192,,False
|
|
17
17
|
Fireworks,accounts/fireworks/models/deepseek-r1,3,8,1338,,FIREWORKS_API_KEY,,,8192,,False
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from typing import Dict, Any
|
|
3
|
+
from rich import print as rprint
|
|
4
|
+
from rich.markdown import Markdown
|
|
5
|
+
from .load_prompt_template import load_prompt_template
|
|
6
|
+
from .llm_invoke import llm_invoke
|
|
7
|
+
|
|
8
|
+
def fix_verification_errors(
|
|
9
|
+
program: str,
|
|
10
|
+
prompt: str,
|
|
11
|
+
code: str,
|
|
12
|
+
output: str,
|
|
13
|
+
strength: float,
|
|
14
|
+
temperature: float = 0.0,
|
|
15
|
+
verbose: bool = False,
|
|
16
|
+
) -> Dict[str, Any]:
|
|
17
|
+
"""
|
|
18
|
+
Identifies and fixes issues in a code module based on verification output.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
program: The program code that ran the code module.
|
|
22
|
+
prompt: The prompt used to generate the code module.
|
|
23
|
+
code: The code module to be fixed.
|
|
24
|
+
output: The output logs from the program run during verification.
|
|
25
|
+
strength: The strength (0-1) for the LLM model selection.
|
|
26
|
+
temperature: The temperature for the LLM model. Defaults to 0.
|
|
27
|
+
verbose: If True, prints detailed execution information. Defaults to False.
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
A dictionary containing:
|
|
31
|
+
- 'explanation': A string with verification details and fix explanation
|
|
32
|
+
in XML format, or None if no issues were found.
|
|
33
|
+
- 'fixed_program': The potentially fixed program code string.
|
|
34
|
+
- 'fixed_code': The potentially fixed code module string.
|
|
35
|
+
- 'total_cost': The total cost incurred from LLM calls.
|
|
36
|
+
- 'model_name': The name of the LLM model used.
|
|
37
|
+
- 'verification_issues_count': The number of issues found during verification.
|
|
38
|
+
"""
|
|
39
|
+
total_cost = 0.0
|
|
40
|
+
model_name = None
|
|
41
|
+
verification_issues_count = 0
|
|
42
|
+
verification_details = None
|
|
43
|
+
fix_explanation = None
|
|
44
|
+
fixed_program = program
|
|
45
|
+
fixed_code = code
|
|
46
|
+
final_explanation = None
|
|
47
|
+
|
|
48
|
+
if not all([program, prompt, code, output]):
|
|
49
|
+
rprint("[bold red]Error:[/bold red] Missing one or more required inputs (program, prompt, code, output).")
|
|
50
|
+
return {
|
|
51
|
+
"explanation": None,
|
|
52
|
+
"fixed_program": program,
|
|
53
|
+
"fixed_code": code,
|
|
54
|
+
"total_cost": 0.0,
|
|
55
|
+
"model_name": None,
|
|
56
|
+
"verification_issues_count": 0,
|
|
57
|
+
}
|
|
58
|
+
if not (0.0 <= strength <= 1.0):
|
|
59
|
+
rprint(f"[bold red]Error:[/bold red] Strength must be between 0.0 and 1.0, got {strength}.")
|
|
60
|
+
return {
|
|
61
|
+
"explanation": None,
|
|
62
|
+
"fixed_program": program,
|
|
63
|
+
"fixed_code": code,
|
|
64
|
+
"total_cost": 0.0,
|
|
65
|
+
"model_name": None,
|
|
66
|
+
"verification_issues_count": 0,
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
if verbose:
|
|
70
|
+
rprint("[blue]Loading prompt templates...[/blue]")
|
|
71
|
+
try:
|
|
72
|
+
find_errors_prompt_template = load_prompt_template("find_verification_errors_LLM")
|
|
73
|
+
fix_errors_prompt_template = load_prompt_template("fix_verification_errors_LLM")
|
|
74
|
+
if not find_errors_prompt_template or not fix_errors_prompt_template:
|
|
75
|
+
raise ValueError("One or both prompt templates could not be loaded.")
|
|
76
|
+
except Exception as e:
|
|
77
|
+
rprint(f"[bold red]Error loading prompt templates:[/bold red] {e}")
|
|
78
|
+
return {
|
|
79
|
+
"explanation": None,
|
|
80
|
+
"fixed_program": program,
|
|
81
|
+
"fixed_code": code,
|
|
82
|
+
"total_cost": total_cost,
|
|
83
|
+
"model_name": model_name,
|
|
84
|
+
"verification_issues_count": verification_issues_count,
|
|
85
|
+
}
|
|
86
|
+
if verbose:
|
|
87
|
+
rprint("[green]Prompt templates loaded successfully.[/green]")
|
|
88
|
+
|
|
89
|
+
if verbose:
|
|
90
|
+
rprint(f"\n[blue]Step 2: Running verification check (Strength: {strength}, Temp: {temperature})...[/blue]")
|
|
91
|
+
|
|
92
|
+
verification_input_json = {
|
|
93
|
+
"program": program,
|
|
94
|
+
"prompt": prompt,
|
|
95
|
+
"code": code,
|
|
96
|
+
"output": output,
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
try:
|
|
100
|
+
verification_response = llm_invoke(
|
|
101
|
+
prompt=find_errors_prompt_template,
|
|
102
|
+
input_json=verification_input_json,
|
|
103
|
+
strength=strength,
|
|
104
|
+
temperature=temperature,
|
|
105
|
+
verbose=False,
|
|
106
|
+
)
|
|
107
|
+
total_cost += verification_response.get('cost', 0.0)
|
|
108
|
+
model_name = verification_response.get('model_name', model_name)
|
|
109
|
+
verification_result = verification_response.get('result', '')
|
|
110
|
+
|
|
111
|
+
if verbose:
|
|
112
|
+
rprint(f"[cyan]Verification LLM call complete.[/cyan]")
|
|
113
|
+
rprint(f" [dim]Model Used:[/dim] {verification_response.get('model_name', 'N/A')}")
|
|
114
|
+
rprint(f" [dim]Cost:[/dim] ${verification_response.get('cost', 0.0):.6f}")
|
|
115
|
+
|
|
116
|
+
except Exception as e:
|
|
117
|
+
rprint(f"[bold red]Error during verification LLM call:[/bold red] {e}")
|
|
118
|
+
return {
|
|
119
|
+
"explanation": None,
|
|
120
|
+
"fixed_program": program,
|
|
121
|
+
"fixed_code": code,
|
|
122
|
+
"total_cost": total_cost,
|
|
123
|
+
"model_name": model_name,
|
|
124
|
+
"verification_issues_count": verification_issues_count,
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
if verbose:
|
|
128
|
+
rprint("\n[blue]Verification Result:[/blue]")
|
|
129
|
+
rprint(Markdown(verification_result))
|
|
130
|
+
|
|
131
|
+
issues_found = False
|
|
132
|
+
try:
|
|
133
|
+
count_match = re.search(r"<issues_count>(\d+)</issues_count>", verification_result)
|
|
134
|
+
if count_match:
|
|
135
|
+
verification_issues_count = int(count_match.group(1))
|
|
136
|
+
else:
|
|
137
|
+
rprint("[yellow]Warning:[/yellow] Could not find <issues_count> tag in verification result. Assuming 0 issues.")
|
|
138
|
+
verification_issues_count = 0
|
|
139
|
+
|
|
140
|
+
if verification_issues_count > 0:
|
|
141
|
+
details_match = re.search(r"<details>(.*?)</details>", verification_result, re.DOTALL)
|
|
142
|
+
if details_match:
|
|
143
|
+
verification_details = details_match.group(1).strip()
|
|
144
|
+
if verification_details:
|
|
145
|
+
issues_found = True
|
|
146
|
+
if verbose:
|
|
147
|
+
rprint(f"\n[yellow]Found {verification_issues_count} potential issues. Proceeding to fix step.[/yellow]")
|
|
148
|
+
else:
|
|
149
|
+
rprint("[yellow]Warning:[/yellow] <issues_count> is > 0, but <details> tag is empty. Treating as no issues found.")
|
|
150
|
+
verification_issues_count = 0
|
|
151
|
+
else:
|
|
152
|
+
rprint("[yellow]Warning:[/yellow] <issues_count> is > 0, but could not find <details> tag. Treating as no issues found.")
|
|
153
|
+
verification_issues_count = 0
|
|
154
|
+
else:
|
|
155
|
+
if verbose:
|
|
156
|
+
rprint("\n[green]No issues found during verification.[/green]")
|
|
157
|
+
|
|
158
|
+
except ValueError:
|
|
159
|
+
rprint("[bold red]Error:[/bold red] Could not parse integer value from <issues_count> tag.")
|
|
160
|
+
return {
|
|
161
|
+
"explanation": None,
|
|
162
|
+
"fixed_program": program,
|
|
163
|
+
"fixed_code": code,
|
|
164
|
+
"total_cost": total_cost,
|
|
165
|
+
"model_name": model_name,
|
|
166
|
+
"verification_issues_count": 0,
|
|
167
|
+
}
|
|
168
|
+
except Exception as e:
|
|
169
|
+
rprint(f"[bold red]Error parsing verification result:[/bold red] {e}")
|
|
170
|
+
return {
|
|
171
|
+
"explanation": None,
|
|
172
|
+
"fixed_program": program,
|
|
173
|
+
"fixed_code": code,
|
|
174
|
+
"total_cost": total_cost,
|
|
175
|
+
"model_name": model_name,
|
|
176
|
+
"verification_issues_count": 0,
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
if issues_found and verification_details:
|
|
180
|
+
if verbose:
|
|
181
|
+
rprint(f"\n[blue]Step 5: Running fix generation (Strength: {strength}, Temp: {temperature})...[/blue]")
|
|
182
|
+
|
|
183
|
+
fix_input_json = {
|
|
184
|
+
"program": program,
|
|
185
|
+
"prompt": prompt,
|
|
186
|
+
"code": code,
|
|
187
|
+
"output": output,
|
|
188
|
+
"issues": verification_details,
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
try:
|
|
192
|
+
fix_response = llm_invoke(
|
|
193
|
+
prompt=fix_errors_prompt_template,
|
|
194
|
+
input_json=fix_input_json,
|
|
195
|
+
strength=strength,
|
|
196
|
+
temperature=temperature,
|
|
197
|
+
verbose=False,
|
|
198
|
+
)
|
|
199
|
+
total_cost += fix_response.get('cost', 0.0)
|
|
200
|
+
model_name = fix_response.get('model_name', model_name)
|
|
201
|
+
fix_result = fix_response.get('result', '')
|
|
202
|
+
|
|
203
|
+
if verbose:
|
|
204
|
+
rprint(f"[cyan]Fix LLM call complete.[/cyan]")
|
|
205
|
+
rprint(f" [dim]Model Used:[/dim] {fix_response.get('model_name', 'N/A')}")
|
|
206
|
+
rprint(f" [dim]Cost:[/dim] ${fix_response.get('cost', 0.0):.6f}")
|
|
207
|
+
rprint("\n[blue]Fix Result:[/blue]")
|
|
208
|
+
rprint(Markdown(fix_result))
|
|
209
|
+
|
|
210
|
+
fixed_program_match = re.search(r"<fixed_program>(.*?)</fixed_program>", fix_result, re.DOTALL)
|
|
211
|
+
fixed_code_match = re.search(r"<fixed_code>(.*?)</fixed_code>", fix_result, re.DOTALL)
|
|
212
|
+
explanation_match = re.search(r"<explanation>(.*?)</explanation>", fix_result, re.DOTALL)
|
|
213
|
+
|
|
214
|
+
if fixed_program_match:
|
|
215
|
+
fixed_program = fixed_program_match.group(1).strip()
|
|
216
|
+
if verbose: rprint("[green]Extracted fixed program.[/green]")
|
|
217
|
+
else:
|
|
218
|
+
if verbose: rprint("[yellow]Warning:[/yellow] Could not find <fixed_program> tag in fix result. Using original program.")
|
|
219
|
+
|
|
220
|
+
if fixed_code_match:
|
|
221
|
+
fixed_code = fixed_code_match.group(1).strip()
|
|
222
|
+
if verbose: rprint("[green]Extracted fixed code module.[/green]")
|
|
223
|
+
else:
|
|
224
|
+
if verbose: rprint("[yellow]Warning:[/yellow] Could not find <fixed_code> tag in fix result. Using original code module.")
|
|
225
|
+
|
|
226
|
+
if explanation_match:
|
|
227
|
+
fix_explanation = explanation_match.group(1).strip()
|
|
228
|
+
if verbose: rprint("[green]Extracted fix explanation.[/green]")
|
|
229
|
+
else:
|
|
230
|
+
if verbose: rprint("[yellow]Warning:[/yellow] Could not find <explanation> tag in fix result.")
|
|
231
|
+
fix_explanation = "[Fix explanation not provided by LLM]"
|
|
232
|
+
|
|
233
|
+
except Exception as e:
|
|
234
|
+
rprint(f"[bold red]Error during fix LLM call or extraction:[/bold red] {e}")
|
|
235
|
+
if verification_details and fix_explanation is None:
|
|
236
|
+
fix_explanation = f"[Error during fix generation: {e}]"
|
|
237
|
+
|
|
238
|
+
if verbose:
|
|
239
|
+
rprint(f"\n[bold blue]Total Cost for fix_verification_errors run:[/bold blue] ${total_cost:.6f}")
|
|
240
|
+
|
|
241
|
+
if issues_found and verification_details and fix_explanation:
|
|
242
|
+
final_explanation = (
|
|
243
|
+
f"<verification_details>{verification_details}</verification_details>\n"
|
|
244
|
+
f"<fix_explanation>{fix_explanation}</fix_explanation>"
|
|
245
|
+
)
|
|
246
|
+
elif issues_found and verification_details:
|
|
247
|
+
final_explanation = (
|
|
248
|
+
f"<verification_details>{verification_details}</verification_details>\n"
|
|
249
|
+
f"<fix_explanation>[Fix explanation not available or extraction failed]</fix_explanation>"
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
return {
|
|
253
|
+
"explanation": final_explanation,
|
|
254
|
+
"fixed_program": fixed_program,
|
|
255
|
+
"fixed_code": fixed_code,
|
|
256
|
+
"total_cost": total_cost,
|
|
257
|
+
"model_name": model_name,
|
|
258
|
+
"verification_issues_count": verification_issues_count,
|
|
259
|
+
}
|