pdd-cli 0.0.22__py3-none-any.whl → 0.0.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pdd-cli might be problematic. Click here for more details.
- pdd/__init__.py +1 -0
- pdd/bug_main.py +16 -2
- pdd/cli.py +4 -3
- pdd/data/llm_model.csv +8 -8
- pdd/fix_verification_errors.py +259 -0
- pdd/fix_verification_errors_loop.py +901 -0
- pdd/generate_output_paths.py +6 -0
- pdd/install_completion.py +7 -7
- pdd/llm_invoke.py +4 -5
- pdd/pdd_completion.zsh +38 -1
- pdd/prompts/extract_prompt_split_LLM.prompt +7 -4
- pdd/prompts/find_verification_errors_LLM.prompt +25 -0
- pdd/prompts/fix_verification_errors_LLM.prompt +20 -0
- pdd/prompts/split_LLM.prompt +3 -3
- pdd/split.py +9 -9
- pdd/split_main.py +11 -11
- {pdd_cli-0.0.22.dist-info → pdd_cli-0.0.24.dist-info}/METADATA +7 -3
- {pdd_cli-0.0.22.dist-info → pdd_cli-0.0.24.dist-info}/RECORD +22 -18
- {pdd_cli-0.0.22.dist-info → pdd_cli-0.0.24.dist-info}/WHEEL +0 -0
- {pdd_cli-0.0.22.dist-info → pdd_cli-0.0.24.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.22.dist-info → pdd_cli-0.0.24.dist-info}/licenses/LICENSE +0 -0
- {pdd_cli-0.0.22.dist-info → pdd_cli-0.0.24.dist-info}/top_level.txt +0 -0
pdd/__init__.py
CHANGED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.0.24"
|
pdd/bug_main.py
CHANGED
|
@@ -3,6 +3,7 @@ import sys
|
|
|
3
3
|
from typing import Tuple, Optional
|
|
4
4
|
import click
|
|
5
5
|
from rich import print as rprint
|
|
6
|
+
from pathlib import Path
|
|
6
7
|
|
|
7
8
|
from .construct_paths import construct_paths
|
|
8
9
|
from .bug_to_unit_test import bug_to_unit_test
|
|
@@ -74,9 +75,22 @@ def bug_main(
|
|
|
74
75
|
|
|
75
76
|
# Save results if output path is provided
|
|
76
77
|
if output_file_paths.get("output"):
|
|
78
|
+
output_path = output_file_paths["output"]
|
|
79
|
+
# Additional check to ensure the path is not empty
|
|
80
|
+
if not output_path or output_path.strip() == '':
|
|
81
|
+
# Use a default output path in the current directory
|
|
82
|
+
output_path = f"test_{Path(code_file).stem}_bug.{language.lower()}"
|
|
83
|
+
if not ctx.obj.get('quiet', False):
|
|
84
|
+
rprint(f"[yellow]Warning: Empty output path detected. Using default: {output_path}[/yellow]")
|
|
85
|
+
output_file_paths["output"] = output_path
|
|
86
|
+
|
|
77
87
|
# Create directory if it doesn't exist
|
|
78
|
-
os.
|
|
79
|
-
|
|
88
|
+
dir_path = os.path.dirname(output_path)
|
|
89
|
+
if dir_path: # Only create directory if there's a directory part in the path
|
|
90
|
+
os.makedirs(dir_path, exist_ok=True)
|
|
91
|
+
|
|
92
|
+
# Write the file
|
|
93
|
+
with open(output_path, 'w') as f:
|
|
80
94
|
f.write(unit_test)
|
|
81
95
|
|
|
82
96
|
# Provide user feedback
|
pdd/cli.py
CHANGED
|
@@ -12,6 +12,7 @@ from rich.panel import Panel
|
|
|
12
12
|
|
|
13
13
|
from .install_completion import install_completion as install_completion_main
|
|
14
14
|
import pdd.install_completion
|
|
15
|
+
from pdd import __version__
|
|
15
16
|
|
|
16
17
|
pdd.install_completion.get_local_pdd_path()
|
|
17
18
|
# ----------------------------------------------------------------------
|
|
@@ -46,7 +47,7 @@ console = Console()
|
|
|
46
47
|
@click.option("--review-examples", is_flag=True,
|
|
47
48
|
help="Review and optionally exclude few-shot examples before command execution.")
|
|
48
49
|
@click.option('--local', is_flag=True, help='Run commands locally instead of in the cloud.')
|
|
49
|
-
@click.version_option(version=
|
|
50
|
+
@click.version_option(version=__version__)
|
|
50
51
|
@click.pass_context
|
|
51
52
|
def cli(
|
|
52
53
|
ctx,
|
|
@@ -374,9 +375,9 @@ def crash(
|
|
|
374
375
|
@cli.command(name="install_completion")
|
|
375
376
|
def install_completion():
|
|
376
377
|
"""
|
|
377
|
-
Install shell completion for the PDD CLI by detecting the user
|
|
378
|
+
Install shell completion for the PDD CLI by detecting the user's shell,
|
|
378
379
|
copying the relevant completion script, and appending a source command
|
|
379
|
-
to the user
|
|
380
|
+
to the user's shell RC file if not already present.
|
|
380
381
|
"""
|
|
381
382
|
return install_completion_main()
|
|
382
383
|
|
pdd/data/llm_model.csv
CHANGED
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
provider,model,input,output,coding_arena_elo,base_url,api_key,counter,encoder,max_tokens,max_completion_tokens,structured_output
|
|
2
|
-
OpenAI,"gpt-
|
|
3
|
-
OpenAI,"grok-
|
|
2
|
+
OpenAI,"gpt-4.1-nano",0.1,0.40,1246,,OPENAI_API_KEY,tiktoken,o200k_base,,32768,True
|
|
3
|
+
OpenAI,"grok-3-beta",3,15,1255,"https://api.x.ai/v1",XAI_API_KEY,tiktoken,o200k_base,131072,,False
|
|
4
4
|
Anthropic,"claude-3-5-haiku-20241022",1,5,1259,,ANTHROPIC_API_KEY,anthropic,claude-3-sonnet-20240229,8192,,False
|
|
5
5
|
OpenAI,"deepseek-coder",0.14,0.28,1279,https://api.deepseek.com/beta,DEEPSEEK_API_KEY,autotokenizer,deepseek-coder-7b-instruct-v1.5,8192,,False
|
|
6
|
-
Google,"gemini-2.
|
|
7
|
-
GoogleVertexAI,"gemini-2.
|
|
6
|
+
Google,"gemini-2.5-flash-preview-04-17",.15,3.5,1291,,GOOGLE_API_KEY,,,65535,,False
|
|
7
|
+
GoogleVertexAI,"gemini-2.5-pro-exp-03-25",1.25,10,1299,,VERTEX_AI_API_KEY,,,65535,,False
|
|
8
8
|
Anthropic,claude-3-7-sonnet-20250219,3,15,1312,,ANTHROPIC_API_KEY,anthropic,claude-3-sonnet-20240229,64000,,False
|
|
9
|
-
Google,gemini-2.5-pro-exp-03-25,1.25,
|
|
9
|
+
Google,gemini-2.5-pro-exp-03-25,1.25,10,1313,,GOOGLE_API_KEY,,,65535,,False
|
|
10
10
|
OpenAI,"deepseek-r1-distill-llama-70b-specdec",5,5,1314,https://api.groq.com/openai/v1,GROQ_API_KEY,autotokenizer,deepseek-coder-7b-instruct-v1.5,16384,,False
|
|
11
11
|
Ollama,"deepseek-r1:70b-llama-distill-q8_0",0.0,0.0,1315,,PWD,,,,,False
|
|
12
12
|
Ollama,deepseek-r1:32b-qwen-distill-fp16,0.0,0.0,1316,,PWD,,,,,False
|
|
13
|
-
OpenAI,"
|
|
14
|
-
OpenAI,"
|
|
15
|
-
OpenAI,"gpt-
|
|
13
|
+
OpenAI,"o4-mini",1.1,4.4,1319,,OPENAI_API_KEY,tiktoken,o200k_base,,100000,True
|
|
14
|
+
OpenAI,"o3",10,40,1331,,OPENAI_API_KEY,tiktoken,o200k_base,,100000,True
|
|
15
|
+
OpenAI,"gpt-4.1",2,8,1332,,OPENAI_API_KEY,tiktoken,o200k_base,,32768,True
|
|
16
16
|
OpenAI,"deepseek-reasoner",0.55,2.19,1336,https://api.deepseek.com/beta,DEEPSEEK_API_KEY,autotokenizer,deepseek-coder-7b-instruct-v1.5,8192,,False
|
|
17
17
|
Fireworks,accounts/fireworks/models/deepseek-r1,3,8,1338,,FIREWORKS_API_KEY,,,8192,,False
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from typing import Dict, Any
|
|
3
|
+
from rich import print as rprint
|
|
4
|
+
from rich.markdown import Markdown
|
|
5
|
+
from .load_prompt_template import load_prompt_template
|
|
6
|
+
from .llm_invoke import llm_invoke
|
|
7
|
+
|
|
8
|
+
def fix_verification_errors(
|
|
9
|
+
program: str,
|
|
10
|
+
prompt: str,
|
|
11
|
+
code: str,
|
|
12
|
+
output: str,
|
|
13
|
+
strength: float,
|
|
14
|
+
temperature: float = 0.0,
|
|
15
|
+
verbose: bool = False,
|
|
16
|
+
) -> Dict[str, Any]:
|
|
17
|
+
"""
|
|
18
|
+
Identifies and fixes issues in a code module based on verification output.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
program: The program code that ran the code module.
|
|
22
|
+
prompt: The prompt used to generate the code module.
|
|
23
|
+
code: The code module to be fixed.
|
|
24
|
+
output: The output logs from the program run during verification.
|
|
25
|
+
strength: The strength (0-1) for the LLM model selection.
|
|
26
|
+
temperature: The temperature for the LLM model. Defaults to 0.
|
|
27
|
+
verbose: If True, prints detailed execution information. Defaults to False.
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
A dictionary containing:
|
|
31
|
+
- 'explanation': A string with verification details and fix explanation
|
|
32
|
+
in XML format, or None if no issues were found.
|
|
33
|
+
- 'fixed_program': The potentially fixed program code string.
|
|
34
|
+
- 'fixed_code': The potentially fixed code module string.
|
|
35
|
+
- 'total_cost': The total cost incurred from LLM calls.
|
|
36
|
+
- 'model_name': The name of the LLM model used.
|
|
37
|
+
- 'verification_issues_count': The number of issues found during verification.
|
|
38
|
+
"""
|
|
39
|
+
total_cost = 0.0
|
|
40
|
+
model_name = None
|
|
41
|
+
verification_issues_count = 0
|
|
42
|
+
verification_details = None
|
|
43
|
+
fix_explanation = None
|
|
44
|
+
fixed_program = program
|
|
45
|
+
fixed_code = code
|
|
46
|
+
final_explanation = None
|
|
47
|
+
|
|
48
|
+
if not all([program, prompt, code, output]):
|
|
49
|
+
rprint("[bold red]Error:[/bold red] Missing one or more required inputs (program, prompt, code, output).")
|
|
50
|
+
return {
|
|
51
|
+
"explanation": None,
|
|
52
|
+
"fixed_program": program,
|
|
53
|
+
"fixed_code": code,
|
|
54
|
+
"total_cost": 0.0,
|
|
55
|
+
"model_name": None,
|
|
56
|
+
"verification_issues_count": 0,
|
|
57
|
+
}
|
|
58
|
+
if not (0.0 <= strength <= 1.0):
|
|
59
|
+
rprint(f"[bold red]Error:[/bold red] Strength must be between 0.0 and 1.0, got {strength}.")
|
|
60
|
+
return {
|
|
61
|
+
"explanation": None,
|
|
62
|
+
"fixed_program": program,
|
|
63
|
+
"fixed_code": code,
|
|
64
|
+
"total_cost": 0.0,
|
|
65
|
+
"model_name": None,
|
|
66
|
+
"verification_issues_count": 0,
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
if verbose:
|
|
70
|
+
rprint("[blue]Loading prompt templates...[/blue]")
|
|
71
|
+
try:
|
|
72
|
+
find_errors_prompt_template = load_prompt_template("find_verification_errors_LLM")
|
|
73
|
+
fix_errors_prompt_template = load_prompt_template("fix_verification_errors_LLM")
|
|
74
|
+
if not find_errors_prompt_template or not fix_errors_prompt_template:
|
|
75
|
+
raise ValueError("One or both prompt templates could not be loaded.")
|
|
76
|
+
except Exception as e:
|
|
77
|
+
rprint(f"[bold red]Error loading prompt templates:[/bold red] {e}")
|
|
78
|
+
return {
|
|
79
|
+
"explanation": None,
|
|
80
|
+
"fixed_program": program,
|
|
81
|
+
"fixed_code": code,
|
|
82
|
+
"total_cost": total_cost,
|
|
83
|
+
"model_name": model_name,
|
|
84
|
+
"verification_issues_count": verification_issues_count,
|
|
85
|
+
}
|
|
86
|
+
if verbose:
|
|
87
|
+
rprint("[green]Prompt templates loaded successfully.[/green]")
|
|
88
|
+
|
|
89
|
+
if verbose:
|
|
90
|
+
rprint(f"\n[blue]Step 2: Running verification check (Strength: {strength}, Temp: {temperature})...[/blue]")
|
|
91
|
+
|
|
92
|
+
verification_input_json = {
|
|
93
|
+
"program": program,
|
|
94
|
+
"prompt": prompt,
|
|
95
|
+
"code": code,
|
|
96
|
+
"output": output,
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
try:
|
|
100
|
+
verification_response = llm_invoke(
|
|
101
|
+
prompt=find_errors_prompt_template,
|
|
102
|
+
input_json=verification_input_json,
|
|
103
|
+
strength=strength,
|
|
104
|
+
temperature=temperature,
|
|
105
|
+
verbose=False,
|
|
106
|
+
)
|
|
107
|
+
total_cost += verification_response.get('cost', 0.0)
|
|
108
|
+
model_name = verification_response.get('model_name', model_name)
|
|
109
|
+
verification_result = verification_response.get('result', '')
|
|
110
|
+
|
|
111
|
+
if verbose:
|
|
112
|
+
rprint(f"[cyan]Verification LLM call complete.[/cyan]")
|
|
113
|
+
rprint(f" [dim]Model Used:[/dim] {verification_response.get('model_name', 'N/A')}")
|
|
114
|
+
rprint(f" [dim]Cost:[/dim] ${verification_response.get('cost', 0.0):.6f}")
|
|
115
|
+
|
|
116
|
+
except Exception as e:
|
|
117
|
+
rprint(f"[bold red]Error during verification LLM call:[/bold red] {e}")
|
|
118
|
+
return {
|
|
119
|
+
"explanation": None,
|
|
120
|
+
"fixed_program": program,
|
|
121
|
+
"fixed_code": code,
|
|
122
|
+
"total_cost": total_cost,
|
|
123
|
+
"model_name": model_name,
|
|
124
|
+
"verification_issues_count": verification_issues_count,
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
if verbose:
|
|
128
|
+
rprint("\n[blue]Verification Result:[/blue]")
|
|
129
|
+
rprint(Markdown(verification_result))
|
|
130
|
+
|
|
131
|
+
issues_found = False
|
|
132
|
+
try:
|
|
133
|
+
count_match = re.search(r"<issues_count>(\d+)</issues_count>", verification_result)
|
|
134
|
+
if count_match:
|
|
135
|
+
verification_issues_count = int(count_match.group(1))
|
|
136
|
+
else:
|
|
137
|
+
rprint("[yellow]Warning:[/yellow] Could not find <issues_count> tag in verification result. Assuming 0 issues.")
|
|
138
|
+
verification_issues_count = 0
|
|
139
|
+
|
|
140
|
+
if verification_issues_count > 0:
|
|
141
|
+
details_match = re.search(r"<details>(.*?)</details>", verification_result, re.DOTALL)
|
|
142
|
+
if details_match:
|
|
143
|
+
verification_details = details_match.group(1).strip()
|
|
144
|
+
if verification_details:
|
|
145
|
+
issues_found = True
|
|
146
|
+
if verbose:
|
|
147
|
+
rprint(f"\n[yellow]Found {verification_issues_count} potential issues. Proceeding to fix step.[/yellow]")
|
|
148
|
+
else:
|
|
149
|
+
rprint("[yellow]Warning:[/yellow] <issues_count> is > 0, but <details> tag is empty. Treating as no issues found.")
|
|
150
|
+
verification_issues_count = 0
|
|
151
|
+
else:
|
|
152
|
+
rprint("[yellow]Warning:[/yellow] <issues_count> is > 0, but could not find <details> tag. Treating as no issues found.")
|
|
153
|
+
verification_issues_count = 0
|
|
154
|
+
else:
|
|
155
|
+
if verbose:
|
|
156
|
+
rprint("\n[green]No issues found during verification.[/green]")
|
|
157
|
+
|
|
158
|
+
except ValueError:
|
|
159
|
+
rprint("[bold red]Error:[/bold red] Could not parse integer value from <issues_count> tag.")
|
|
160
|
+
return {
|
|
161
|
+
"explanation": None,
|
|
162
|
+
"fixed_program": program,
|
|
163
|
+
"fixed_code": code,
|
|
164
|
+
"total_cost": total_cost,
|
|
165
|
+
"model_name": model_name,
|
|
166
|
+
"verification_issues_count": 0,
|
|
167
|
+
}
|
|
168
|
+
except Exception as e:
|
|
169
|
+
rprint(f"[bold red]Error parsing verification result:[/bold red] {e}")
|
|
170
|
+
return {
|
|
171
|
+
"explanation": None,
|
|
172
|
+
"fixed_program": program,
|
|
173
|
+
"fixed_code": code,
|
|
174
|
+
"total_cost": total_cost,
|
|
175
|
+
"model_name": model_name,
|
|
176
|
+
"verification_issues_count": 0,
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
if issues_found and verification_details:
|
|
180
|
+
if verbose:
|
|
181
|
+
rprint(f"\n[blue]Step 5: Running fix generation (Strength: {strength}, Temp: {temperature})...[/blue]")
|
|
182
|
+
|
|
183
|
+
fix_input_json = {
|
|
184
|
+
"program": program,
|
|
185
|
+
"prompt": prompt,
|
|
186
|
+
"code": code,
|
|
187
|
+
"output": output,
|
|
188
|
+
"issues": verification_details,
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
try:
|
|
192
|
+
fix_response = llm_invoke(
|
|
193
|
+
prompt=fix_errors_prompt_template,
|
|
194
|
+
input_json=fix_input_json,
|
|
195
|
+
strength=strength,
|
|
196
|
+
temperature=temperature,
|
|
197
|
+
verbose=False,
|
|
198
|
+
)
|
|
199
|
+
total_cost += fix_response.get('cost', 0.0)
|
|
200
|
+
model_name = fix_response.get('model_name', model_name)
|
|
201
|
+
fix_result = fix_response.get('result', '')
|
|
202
|
+
|
|
203
|
+
if verbose:
|
|
204
|
+
rprint(f"[cyan]Fix LLM call complete.[/cyan]")
|
|
205
|
+
rprint(f" [dim]Model Used:[/dim] {fix_response.get('model_name', 'N/A')}")
|
|
206
|
+
rprint(f" [dim]Cost:[/dim] ${fix_response.get('cost', 0.0):.6f}")
|
|
207
|
+
rprint("\n[blue]Fix Result:[/blue]")
|
|
208
|
+
rprint(Markdown(fix_result))
|
|
209
|
+
|
|
210
|
+
fixed_program_match = re.search(r"<fixed_program>(.*?)</fixed_program>", fix_result, re.DOTALL)
|
|
211
|
+
fixed_code_match = re.search(r"<fixed_code>(.*?)</fixed_code>", fix_result, re.DOTALL)
|
|
212
|
+
explanation_match = re.search(r"<explanation>(.*?)</explanation>", fix_result, re.DOTALL)
|
|
213
|
+
|
|
214
|
+
if fixed_program_match:
|
|
215
|
+
fixed_program = fixed_program_match.group(1).strip()
|
|
216
|
+
if verbose: rprint("[green]Extracted fixed program.[/green]")
|
|
217
|
+
else:
|
|
218
|
+
if verbose: rprint("[yellow]Warning:[/yellow] Could not find <fixed_program> tag in fix result. Using original program.")
|
|
219
|
+
|
|
220
|
+
if fixed_code_match:
|
|
221
|
+
fixed_code = fixed_code_match.group(1).strip()
|
|
222
|
+
if verbose: rprint("[green]Extracted fixed code module.[/green]")
|
|
223
|
+
else:
|
|
224
|
+
if verbose: rprint("[yellow]Warning:[/yellow] Could not find <fixed_code> tag in fix result. Using original code module.")
|
|
225
|
+
|
|
226
|
+
if explanation_match:
|
|
227
|
+
fix_explanation = explanation_match.group(1).strip()
|
|
228
|
+
if verbose: rprint("[green]Extracted fix explanation.[/green]")
|
|
229
|
+
else:
|
|
230
|
+
if verbose: rprint("[yellow]Warning:[/yellow] Could not find <explanation> tag in fix result.")
|
|
231
|
+
fix_explanation = "[Fix explanation not provided by LLM]"
|
|
232
|
+
|
|
233
|
+
except Exception as e:
|
|
234
|
+
rprint(f"[bold red]Error during fix LLM call or extraction:[/bold red] {e}")
|
|
235
|
+
if verification_details and fix_explanation is None:
|
|
236
|
+
fix_explanation = f"[Error during fix generation: {e}]"
|
|
237
|
+
|
|
238
|
+
if verbose:
|
|
239
|
+
rprint(f"\n[bold blue]Total Cost for fix_verification_errors run:[/bold blue] ${total_cost:.6f}")
|
|
240
|
+
|
|
241
|
+
if issues_found and verification_details and fix_explanation:
|
|
242
|
+
final_explanation = (
|
|
243
|
+
f"<verification_details>{verification_details}</verification_details>\n"
|
|
244
|
+
f"<fix_explanation>{fix_explanation}</fix_explanation>"
|
|
245
|
+
)
|
|
246
|
+
elif issues_found and verification_details:
|
|
247
|
+
final_explanation = (
|
|
248
|
+
f"<verification_details>{verification_details}</verification_details>\n"
|
|
249
|
+
f"<fix_explanation>[Fix explanation not available or extraction failed]</fix_explanation>"
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
return {
|
|
253
|
+
"explanation": final_explanation,
|
|
254
|
+
"fixed_program": fixed_program,
|
|
255
|
+
"fixed_code": fixed_code,
|
|
256
|
+
"total_cost": total_cost,
|
|
257
|
+
"model_name": model_name,
|
|
258
|
+
"verification_issues_count": verification_issues_count,
|
|
259
|
+
}
|