pdd-cli 0.0.20__py3-none-any.whl → 0.0.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pdd-cli might be problematic. Click here for more details.
- pdd/cli.py +1 -1
- pdd/context_generator.py +1 -1
- pdd/data/llm_model.csv +1 -1
- pdd/edit_file.py +783 -0
- pdd/fix_error_loop.py +218 -66
- pdd/fix_errors_from_unit_tests.py +366 -206
- pdd/fix_main.py +28 -6
- pdd/increase_tests.py +6 -3
- pdd/mcp_config.json +7 -0
- pdd/preprocess.py +0 -26
- pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +2 -2
- pdd/prompts/generate_test_LLM.prompt +11 -4
- {pdd_cli-0.0.20.dist-info → pdd_cli-0.0.22.dist-info}/METADATA +25 -17
- {pdd_cli-0.0.20.dist-info → pdd_cli-0.0.22.dist-info}/RECORD +18 -18
- {pdd_cli-0.0.20.dist-info → pdd_cli-0.0.22.dist-info}/WHEEL +1 -1
- pdd/preprocess copy.py +0 -234
- pdd/preprocess_copy_bahrat.py +0 -287
- {pdd_cli-0.0.20.dist-info → pdd_cli-0.0.22.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.20.dist-info → pdd_cli-0.0.22.dist-info}/licenses/LICENSE +0 -0
- {pdd_cli-0.0.20.dist-info → pdd_cli-0.0.22.dist-info}/top_level.txt +0 -0
pdd/preprocess_copy_bahrat.py
DELETED
|
@@ -1,287 +0,0 @@
|
|
|
1
|
-
from rich import print as rprint
|
|
2
|
-
import re
|
|
3
|
-
import os
|
|
4
|
-
import subprocess
|
|
5
|
-
import requests
|
|
6
|
-
from bs4 import BeautifulSoup
|
|
7
|
-
from pathlib import Path
|
|
8
|
-
from typing import List, Optional
|
|
9
|
-
|
|
10
|
-
def get_file_path(file_name: str) -> str:
|
|
11
|
-
"""
|
|
12
|
-
Resolves a file path using the current directory as the base path.
|
|
13
|
-
|
|
14
|
-
Args:
|
|
15
|
-
file_name: The name of the file to resolve
|
|
16
|
-
|
|
17
|
-
Returns:
|
|
18
|
-
The full path to the file
|
|
19
|
-
"""
|
|
20
|
-
path = Path(file_name)
|
|
21
|
-
|
|
22
|
-
# If it's an absolute path, return it as is
|
|
23
|
-
if path.is_absolute():
|
|
24
|
-
return str(path)
|
|
25
|
-
|
|
26
|
-
# If path already exists relative to cwd, use it directly
|
|
27
|
-
if path.exists():
|
|
28
|
-
return str(path.resolve())
|
|
29
|
-
|
|
30
|
-
# Check if the path exists relative to PDD_PATH
|
|
31
|
-
if 'PDD_PATH' in os.environ:
|
|
32
|
-
pdd_path = Path(os.environ['PDD_PATH'])
|
|
33
|
-
if (pdd_path / path).exists():
|
|
34
|
-
return str(pdd_path / path)
|
|
35
|
-
|
|
36
|
-
# If the path has pdd in it, try removing one level
|
|
37
|
-
parts = list(path.parts)
|
|
38
|
-
if 'pdd' in parts:
|
|
39
|
-
if len(parts) > 1 and parts[0] == 'pdd':
|
|
40
|
-
adjusted_path = Path(*parts[1:])
|
|
41
|
-
if adjusted_path.exists():
|
|
42
|
-
return str(adjusted_path.resolve())
|
|
43
|
-
|
|
44
|
-
# If we got here, use the original path resolution logic
|
|
45
|
-
if 'PDD_PATH' in os.environ:
|
|
46
|
-
base_path = Path(os.environ['PDD_PATH'])
|
|
47
|
-
else:
|
|
48
|
-
base_path = Path.cwd()
|
|
49
|
-
|
|
50
|
-
# Get the project root - if we're in a directory named 'pdd' and we're including a file that might also have 'pdd' in its path
|
|
51
|
-
# Make sure we don't add 'pdd' twice
|
|
52
|
-
full_path = base_path / file_name
|
|
53
|
-
|
|
54
|
-
# Check if base_path already ends with 'pdd' and file_name starts with 'pdd/'
|
|
55
|
-
if base_path.name == 'pdd' and isinstance(file_name, str) and file_name.startswith('pdd/'):
|
|
56
|
-
# Remove the 'pdd/' prefix from file_name to avoid duplication
|
|
57
|
-
file_name_without_pdd = file_name[4:] # Skip 'pdd/'
|
|
58
|
-
full_path = base_path / file_name_without_pdd
|
|
59
|
-
|
|
60
|
-
return str(full_path)
|
|
61
|
-
|
|
62
|
-
def preprocess(
|
|
63
|
-
prompt: str,
|
|
64
|
-
recursive: bool = True,
|
|
65
|
-
double_curly_brackets: bool = True,
|
|
66
|
-
exclude_keys: Optional[List[str]] = None
|
|
67
|
-
) -> str:
|
|
68
|
-
"""
|
|
69
|
-
Preprocess a prompt string for an LLM by handling specific XML-like tags.
|
|
70
|
-
|
|
71
|
-
Args:
|
|
72
|
-
prompt: The prompt string to preprocess
|
|
73
|
-
recursive: Whether to recursively process includes in the prompt
|
|
74
|
-
double_curly_brackets: Whether to double curly brackets in the prompt
|
|
75
|
-
exclude_keys: List of keys to exclude from curly bracket doubling
|
|
76
|
-
|
|
77
|
-
Returns:
|
|
78
|
-
The preprocessed prompt string
|
|
79
|
-
"""
|
|
80
|
-
if not prompt:
|
|
81
|
-
rprint("[bold red]Error:[/bold red] No prompt provided.")
|
|
82
|
-
return ""
|
|
83
|
-
|
|
84
|
-
if exclude_keys is None:
|
|
85
|
-
exclude_keys = []
|
|
86
|
-
|
|
87
|
-
try:
|
|
88
|
-
# Replace separate regex calls with a unified tag processing approach
|
|
89
|
-
def process_tags(prompt):
|
|
90
|
-
# Define a function to handle different tag types
|
|
91
|
-
def tag_handler(match):
|
|
92
|
-
pre_whitespace = match.group(1)
|
|
93
|
-
tag_type = match.group(2)
|
|
94
|
-
content = match.group(3) if match.group(3) else ""
|
|
95
|
-
post_whitespace = match.group(4)
|
|
96
|
-
|
|
97
|
-
# Skip processing if it looks like an example (contains backticks or is in code format)
|
|
98
|
-
if '`' in pre_whitespace or '`' in post_whitespace:
|
|
99
|
-
return match.group(0) # Return unchanged
|
|
100
|
-
|
|
101
|
-
if tag_type == 'pdd':
|
|
102
|
-
return pre_whitespace + post_whitespace # Remove pdd comments
|
|
103
|
-
elif tag_type == 'shell':
|
|
104
|
-
# Process shell commands
|
|
105
|
-
command = content.strip()
|
|
106
|
-
try:
|
|
107
|
-
result = subprocess.run(command, shell=True, check=True, text=True, capture_output=True)
|
|
108
|
-
return pre_whitespace + result.stdout + post_whitespace
|
|
109
|
-
except Exception as e:
|
|
110
|
-
# Return the original tag on error (critical for regression tests)
|
|
111
|
-
return match.group(0)
|
|
112
|
-
elif tag_type == 'web':
|
|
113
|
-
# Process web content
|
|
114
|
-
url = content.strip()
|
|
115
|
-
try:
|
|
116
|
-
response = requests.get(url)
|
|
117
|
-
response.raise_for_status()
|
|
118
|
-
soup = BeautifulSoup(response.text, 'html.parser')
|
|
119
|
-
|
|
120
|
-
# Remove scripts, styles, and navigation elements
|
|
121
|
-
for element in soup(['script', 'style', 'nav', 'header', 'footer', 'aside']):
|
|
122
|
-
element.decompose()
|
|
123
|
-
|
|
124
|
-
# Extract meaningful content
|
|
125
|
-
main_content = soup.find('main') or soup.find('article') or soup.find('div', {'id': 'content'})
|
|
126
|
-
|
|
127
|
-
if main_content:
|
|
128
|
-
result_content = main_content.get_text(strip=True)
|
|
129
|
-
else:
|
|
130
|
-
# Fallback to body content
|
|
131
|
-
result_content = soup.body.get_text(strip=True)
|
|
132
|
-
return pre_whitespace + result_content + post_whitespace
|
|
133
|
-
except Exception as e:
|
|
134
|
-
# Return the original tag on error
|
|
135
|
-
return match.group(0)
|
|
136
|
-
elif tag_type == 'include':
|
|
137
|
-
# Process file includes
|
|
138
|
-
file_name = content.strip()
|
|
139
|
-
# Skip if it contains invalid characters or looks like an example
|
|
140
|
-
if len(file_name) > 255 or any(c in file_name for c in '<>"\'|*?'):
|
|
141
|
-
return match.group(0) # Return unchanged
|
|
142
|
-
|
|
143
|
-
try:
|
|
144
|
-
file_path = get_file_path(file_name)
|
|
145
|
-
with open(file_path, 'r', encoding='utf-8') as file:
|
|
146
|
-
included_content = file.read()
|
|
147
|
-
if recursive:
|
|
148
|
-
# Recursive processing
|
|
149
|
-
included_content = preprocess(
|
|
150
|
-
included_content,
|
|
151
|
-
recursive=True,
|
|
152
|
-
double_curly_brackets=double_curly_brackets,
|
|
153
|
-
exclude_keys=exclude_keys
|
|
154
|
-
)
|
|
155
|
-
return pre_whitespace + included_content + post_whitespace
|
|
156
|
-
except Exception as e:
|
|
157
|
-
# Return the original tag on error
|
|
158
|
-
return match.group(0)
|
|
159
|
-
|
|
160
|
-
# Use a more specific regex pattern that properly handles tag structure
|
|
161
|
-
pattern = r'(\s*)<(include|pdd|shell|web)(?:\s+[^>]*)?(?:>(.*?)</\2>|/|>)(\s*)'
|
|
162
|
-
return re.sub(pattern, tag_handler, prompt, flags=re.DOTALL)
|
|
163
|
-
|
|
164
|
-
# Apply the unified tag processing approach
|
|
165
|
-
prompt = process_tags(prompt)
|
|
166
|
-
|
|
167
|
-
# Process angle brackets in triple backticks
|
|
168
|
-
def triple_backtick_include(match):
|
|
169
|
-
full_content = match.group(0) # The entire match including the backticks
|
|
170
|
-
backtick_content = match.group(1) # Just the content between backticks
|
|
171
|
-
|
|
172
|
-
# Find angle brackets within the backtick content
|
|
173
|
-
def angle_bracket_replace(inner_match):
|
|
174
|
-
file_name = inner_match.group(1)
|
|
175
|
-
try:
|
|
176
|
-
file_path = get_file_path(file_name)
|
|
177
|
-
with open(file_path, 'r', encoding='utf-8') as file:
|
|
178
|
-
content = file.read()
|
|
179
|
-
if recursive:
|
|
180
|
-
return preprocess(
|
|
181
|
-
content,
|
|
182
|
-
recursive=True,
|
|
183
|
-
double_curly_brackets=double_curly_brackets,
|
|
184
|
-
exclude_keys=exclude_keys
|
|
185
|
-
)
|
|
186
|
-
return content
|
|
187
|
-
except FileNotFoundError:
|
|
188
|
-
rprint(f"[bold red]File not found:[/bold red] {file_name}")
|
|
189
|
-
return f"<{file_name}>"
|
|
190
|
-
except Exception as e:
|
|
191
|
-
rprint(f"[bold red]Error including file {file_name}:[/bold red] {e}")
|
|
192
|
-
return f"<{file_name}>"
|
|
193
|
-
|
|
194
|
-
# Replace angle brackets in backtick content
|
|
195
|
-
processed_content = re.sub(r"<([^>]+)>", angle_bracket_replace, backtick_content)
|
|
196
|
-
return f"```{processed_content}```"
|
|
197
|
-
|
|
198
|
-
prompt = re.sub(r'```(.*?)```', triple_backtick_include, prompt, flags=re.DOTALL)
|
|
199
|
-
|
|
200
|
-
# Double curly brackets if needed
|
|
201
|
-
if double_curly_brackets:
|
|
202
|
-
# Initialize exclude_keys if it's None
|
|
203
|
-
exclude_keys = exclude_keys or []
|
|
204
|
-
|
|
205
|
-
# Handle simple cases first with character-by-character approach
|
|
206
|
-
if "\n" not in prompt and "```" not in prompt:
|
|
207
|
-
# Simple case: Character-by-character replacement
|
|
208
|
-
output = ""
|
|
209
|
-
i = 0
|
|
210
|
-
while i < len(prompt):
|
|
211
|
-
if prompt[i] == '{':
|
|
212
|
-
# Check if this is part of an excluded key
|
|
213
|
-
excluded = False
|
|
214
|
-
for key in exclude_keys:
|
|
215
|
-
if i + 1 + len(key) + 1 <= len(prompt) and prompt[i+1:i+1+len(key)] == key and prompt[i+1+len(key)] == '}':
|
|
216
|
-
output += '{' + key + '}'
|
|
217
|
-
i += 2 + len(key) # Skip the key and both braces
|
|
218
|
-
excluded = True
|
|
219
|
-
break
|
|
220
|
-
if not excluded:
|
|
221
|
-
output += '{{'
|
|
222
|
-
i += 1
|
|
223
|
-
elif prompt[i] == '}':
|
|
224
|
-
output += '}}'
|
|
225
|
-
i += 1
|
|
226
|
-
else:
|
|
227
|
-
output += prompt[i]
|
|
228
|
-
i += 1
|
|
229
|
-
return output.rstrip() if prompt.rstrip() == prompt else output
|
|
230
|
-
|
|
231
|
-
# More complex case: Use regex for structured text
|
|
232
|
-
# Step 1: Create a function to handle the pattern replacement
|
|
233
|
-
def replacer(match):
|
|
234
|
-
# Extract the content inside the curly braces
|
|
235
|
-
content = match.group(1)
|
|
236
|
-
|
|
237
|
-
# If the content is empty or in the exclude_keys list, don't double it
|
|
238
|
-
if not content: # Handle empty braces: {}
|
|
239
|
-
return "{{}}"
|
|
240
|
-
elif content in exclude_keys:
|
|
241
|
-
return f"{{{content}}}"
|
|
242
|
-
else:
|
|
243
|
-
return f"{{{{{content}}}}}"
|
|
244
|
-
|
|
245
|
-
# Step 2: Process code blocks and regular text separately
|
|
246
|
-
# Split the text into code blocks and non-code blocks
|
|
247
|
-
parts = re.split(r'(```.*?```)', prompt, flags=re.DOTALL)
|
|
248
|
-
|
|
249
|
-
for i in range(len(parts)):
|
|
250
|
-
if i % 2 == 0: # Not in a code block
|
|
251
|
-
# Handle JSON-like structures and nested braces more carefully
|
|
252
|
-
if ":" in parts[i] and "{" in parts[i] and "}" in parts[i]:
|
|
253
|
-
# For JSON-like structures, first preserve excluded keys
|
|
254
|
-
for key in exclude_keys:
|
|
255
|
-
pattern = r'\{' + re.escape(key) + r'\}'
|
|
256
|
-
# Use a unique placeholder that won't appear in normal text
|
|
257
|
-
placeholder = f"__EXCLUDED_KEY_{key}_PLACEHOLDER__"
|
|
258
|
-
parts[i] = re.sub(pattern, placeholder, parts[i])
|
|
259
|
-
|
|
260
|
-
# Then double all remaining braces
|
|
261
|
-
parts[i] = parts[i].replace("{", "{{").replace("}", "}}")
|
|
262
|
-
|
|
263
|
-
# Finally, restore the excluded keys
|
|
264
|
-
for key in exclude_keys:
|
|
265
|
-
placeholder = f"__EXCLUDED_KEY_{key}_PLACEHOLDER__"
|
|
266
|
-
parts[i] = parts[i].replace(placeholder, '{' + key + '}')
|
|
267
|
-
else:
|
|
268
|
-
# For regular text, use the replacer for simpler patterns
|
|
269
|
-
parts[i] = re.sub(r'{([^{}]*)}', replacer, parts[i])
|
|
270
|
-
else: # Inside a code block
|
|
271
|
-
# Double all curly brackets in code blocks
|
|
272
|
-
code_block = parts[i]
|
|
273
|
-
# Split the code block into the opening, content and closing
|
|
274
|
-
code_match = re.match(r'```(.*?)?\n(.*?)```', code_block, re.DOTALL)
|
|
275
|
-
if code_match:
|
|
276
|
-
language = code_match.group(1) or ""
|
|
277
|
-
content = code_match.group(2)
|
|
278
|
-
# Double all curly brackets in code blocks
|
|
279
|
-
content = content.replace("{", "{{").replace("}", "}}").replace("{{}}", "{{}}")
|
|
280
|
-
parts[i] = f"```{language}\n{content}```"
|
|
281
|
-
|
|
282
|
-
prompt = "".join(parts)
|
|
283
|
-
|
|
284
|
-
return prompt # Preserve whitespaces
|
|
285
|
-
except Exception as e:
|
|
286
|
-
rprint(f"[bold red]Error during prompt processing:[/bold red] {e}")
|
|
287
|
-
return f"Error: {str(e)}"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|