pdd-cli 0.0.20__py3-none-any.whl → 0.0.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pdd-cli might be problematic. Click here for more details.

@@ -1,287 +0,0 @@
1
- from rich import print as rprint
2
- import re
3
- import os
4
- import subprocess
5
- import requests
6
- from bs4 import BeautifulSoup
7
- from pathlib import Path
8
- from typing import List, Optional
9
-
10
- def get_file_path(file_name: str) -> str:
11
- """
12
- Resolves a file path using the current directory as the base path.
13
-
14
- Args:
15
- file_name: The name of the file to resolve
16
-
17
- Returns:
18
- The full path to the file
19
- """
20
- path = Path(file_name)
21
-
22
- # If it's an absolute path, return it as is
23
- if path.is_absolute():
24
- return str(path)
25
-
26
- # If path already exists relative to cwd, use it directly
27
- if path.exists():
28
- return str(path.resolve())
29
-
30
- # Check if the path exists relative to PDD_PATH
31
- if 'PDD_PATH' in os.environ:
32
- pdd_path = Path(os.environ['PDD_PATH'])
33
- if (pdd_path / path).exists():
34
- return str(pdd_path / path)
35
-
36
- # If the path has pdd in it, try removing one level
37
- parts = list(path.parts)
38
- if 'pdd' in parts:
39
- if len(parts) > 1 and parts[0] == 'pdd':
40
- adjusted_path = Path(*parts[1:])
41
- if adjusted_path.exists():
42
- return str(adjusted_path.resolve())
43
-
44
- # If we got here, use the original path resolution logic
45
- if 'PDD_PATH' in os.environ:
46
- base_path = Path(os.environ['PDD_PATH'])
47
- else:
48
- base_path = Path.cwd()
49
-
50
- # Get the project root - if we're in a directory named 'pdd' and we're including a file that might also have 'pdd' in its path
51
- # Make sure we don't add 'pdd' twice
52
- full_path = base_path / file_name
53
-
54
- # Check if base_path already ends with 'pdd' and file_name starts with 'pdd/'
55
- if base_path.name == 'pdd' and isinstance(file_name, str) and file_name.startswith('pdd/'):
56
- # Remove the 'pdd/' prefix from file_name to avoid duplication
57
- file_name_without_pdd = file_name[4:] # Skip 'pdd/'
58
- full_path = base_path / file_name_without_pdd
59
-
60
- return str(full_path)
61
-
62
- def preprocess(
63
- prompt: str,
64
- recursive: bool = True,
65
- double_curly_brackets: bool = True,
66
- exclude_keys: Optional[List[str]] = None
67
- ) -> str:
68
- """
69
- Preprocess a prompt string for an LLM by handling specific XML-like tags.
70
-
71
- Args:
72
- prompt: The prompt string to preprocess
73
- recursive: Whether to recursively process includes in the prompt
74
- double_curly_brackets: Whether to double curly brackets in the prompt
75
- exclude_keys: List of keys to exclude from curly bracket doubling
76
-
77
- Returns:
78
- The preprocessed prompt string
79
- """
80
- if not prompt:
81
- rprint("[bold red]Error:[/bold red] No prompt provided.")
82
- return ""
83
-
84
- if exclude_keys is None:
85
- exclude_keys = []
86
-
87
- try:
88
- # Replace separate regex calls with a unified tag processing approach
89
- def process_tags(prompt):
90
- # Define a function to handle different tag types
91
- def tag_handler(match):
92
- pre_whitespace = match.group(1)
93
- tag_type = match.group(2)
94
- content = match.group(3) if match.group(3) else ""
95
- post_whitespace = match.group(4)
96
-
97
- # Skip processing if it looks like an example (contains backticks or is in code format)
98
- if '`' in pre_whitespace or '`' in post_whitespace:
99
- return match.group(0) # Return unchanged
100
-
101
- if tag_type == 'pdd':
102
- return pre_whitespace + post_whitespace # Remove pdd comments
103
- elif tag_type == 'shell':
104
- # Process shell commands
105
- command = content.strip()
106
- try:
107
- result = subprocess.run(command, shell=True, check=True, text=True, capture_output=True)
108
- return pre_whitespace + result.stdout + post_whitespace
109
- except Exception as e:
110
- # Return the original tag on error (critical for regression tests)
111
- return match.group(0)
112
- elif tag_type == 'web':
113
- # Process web content
114
- url = content.strip()
115
- try:
116
- response = requests.get(url)
117
- response.raise_for_status()
118
- soup = BeautifulSoup(response.text, 'html.parser')
119
-
120
- # Remove scripts, styles, and navigation elements
121
- for element in soup(['script', 'style', 'nav', 'header', 'footer', 'aside']):
122
- element.decompose()
123
-
124
- # Extract meaningful content
125
- main_content = soup.find('main') or soup.find('article') or soup.find('div', {'id': 'content'})
126
-
127
- if main_content:
128
- result_content = main_content.get_text(strip=True)
129
- else:
130
- # Fallback to body content
131
- result_content = soup.body.get_text(strip=True)
132
- return pre_whitespace + result_content + post_whitespace
133
- except Exception as e:
134
- # Return the original tag on error
135
- return match.group(0)
136
- elif tag_type == 'include':
137
- # Process file includes
138
- file_name = content.strip()
139
- # Skip if it contains invalid characters or looks like an example
140
- if len(file_name) > 255 or any(c in file_name for c in '<>"\'|*?'):
141
- return match.group(0) # Return unchanged
142
-
143
- try:
144
- file_path = get_file_path(file_name)
145
- with open(file_path, 'r', encoding='utf-8') as file:
146
- included_content = file.read()
147
- if recursive:
148
- # Recursive processing
149
- included_content = preprocess(
150
- included_content,
151
- recursive=True,
152
- double_curly_brackets=double_curly_brackets,
153
- exclude_keys=exclude_keys
154
- )
155
- return pre_whitespace + included_content + post_whitespace
156
- except Exception as e:
157
- # Return the original tag on error
158
- return match.group(0)
159
-
160
- # Use a more specific regex pattern that properly handles tag structure
161
- pattern = r'(\s*)<(include|pdd|shell|web)(?:\s+[^>]*)?(?:>(.*?)</\2>|/|>)(\s*)'
162
- return re.sub(pattern, tag_handler, prompt, flags=re.DOTALL)
163
-
164
- # Apply the unified tag processing approach
165
- prompt = process_tags(prompt)
166
-
167
- # Process angle brackets in triple backticks
168
- def triple_backtick_include(match):
169
- full_content = match.group(0) # The entire match including the backticks
170
- backtick_content = match.group(1) # Just the content between backticks
171
-
172
- # Find angle brackets within the backtick content
173
- def angle_bracket_replace(inner_match):
174
- file_name = inner_match.group(1)
175
- try:
176
- file_path = get_file_path(file_name)
177
- with open(file_path, 'r', encoding='utf-8') as file:
178
- content = file.read()
179
- if recursive:
180
- return preprocess(
181
- content,
182
- recursive=True,
183
- double_curly_brackets=double_curly_brackets,
184
- exclude_keys=exclude_keys
185
- )
186
- return content
187
- except FileNotFoundError:
188
- rprint(f"[bold red]File not found:[/bold red] {file_name}")
189
- return f"<{file_name}>"
190
- except Exception as e:
191
- rprint(f"[bold red]Error including file {file_name}:[/bold red] {e}")
192
- return f"<{file_name}>"
193
-
194
- # Replace angle brackets in backtick content
195
- processed_content = re.sub(r"<([^>]+)>", angle_bracket_replace, backtick_content)
196
- return f"```{processed_content}```"
197
-
198
- prompt = re.sub(r'```(.*?)```', triple_backtick_include, prompt, flags=re.DOTALL)
199
-
200
- # Double curly brackets if needed
201
- if double_curly_brackets:
202
- # Initialize exclude_keys if it's None
203
- exclude_keys = exclude_keys or []
204
-
205
- # Handle simple cases first with character-by-character approach
206
- if "\n" not in prompt and "```" not in prompt:
207
- # Simple case: Character-by-character replacement
208
- output = ""
209
- i = 0
210
- while i < len(prompt):
211
- if prompt[i] == '{':
212
- # Check if this is part of an excluded key
213
- excluded = False
214
- for key in exclude_keys:
215
- if i + 1 + len(key) + 1 <= len(prompt) and prompt[i+1:i+1+len(key)] == key and prompt[i+1+len(key)] == '}':
216
- output += '{' + key + '}'
217
- i += 2 + len(key) # Skip the key and both braces
218
- excluded = True
219
- break
220
- if not excluded:
221
- output += '{{'
222
- i += 1
223
- elif prompt[i] == '}':
224
- output += '}}'
225
- i += 1
226
- else:
227
- output += prompt[i]
228
- i += 1
229
- return output.rstrip() if prompt.rstrip() == prompt else output
230
-
231
- # More complex case: Use regex for structured text
232
- # Step 1: Create a function to handle the pattern replacement
233
- def replacer(match):
234
- # Extract the content inside the curly braces
235
- content = match.group(1)
236
-
237
- # If the content is empty or in the exclude_keys list, don't double it
238
- if not content: # Handle empty braces: {}
239
- return "{{}}"
240
- elif content in exclude_keys:
241
- return f"{{{content}}}"
242
- else:
243
- return f"{{{{{content}}}}}"
244
-
245
- # Step 2: Process code blocks and regular text separately
246
- # Split the text into code blocks and non-code blocks
247
- parts = re.split(r'(```.*?```)', prompt, flags=re.DOTALL)
248
-
249
- for i in range(len(parts)):
250
- if i % 2 == 0: # Not in a code block
251
- # Handle JSON-like structures and nested braces more carefully
252
- if ":" in parts[i] and "{" in parts[i] and "}" in parts[i]:
253
- # For JSON-like structures, first preserve excluded keys
254
- for key in exclude_keys:
255
- pattern = r'\{' + re.escape(key) + r'\}'
256
- # Use a unique placeholder that won't appear in normal text
257
- placeholder = f"__EXCLUDED_KEY_{key}_PLACEHOLDER__"
258
- parts[i] = re.sub(pattern, placeholder, parts[i])
259
-
260
- # Then double all remaining braces
261
- parts[i] = parts[i].replace("{", "{{").replace("}", "}}")
262
-
263
- # Finally, restore the excluded keys
264
- for key in exclude_keys:
265
- placeholder = f"__EXCLUDED_KEY_{key}_PLACEHOLDER__"
266
- parts[i] = parts[i].replace(placeholder, '{' + key + '}')
267
- else:
268
- # For regular text, use the replacer for simpler patterns
269
- parts[i] = re.sub(r'{([^{}]*)}', replacer, parts[i])
270
- else: # Inside a code block
271
- # Double all curly brackets in code blocks
272
- code_block = parts[i]
273
- # Split the code block into the opening, content and closing
274
- code_match = re.match(r'```(.*?)?\n(.*?)```', code_block, re.DOTALL)
275
- if code_match:
276
- language = code_match.group(1) or ""
277
- content = code_match.group(2)
278
- # Double all curly brackets in code blocks
279
- content = content.replace("{", "{{").replace("}", "}}").replace("{{}}", "{{}}")
280
- parts[i] = f"```{language}\n{content}```"
281
-
282
- prompt = "".join(parts)
283
-
284
- return prompt # Preserve whitespaces
285
- except Exception as e:
286
- rprint(f"[bold red]Error during prompt processing:[/bold red] {e}")
287
- return f"Error: {str(e)}"