pdd-cli 0.0.46__py3-none-any.whl → 0.0.48__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pdd-cli might be problematic. Click here for more details.
- pdd/__init__.py +1 -1
- pdd/cli.py +42 -0
- pdd/cmd_test_main.py +19 -2
- pdd/code_generator_main.py +14 -3
- pdd/construct_paths.py +56 -49
- pdd/context_generator_main.py +15 -6
- pdd/fix_error_loop.py +45 -6
- pdd/llm_invoke.py +53 -11
- pdd/prompts/auto_include_LLM.prompt +51 -905
- pdd/summarize_directory.py +5 -0
- pdd/sync_determine_operation.py +163 -51
- pdd/sync_orchestration.py +255 -58
- pdd/update_model_costs.py +2 -2
- {pdd_cli-0.0.46.dist-info → pdd_cli-0.0.48.dist-info}/METADATA +3 -3
- {pdd_cli-0.0.46.dist-info → pdd_cli-0.0.48.dist-info}/RECORD +19 -19
- {pdd_cli-0.0.46.dist-info → pdd_cli-0.0.48.dist-info}/WHEEL +0 -0
- {pdd_cli-0.0.46.dist-info → pdd_cli-0.0.48.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.46.dist-info → pdd_cli-0.0.48.dist-info}/licenses/LICENSE +0 -0
- {pdd_cli-0.0.46.dist-info → pdd_cli-0.0.48.dist-info}/top_level.txt +0 -0
pdd/__init__.py
CHANGED
pdd/cli.py
CHANGED
|
@@ -34,6 +34,7 @@ from .fix_main import fix_main
|
|
|
34
34
|
from .fix_verification_main import fix_verification_main
|
|
35
35
|
from .install_completion import install_completion, get_local_pdd_path
|
|
36
36
|
from .preprocess_main import preprocess_main
|
|
37
|
+
from .pytest_output import run_pytest_and_capture_output
|
|
37
38
|
from .split_main import split_main
|
|
38
39
|
from .sync_main import sync_main
|
|
39
40
|
from .trace_main import trace_main
|
|
@@ -1176,6 +1177,47 @@ def sync(
|
|
|
1176
1177
|
return None
|
|
1177
1178
|
|
|
1178
1179
|
|
|
1180
|
+
@cli.command("pytest-output")
|
|
1181
|
+
@click.argument("test_file", type=click.Path(exists=True, dir_okay=False))
|
|
1182
|
+
@click.option(
|
|
1183
|
+
"--json-only",
|
|
1184
|
+
is_flag=True,
|
|
1185
|
+
default=False,
|
|
1186
|
+
help="Output only JSON to stdout for programmatic use.",
|
|
1187
|
+
)
|
|
1188
|
+
@click.pass_context
|
|
1189
|
+
# No @track_cost since this is a utility command
|
|
1190
|
+
def pytest_output_cmd(ctx: click.Context, test_file: str, json_only: bool) -> None:
|
|
1191
|
+
"""Run pytest on a test file and capture structured output.
|
|
1192
|
+
|
|
1193
|
+
This is a utility command used internally by PDD for capturing pytest results
|
|
1194
|
+
in a structured format. It can also be used directly for debugging test issues.
|
|
1195
|
+
|
|
1196
|
+
Examples:
|
|
1197
|
+
pdd pytest-output tests/test_example.py
|
|
1198
|
+
pdd pytest-output tests/test_example.py --json-only
|
|
1199
|
+
"""
|
|
1200
|
+
command_name = "pytest-output"
|
|
1201
|
+
quiet_mode = ctx.obj.get("quiet", False)
|
|
1202
|
+
|
|
1203
|
+
try:
|
|
1204
|
+
import json
|
|
1205
|
+
pytest_output = run_pytest_and_capture_output(test_file)
|
|
1206
|
+
|
|
1207
|
+
if json_only:
|
|
1208
|
+
# Print only valid JSON to stdout for programmatic use
|
|
1209
|
+
print(json.dumps(pytest_output))
|
|
1210
|
+
else:
|
|
1211
|
+
# Pretty print the output for interactive use
|
|
1212
|
+
if not quiet_mode:
|
|
1213
|
+
console.print(f"Running pytest on: [blue]{test_file}[/blue]")
|
|
1214
|
+
from rich.pretty import pprint
|
|
1215
|
+
pprint(pytest_output, console=console)
|
|
1216
|
+
|
|
1217
|
+
except Exception as e:
|
|
1218
|
+
handle_error(e, command_name, quiet_mode)
|
|
1219
|
+
|
|
1220
|
+
|
|
1179
1221
|
@cli.command("install_completion")
|
|
1180
1222
|
@click.pass_context
|
|
1181
1223
|
# No @track_cost
|
pdd/cmd_test_main.py
CHANGED
|
@@ -3,6 +3,7 @@ Main entry point for the 'test' command.
|
|
|
3
3
|
"""
|
|
4
4
|
from __future__ import annotations
|
|
5
5
|
import click
|
|
6
|
+
from pathlib import Path
|
|
6
7
|
# pylint: disable=redefined-builtin
|
|
7
8
|
from rich import print
|
|
8
9
|
|
|
@@ -146,8 +147,20 @@ def cmd_test_main(
|
|
|
146
147
|
ctx.exit(1)
|
|
147
148
|
return "", 0.0, ""
|
|
148
149
|
|
|
149
|
-
# Handle output -
|
|
150
|
-
|
|
150
|
+
# Handle output - if output is a directory, use resolved file path from construct_paths
|
|
151
|
+
resolved_output = output_file_paths["output"]
|
|
152
|
+
if output is None:
|
|
153
|
+
output_file = resolved_output
|
|
154
|
+
else:
|
|
155
|
+
try:
|
|
156
|
+
is_dir_hint = output.endswith('/')
|
|
157
|
+
except Exception:
|
|
158
|
+
is_dir_hint = False
|
|
159
|
+
# Prefer resolved file if user passed a directory path
|
|
160
|
+
if is_dir_hint or (Path(output).exists() and Path(output).is_dir()):
|
|
161
|
+
output_file = resolved_output
|
|
162
|
+
else:
|
|
163
|
+
output_file = output
|
|
151
164
|
if merge and existing_tests:
|
|
152
165
|
output_file = existing_tests
|
|
153
166
|
|
|
@@ -165,6 +178,10 @@ def cmd_test_main(
|
|
|
165
178
|
return "", 0.0, ""
|
|
166
179
|
|
|
167
180
|
try:
|
|
181
|
+
# Ensure parent directory exists
|
|
182
|
+
output_path = Path(output_file)
|
|
183
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
184
|
+
|
|
168
185
|
with open(output_file, "w", encoding="utf-8") as file_handle:
|
|
169
186
|
file_handle.write(unit_test)
|
|
170
187
|
print(f"[bold green]Unit tests saved to:[/bold green] {output_file}")
|
pdd/code_generator_main.py
CHANGED
|
@@ -165,8 +165,19 @@ def code_generator_main(
|
|
|
165
165
|
command_options=command_options,
|
|
166
166
|
)
|
|
167
167
|
prompt_content = input_strings["prompt_file"]
|
|
168
|
-
#
|
|
169
|
-
|
|
168
|
+
# Determine final output path: if user passed a directory, use resolved file path
|
|
169
|
+
resolved_output = output_file_paths.get("output")
|
|
170
|
+
if output is None:
|
|
171
|
+
output_path = resolved_output
|
|
172
|
+
else:
|
|
173
|
+
try:
|
|
174
|
+
is_dir_hint = output.endswith(os.path.sep) or output.endswith("/")
|
|
175
|
+
except Exception:
|
|
176
|
+
is_dir_hint = False
|
|
177
|
+
if is_dir_hint or os.path.isdir(output):
|
|
178
|
+
output_path = resolved_output
|
|
179
|
+
else:
|
|
180
|
+
output_path = output
|
|
170
181
|
|
|
171
182
|
except FileNotFoundError as e:
|
|
172
183
|
console.print(f"[red]Error: Input file not found: {e.filename}[/red]")
|
|
@@ -442,4 +453,4 @@ def code_generator_main(
|
|
|
442
453
|
if verbose: console.print(traceback.format_exc())
|
|
443
454
|
return "", was_incremental_operation, total_cost, "error"
|
|
444
455
|
|
|
445
|
-
return generated_code_content or "", was_incremental_operation, total_cost, model_name
|
|
456
|
+
return generated_code_content or "", was_incremental_operation, total_cost, model_name
|
pdd/construct_paths.py
CHANGED
|
@@ -181,47 +181,29 @@ def _candidate_prompt_path(input_files: Dict[str, Path]) -> Path | None:
|
|
|
181
181
|
|
|
182
182
|
# New helper function to check if a language is known
|
|
183
183
|
def _is_known_language(language_name: str) -> bool:
|
|
184
|
-
"""
|
|
184
|
+
"""Return True if the language is recognized.
|
|
185
|
+
|
|
186
|
+
Prefer CSV in PDD_PATH if available; otherwise fall back to a built-in set
|
|
187
|
+
so basename/language inference does not fail when PDD_PATH is unset.
|
|
188
|
+
"""
|
|
189
|
+
language_name_lower = (language_name or "").lower()
|
|
190
|
+
if not language_name_lower:
|
|
191
|
+
return False
|
|
192
|
+
|
|
193
|
+
builtin_languages = {
|
|
194
|
+
'python', 'javascript', 'typescript', 'java', 'cpp', 'c', 'go', 'ruby', 'rust',
|
|
195
|
+
'kotlin', 'swift', 'csharp', 'php', 'scala', 'r', 'lua', 'perl', 'bash', 'shell',
|
|
196
|
+
'powershell', 'sql', 'prompt', 'html', 'css', 'makefile'
|
|
197
|
+
}
|
|
198
|
+
|
|
185
199
|
pdd_path_str = os.getenv('PDD_PATH')
|
|
186
200
|
if not pdd_path_str:
|
|
187
|
-
|
|
188
|
-
# Or, for an internal helper, we might decide to log and return False,
|
|
189
|
-
# but raising an error for missing config is generally safer.
|
|
190
|
-
# However, _determine_language (the caller) already raises ValueError
|
|
191
|
-
# if language cannot be found, so this path might not be strictly necessary
|
|
192
|
-
# if we assume PDD_PATH is validated earlier or by other get_extension/get_language calls.
|
|
193
|
-
# For robustness here, let's keep a check but perhaps make it less severe if called internally.
|
|
194
|
-
# For now, align with how get_extension might handle it.
|
|
195
|
-
# console.print("[error]PDD_PATH environment variable is not set. Cannot validate language.", style="error")
|
|
196
|
-
# return False # Or raise error
|
|
197
|
-
# Given this is internal and other functions (get_extension) already depend on PDD_PATH,
|
|
198
|
-
# we can assume if those ran, PDD_PATH is set. If not, they'd fail first.
|
|
199
|
-
# So, we can simplify or rely on that pre-condition.
|
|
200
|
-
# Let's assume PDD_PATH will be set if other language functions are working.
|
|
201
|
-
# If it's critical, an explicit check and raise ValueError is better.
|
|
202
|
-
# For now, let's proceed assuming PDD_PATH is available if this point is reached.
|
|
203
|
-
pass # Assuming PDD_PATH is checked by get_extension/get_language if they are called
|
|
204
|
-
|
|
205
|
-
# If PDD_PATH is not set, this will likely fail earlier if get_extension/get_language are used.
|
|
206
|
-
# If we want this helper to be fully independent, it needs robust PDD_PATH handling.
|
|
207
|
-
# Let's assume for now, PDD_PATH is available if this point is reached through normal flow.
|
|
208
|
-
|
|
209
|
-
# Re-evaluate: PDD_PATH is critical for this function. Let's keep the check.
|
|
210
|
-
if not pdd_path_str:
|
|
211
|
-
# This helper might be called before get_extension in some logic paths
|
|
212
|
-
# if _determine_language prioritizes suffix checking first.
|
|
213
|
-
# So, it needs its own PDD_PATH check.
|
|
214
|
-
# Raise ValueError to be consistent with get_extension's behavior.
|
|
215
|
-
raise ValueError("PDD_PATH environment variable is not set. Cannot validate language.")
|
|
201
|
+
return language_name_lower in builtin_languages
|
|
216
202
|
|
|
217
203
|
csv_file_path = Path(pdd_path_str) / 'data' / 'language_format.csv'
|
|
218
|
-
|
|
219
204
|
if not csv_file_path.is_file():
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
language_name_lower = language_name.lower()
|
|
224
|
-
|
|
205
|
+
return language_name_lower in builtin_languages
|
|
206
|
+
|
|
225
207
|
try:
|
|
226
208
|
with open(csv_file_path, mode='r', encoding='utf-8', newline='') as csvfile:
|
|
227
209
|
reader = csv.DictReader(csvfile)
|
|
@@ -229,10 +211,10 @@ def _is_known_language(language_name: str) -> bool:
|
|
|
229
211
|
if row.get('language', '').lower() == language_name_lower:
|
|
230
212
|
return True
|
|
231
213
|
except csv.Error as e:
|
|
232
|
-
# Log and return False or raise a custom error
|
|
233
214
|
console.print(f"[error]CSV Error reading {csv_file_path}: {e}", style="error")
|
|
234
|
-
return
|
|
235
|
-
|
|
215
|
+
return language_name_lower in builtin_languages
|
|
216
|
+
|
|
217
|
+
return language_name_lower in builtin_languages
|
|
236
218
|
|
|
237
219
|
|
|
238
220
|
def _strip_language_suffix(path_like: os.PathLike[str]) -> str:
|
|
@@ -354,7 +336,7 @@ def _determine_language(
|
|
|
354
336
|
|
|
355
337
|
# 4 - Special handling for detect command - default to prompt for LLM prompts
|
|
356
338
|
if command == "detect" and "change_file" in input_file_paths:
|
|
357
|
-
return "prompt"
|
|
339
|
+
return "prompt"
|
|
358
340
|
|
|
359
341
|
# 5 - If no language determined, raise error
|
|
360
342
|
raise ValueError("Could not determine language from input files or options.")
|
|
@@ -497,11 +479,15 @@ def construct_paths(
|
|
|
497
479
|
for key, path_str in input_file_paths.items():
|
|
498
480
|
try:
|
|
499
481
|
path = Path(path_str).expanduser()
|
|
500
|
-
# Resolve non-error files strictly first
|
|
482
|
+
# Resolve non-error files strictly first, but be more lenient for sync command
|
|
501
483
|
if key != "error_file":
|
|
502
|
-
#
|
|
503
|
-
|
|
504
|
-
|
|
484
|
+
# For sync command, be more tolerant of non-existent files since we're just determining paths
|
|
485
|
+
if command == "sync":
|
|
486
|
+
input_paths[key] = path.resolve()
|
|
487
|
+
else:
|
|
488
|
+
# Let FileNotFoundError propagate naturally if path doesn't exist
|
|
489
|
+
resolved_path = path.resolve(strict=True)
|
|
490
|
+
input_paths[key] = resolved_path
|
|
505
491
|
else:
|
|
506
492
|
# Resolve error file non-strictly, existence checked later
|
|
507
493
|
input_paths[key] = path.resolve()
|
|
@@ -531,9 +517,14 @@ def construct_paths(
|
|
|
531
517
|
|
|
532
518
|
# Check existence again, especially for error_file which might have been created
|
|
533
519
|
if not path.exists():
|
|
534
|
-
#
|
|
535
|
-
|
|
536
|
-
|
|
520
|
+
# For sync command, be more tolerant of non-existent files since we're just determining paths
|
|
521
|
+
if command == "sync":
|
|
522
|
+
# Skip reading content for non-existent files in sync mode
|
|
523
|
+
continue
|
|
524
|
+
else:
|
|
525
|
+
# This case should ideally be caught by resolve(strict=True) earlier for non-error files
|
|
526
|
+
# Raise standard FileNotFoundError
|
|
527
|
+
raise FileNotFoundError(f"{path}")
|
|
537
528
|
|
|
538
529
|
if path.is_file(): # Read only if it's a file
|
|
539
530
|
try:
|
|
@@ -598,7 +589,23 @@ def construct_paths(
|
|
|
598
589
|
style="warning"
|
|
599
590
|
)
|
|
600
591
|
|
|
601
|
-
|
|
592
|
+
|
|
593
|
+
# Try to get extension from CSV; fallback to built-in mapping if PDD_PATH/CSV unavailable
|
|
594
|
+
try:
|
|
595
|
+
file_extension = get_extension(language) # Pass determined language
|
|
596
|
+
if not file_extension and (language or '').lower() != 'prompt':
|
|
597
|
+
raise ValueError('empty extension')
|
|
598
|
+
except Exception:
|
|
599
|
+
builtin_ext_map = {
|
|
600
|
+
'python': '.py', 'javascript': '.js', 'typescript': '.ts', 'java': '.java',
|
|
601
|
+
'cpp': '.cpp', 'c': '.c', 'go': '.go', 'ruby': '.rb', 'rust': '.rs',
|
|
602
|
+
'kotlin': '.kt', 'swift': '.swift', 'csharp': '.cs', 'php': '.php',
|
|
603
|
+
'scala': '.scala', 'r': '.r', 'lua': '.lua', 'perl': '.pl', 'bash': '.sh',
|
|
604
|
+
'shell': '.sh', 'powershell': '.ps1', 'sql': '.sql', 'html': '.html', 'css': '.css',
|
|
605
|
+
'prompt': '.prompt', 'makefile': ''
|
|
606
|
+
}
|
|
607
|
+
file_extension = builtin_ext_map.get(language.lower(), f".{language.lower()}" if language else '')
|
|
608
|
+
|
|
602
609
|
|
|
603
610
|
|
|
604
611
|
# ------------- Step 3b: build output paths ---------------
|
|
@@ -688,4 +695,4 @@ def construct_paths(
|
|
|
688
695
|
resolved_config["examples_dir"] = str(Path(resolved_config.get("example_output_path", "examples")).parent)
|
|
689
696
|
|
|
690
697
|
|
|
691
|
-
return resolved_config, input_strings, output_file_paths_str_return, language
|
|
698
|
+
return resolved_config, input_strings, output_file_paths_str_return, language
|
pdd/context_generator_main.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import sys
|
|
2
2
|
from typing import Tuple, Optional
|
|
3
|
+
from pathlib import Path
|
|
3
4
|
import click
|
|
4
5
|
from rich import print as rprint
|
|
5
6
|
|
|
@@ -51,11 +52,19 @@ def context_generator_main(ctx: click.Context, prompt_file: str, code_file: str,
|
|
|
51
52
|
verbose=ctx.obj.get('verbose', False)
|
|
52
53
|
)
|
|
53
54
|
|
|
54
|
-
# Save results -
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
55
|
+
# Save results - if output is a directory, use resolved file path from construct_paths
|
|
56
|
+
resolved_output = output_file_paths["output"]
|
|
57
|
+
if output is None:
|
|
58
|
+
final_output_path = resolved_output
|
|
59
|
+
else:
|
|
60
|
+
try:
|
|
61
|
+
is_dir_hint = output.endswith('/')
|
|
62
|
+
except Exception:
|
|
63
|
+
is_dir_hint = False
|
|
64
|
+
if is_dir_hint or (Path(output).exists() and Path(output).is_dir()):
|
|
65
|
+
final_output_path = resolved_output
|
|
66
|
+
else:
|
|
67
|
+
final_output_path = output
|
|
59
68
|
if final_output_path and example_code is not None:
|
|
60
69
|
with open(final_output_path, 'w') as f:
|
|
61
70
|
f.write(example_code)
|
|
@@ -88,4 +97,4 @@ def context_generator_main(ctx: click.Context, prompt_file: str, code_file: str,
|
|
|
88
97
|
except Exception as e:
|
|
89
98
|
if not ctx.obj.get('quiet', False):
|
|
90
99
|
rprint(f"[bold red]Error:[/bold red] {str(e)}")
|
|
91
|
-
sys.exit(1)
|
|
100
|
+
sys.exit(1)
|
pdd/fix_error_loop.py
CHANGED
|
@@ -26,15 +26,46 @@ def run_pytest_on_file(test_file: str) -> tuple[int, int, int, str]:
|
|
|
26
26
|
Returns a tuple: (failures, errors, warnings, logs)
|
|
27
27
|
"""
|
|
28
28
|
try:
|
|
29
|
-
#
|
|
30
|
-
|
|
31
|
-
python_executable = detect_host_python_executable()
|
|
32
|
-
cmd = [python_executable, "-m", "pdd.pytest_output", "--json-only", test_file]
|
|
29
|
+
# Try using the pdd pytest-output command first (works with uv tool installs)
|
|
30
|
+
cmd = ["pdd", "pytest-output", "--json-only", test_file]
|
|
33
31
|
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
34
32
|
|
|
33
|
+
# If pdd command failed, try fallback approaches
|
|
34
|
+
if result.returncode != 0 and ("command not found" in result.stderr.lower() or "not found" in result.stderr.lower()):
|
|
35
|
+
# Fallback 1: Try direct function call (fastest for development)
|
|
36
|
+
try:
|
|
37
|
+
from .pytest_output import run_pytest_and_capture_output
|
|
38
|
+
pytest_output = run_pytest_and_capture_output(test_file)
|
|
39
|
+
result_stdout = json.dumps(pytest_output)
|
|
40
|
+
result = type('MockResult', (), {'stdout': result_stdout, 'stderr': '', 'returncode': 0})()
|
|
41
|
+
except ImportError:
|
|
42
|
+
# Fallback 2: Try python -m approach for development installs where pdd isn't in PATH
|
|
43
|
+
python_executable = detect_host_python_executable()
|
|
44
|
+
cmd = [python_executable, "-m", "pdd.pytest_output", "--json-only", test_file]
|
|
45
|
+
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
46
|
+
|
|
35
47
|
# Parse the JSON output from stdout
|
|
36
48
|
try:
|
|
37
|
-
|
|
49
|
+
# Extract just the JSON part from stdout (handles CLI contamination)
|
|
50
|
+
stdout_clean = result.stdout
|
|
51
|
+
json_start = stdout_clean.find('{')
|
|
52
|
+
if json_start == -1:
|
|
53
|
+
raise json.JSONDecodeError("No JSON found in output", stdout_clean, 0)
|
|
54
|
+
|
|
55
|
+
# Find the end of the JSON object by counting braces
|
|
56
|
+
brace_count = 0
|
|
57
|
+
json_end = json_start
|
|
58
|
+
for i, char in enumerate(stdout_clean[json_start:], json_start):
|
|
59
|
+
if char == '{':
|
|
60
|
+
brace_count += 1
|
|
61
|
+
elif char == '}':
|
|
62
|
+
brace_count -= 1
|
|
63
|
+
if brace_count == 0:
|
|
64
|
+
json_end = i + 1
|
|
65
|
+
break
|
|
66
|
+
|
|
67
|
+
json_str = stdout_clean[json_start:json_end]
|
|
68
|
+
output = json.loads(json_str)
|
|
38
69
|
test_results = output.get('test_results', [{}])[0]
|
|
39
70
|
|
|
40
71
|
# Check pytest's return code first
|
|
@@ -250,7 +281,15 @@ def fix_error_loop(unit_test_file: str,
|
|
|
250
281
|
elog.write(format_log_for_output(log_structure))
|
|
251
282
|
|
|
252
283
|
# Set success to True (already determined)
|
|
253
|
-
#
|
|
284
|
+
# Read the actual fixed files to return the successful state
|
|
285
|
+
try:
|
|
286
|
+
with open(unit_test_file, "r") as f:
|
|
287
|
+
final_unit_test = f.read()
|
|
288
|
+
with open(code_file, "r") as f:
|
|
289
|
+
final_code = f.read()
|
|
290
|
+
except Exception as e:
|
|
291
|
+
rprint(f"[yellow]Warning: Could not read fixed files: {e}[/yellow]")
|
|
292
|
+
# Keep empty strings as fallback
|
|
254
293
|
break
|
|
255
294
|
|
|
256
295
|
iteration_header = f"=== Attempt iteration {iteration} ==="
|
pdd/llm_invoke.py
CHANGED
|
@@ -5,6 +5,8 @@ import os
|
|
|
5
5
|
import pandas as pd
|
|
6
6
|
import litellm
|
|
7
7
|
import logging # ADDED FOR DETAILED LOGGING
|
|
8
|
+
import importlib.resources
|
|
9
|
+
from litellm.caching.caching import Cache # Fix for LiteLLM v1.49.3+
|
|
8
10
|
|
|
9
11
|
# --- Configure Standard Python Logging ---
|
|
10
12
|
logger = logging.getLogger("pdd.llm_invoke")
|
|
@@ -190,12 +192,20 @@ ENV_PATH = PROJECT_ROOT / ".env"
|
|
|
190
192
|
user_pdd_dir = Path.home() / ".pdd"
|
|
191
193
|
user_model_csv_path = user_pdd_dir / "llm_model.csv"
|
|
192
194
|
|
|
195
|
+
# Check in order: user-specific, project-specific, package default
|
|
193
196
|
if user_model_csv_path.is_file():
|
|
194
197
|
LLM_MODEL_CSV_PATH = user_model_csv_path
|
|
195
198
|
logger.info(f"Using user-specific LLM model CSV: {LLM_MODEL_CSV_PATH}")
|
|
196
199
|
else:
|
|
197
|
-
|
|
198
|
-
|
|
200
|
+
# Check project-specific location (.pdd directory)
|
|
201
|
+
project_model_csv_path = PROJECT_ROOT / ".pdd" / "llm_model.csv"
|
|
202
|
+
if project_model_csv_path.is_file():
|
|
203
|
+
LLM_MODEL_CSV_PATH = project_model_csv_path
|
|
204
|
+
logger.info(f"Using project-specific LLM model CSV: {LLM_MODEL_CSV_PATH}")
|
|
205
|
+
else:
|
|
206
|
+
# Neither exists, we'll use a marker path that _load_model_data will handle
|
|
207
|
+
LLM_MODEL_CSV_PATH = None
|
|
208
|
+
logger.info("No local LLM model CSV found, will use package default")
|
|
199
209
|
# ---------------------------------
|
|
200
210
|
|
|
201
211
|
# Load environment variables from .env file
|
|
@@ -225,6 +235,7 @@ if GCS_HMAC_SECRET_ACCESS_KEY:
|
|
|
225
235
|
GCS_HMAC_SECRET_ACCESS_KEY = GCS_HMAC_SECRET_ACCESS_KEY.strip()
|
|
226
236
|
|
|
227
237
|
cache_configured = False
|
|
238
|
+
configured_cache = None # Store the configured cache instance for restoration
|
|
228
239
|
|
|
229
240
|
if GCS_BUCKET_NAME and GCS_HMAC_ACCESS_KEY_ID and GCS_HMAC_SECRET_ACCESS_KEY:
|
|
230
241
|
# Store original AWS credentials before overwriting for GCS cache setup
|
|
@@ -238,12 +249,13 @@ if GCS_BUCKET_NAME and GCS_HMAC_ACCESS_KEY_ID and GCS_HMAC_SECRET_ACCESS_KEY:
|
|
|
238
249
|
os.environ['AWS_SECRET_ACCESS_KEY'] = GCS_HMAC_SECRET_ACCESS_KEY
|
|
239
250
|
# os.environ['AWS_REGION_NAME'] = GCS_REGION_NAME # Uncomment if needed
|
|
240
251
|
|
|
241
|
-
|
|
252
|
+
configured_cache = Cache(
|
|
242
253
|
type="s3",
|
|
243
254
|
s3_bucket_name=GCS_BUCKET_NAME,
|
|
244
255
|
s3_region_name=GCS_REGION_NAME, # Pass region explicitly to cache
|
|
245
256
|
s3_endpoint_url=GCS_ENDPOINT_URL,
|
|
246
257
|
)
|
|
258
|
+
litellm.cache = configured_cache
|
|
247
259
|
logger.info(f"LiteLLM cache configured for GCS bucket (S3 compatible): {GCS_BUCKET_NAME}")
|
|
248
260
|
cache_configured = True
|
|
249
261
|
|
|
@@ -272,7 +284,8 @@ if not cache_configured:
|
|
|
272
284
|
try:
|
|
273
285
|
# Try SQLite-based cache as a fallback
|
|
274
286
|
sqlite_cache_path = PROJECT_ROOT / "litellm_cache.sqlite"
|
|
275
|
-
|
|
287
|
+
configured_cache = Cache(type="sqlite", cache_path=str(sqlite_cache_path))
|
|
288
|
+
litellm.cache = configured_cache
|
|
276
289
|
logger.info(f"LiteLLM SQLite cache configured at {sqlite_cache_path}")
|
|
277
290
|
cache_configured = True
|
|
278
291
|
except Exception as e2:
|
|
@@ -356,12 +369,41 @@ litellm.success_callback = [_litellm_success_callback]
|
|
|
356
369
|
|
|
357
370
|
# --- Helper Functions ---
|
|
358
371
|
|
|
359
|
-
def _load_model_data(csv_path: Path) -> pd.DataFrame:
|
|
360
|
-
"""Loads and preprocesses the LLM model data from CSV.
|
|
361
|
-
|
|
362
|
-
|
|
372
|
+
def _load_model_data(csv_path: Optional[Path]) -> pd.DataFrame:
|
|
373
|
+
"""Loads and preprocesses the LLM model data from CSV.
|
|
374
|
+
|
|
375
|
+
Args:
|
|
376
|
+
csv_path: Path to CSV file, or None to use package default
|
|
377
|
+
|
|
378
|
+
Returns:
|
|
379
|
+
DataFrame with model configuration data
|
|
380
|
+
"""
|
|
381
|
+
# If csv_path is provided, try to load from it
|
|
382
|
+
if csv_path is not None:
|
|
383
|
+
if not csv_path.exists():
|
|
384
|
+
logger.warning(f"Specified LLM model CSV not found at {csv_path}, trying package default")
|
|
385
|
+
csv_path = None
|
|
386
|
+
else:
|
|
387
|
+
try:
|
|
388
|
+
df = pd.read_csv(csv_path)
|
|
389
|
+
logger.debug(f"Loaded model data from {csv_path}")
|
|
390
|
+
# Continue with the rest of the function...
|
|
391
|
+
except Exception as e:
|
|
392
|
+
logger.warning(f"Failed to load CSV from {csv_path}: {e}, trying package default")
|
|
393
|
+
csv_path = None
|
|
394
|
+
|
|
395
|
+
# If csv_path is None or loading failed, use package default
|
|
396
|
+
if csv_path is None:
|
|
397
|
+
try:
|
|
398
|
+
# Use importlib.resources to load the packaged CSV
|
|
399
|
+
csv_data = importlib.resources.files('pdd').joinpath('data/llm_model.csv').read_text()
|
|
400
|
+
import io
|
|
401
|
+
df = pd.read_csv(io.StringIO(csv_data))
|
|
402
|
+
logger.info("Loaded model data from package default")
|
|
403
|
+
except Exception as e:
|
|
404
|
+
raise FileNotFoundError(f"Failed to load default LLM model CSV from package: {e}")
|
|
405
|
+
|
|
363
406
|
try:
|
|
364
|
-
df = pd.read_csv(csv_path)
|
|
365
407
|
# Basic validation and type conversion
|
|
366
408
|
required_cols = ['provider', 'model', 'input', 'output', 'coding_arena_elo', 'api_key', 'structured_output', 'reasoning_type']
|
|
367
409
|
for col in required_cols:
|
|
@@ -1066,8 +1108,8 @@ def llm_invoke(
|
|
|
1066
1108
|
max_completion_tokens=max_tokens,
|
|
1067
1109
|
**time_kwargs
|
|
1068
1110
|
)
|
|
1069
|
-
# Re-enable cache
|
|
1070
|
-
litellm.cache =
|
|
1111
|
+
# Re-enable cache - restore original configured cache (restore to original state, even if None)
|
|
1112
|
+
litellm.cache = configured_cache
|
|
1071
1113
|
# Extract result from retry
|
|
1072
1114
|
retry_raw_result = retry_response.choices[0].message.content
|
|
1073
1115
|
if retry_raw_result is not None:
|