pdd-cli 0.0.48__py3-none-any.whl → 0.0.50__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pdd-cli might be problematic. Click here for more details.
- pdd/__init__.py +4 -4
- pdd/bug_to_unit_test.py +2 -0
- pdd/cli.py +8 -1
- pdd/code_generator.py +3 -1
- pdd/context_generator.py +3 -1
- pdd/continue_generation.py +47 -7
- pdd/data/llm_model.csv +15 -16
- pdd/detect_change_main.py +2 -2
- pdd/generate_test.py +3 -1
- pdd/llm_invoke.py +461 -74
- pdd/load_prompt_template.py +30 -9
- pdd/pdd_completion.fish +2 -2
- pdd/pdd_completion.zsh +4 -4
- pdd/postprocess.py +2 -2
- pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
- pdd/prompts/insert_includes_LLM.prompt +4 -4
- pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
- pdd/summarize_directory.py +15 -2
- pdd/sync_orchestration.py +32 -4
- pdd/trace.py +131 -11
- pdd/trace_main.py +2 -2
- pdd/unfinished_prompt.py +41 -2
- {pdd_cli-0.0.48.dist-info → pdd_cli-0.0.50.dist-info}/METADATA +7 -4
- {pdd_cli-0.0.48.dist-info → pdd_cli-0.0.50.dist-info}/RECORD +27 -27
- {pdd_cli-0.0.48.dist-info → pdd_cli-0.0.50.dist-info}/WHEEL +0 -0
- {pdd_cli-0.0.48.dist-info → pdd_cli-0.0.50.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.48.dist-info → pdd_cli-0.0.50.dist-info}/licenses/LICENSE +0 -0
- {pdd_cli-0.0.48.dist-info → pdd_cli-0.0.50.dist-info}/top_level.txt +0 -0
pdd/llm_invoke.py
CHANGED
|
@@ -6,7 +6,7 @@ import pandas as pd
|
|
|
6
6
|
import litellm
|
|
7
7
|
import logging # ADDED FOR DETAILED LOGGING
|
|
8
8
|
import importlib.resources
|
|
9
|
-
from litellm.caching.caching import Cache # Fix for LiteLLM v1.
|
|
9
|
+
from litellm.caching.caching import Cache # Fix for LiteLLM v1.75.5+
|
|
10
10
|
|
|
11
11
|
# --- Configure Standard Python Logging ---
|
|
12
12
|
logger = logging.getLogger("pdd.llm_invoke")
|
|
@@ -26,6 +26,16 @@ litellm_logger = logging.getLogger("litellm")
|
|
|
26
26
|
litellm_log_level = os.getenv("LITELLM_LOG_LEVEL", "WARNING" if PRODUCTION_MODE else "INFO")
|
|
27
27
|
litellm_logger.setLevel(getattr(logging, litellm_log_level, logging.WARNING))
|
|
28
28
|
|
|
29
|
+
# Ensure LiteLLM drops provider-unsupported params instead of erroring
|
|
30
|
+
# This prevents failures like UnsupportedParamsError for OpenAI gpt-5-* when
|
|
31
|
+
# passing generic params (e.g., reasoning_effort) not accepted by that API path.
|
|
32
|
+
try:
|
|
33
|
+
_drop_params_env = os.getenv("LITELLM_DROP_PARAMS", "true")
|
|
34
|
+
litellm.drop_params = str(_drop_params_env).lower() in ("1", "true", "yes", "on")
|
|
35
|
+
except Exception:
|
|
36
|
+
# Be conservative: default to True even if env parsing fails
|
|
37
|
+
litellm.drop_params = True
|
|
38
|
+
|
|
29
39
|
# Add a console handler if none exists
|
|
30
40
|
if not logger.handlers:
|
|
31
41
|
console_handler = logging.StreamHandler()
|
|
@@ -71,7 +81,7 @@ import json
|
|
|
71
81
|
# from rich import print as rprint # Replaced with logger
|
|
72
82
|
from dotenv import load_dotenv
|
|
73
83
|
from pathlib import Path
|
|
74
|
-
from typing import Optional, Dict, List, Any, Type, Union
|
|
84
|
+
from typing import Optional, Dict, List, Any, Type, Union, Tuple
|
|
75
85
|
from pydantic import BaseModel, ValidationError
|
|
76
86
|
import openai # Import openai for exception handling as LiteLLM maps to its types
|
|
77
87
|
from langchain_core.prompts import PromptTemplate
|
|
@@ -114,6 +124,22 @@ def _is_wsl_environment() -> bool:
|
|
|
114
124
|
return False
|
|
115
125
|
|
|
116
126
|
|
|
127
|
+
def _openai_responses_supports_response_format() -> bool:
|
|
128
|
+
"""Detect if current OpenAI Python SDK supports `response_format` on Responses.create.
|
|
129
|
+
|
|
130
|
+
Returns True if the installed SDK exposes a `response_format` parameter on
|
|
131
|
+
`openai.resources.responses.Responses.create`, else False. This avoids
|
|
132
|
+
sending unsupported kwargs and triggering TypeError at runtime.
|
|
133
|
+
"""
|
|
134
|
+
try:
|
|
135
|
+
import inspect
|
|
136
|
+
from openai.resources.responses import Responses
|
|
137
|
+
sig = inspect.signature(Responses.create)
|
|
138
|
+
return "response_format" in sig.parameters
|
|
139
|
+
except Exception:
|
|
140
|
+
return False
|
|
141
|
+
|
|
142
|
+
|
|
117
143
|
def _get_environment_info() -> Dict[str, str]:
|
|
118
144
|
"""
|
|
119
145
|
Get environment information for debugging and error reporting.
|
|
@@ -188,24 +214,80 @@ if PROJECT_ROOT is None: # Fallback to CWD if no method succeeded
|
|
|
188
214
|
|
|
189
215
|
ENV_PATH = PROJECT_ROOT / ".env"
|
|
190
216
|
# --- Determine LLM_MODEL_CSV_PATH ---
|
|
191
|
-
# Prioritize ~/.pdd/llm_model.csv
|
|
217
|
+
# Prioritize ~/.pdd/llm_model.csv, then a project .pdd from the current CWD,
|
|
218
|
+
# then PROJECT_ROOT (which may be set from PDD_PATH), else fall back to package.
|
|
192
219
|
user_pdd_dir = Path.home() / ".pdd"
|
|
193
220
|
user_model_csv_path = user_pdd_dir / "llm_model.csv"
|
|
194
221
|
|
|
195
|
-
|
|
222
|
+
def _detect_project_root_from_cwd(max_levels: int = 5) -> Path:
|
|
223
|
+
"""Search upwards from the current working directory for common project markers.
|
|
224
|
+
|
|
225
|
+
This intentionally ignores PDD_PATH to support CLI invocations that set
|
|
226
|
+
PDD_PATH to the installed package location. We want to honor a real project
|
|
227
|
+
checkout's .pdd/llm_model.csv when running inside it.
|
|
228
|
+
"""
|
|
229
|
+
try:
|
|
230
|
+
current_dir = Path.cwd().resolve()
|
|
231
|
+
for _ in range(max_levels):
|
|
232
|
+
if (
|
|
233
|
+
(current_dir / ".git").exists()
|
|
234
|
+
or (current_dir / "pyproject.toml").exists()
|
|
235
|
+
or (current_dir / "data").is_dir()
|
|
236
|
+
or (current_dir / ".env").exists()
|
|
237
|
+
):
|
|
238
|
+
return current_dir
|
|
239
|
+
parent = current_dir.parent
|
|
240
|
+
if parent == current_dir:
|
|
241
|
+
break
|
|
242
|
+
current_dir = parent
|
|
243
|
+
except Exception:
|
|
244
|
+
pass
|
|
245
|
+
return Path.cwd().resolve()
|
|
246
|
+
|
|
247
|
+
# Resolve candidates
|
|
248
|
+
project_root_from_cwd = _detect_project_root_from_cwd()
|
|
249
|
+
project_csv_from_cwd = project_root_from_cwd / ".pdd" / "llm_model.csv"
|
|
250
|
+
project_csv_from_env = PROJECT_ROOT / ".pdd" / "llm_model.csv"
|
|
251
|
+
|
|
252
|
+
# Detect whether PDD_PATH points to the installed package directory. If so,
|
|
253
|
+
# don't prioritize it over the real project from CWD.
|
|
254
|
+
try:
|
|
255
|
+
_installed_pkg_root = importlib.resources.files('pdd')
|
|
256
|
+
# importlib.resources.files returns a Traversable; get a FS path string if possible
|
|
257
|
+
try:
|
|
258
|
+
_installed_pkg_root_path = Path(str(_installed_pkg_root))
|
|
259
|
+
except Exception:
|
|
260
|
+
_installed_pkg_root_path = None
|
|
261
|
+
except Exception:
|
|
262
|
+
_installed_pkg_root_path = None
|
|
263
|
+
|
|
264
|
+
def _is_env_path_package_dir(env_path: Path) -> bool:
|
|
265
|
+
try:
|
|
266
|
+
if _installed_pkg_root_path is None:
|
|
267
|
+
return False
|
|
268
|
+
env_path = env_path.resolve()
|
|
269
|
+
pkg_path = _installed_pkg_root_path.resolve()
|
|
270
|
+
# Treat equal or subpath as package dir
|
|
271
|
+
return env_path == pkg_path or str(env_path).startswith(str(pkg_path))
|
|
272
|
+
except Exception:
|
|
273
|
+
return False
|
|
274
|
+
|
|
275
|
+
# Selection order
|
|
196
276
|
if user_model_csv_path.is_file():
|
|
197
277
|
LLM_MODEL_CSV_PATH = user_model_csv_path
|
|
198
278
|
logger.info(f"Using user-specific LLM model CSV: {LLM_MODEL_CSV_PATH}")
|
|
279
|
+
elif (not _is_env_path_package_dir(PROJECT_ROOT)) and project_csv_from_env.is_file():
|
|
280
|
+
# Honor an explicitly-set PDD_PATH pointing to a real project directory
|
|
281
|
+
LLM_MODEL_CSV_PATH = project_csv_from_env
|
|
282
|
+
logger.info(f"Using project-specific LLM model CSV (from PDD_PATH): {LLM_MODEL_CSV_PATH}")
|
|
283
|
+
elif project_csv_from_cwd.is_file():
|
|
284
|
+
# Otherwise, prefer the project relative to the current working directory
|
|
285
|
+
LLM_MODEL_CSV_PATH = project_csv_from_cwd
|
|
286
|
+
logger.info(f"Using project-specific LLM model CSV (from CWD): {LLM_MODEL_CSV_PATH}")
|
|
199
287
|
else:
|
|
200
|
-
#
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
LLM_MODEL_CSV_PATH = project_model_csv_path
|
|
204
|
-
logger.info(f"Using project-specific LLM model CSV: {LLM_MODEL_CSV_PATH}")
|
|
205
|
-
else:
|
|
206
|
-
# Neither exists, we'll use a marker path that _load_model_data will handle
|
|
207
|
-
LLM_MODEL_CSV_PATH = None
|
|
208
|
-
logger.info("No local LLM model CSV found, will use package default")
|
|
288
|
+
# Neither exists, we'll use a marker path that _load_model_data will handle
|
|
289
|
+
LLM_MODEL_CSV_PATH = None
|
|
290
|
+
logger.info("No local LLM model CSV found, will use package default")
|
|
209
291
|
# ---------------------------------
|
|
210
292
|
|
|
211
293
|
# Load environment variables from .env file
|
|
@@ -280,16 +362,22 @@ if GCS_BUCKET_NAME and GCS_HMAC_ACCESS_KEY_ID and GCS_HMAC_SECRET_ACCESS_KEY:
|
|
|
280
362
|
elif 'AWS_REGION_NAME' in os.environ:
|
|
281
363
|
pass # Or just leave it if the temporary setting wasn't done/needed
|
|
282
364
|
|
|
365
|
+
# Check if caching is disabled via environment variable
|
|
366
|
+
if os.getenv("LITELLM_CACHE_DISABLE") == "1":
|
|
367
|
+
logger.info("LiteLLM caching disabled via LITELLM_CACHE_DISABLE=1")
|
|
368
|
+
litellm.cache = None
|
|
369
|
+
cache_configured = True
|
|
370
|
+
|
|
283
371
|
if not cache_configured:
|
|
284
372
|
try:
|
|
285
|
-
# Try
|
|
373
|
+
# Try disk-based cache as a fallback
|
|
286
374
|
sqlite_cache_path = PROJECT_ROOT / "litellm_cache.sqlite"
|
|
287
|
-
configured_cache = Cache(type="
|
|
375
|
+
configured_cache = Cache(type="disk", disk_cache_dir=str(sqlite_cache_path))
|
|
288
376
|
litellm.cache = configured_cache
|
|
289
|
-
logger.info(f"LiteLLM
|
|
377
|
+
logger.info(f"LiteLLM disk cache configured at {sqlite_cache_path}")
|
|
290
378
|
cache_configured = True
|
|
291
379
|
except Exception as e2:
|
|
292
|
-
warnings.warn(f"Failed to configure LiteLLM
|
|
380
|
+
warnings.warn(f"Failed to configure LiteLLM disk cache: {e2}. Caching is disabled.")
|
|
293
381
|
litellm.cache = None
|
|
294
382
|
|
|
295
383
|
if not cache_configured:
|
|
@@ -327,29 +415,49 @@ def _litellm_success_callback(
|
|
|
327
415
|
cost_val = litellm.completion_cost(completion_response=completion_response)
|
|
328
416
|
calculated_cost = cost_val if cost_val is not None else 0.0
|
|
329
417
|
except Exception as e1:
|
|
330
|
-
# Attempt 2:
|
|
331
|
-
#
|
|
332
|
-
# This is often needed for batch completion items.
|
|
418
|
+
# Attempt 2: Compute via tokens and model mapping. If LiteLLM mapping is
|
|
419
|
+
# missing or API differs, fall back to CSV rates in _MODEL_RATE_MAP.
|
|
333
420
|
logger.debug(f"Attempting cost calculation with fallback method: {e1}")
|
|
334
421
|
try:
|
|
335
|
-
model_name = kwargs.get("model")
|
|
422
|
+
model_name = kwargs.get("model")
|
|
336
423
|
if model_name and usage:
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
424
|
+
in_tok = getattr(usage, 'prompt_tokens', None)
|
|
425
|
+
out_tok = getattr(usage, 'completion_tokens', None)
|
|
426
|
+
# Some providers may use 'input_tokens'/'output_tokens'
|
|
427
|
+
if in_tok is None:
|
|
428
|
+
in_tok = getattr(usage, 'input_tokens', 0)
|
|
429
|
+
if out_tok is None:
|
|
430
|
+
out_tok = getattr(usage, 'output_tokens', 0)
|
|
431
|
+
|
|
432
|
+
# Try LiteLLM helper (arg names vary across versions)
|
|
433
|
+
try:
|
|
434
|
+
cost_val = litellm.completion_cost(
|
|
435
|
+
model=model_name,
|
|
436
|
+
prompt_tokens=in_tok,
|
|
437
|
+
completion_tokens=out_tok,
|
|
438
|
+
)
|
|
439
|
+
calculated_cost = cost_val if cost_val is not None else 0.0
|
|
440
|
+
except TypeError:
|
|
441
|
+
# Older/newer versions may require input/output token names
|
|
442
|
+
try:
|
|
443
|
+
cost_val = litellm.completion_cost(
|
|
444
|
+
model=model_name,
|
|
445
|
+
input_tokens=in_tok,
|
|
446
|
+
output_tokens=out_tok,
|
|
447
|
+
)
|
|
448
|
+
calculated_cost = cost_val if cost_val is not None else 0.0
|
|
449
|
+
except Exception as e3:
|
|
450
|
+
# Final fallback: compute using CSV rates
|
|
451
|
+
rates = _MODEL_RATE_MAP.get(str(model_name))
|
|
452
|
+
if rates is not None:
|
|
453
|
+
in_rate, out_rate = rates
|
|
454
|
+
calculated_cost = (float(in_tok or 0) * in_rate + float(out_tok or 0) * out_rate) / 1_000_000.0
|
|
455
|
+
else:
|
|
456
|
+
calculated_cost = 0.0
|
|
457
|
+
logger.debug(f"Cost calculation failed with LiteLLM token API; used CSV rates if available. Detail: {e3}")
|
|
345
458
|
else:
|
|
346
|
-
# If we can't get model name or usage, fallback to 0
|
|
347
459
|
calculated_cost = 0.0
|
|
348
|
-
# Optional: Log the original error e1 if needed
|
|
349
|
-
# logger.warning(f"[Callback WARN] Failed to calculate cost with response object ({e1}) and fallback failed.")
|
|
350
460
|
except Exception as e2:
|
|
351
|
-
# Optional: Log secondary error e2 if needed
|
|
352
|
-
# logger.warning(f"[Callback WARN] Failed to calculate cost with fallback method: {e2}")
|
|
353
461
|
calculated_cost = 0.0 # Default to 0 on any error
|
|
354
462
|
logger.debug(f"Cost calculation failed with fallback method: {e2}")
|
|
355
463
|
|
|
@@ -367,6 +475,23 @@ def _litellm_success_callback(
|
|
|
367
475
|
# Register the callback with LiteLLM
|
|
368
476
|
litellm.success_callback = [_litellm_success_callback]
|
|
369
477
|
|
|
478
|
+
# --- Cost Mapping Support (CSV Rates) ---
|
|
479
|
+
# Populate from CSV inside llm_invoke; used by callback fallback
|
|
480
|
+
_MODEL_RATE_MAP: Dict[str, Tuple[float, float]] = {}
|
|
481
|
+
|
|
482
|
+
def _set_model_rate_map(df: pd.DataFrame) -> None:
|
|
483
|
+
global _MODEL_RATE_MAP
|
|
484
|
+
try:
|
|
485
|
+
_MODEL_RATE_MAP = {
|
|
486
|
+
str(row['model']): (
|
|
487
|
+
float(row['input']) if pd.notna(row['input']) else 0.0,
|
|
488
|
+
float(row['output']) if pd.notna(row['output']) else 0.0,
|
|
489
|
+
)
|
|
490
|
+
for _, row in df.iterrows()
|
|
491
|
+
}
|
|
492
|
+
except Exception:
|
|
493
|
+
_MODEL_RATE_MAP = {}
|
|
494
|
+
|
|
370
495
|
# --- Helper Functions ---
|
|
371
496
|
|
|
372
497
|
def _load_model_data(csv_path: Optional[Path]) -> pd.DataFrame:
|
|
@@ -476,11 +601,26 @@ def _select_model_candidates(
|
|
|
476
601
|
# Try finding base model in the *original* df in case it was filtered out
|
|
477
602
|
original_base = model_df[model_df['model'] == base_model_name]
|
|
478
603
|
if not original_base.empty:
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
604
|
+
# Base exists but may be misconfigured (e.g., missing API key). Keep erroring loudly.
|
|
605
|
+
raise ValueError(
|
|
606
|
+
f"Base model '{base_model_name}' found in CSV but requires API key '{original_base.iloc[0]['api_key']}' which might be missing or invalid configuration."
|
|
607
|
+
)
|
|
608
|
+
# Option A': Soft fallback – choose a reasonable surrogate base and continue
|
|
609
|
+
# Strategy (simplified and deterministic): pick the first available model
|
|
610
|
+
# from the CSV as the surrogate base. This mirrors typical CSV ordering
|
|
611
|
+
# expectations and keeps behavior predictable across environments.
|
|
612
|
+
try:
|
|
613
|
+
base_model = available_df.iloc[0]
|
|
614
|
+
logger.warning(
|
|
615
|
+
f"Base model '{base_model_name}' not found in CSV. Falling back to surrogate base '{base_model['model']}' (Option A')."
|
|
616
|
+
)
|
|
617
|
+
except Exception:
|
|
618
|
+
# If any unexpected error occurs during fallback, raise a clear error
|
|
619
|
+
raise ValueError(
|
|
620
|
+
f"Specified base model '{base_model_name}' not found and fallback selection failed. Check your LLM model CSV."
|
|
621
|
+
)
|
|
622
|
+
else:
|
|
623
|
+
base_model = base_model_row.iloc[0]
|
|
484
624
|
|
|
485
625
|
# 3. Determine Target and Sort
|
|
486
626
|
candidates = []
|
|
@@ -491,9 +631,10 @@ def _select_model_candidates(
|
|
|
491
631
|
# Sort remaining by ELO descending as fallback
|
|
492
632
|
available_df['sort_metric'] = -available_df['coding_arena_elo'] # Negative for descending sort
|
|
493
633
|
candidates = available_df.sort_values(by='sort_metric').to_dict('records')
|
|
494
|
-
# Ensure base model is first if it exists
|
|
495
|
-
|
|
496
|
-
|
|
634
|
+
# Ensure effective base model is first if it exists (supports surrogate base)
|
|
635
|
+
effective_base_name = str(base_model['model']) if isinstance(base_model, pd.Series) else base_model_name
|
|
636
|
+
if any(c['model'] == effective_base_name for c in candidates):
|
|
637
|
+
candidates.sort(key=lambda x: 0 if x['model'] == effective_base_name else 1)
|
|
497
638
|
target_metric_value = f"Base Model ELO: {base_model['coding_arena_elo']}"
|
|
498
639
|
|
|
499
640
|
elif strength < 0.5:
|
|
@@ -710,6 +851,49 @@ def _format_messages(prompt: str, input_data: Union[Dict[str, Any], List[Dict[st
|
|
|
710
851
|
except Exception as e:
|
|
711
852
|
raise ValueError(f"Error formatting prompt: {e}") from e
|
|
712
853
|
|
|
854
|
+
# --- JSON Extraction Helpers ---
|
|
855
|
+
import re
|
|
856
|
+
|
|
857
|
+
def _extract_fenced_json_block(text: str) -> Optional[str]:
|
|
858
|
+
try:
|
|
859
|
+
m = re.search(r"```json\s*(\{[\s\S]*?\})\s*```", text, flags=re.IGNORECASE)
|
|
860
|
+
if m:
|
|
861
|
+
return m.group(1)
|
|
862
|
+
return None
|
|
863
|
+
except Exception:
|
|
864
|
+
return None
|
|
865
|
+
|
|
866
|
+
def _extract_balanced_json_objects(text: str) -> List[str]:
|
|
867
|
+
results: List[str] = []
|
|
868
|
+
brace_stack = 0
|
|
869
|
+
start_idx = -1
|
|
870
|
+
in_string = False
|
|
871
|
+
escape = False
|
|
872
|
+
for i, ch in enumerate(text):
|
|
873
|
+
if in_string:
|
|
874
|
+
if escape:
|
|
875
|
+
escape = False
|
|
876
|
+
elif ch == '\\':
|
|
877
|
+
escape = True
|
|
878
|
+
elif ch == '"':
|
|
879
|
+
in_string = False
|
|
880
|
+
continue
|
|
881
|
+
else:
|
|
882
|
+
if ch == '"':
|
|
883
|
+
in_string = True
|
|
884
|
+
continue
|
|
885
|
+
if ch == '{':
|
|
886
|
+
if brace_stack == 0:
|
|
887
|
+
start_idx = i
|
|
888
|
+
brace_stack += 1
|
|
889
|
+
elif ch == '}':
|
|
890
|
+
if brace_stack > 0:
|
|
891
|
+
brace_stack -= 1
|
|
892
|
+
if brace_stack == 0 and start_idx != -1:
|
|
893
|
+
results.append(text[start_idx:i+1])
|
|
894
|
+
start_idx = -1
|
|
895
|
+
return results
|
|
896
|
+
|
|
713
897
|
# --- Main Function ---
|
|
714
898
|
|
|
715
899
|
def llm_invoke(
|
|
@@ -852,6 +1036,16 @@ def llm_invoke(
|
|
|
852
1036
|
# --- 3. Iterate Through Candidates and Invoke LLM ---
|
|
853
1037
|
last_exception = None
|
|
854
1038
|
newly_acquired_keys: Dict[str, bool] = {} # Track keys obtained in this run
|
|
1039
|
+
|
|
1040
|
+
# Initialize variables for retry section
|
|
1041
|
+
response_format = None
|
|
1042
|
+
time_kwargs = {}
|
|
1043
|
+
|
|
1044
|
+
# Update global rate map for callback cost fallback
|
|
1045
|
+
try:
|
|
1046
|
+
_set_model_rate_map(model_df)
|
|
1047
|
+
except Exception:
|
|
1048
|
+
pass
|
|
855
1049
|
|
|
856
1050
|
for model_info in candidate_models:
|
|
857
1051
|
model_name_litellm = model_info['model']
|
|
@@ -945,11 +1139,33 @@ def llm_invoke(
|
|
|
945
1139
|
elif verbose: # No api_key_name_from_csv in CSV for this model
|
|
946
1140
|
logger.info(f"[INFO] No API key name specified in CSV for model '{model_name_litellm}'. LiteLLM will use its default authentication mechanisms (e.g., standard provider env vars or ADC for Vertex AI).")
|
|
947
1141
|
|
|
948
|
-
# Add api_base if present in CSV
|
|
1142
|
+
# Add base_url/api_base override if present in CSV
|
|
949
1143
|
api_base = model_info.get('base_url')
|
|
950
1144
|
if pd.notna(api_base) and api_base:
|
|
1145
|
+
# LiteLLM prefers `base_url`; some older paths accept `api_base`.
|
|
1146
|
+
litellm_kwargs["base_url"] = str(api_base)
|
|
951
1147
|
litellm_kwargs["api_base"] = str(api_base)
|
|
952
1148
|
|
|
1149
|
+
# Provider-specific defaults (e.g., LM Studio)
|
|
1150
|
+
model_name_lower = str(model_name_litellm).lower()
|
|
1151
|
+
provider_lower_for_model = provider.lower()
|
|
1152
|
+
is_lm_studio = model_name_lower.startswith('lm_studio/') or provider_lower_for_model == 'lm_studio'
|
|
1153
|
+
if is_lm_studio:
|
|
1154
|
+
# Ensure base_url is set (fallback to env LM_STUDIO_API_BASE or localhost)
|
|
1155
|
+
if not litellm_kwargs.get("base_url"):
|
|
1156
|
+
lm_studio_base = os.getenv("LM_STUDIO_API_BASE", "http://localhost:1234/v1")
|
|
1157
|
+
litellm_kwargs["base_url"] = lm_studio_base
|
|
1158
|
+
litellm_kwargs["api_base"] = lm_studio_base
|
|
1159
|
+
if verbose:
|
|
1160
|
+
logger.info(f"[INFO] Using LM Studio base_url: {lm_studio_base}")
|
|
1161
|
+
|
|
1162
|
+
# Ensure a non-empty api_key; LM Studio accepts any non-empty token (e.g., 'lm-studio')
|
|
1163
|
+
if not litellm_kwargs.get("api_key"):
|
|
1164
|
+
lm_studio_key = os.getenv("LM_STUDIO_API_KEY") or "lm-studio"
|
|
1165
|
+
litellm_kwargs["api_key"] = lm_studio_key
|
|
1166
|
+
if verbose:
|
|
1167
|
+
logger.info("[INFO] Using LM Studio api_key placeholder (set LM_STUDIO_API_KEY to customize).")
|
|
1168
|
+
|
|
953
1169
|
# Handle Structured Output (JSON Mode / Pydantic)
|
|
954
1170
|
if output_pydantic:
|
|
955
1171
|
# Check if model supports structured output based on CSV flag or LiteLLM check
|
|
@@ -964,7 +1180,8 @@ def llm_invoke(
|
|
|
964
1180
|
logger.info(f"[INFO] Requesting structured output (Pydantic: {output_pydantic.__name__}) for {model_name_litellm}")
|
|
965
1181
|
# Pass the Pydantic model directly if supported, else use json_object
|
|
966
1182
|
# LiteLLM handles passing Pydantic models for supported providers
|
|
967
|
-
|
|
1183
|
+
response_format = output_pydantic
|
|
1184
|
+
litellm_kwargs["response_format"] = response_format
|
|
968
1185
|
# As a fallback, one could use:
|
|
969
1186
|
# litellm_kwargs["response_format"] = {"type": "json_object"}
|
|
970
1187
|
# And potentially enable client-side validation:
|
|
@@ -986,7 +1203,9 @@ def llm_invoke(
|
|
|
986
1203
|
# Currently known: Anthropic uses 'thinking'
|
|
987
1204
|
# Model name comparison is more robust than provider string
|
|
988
1205
|
if provider == 'anthropic': # Check provider column instead of model prefix
|
|
989
|
-
|
|
1206
|
+
thinking_param = {"type": "enabled", "budget_tokens": budget}
|
|
1207
|
+
litellm_kwargs["thinking"] = thinking_param
|
|
1208
|
+
time_kwargs["thinking"] = thinking_param
|
|
990
1209
|
if verbose:
|
|
991
1210
|
logger.info(f"[INFO] Requesting Anthropic thinking (budget type) with budget: {budget} tokens for {model_name_litellm}")
|
|
992
1211
|
else:
|
|
@@ -1004,10 +1223,32 @@ def llm_invoke(
|
|
|
1004
1223
|
effort = "high"
|
|
1005
1224
|
elif time > 0.3:
|
|
1006
1225
|
effort = "medium"
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1226
|
+
|
|
1227
|
+
# Map effort parameter per-provider/model family
|
|
1228
|
+
model_lower = str(model_name_litellm).lower()
|
|
1229
|
+
provider_lower = str(provider).lower()
|
|
1230
|
+
|
|
1231
|
+
if provider_lower == 'openai' and model_lower.startswith('gpt-5'):
|
|
1232
|
+
# OpenAI 5-series uses Responses API with nested 'reasoning'
|
|
1233
|
+
reasoning_obj = {"effort": effort, "summary": "auto"}
|
|
1234
|
+
litellm_kwargs["reasoning"] = reasoning_obj
|
|
1235
|
+
time_kwargs["reasoning"] = reasoning_obj
|
|
1236
|
+
if verbose:
|
|
1237
|
+
logger.info(f"[INFO] Requesting OpenAI reasoning.effort='{effort}' for {model_name_litellm} (Responses API)")
|
|
1238
|
+
|
|
1239
|
+
elif provider_lower == 'openai' and model_lower.startswith('o') and 'mini' not in model_lower:
|
|
1240
|
+
# Historical o* models may use LiteLLM's generic reasoning_effort param
|
|
1241
|
+
litellm_kwargs["reasoning_effort"] = effort
|
|
1242
|
+
time_kwargs["reasoning_effort"] = effort
|
|
1243
|
+
if verbose:
|
|
1244
|
+
logger.info(f"[INFO] Requesting reasoning_effort='{effort}' for {model_name_litellm}")
|
|
1245
|
+
|
|
1246
|
+
else:
|
|
1247
|
+
# Fallback to LiteLLM generic param when supported by provider adapter
|
|
1248
|
+
litellm_kwargs["reasoning_effort"] = effort
|
|
1249
|
+
time_kwargs["reasoning_effort"] = effort
|
|
1250
|
+
if verbose:
|
|
1251
|
+
logger.info(f"[INFO] Requesting generic reasoning_effort='{effort}' for {model_name_litellm}")
|
|
1011
1252
|
|
|
1012
1253
|
elif reasoning_type == 'none':
|
|
1013
1254
|
if verbose:
|
|
@@ -1039,6 +1280,142 @@ def llm_invoke(
|
|
|
1039
1280
|
logger.debug("NOT ENABLING CACHING: litellm.cache is None at call time")
|
|
1040
1281
|
|
|
1041
1282
|
|
|
1283
|
+
# Route OpenAI gpt-5* models through Responses API to support 'reasoning'
|
|
1284
|
+
model_lower_for_call = str(model_name_litellm).lower()
|
|
1285
|
+
provider_lower_for_call = str(provider).lower()
|
|
1286
|
+
|
|
1287
|
+
if (
|
|
1288
|
+
not use_batch_mode
|
|
1289
|
+
and provider_lower_for_call == 'openai'
|
|
1290
|
+
and model_lower_for_call.startswith('gpt-5')
|
|
1291
|
+
):
|
|
1292
|
+
if verbose:
|
|
1293
|
+
logger.info(f"[INFO] Calling OpenAI Responses API for {model_name_litellm}...")
|
|
1294
|
+
try:
|
|
1295
|
+
# Build input text from messages
|
|
1296
|
+
if isinstance(formatted_messages, list) and formatted_messages and isinstance(formatted_messages[0], dict):
|
|
1297
|
+
input_text = "\n\n".join(f"{m.get('role','user')}: {m.get('content','')}" for m in formatted_messages)
|
|
1298
|
+
else:
|
|
1299
|
+
# Fallback: string cast
|
|
1300
|
+
input_text = str(formatted_messages)
|
|
1301
|
+
|
|
1302
|
+
# Derive effort mapping already computed in time_kwargs
|
|
1303
|
+
reasoning_param = time_kwargs.get("reasoning")
|
|
1304
|
+
|
|
1305
|
+
# Optional text settings; keep simple
|
|
1306
|
+
text_block = {"format": {"type": "text"}}
|
|
1307
|
+
|
|
1308
|
+
# If structured output requested, attempt JSON schema via Pydantic
|
|
1309
|
+
# GPT-5 Responses API does not support temperature; omit it here.
|
|
1310
|
+
responses_kwargs = {
|
|
1311
|
+
"model": model_name_litellm,
|
|
1312
|
+
"input": input_text,
|
|
1313
|
+
"text": text_block,
|
|
1314
|
+
}
|
|
1315
|
+
if verbose and temperature not in (None, 0, 0.0):
|
|
1316
|
+
logger.info("[INFO] Skipping 'temperature' for OpenAI GPT-5 Responses call (unsupported by API).")
|
|
1317
|
+
if reasoning_param is not None:
|
|
1318
|
+
responses_kwargs["reasoning"] = reasoning_param
|
|
1319
|
+
|
|
1320
|
+
if output_pydantic:
|
|
1321
|
+
try:
|
|
1322
|
+
schema = output_pydantic.model_json_schema()
|
|
1323
|
+
if _openai_responses_supports_response_format():
|
|
1324
|
+
responses_kwargs["response_format"] = {
|
|
1325
|
+
"type": "json_schema",
|
|
1326
|
+
"json_schema": {
|
|
1327
|
+
"name": output_pydantic.__name__,
|
|
1328
|
+
"schema": schema,
|
|
1329
|
+
"strict": True,
|
|
1330
|
+
},
|
|
1331
|
+
}
|
|
1332
|
+
# When enforcing JSON schema, omit text formatting
|
|
1333
|
+
responses_kwargs.pop("text", None)
|
|
1334
|
+
else:
|
|
1335
|
+
if verbose:
|
|
1336
|
+
logger.info("[INFO] OpenAI SDK lacks Responses.response_format; will validate JSON client-side with Pydantic.")
|
|
1337
|
+
except Exception as schema_e:
|
|
1338
|
+
logger.warning(f"[WARN] Failed to derive JSON schema from Pydantic: {schema_e}. Proceeding without structured response_format.")
|
|
1339
|
+
|
|
1340
|
+
# Initialize OpenAI client with explicit key if provided
|
|
1341
|
+
try:
|
|
1342
|
+
from openai import OpenAI as _OpenAIClient
|
|
1343
|
+
except Exception:
|
|
1344
|
+
_OpenAIClient = None
|
|
1345
|
+
if _OpenAIClient is None:
|
|
1346
|
+
raise RuntimeError("OpenAI SDK not available to call Responses API.")
|
|
1347
|
+
|
|
1348
|
+
api_key_to_use = litellm_kwargs.get("api_key") or os.getenv("OPENAI_API_KEY")
|
|
1349
|
+
client = _OpenAIClient(api_key=api_key_to_use) if api_key_to_use else _OpenAIClient()
|
|
1350
|
+
|
|
1351
|
+
# Make the Responses API call, with graceful fallback if SDK
|
|
1352
|
+
# doesn't support certain newer kwargs (e.g., response_format)
|
|
1353
|
+
try:
|
|
1354
|
+
resp = client.responses.create(**responses_kwargs)
|
|
1355
|
+
except TypeError as te:
|
|
1356
|
+
msg = str(te)
|
|
1357
|
+
if 'response_format' in responses_kwargs and ('unexpected keyword argument' in msg or 'got an unexpected keyword argument' in msg):
|
|
1358
|
+
logger.warning("[WARN] OpenAI SDK doesn't support response_format; retrying without it.")
|
|
1359
|
+
responses_kwargs.pop('response_format', None)
|
|
1360
|
+
resp = client.responses.create(**responses_kwargs)
|
|
1361
|
+
else:
|
|
1362
|
+
raise
|
|
1363
|
+
|
|
1364
|
+
# Extract text result
|
|
1365
|
+
result_text = getattr(resp, "output_text", None)
|
|
1366
|
+
if result_text is None:
|
|
1367
|
+
try:
|
|
1368
|
+
# Fallback parse
|
|
1369
|
+
outputs = getattr(resp, "output", []) or getattr(resp, "outputs", [])
|
|
1370
|
+
if outputs:
|
|
1371
|
+
first = outputs[0]
|
|
1372
|
+
content = getattr(first, "content", [])
|
|
1373
|
+
if content and hasattr(content[0], "text"):
|
|
1374
|
+
result_text = content[0].text
|
|
1375
|
+
except Exception:
|
|
1376
|
+
result_text = None
|
|
1377
|
+
|
|
1378
|
+
# Calculate cost using usage + CSV rates
|
|
1379
|
+
usage = getattr(resp, "usage", None)
|
|
1380
|
+
total_cost = 0.0
|
|
1381
|
+
if usage is not None:
|
|
1382
|
+
in_tok = getattr(usage, "input_tokens", 0) or 0
|
|
1383
|
+
out_tok = getattr(usage, "output_tokens", 0) or 0
|
|
1384
|
+
in_rate = model_info.get('input', 0.0) or 0.0
|
|
1385
|
+
out_rate = model_info.get('output', 0.0) or 0.0
|
|
1386
|
+
total_cost = (in_tok * in_rate + out_tok * out_rate) / 1_000_000.0
|
|
1387
|
+
|
|
1388
|
+
final_result = None
|
|
1389
|
+
if output_pydantic and result_text:
|
|
1390
|
+
try:
|
|
1391
|
+
final_result = output_pydantic.model_validate_json(result_text)
|
|
1392
|
+
except Exception as e:
|
|
1393
|
+
logger.error(f"[ERROR] Pydantic parse failed on Responses output: {e}")
|
|
1394
|
+
final_result = result_text
|
|
1395
|
+
else:
|
|
1396
|
+
final_result = result_text
|
|
1397
|
+
|
|
1398
|
+
if verbose:
|
|
1399
|
+
logger.info(f"[RESULT] Model Used: {model_name_litellm}")
|
|
1400
|
+
logger.info(f"[RESULT] Total Cost (estimated): ${total_cost:.6g}")
|
|
1401
|
+
|
|
1402
|
+
return {
|
|
1403
|
+
'result': final_result,
|
|
1404
|
+
'cost': total_cost,
|
|
1405
|
+
'model_name': model_name_litellm,
|
|
1406
|
+
'thinking_output': None,
|
|
1407
|
+
}
|
|
1408
|
+
except Exception as e:
|
|
1409
|
+
last_exception = e
|
|
1410
|
+
logger.error(f"[ERROR] OpenAI Responses call failed for {model_name_litellm}: {e}")
|
|
1411
|
+
# Remove 'reasoning' key to avoid OpenAI Chat API unknown param errors
|
|
1412
|
+
if "reasoning" in litellm_kwargs:
|
|
1413
|
+
try:
|
|
1414
|
+
litellm_kwargs.pop("reasoning", None)
|
|
1415
|
+
except Exception:
|
|
1416
|
+
pass
|
|
1417
|
+
# Fall through to LiteLLM path as a fallback
|
|
1418
|
+
|
|
1042
1419
|
if use_batch_mode:
|
|
1043
1420
|
if verbose:
|
|
1044
1421
|
logger.info(f"[INFO] Calling litellm.batch_completion for {model_name_litellm}...")
|
|
@@ -1105,7 +1482,6 @@ def llm_invoke(
|
|
|
1105
1482
|
messages=retry_messages,
|
|
1106
1483
|
temperature=temperature,
|
|
1107
1484
|
response_format=response_format,
|
|
1108
|
-
max_completion_tokens=max_tokens,
|
|
1109
1485
|
**time_kwargs
|
|
1110
1486
|
)
|
|
1111
1487
|
# Re-enable cache - restore original configured cache (restore to original state, even if None)
|
|
@@ -1149,26 +1525,39 @@ def llm_invoke(
|
|
|
1149
1525
|
elif isinstance(raw_result, str):
|
|
1150
1526
|
json_string_to_parse = raw_result # Start with the raw string
|
|
1151
1527
|
try:
|
|
1152
|
-
#
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
if
|
|
1156
|
-
|
|
1157
|
-
# Basic check if it looks like JSON
|
|
1158
|
-
if potential_json.strip().startswith('{') and potential_json.strip().endswith('}'):
|
|
1159
|
-
if verbose:
|
|
1160
|
-
logger.debug(f"[DEBUG] Attempting to parse extracted JSON block: '{potential_json}'")
|
|
1161
|
-
parsed_result = output_pydantic.model_validate_json(potential_json)
|
|
1162
|
-
else:
|
|
1163
|
-
# If block extraction fails, try cleaning markdown next
|
|
1164
|
-
raise ValueError("Extracted block doesn't look like JSON")
|
|
1528
|
+
# 1) Prefer fenced ```json blocks
|
|
1529
|
+
fenced = _extract_fenced_json_block(raw_result)
|
|
1530
|
+
candidates: List[str] = []
|
|
1531
|
+
if fenced:
|
|
1532
|
+
candidates.append(fenced)
|
|
1165
1533
|
else:
|
|
1166
|
-
|
|
1167
|
-
|
|
1534
|
+
# 2) Fall back to scanning for balanced JSON objects
|
|
1535
|
+
candidates.extend(_extract_balanced_json_objects(raw_result))
|
|
1536
|
+
|
|
1537
|
+
if not candidates:
|
|
1538
|
+
raise ValueError("No JSON-like content found")
|
|
1539
|
+
|
|
1540
|
+
parse_err: Optional[Exception] = None
|
|
1541
|
+
for cand in candidates:
|
|
1542
|
+
try:
|
|
1543
|
+
if verbose:
|
|
1544
|
+
logger.debug(f"[DEBUG] Attempting to parse candidate JSON block: {cand}")
|
|
1545
|
+
parsed_result = output_pydantic.model_validate_json(cand)
|
|
1546
|
+
json_string_to_parse = cand
|
|
1547
|
+
parse_err = None
|
|
1548
|
+
break
|
|
1549
|
+
except (json.JSONDecodeError, ValidationError, ValueError) as pe:
|
|
1550
|
+
parse_err = pe
|
|
1551
|
+
|
|
1552
|
+
if parsed_result is None:
|
|
1553
|
+
# If none of the candidates parsed, raise last error
|
|
1554
|
+
if parse_err is not None:
|
|
1555
|
+
raise parse_err
|
|
1556
|
+
raise ValueError("Unable to parse any JSON candidates")
|
|
1168
1557
|
except (json.JSONDecodeError, ValidationError, ValueError) as extraction_error:
|
|
1169
1558
|
if verbose:
|
|
1170
|
-
logger.debug(f"[DEBUG] JSON
|
|
1171
|
-
#
|
|
1559
|
+
logger.debug(f"[DEBUG] JSON extraction/validation failed ('{extraction_error}'). Trying fence cleaning.")
|
|
1560
|
+
# Last resort: strip any leading/trailing code fences and retry
|
|
1172
1561
|
cleaned_result_str = raw_result.strip()
|
|
1173
1562
|
if cleaned_result_str.startswith("```json"):
|
|
1174
1563
|
cleaned_result_str = cleaned_result_str[7:]
|
|
@@ -1177,15 +1566,13 @@ def llm_invoke(
|
|
|
1177
1566
|
if cleaned_result_str.endswith("```"):
|
|
1178
1567
|
cleaned_result_str = cleaned_result_str[:-3]
|
|
1179
1568
|
cleaned_result_str = cleaned_result_str.strip()
|
|
1180
|
-
# Check again if it looks like JSON before parsing
|
|
1181
1569
|
if cleaned_result_str.startswith('{') and cleaned_result_str.endswith('}'):
|
|
1182
1570
|
if verbose:
|
|
1183
|
-
logger.debug(f"[DEBUG] Attempting parse after
|
|
1184
|
-
json_string_to_parse = cleaned_result_str
|
|
1571
|
+
logger.debug(f"[DEBUG] Attempting parse after generic fence cleaning. Cleaned string: '{cleaned_result_str}'")
|
|
1572
|
+
json_string_to_parse = cleaned_result_str
|
|
1185
1573
|
parsed_result = output_pydantic.model_validate_json(json_string_to_parse)
|
|
1186
1574
|
else:
|
|
1187
|
-
|
|
1188
|
-
raise ValueError("Content after cleaning markdown doesn't look like JSON")
|
|
1575
|
+
raise ValueError("Content after cleaning doesn't look like JSON")
|
|
1189
1576
|
|
|
1190
1577
|
|
|
1191
1578
|
# Check if any parsing attempt succeeded
|
|
@@ -1319,7 +1706,7 @@ if __name__ == "__main__":
|
|
|
1319
1706
|
response = llm_invoke(
|
|
1320
1707
|
prompt="Tell me a short joke about {topic}.",
|
|
1321
1708
|
input_json={"topic": "programmers"},
|
|
1322
|
-
strength=0.5, # Use base model (gpt-
|
|
1709
|
+
strength=0.5, # Use base model (gpt-5-nano)
|
|
1323
1710
|
temperature=0.7,
|
|
1324
1711
|
verbose=True
|
|
1325
1712
|
)
|
|
@@ -1400,7 +1787,7 @@ if __name__ == "__main__":
|
|
|
1400
1787
|
{"role": "system", "content": "You are a helpful assistant."},
|
|
1401
1788
|
{"role": "user", "content": "What is the capital of France?"}
|
|
1402
1789
|
]
|
|
1403
|
-
# Strength 0.5 should select gpt-
|
|
1790
|
+
# Strength 0.5 should select gpt-5-nano
|
|
1404
1791
|
response_messages = llm_invoke(
|
|
1405
1792
|
messages=custom_messages,
|
|
1406
1793
|
strength=0.5,
|