pdd-cli 0.0.49__py3-none-any.whl → 0.0.51__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pdd-cli might be problematic. Click here for more details.
- pdd/__init__.py +4 -4
- pdd/bug_to_unit_test.py +2 -0
- pdd/cli.py +8 -1
- pdd/code_generator.py +3 -1
- pdd/context_generator.py +3 -1
- pdd/continue_generation.py +47 -7
- pdd/data/llm_model.csv +15 -16
- pdd/detect_change_main.py +2 -2
- pdd/generate_test.py +3 -1
- pdd/llm_invoke.py +441 -68
- pdd/load_prompt_template.py +30 -9
- pdd/pdd_completion.fish +2 -2
- pdd/pdd_completion.zsh +4 -4
- pdd/postprocess.py +2 -2
- pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
- pdd/prompts/insert_includes_LLM.prompt +4 -4
- pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
- pdd/summarize_directory.py +15 -2
- pdd/trace.py +131 -11
- pdd/trace_main.py +2 -2
- pdd/unfinished_prompt.py +41 -2
- {pdd_cli-0.0.49.dist-info → pdd_cli-0.0.51.dist-info}/METADATA +6 -3
- {pdd_cli-0.0.49.dist-info → pdd_cli-0.0.51.dist-info}/RECORD +26 -26
- {pdd_cli-0.0.49.dist-info → pdd_cli-0.0.51.dist-info}/WHEEL +0 -0
- {pdd_cli-0.0.49.dist-info → pdd_cli-0.0.51.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.49.dist-info → pdd_cli-0.0.51.dist-info}/licenses/LICENSE +0 -0
- {pdd_cli-0.0.49.dist-info → pdd_cli-0.0.51.dist-info}/top_level.txt +0 -0
pdd/llm_invoke.py
CHANGED
|
@@ -26,6 +26,16 @@ litellm_logger = logging.getLogger("litellm")
|
|
|
26
26
|
litellm_log_level = os.getenv("LITELLM_LOG_LEVEL", "WARNING" if PRODUCTION_MODE else "INFO")
|
|
27
27
|
litellm_logger.setLevel(getattr(logging, litellm_log_level, logging.WARNING))
|
|
28
28
|
|
|
29
|
+
# Ensure LiteLLM drops provider-unsupported params instead of erroring
|
|
30
|
+
# This prevents failures like UnsupportedParamsError for OpenAI gpt-5-* when
|
|
31
|
+
# passing generic params (e.g., reasoning_effort) not accepted by that API path.
|
|
32
|
+
try:
|
|
33
|
+
_drop_params_env = os.getenv("LITELLM_DROP_PARAMS", "true")
|
|
34
|
+
litellm.drop_params = str(_drop_params_env).lower() in ("1", "true", "yes", "on")
|
|
35
|
+
except Exception:
|
|
36
|
+
# Be conservative: default to True even if env parsing fails
|
|
37
|
+
litellm.drop_params = True
|
|
38
|
+
|
|
29
39
|
# Add a console handler if none exists
|
|
30
40
|
if not logger.handlers:
|
|
31
41
|
console_handler = logging.StreamHandler()
|
|
@@ -71,7 +81,7 @@ import json
|
|
|
71
81
|
# from rich import print as rprint # Replaced with logger
|
|
72
82
|
from dotenv import load_dotenv
|
|
73
83
|
from pathlib import Path
|
|
74
|
-
from typing import Optional, Dict, List, Any, Type, Union
|
|
84
|
+
from typing import Optional, Dict, List, Any, Type, Union, Tuple
|
|
75
85
|
from pydantic import BaseModel, ValidationError
|
|
76
86
|
import openai # Import openai for exception handling as LiteLLM maps to its types
|
|
77
87
|
from langchain_core.prompts import PromptTemplate
|
|
@@ -114,6 +124,22 @@ def _is_wsl_environment() -> bool:
|
|
|
114
124
|
return False
|
|
115
125
|
|
|
116
126
|
|
|
127
|
+
def _openai_responses_supports_response_format() -> bool:
|
|
128
|
+
"""Detect if current OpenAI Python SDK supports `response_format` on Responses.create.
|
|
129
|
+
|
|
130
|
+
Returns True if the installed SDK exposes a `response_format` parameter on
|
|
131
|
+
`openai.resources.responses.Responses.create`, else False. This avoids
|
|
132
|
+
sending unsupported kwargs and triggering TypeError at runtime.
|
|
133
|
+
"""
|
|
134
|
+
try:
|
|
135
|
+
import inspect
|
|
136
|
+
from openai.resources.responses import Responses
|
|
137
|
+
sig = inspect.signature(Responses.create)
|
|
138
|
+
return "response_format" in sig.parameters
|
|
139
|
+
except Exception:
|
|
140
|
+
return False
|
|
141
|
+
|
|
142
|
+
|
|
117
143
|
def _get_environment_info() -> Dict[str, str]:
|
|
118
144
|
"""
|
|
119
145
|
Get environment information for debugging and error reporting.
|
|
@@ -188,24 +214,80 @@ if PROJECT_ROOT is None: # Fallback to CWD if no method succeeded
|
|
|
188
214
|
|
|
189
215
|
ENV_PATH = PROJECT_ROOT / ".env"
|
|
190
216
|
# --- Determine LLM_MODEL_CSV_PATH ---
|
|
191
|
-
# Prioritize ~/.pdd/llm_model.csv
|
|
217
|
+
# Prioritize ~/.pdd/llm_model.csv, then a project .pdd from the current CWD,
|
|
218
|
+
# then PROJECT_ROOT (which may be set from PDD_PATH), else fall back to package.
|
|
192
219
|
user_pdd_dir = Path.home() / ".pdd"
|
|
193
220
|
user_model_csv_path = user_pdd_dir / "llm_model.csv"
|
|
194
221
|
|
|
195
|
-
|
|
222
|
+
def _detect_project_root_from_cwd(max_levels: int = 5) -> Path:
|
|
223
|
+
"""Search upwards from the current working directory for common project markers.
|
|
224
|
+
|
|
225
|
+
This intentionally ignores PDD_PATH to support CLI invocations that set
|
|
226
|
+
PDD_PATH to the installed package location. We want to honor a real project
|
|
227
|
+
checkout's .pdd/llm_model.csv when running inside it.
|
|
228
|
+
"""
|
|
229
|
+
try:
|
|
230
|
+
current_dir = Path.cwd().resolve()
|
|
231
|
+
for _ in range(max_levels):
|
|
232
|
+
if (
|
|
233
|
+
(current_dir / ".git").exists()
|
|
234
|
+
or (current_dir / "pyproject.toml").exists()
|
|
235
|
+
or (current_dir / "data").is_dir()
|
|
236
|
+
or (current_dir / ".env").exists()
|
|
237
|
+
):
|
|
238
|
+
return current_dir
|
|
239
|
+
parent = current_dir.parent
|
|
240
|
+
if parent == current_dir:
|
|
241
|
+
break
|
|
242
|
+
current_dir = parent
|
|
243
|
+
except Exception:
|
|
244
|
+
pass
|
|
245
|
+
return Path.cwd().resolve()
|
|
246
|
+
|
|
247
|
+
# Resolve candidates
|
|
248
|
+
project_root_from_cwd = _detect_project_root_from_cwd()
|
|
249
|
+
project_csv_from_cwd = project_root_from_cwd / ".pdd" / "llm_model.csv"
|
|
250
|
+
project_csv_from_env = PROJECT_ROOT / ".pdd" / "llm_model.csv"
|
|
251
|
+
|
|
252
|
+
# Detect whether PDD_PATH points to the installed package directory. If so,
|
|
253
|
+
# don't prioritize it over the real project from CWD.
|
|
254
|
+
try:
|
|
255
|
+
_installed_pkg_root = importlib.resources.files('pdd')
|
|
256
|
+
# importlib.resources.files returns a Traversable; get a FS path string if possible
|
|
257
|
+
try:
|
|
258
|
+
_installed_pkg_root_path = Path(str(_installed_pkg_root))
|
|
259
|
+
except Exception:
|
|
260
|
+
_installed_pkg_root_path = None
|
|
261
|
+
except Exception:
|
|
262
|
+
_installed_pkg_root_path = None
|
|
263
|
+
|
|
264
|
+
def _is_env_path_package_dir(env_path: Path) -> bool:
|
|
265
|
+
try:
|
|
266
|
+
if _installed_pkg_root_path is None:
|
|
267
|
+
return False
|
|
268
|
+
env_path = env_path.resolve()
|
|
269
|
+
pkg_path = _installed_pkg_root_path.resolve()
|
|
270
|
+
# Treat equal or subpath as package dir
|
|
271
|
+
return env_path == pkg_path or str(env_path).startswith(str(pkg_path))
|
|
272
|
+
except Exception:
|
|
273
|
+
return False
|
|
274
|
+
|
|
275
|
+
# Selection order
|
|
196
276
|
if user_model_csv_path.is_file():
|
|
197
277
|
LLM_MODEL_CSV_PATH = user_model_csv_path
|
|
198
278
|
logger.info(f"Using user-specific LLM model CSV: {LLM_MODEL_CSV_PATH}")
|
|
279
|
+
elif (not _is_env_path_package_dir(PROJECT_ROOT)) and project_csv_from_env.is_file():
|
|
280
|
+
# Honor an explicitly-set PDD_PATH pointing to a real project directory
|
|
281
|
+
LLM_MODEL_CSV_PATH = project_csv_from_env
|
|
282
|
+
logger.info(f"Using project-specific LLM model CSV (from PDD_PATH): {LLM_MODEL_CSV_PATH}")
|
|
283
|
+
elif project_csv_from_cwd.is_file():
|
|
284
|
+
# Otherwise, prefer the project relative to the current working directory
|
|
285
|
+
LLM_MODEL_CSV_PATH = project_csv_from_cwd
|
|
286
|
+
logger.info(f"Using project-specific LLM model CSV (from CWD): {LLM_MODEL_CSV_PATH}")
|
|
199
287
|
else:
|
|
200
|
-
#
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
LLM_MODEL_CSV_PATH = project_model_csv_path
|
|
204
|
-
logger.info(f"Using project-specific LLM model CSV: {LLM_MODEL_CSV_PATH}")
|
|
205
|
-
else:
|
|
206
|
-
# Neither exists, we'll use a marker path that _load_model_data will handle
|
|
207
|
-
LLM_MODEL_CSV_PATH = None
|
|
208
|
-
logger.info("No local LLM model CSV found, will use package default")
|
|
288
|
+
# Neither exists, we'll use a marker path that _load_model_data will handle
|
|
289
|
+
LLM_MODEL_CSV_PATH = None
|
|
290
|
+
logger.info("No local LLM model CSV found, will use package default")
|
|
209
291
|
# ---------------------------------
|
|
210
292
|
|
|
211
293
|
# Load environment variables from .env file
|
|
@@ -333,29 +415,49 @@ def _litellm_success_callback(
|
|
|
333
415
|
cost_val = litellm.completion_cost(completion_response=completion_response)
|
|
334
416
|
calculated_cost = cost_val if cost_val is not None else 0.0
|
|
335
417
|
except Exception as e1:
|
|
336
|
-
# Attempt 2:
|
|
337
|
-
#
|
|
338
|
-
# This is often needed for batch completion items.
|
|
418
|
+
# Attempt 2: Compute via tokens and model mapping. If LiteLLM mapping is
|
|
419
|
+
# missing or API differs, fall back to CSV rates in _MODEL_RATE_MAP.
|
|
339
420
|
logger.debug(f"Attempting cost calculation with fallback method: {e1}")
|
|
340
421
|
try:
|
|
341
|
-
model_name = kwargs.get("model")
|
|
422
|
+
model_name = kwargs.get("model")
|
|
342
423
|
if model_name and usage:
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
424
|
+
in_tok = getattr(usage, 'prompt_tokens', None)
|
|
425
|
+
out_tok = getattr(usage, 'completion_tokens', None)
|
|
426
|
+
# Some providers may use 'input_tokens'/'output_tokens'
|
|
427
|
+
if in_tok is None:
|
|
428
|
+
in_tok = getattr(usage, 'input_tokens', 0)
|
|
429
|
+
if out_tok is None:
|
|
430
|
+
out_tok = getattr(usage, 'output_tokens', 0)
|
|
431
|
+
|
|
432
|
+
# Try LiteLLM helper (arg names vary across versions)
|
|
433
|
+
try:
|
|
434
|
+
cost_val = litellm.completion_cost(
|
|
435
|
+
model=model_name,
|
|
436
|
+
prompt_tokens=in_tok,
|
|
437
|
+
completion_tokens=out_tok,
|
|
438
|
+
)
|
|
439
|
+
calculated_cost = cost_val if cost_val is not None else 0.0
|
|
440
|
+
except TypeError:
|
|
441
|
+
# Older/newer versions may require input/output token names
|
|
442
|
+
try:
|
|
443
|
+
cost_val = litellm.completion_cost(
|
|
444
|
+
model=model_name,
|
|
445
|
+
input_tokens=in_tok,
|
|
446
|
+
output_tokens=out_tok,
|
|
447
|
+
)
|
|
448
|
+
calculated_cost = cost_val if cost_val is not None else 0.0
|
|
449
|
+
except Exception as e3:
|
|
450
|
+
# Final fallback: compute using CSV rates
|
|
451
|
+
rates = _MODEL_RATE_MAP.get(str(model_name))
|
|
452
|
+
if rates is not None:
|
|
453
|
+
in_rate, out_rate = rates
|
|
454
|
+
calculated_cost = (float(in_tok or 0) * in_rate + float(out_tok or 0) * out_rate) / 1_000_000.0
|
|
455
|
+
else:
|
|
456
|
+
calculated_cost = 0.0
|
|
457
|
+
logger.debug(f"Cost calculation failed with LiteLLM token API; used CSV rates if available. Detail: {e3}")
|
|
351
458
|
else:
|
|
352
|
-
# If we can't get model name or usage, fallback to 0
|
|
353
459
|
calculated_cost = 0.0
|
|
354
|
-
# Optional: Log the original error e1 if needed
|
|
355
|
-
# logger.warning(f"[Callback WARN] Failed to calculate cost with response object ({e1}) and fallback failed.")
|
|
356
460
|
except Exception as e2:
|
|
357
|
-
# Optional: Log secondary error e2 if needed
|
|
358
|
-
# logger.warning(f"[Callback WARN] Failed to calculate cost with fallback method: {e2}")
|
|
359
461
|
calculated_cost = 0.0 # Default to 0 on any error
|
|
360
462
|
logger.debug(f"Cost calculation failed with fallback method: {e2}")
|
|
361
463
|
|
|
@@ -373,6 +475,23 @@ def _litellm_success_callback(
|
|
|
373
475
|
# Register the callback with LiteLLM
|
|
374
476
|
litellm.success_callback = [_litellm_success_callback]
|
|
375
477
|
|
|
478
|
+
# --- Cost Mapping Support (CSV Rates) ---
|
|
479
|
+
# Populate from CSV inside llm_invoke; used by callback fallback
|
|
480
|
+
_MODEL_RATE_MAP: Dict[str, Tuple[float, float]] = {}
|
|
481
|
+
|
|
482
|
+
def _set_model_rate_map(df: pd.DataFrame) -> None:
|
|
483
|
+
global _MODEL_RATE_MAP
|
|
484
|
+
try:
|
|
485
|
+
_MODEL_RATE_MAP = {
|
|
486
|
+
str(row['model']): (
|
|
487
|
+
float(row['input']) if pd.notna(row['input']) else 0.0,
|
|
488
|
+
float(row['output']) if pd.notna(row['output']) else 0.0,
|
|
489
|
+
)
|
|
490
|
+
for _, row in df.iterrows()
|
|
491
|
+
}
|
|
492
|
+
except Exception:
|
|
493
|
+
_MODEL_RATE_MAP = {}
|
|
494
|
+
|
|
376
495
|
# --- Helper Functions ---
|
|
377
496
|
|
|
378
497
|
def _load_model_data(csv_path: Optional[Path]) -> pd.DataFrame:
|
|
@@ -482,11 +601,26 @@ def _select_model_candidates(
|
|
|
482
601
|
# Try finding base model in the *original* df in case it was filtered out
|
|
483
602
|
original_base = model_df[model_df['model'] == base_model_name]
|
|
484
603
|
if not original_base.empty:
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
604
|
+
# Base exists but may be misconfigured (e.g., missing API key). Keep erroring loudly.
|
|
605
|
+
raise ValueError(
|
|
606
|
+
f"Base model '{base_model_name}' found in CSV but requires API key '{original_base.iloc[0]['api_key']}' which might be missing or invalid configuration."
|
|
607
|
+
)
|
|
608
|
+
# Option A': Soft fallback – choose a reasonable surrogate base and continue
|
|
609
|
+
# Strategy (simplified and deterministic): pick the first available model
|
|
610
|
+
# from the CSV as the surrogate base. This mirrors typical CSV ordering
|
|
611
|
+
# expectations and keeps behavior predictable across environments.
|
|
612
|
+
try:
|
|
613
|
+
base_model = available_df.iloc[0]
|
|
614
|
+
logger.warning(
|
|
615
|
+
f"Base model '{base_model_name}' not found in CSV. Falling back to surrogate base '{base_model['model']}' (Option A')."
|
|
616
|
+
)
|
|
617
|
+
except Exception:
|
|
618
|
+
# If any unexpected error occurs during fallback, raise a clear error
|
|
619
|
+
raise ValueError(
|
|
620
|
+
f"Specified base model '{base_model_name}' not found and fallback selection failed. Check your LLM model CSV."
|
|
621
|
+
)
|
|
622
|
+
else:
|
|
623
|
+
base_model = base_model_row.iloc[0]
|
|
490
624
|
|
|
491
625
|
# 3. Determine Target and Sort
|
|
492
626
|
candidates = []
|
|
@@ -497,9 +631,10 @@ def _select_model_candidates(
|
|
|
497
631
|
# Sort remaining by ELO descending as fallback
|
|
498
632
|
available_df['sort_metric'] = -available_df['coding_arena_elo'] # Negative for descending sort
|
|
499
633
|
candidates = available_df.sort_values(by='sort_metric').to_dict('records')
|
|
500
|
-
# Ensure base model is first if it exists
|
|
501
|
-
|
|
502
|
-
|
|
634
|
+
# Ensure effective base model is first if it exists (supports surrogate base)
|
|
635
|
+
effective_base_name = str(base_model['model']) if isinstance(base_model, pd.Series) else base_model_name
|
|
636
|
+
if any(c['model'] == effective_base_name for c in candidates):
|
|
637
|
+
candidates.sort(key=lambda x: 0 if x['model'] == effective_base_name else 1)
|
|
503
638
|
target_metric_value = f"Base Model ELO: {base_model['coding_arena_elo']}"
|
|
504
639
|
|
|
505
640
|
elif strength < 0.5:
|
|
@@ -716,6 +851,49 @@ def _format_messages(prompt: str, input_data: Union[Dict[str, Any], List[Dict[st
|
|
|
716
851
|
except Exception as e:
|
|
717
852
|
raise ValueError(f"Error formatting prompt: {e}") from e
|
|
718
853
|
|
|
854
|
+
# --- JSON Extraction Helpers ---
|
|
855
|
+
import re
|
|
856
|
+
|
|
857
|
+
def _extract_fenced_json_block(text: str) -> Optional[str]:
|
|
858
|
+
try:
|
|
859
|
+
m = re.search(r"```json\s*(\{[\s\S]*?\})\s*```", text, flags=re.IGNORECASE)
|
|
860
|
+
if m:
|
|
861
|
+
return m.group(1)
|
|
862
|
+
return None
|
|
863
|
+
except Exception:
|
|
864
|
+
return None
|
|
865
|
+
|
|
866
|
+
def _extract_balanced_json_objects(text: str) -> List[str]:
|
|
867
|
+
results: List[str] = []
|
|
868
|
+
brace_stack = 0
|
|
869
|
+
start_idx = -1
|
|
870
|
+
in_string = False
|
|
871
|
+
escape = False
|
|
872
|
+
for i, ch in enumerate(text):
|
|
873
|
+
if in_string:
|
|
874
|
+
if escape:
|
|
875
|
+
escape = False
|
|
876
|
+
elif ch == '\\':
|
|
877
|
+
escape = True
|
|
878
|
+
elif ch == '"':
|
|
879
|
+
in_string = False
|
|
880
|
+
continue
|
|
881
|
+
else:
|
|
882
|
+
if ch == '"':
|
|
883
|
+
in_string = True
|
|
884
|
+
continue
|
|
885
|
+
if ch == '{':
|
|
886
|
+
if brace_stack == 0:
|
|
887
|
+
start_idx = i
|
|
888
|
+
brace_stack += 1
|
|
889
|
+
elif ch == '}':
|
|
890
|
+
if brace_stack > 0:
|
|
891
|
+
brace_stack -= 1
|
|
892
|
+
if brace_stack == 0 and start_idx != -1:
|
|
893
|
+
results.append(text[start_idx:i+1])
|
|
894
|
+
start_idx = -1
|
|
895
|
+
return results
|
|
896
|
+
|
|
719
897
|
# --- Main Function ---
|
|
720
898
|
|
|
721
899
|
def llm_invoke(
|
|
@@ -863,6 +1041,12 @@ def llm_invoke(
|
|
|
863
1041
|
response_format = None
|
|
864
1042
|
time_kwargs = {}
|
|
865
1043
|
|
|
1044
|
+
# Update global rate map for callback cost fallback
|
|
1045
|
+
try:
|
|
1046
|
+
_set_model_rate_map(model_df)
|
|
1047
|
+
except Exception:
|
|
1048
|
+
pass
|
|
1049
|
+
|
|
866
1050
|
for model_info in candidate_models:
|
|
867
1051
|
model_name_litellm = model_info['model']
|
|
868
1052
|
api_key_name = model_info.get('api_key')
|
|
@@ -955,11 +1139,33 @@ def llm_invoke(
|
|
|
955
1139
|
elif verbose: # No api_key_name_from_csv in CSV for this model
|
|
956
1140
|
logger.info(f"[INFO] No API key name specified in CSV for model '{model_name_litellm}'. LiteLLM will use its default authentication mechanisms (e.g., standard provider env vars or ADC for Vertex AI).")
|
|
957
1141
|
|
|
958
|
-
# Add api_base if present in CSV
|
|
1142
|
+
# Add base_url/api_base override if present in CSV
|
|
959
1143
|
api_base = model_info.get('base_url')
|
|
960
1144
|
if pd.notna(api_base) and api_base:
|
|
1145
|
+
# LiteLLM prefers `base_url`; some older paths accept `api_base`.
|
|
1146
|
+
litellm_kwargs["base_url"] = str(api_base)
|
|
961
1147
|
litellm_kwargs["api_base"] = str(api_base)
|
|
962
1148
|
|
|
1149
|
+
# Provider-specific defaults (e.g., LM Studio)
|
|
1150
|
+
model_name_lower = str(model_name_litellm).lower()
|
|
1151
|
+
provider_lower_for_model = provider.lower()
|
|
1152
|
+
is_lm_studio = model_name_lower.startswith('lm_studio/') or provider_lower_for_model == 'lm_studio'
|
|
1153
|
+
if is_lm_studio:
|
|
1154
|
+
# Ensure base_url is set (fallback to env LM_STUDIO_API_BASE or localhost)
|
|
1155
|
+
if not litellm_kwargs.get("base_url"):
|
|
1156
|
+
lm_studio_base = os.getenv("LM_STUDIO_API_BASE", "http://localhost:1234/v1")
|
|
1157
|
+
litellm_kwargs["base_url"] = lm_studio_base
|
|
1158
|
+
litellm_kwargs["api_base"] = lm_studio_base
|
|
1159
|
+
if verbose:
|
|
1160
|
+
logger.info(f"[INFO] Using LM Studio base_url: {lm_studio_base}")
|
|
1161
|
+
|
|
1162
|
+
# Ensure a non-empty api_key; LM Studio accepts any non-empty token (e.g., 'lm-studio')
|
|
1163
|
+
if not litellm_kwargs.get("api_key"):
|
|
1164
|
+
lm_studio_key = os.getenv("LM_STUDIO_API_KEY") or "lm-studio"
|
|
1165
|
+
litellm_kwargs["api_key"] = lm_studio_key
|
|
1166
|
+
if verbose:
|
|
1167
|
+
logger.info("[INFO] Using LM Studio api_key placeholder (set LM_STUDIO_API_KEY to customize).")
|
|
1168
|
+
|
|
963
1169
|
# Handle Structured Output (JSON Mode / Pydantic)
|
|
964
1170
|
if output_pydantic:
|
|
965
1171
|
# Check if model supports structured output based on CSV flag or LiteLLM check
|
|
@@ -1017,11 +1223,32 @@ def llm_invoke(
|
|
|
1017
1223
|
effort = "high"
|
|
1018
1224
|
elif time > 0.3:
|
|
1019
1225
|
effort = "medium"
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1226
|
+
|
|
1227
|
+
# Map effort parameter per-provider/model family
|
|
1228
|
+
model_lower = str(model_name_litellm).lower()
|
|
1229
|
+
provider_lower = str(provider).lower()
|
|
1230
|
+
|
|
1231
|
+
if provider_lower == 'openai' and model_lower.startswith('gpt-5'):
|
|
1232
|
+
# OpenAI 5-series uses Responses API with nested 'reasoning'
|
|
1233
|
+
reasoning_obj = {"effort": effort, "summary": "auto"}
|
|
1234
|
+
litellm_kwargs["reasoning"] = reasoning_obj
|
|
1235
|
+
time_kwargs["reasoning"] = reasoning_obj
|
|
1236
|
+
if verbose:
|
|
1237
|
+
logger.info(f"[INFO] Requesting OpenAI reasoning.effort='{effort}' for {model_name_litellm} (Responses API)")
|
|
1238
|
+
|
|
1239
|
+
elif provider_lower == 'openai' and model_lower.startswith('o') and 'mini' not in model_lower:
|
|
1240
|
+
# Historical o* models may use LiteLLM's generic reasoning_effort param
|
|
1241
|
+
litellm_kwargs["reasoning_effort"] = effort
|
|
1242
|
+
time_kwargs["reasoning_effort"] = effort
|
|
1243
|
+
if verbose:
|
|
1244
|
+
logger.info(f"[INFO] Requesting reasoning_effort='{effort}' for {model_name_litellm}")
|
|
1245
|
+
|
|
1246
|
+
else:
|
|
1247
|
+
# Fallback to LiteLLM generic param when supported by provider adapter
|
|
1248
|
+
litellm_kwargs["reasoning_effort"] = effort
|
|
1249
|
+
time_kwargs["reasoning_effort"] = effort
|
|
1250
|
+
if verbose:
|
|
1251
|
+
logger.info(f"[INFO] Requesting generic reasoning_effort='{effort}' for {model_name_litellm}")
|
|
1025
1252
|
|
|
1026
1253
|
elif reasoning_type == 'none':
|
|
1027
1254
|
if verbose:
|
|
@@ -1053,6 +1280,142 @@ def llm_invoke(
|
|
|
1053
1280
|
logger.debug("NOT ENABLING CACHING: litellm.cache is None at call time")
|
|
1054
1281
|
|
|
1055
1282
|
|
|
1283
|
+
# Route OpenAI gpt-5* models through Responses API to support 'reasoning'
|
|
1284
|
+
model_lower_for_call = str(model_name_litellm).lower()
|
|
1285
|
+
provider_lower_for_call = str(provider).lower()
|
|
1286
|
+
|
|
1287
|
+
if (
|
|
1288
|
+
not use_batch_mode
|
|
1289
|
+
and provider_lower_for_call == 'openai'
|
|
1290
|
+
and model_lower_for_call.startswith('gpt-5')
|
|
1291
|
+
):
|
|
1292
|
+
if verbose:
|
|
1293
|
+
logger.info(f"[INFO] Calling OpenAI Responses API for {model_name_litellm}...")
|
|
1294
|
+
try:
|
|
1295
|
+
# Build input text from messages
|
|
1296
|
+
if isinstance(formatted_messages, list) and formatted_messages and isinstance(formatted_messages[0], dict):
|
|
1297
|
+
input_text = "\n\n".join(f"{m.get('role','user')}: {m.get('content','')}" for m in formatted_messages)
|
|
1298
|
+
else:
|
|
1299
|
+
# Fallback: string cast
|
|
1300
|
+
input_text = str(formatted_messages)
|
|
1301
|
+
|
|
1302
|
+
# Derive effort mapping already computed in time_kwargs
|
|
1303
|
+
reasoning_param = time_kwargs.get("reasoning")
|
|
1304
|
+
|
|
1305
|
+
# Optional text settings; keep simple
|
|
1306
|
+
text_block = {"format": {"type": "text"}}
|
|
1307
|
+
|
|
1308
|
+
# If structured output requested, attempt JSON schema via Pydantic
|
|
1309
|
+
# GPT-5 Responses API does not support temperature; omit it here.
|
|
1310
|
+
responses_kwargs = {
|
|
1311
|
+
"model": model_name_litellm,
|
|
1312
|
+
"input": input_text,
|
|
1313
|
+
"text": text_block,
|
|
1314
|
+
}
|
|
1315
|
+
if verbose and temperature not in (None, 0, 0.0):
|
|
1316
|
+
logger.info("[INFO] Skipping 'temperature' for OpenAI GPT-5 Responses call (unsupported by API).")
|
|
1317
|
+
if reasoning_param is not None:
|
|
1318
|
+
responses_kwargs["reasoning"] = reasoning_param
|
|
1319
|
+
|
|
1320
|
+
if output_pydantic:
|
|
1321
|
+
try:
|
|
1322
|
+
schema = output_pydantic.model_json_schema()
|
|
1323
|
+
if _openai_responses_supports_response_format():
|
|
1324
|
+
responses_kwargs["response_format"] = {
|
|
1325
|
+
"type": "json_schema",
|
|
1326
|
+
"json_schema": {
|
|
1327
|
+
"name": output_pydantic.__name__,
|
|
1328
|
+
"schema": schema,
|
|
1329
|
+
"strict": True,
|
|
1330
|
+
},
|
|
1331
|
+
}
|
|
1332
|
+
# When enforcing JSON schema, omit text formatting
|
|
1333
|
+
responses_kwargs.pop("text", None)
|
|
1334
|
+
else:
|
|
1335
|
+
if verbose:
|
|
1336
|
+
logger.info("[INFO] OpenAI SDK lacks Responses.response_format; will validate JSON client-side with Pydantic.")
|
|
1337
|
+
except Exception as schema_e:
|
|
1338
|
+
logger.warning(f"[WARN] Failed to derive JSON schema from Pydantic: {schema_e}. Proceeding without structured response_format.")
|
|
1339
|
+
|
|
1340
|
+
# Initialize OpenAI client with explicit key if provided
|
|
1341
|
+
try:
|
|
1342
|
+
from openai import OpenAI as _OpenAIClient
|
|
1343
|
+
except Exception:
|
|
1344
|
+
_OpenAIClient = None
|
|
1345
|
+
if _OpenAIClient is None:
|
|
1346
|
+
raise RuntimeError("OpenAI SDK not available to call Responses API.")
|
|
1347
|
+
|
|
1348
|
+
api_key_to_use = litellm_kwargs.get("api_key") or os.getenv("OPENAI_API_KEY")
|
|
1349
|
+
client = _OpenAIClient(api_key=api_key_to_use) if api_key_to_use else _OpenAIClient()
|
|
1350
|
+
|
|
1351
|
+
# Make the Responses API call, with graceful fallback if SDK
|
|
1352
|
+
# doesn't support certain newer kwargs (e.g., response_format)
|
|
1353
|
+
try:
|
|
1354
|
+
resp = client.responses.create(**responses_kwargs)
|
|
1355
|
+
except TypeError as te:
|
|
1356
|
+
msg = str(te)
|
|
1357
|
+
if 'response_format' in responses_kwargs and ('unexpected keyword argument' in msg or 'got an unexpected keyword argument' in msg):
|
|
1358
|
+
logger.warning("[WARN] OpenAI SDK doesn't support response_format; retrying without it.")
|
|
1359
|
+
responses_kwargs.pop('response_format', None)
|
|
1360
|
+
resp = client.responses.create(**responses_kwargs)
|
|
1361
|
+
else:
|
|
1362
|
+
raise
|
|
1363
|
+
|
|
1364
|
+
# Extract text result
|
|
1365
|
+
result_text = getattr(resp, "output_text", None)
|
|
1366
|
+
if result_text is None:
|
|
1367
|
+
try:
|
|
1368
|
+
# Fallback parse
|
|
1369
|
+
outputs = getattr(resp, "output", []) or getattr(resp, "outputs", [])
|
|
1370
|
+
if outputs:
|
|
1371
|
+
first = outputs[0]
|
|
1372
|
+
content = getattr(first, "content", [])
|
|
1373
|
+
if content and hasattr(content[0], "text"):
|
|
1374
|
+
result_text = content[0].text
|
|
1375
|
+
except Exception:
|
|
1376
|
+
result_text = None
|
|
1377
|
+
|
|
1378
|
+
# Calculate cost using usage + CSV rates
|
|
1379
|
+
usage = getattr(resp, "usage", None)
|
|
1380
|
+
total_cost = 0.0
|
|
1381
|
+
if usage is not None:
|
|
1382
|
+
in_tok = getattr(usage, "input_tokens", 0) or 0
|
|
1383
|
+
out_tok = getattr(usage, "output_tokens", 0) or 0
|
|
1384
|
+
in_rate = model_info.get('input', 0.0) or 0.0
|
|
1385
|
+
out_rate = model_info.get('output', 0.0) or 0.0
|
|
1386
|
+
total_cost = (in_tok * in_rate + out_tok * out_rate) / 1_000_000.0
|
|
1387
|
+
|
|
1388
|
+
final_result = None
|
|
1389
|
+
if output_pydantic and result_text:
|
|
1390
|
+
try:
|
|
1391
|
+
final_result = output_pydantic.model_validate_json(result_text)
|
|
1392
|
+
except Exception as e:
|
|
1393
|
+
logger.error(f"[ERROR] Pydantic parse failed on Responses output: {e}")
|
|
1394
|
+
final_result = result_text
|
|
1395
|
+
else:
|
|
1396
|
+
final_result = result_text
|
|
1397
|
+
|
|
1398
|
+
if verbose:
|
|
1399
|
+
logger.info(f"[RESULT] Model Used: {model_name_litellm}")
|
|
1400
|
+
logger.info(f"[RESULT] Total Cost (estimated): ${total_cost:.6g}")
|
|
1401
|
+
|
|
1402
|
+
return {
|
|
1403
|
+
'result': final_result,
|
|
1404
|
+
'cost': total_cost,
|
|
1405
|
+
'model_name': model_name_litellm,
|
|
1406
|
+
'thinking_output': None,
|
|
1407
|
+
}
|
|
1408
|
+
except Exception as e:
|
|
1409
|
+
last_exception = e
|
|
1410
|
+
logger.error(f"[ERROR] OpenAI Responses call failed for {model_name_litellm}: {e}")
|
|
1411
|
+
# Remove 'reasoning' key to avoid OpenAI Chat API unknown param errors
|
|
1412
|
+
if "reasoning" in litellm_kwargs:
|
|
1413
|
+
try:
|
|
1414
|
+
litellm_kwargs.pop("reasoning", None)
|
|
1415
|
+
except Exception:
|
|
1416
|
+
pass
|
|
1417
|
+
# Fall through to LiteLLM path as a fallback
|
|
1418
|
+
|
|
1056
1419
|
if use_batch_mode:
|
|
1057
1420
|
if verbose:
|
|
1058
1421
|
logger.info(f"[INFO] Calling litellm.batch_completion for {model_name_litellm}...")
|
|
@@ -1119,7 +1482,6 @@ def llm_invoke(
|
|
|
1119
1482
|
messages=retry_messages,
|
|
1120
1483
|
temperature=temperature,
|
|
1121
1484
|
response_format=response_format,
|
|
1122
|
-
max_completion_tokens=max_tokens,
|
|
1123
1485
|
**time_kwargs
|
|
1124
1486
|
)
|
|
1125
1487
|
# Re-enable cache - restore original configured cache (restore to original state, even if None)
|
|
@@ -1163,26 +1525,39 @@ def llm_invoke(
|
|
|
1163
1525
|
elif isinstance(raw_result, str):
|
|
1164
1526
|
json_string_to_parse = raw_result # Start with the raw string
|
|
1165
1527
|
try:
|
|
1166
|
-
#
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
if
|
|
1170
|
-
|
|
1171
|
-
# Basic check if it looks like JSON
|
|
1172
|
-
if potential_json.strip().startswith('{') and potential_json.strip().endswith('}'):
|
|
1173
|
-
if verbose:
|
|
1174
|
-
logger.debug(f"[DEBUG] Attempting to parse extracted JSON block: '{potential_json}'")
|
|
1175
|
-
parsed_result = output_pydantic.model_validate_json(potential_json)
|
|
1176
|
-
else:
|
|
1177
|
-
# If block extraction fails, try cleaning markdown next
|
|
1178
|
-
raise ValueError("Extracted block doesn't look like JSON")
|
|
1528
|
+
# 1) Prefer fenced ```json blocks
|
|
1529
|
+
fenced = _extract_fenced_json_block(raw_result)
|
|
1530
|
+
candidates: List[str] = []
|
|
1531
|
+
if fenced:
|
|
1532
|
+
candidates.append(fenced)
|
|
1179
1533
|
else:
|
|
1180
|
-
|
|
1181
|
-
|
|
1534
|
+
# 2) Fall back to scanning for balanced JSON objects
|
|
1535
|
+
candidates.extend(_extract_balanced_json_objects(raw_result))
|
|
1536
|
+
|
|
1537
|
+
if not candidates:
|
|
1538
|
+
raise ValueError("No JSON-like content found")
|
|
1539
|
+
|
|
1540
|
+
parse_err: Optional[Exception] = None
|
|
1541
|
+
for cand in candidates:
|
|
1542
|
+
try:
|
|
1543
|
+
if verbose:
|
|
1544
|
+
logger.debug(f"[DEBUG] Attempting to parse candidate JSON block: {cand}")
|
|
1545
|
+
parsed_result = output_pydantic.model_validate_json(cand)
|
|
1546
|
+
json_string_to_parse = cand
|
|
1547
|
+
parse_err = None
|
|
1548
|
+
break
|
|
1549
|
+
except (json.JSONDecodeError, ValidationError, ValueError) as pe:
|
|
1550
|
+
parse_err = pe
|
|
1551
|
+
|
|
1552
|
+
if parsed_result is None:
|
|
1553
|
+
# If none of the candidates parsed, raise last error
|
|
1554
|
+
if parse_err is not None:
|
|
1555
|
+
raise parse_err
|
|
1556
|
+
raise ValueError("Unable to parse any JSON candidates")
|
|
1182
1557
|
except (json.JSONDecodeError, ValidationError, ValueError) as extraction_error:
|
|
1183
1558
|
if verbose:
|
|
1184
|
-
logger.debug(f"[DEBUG] JSON
|
|
1185
|
-
#
|
|
1559
|
+
logger.debug(f"[DEBUG] JSON extraction/validation failed ('{extraction_error}'). Trying fence cleaning.")
|
|
1560
|
+
# Last resort: strip any leading/trailing code fences and retry
|
|
1186
1561
|
cleaned_result_str = raw_result.strip()
|
|
1187
1562
|
if cleaned_result_str.startswith("```json"):
|
|
1188
1563
|
cleaned_result_str = cleaned_result_str[7:]
|
|
@@ -1191,15 +1566,13 @@ def llm_invoke(
|
|
|
1191
1566
|
if cleaned_result_str.endswith("```"):
|
|
1192
1567
|
cleaned_result_str = cleaned_result_str[:-3]
|
|
1193
1568
|
cleaned_result_str = cleaned_result_str.strip()
|
|
1194
|
-
# Check again if it looks like JSON before parsing
|
|
1195
1569
|
if cleaned_result_str.startswith('{') and cleaned_result_str.endswith('}'):
|
|
1196
1570
|
if verbose:
|
|
1197
|
-
logger.debug(f"[DEBUG] Attempting parse after
|
|
1198
|
-
json_string_to_parse = cleaned_result_str
|
|
1571
|
+
logger.debug(f"[DEBUG] Attempting parse after generic fence cleaning. Cleaned string: '{cleaned_result_str}'")
|
|
1572
|
+
json_string_to_parse = cleaned_result_str
|
|
1199
1573
|
parsed_result = output_pydantic.model_validate_json(json_string_to_parse)
|
|
1200
1574
|
else:
|
|
1201
|
-
|
|
1202
|
-
raise ValueError("Content after cleaning markdown doesn't look like JSON")
|
|
1575
|
+
raise ValueError("Content after cleaning doesn't look like JSON")
|
|
1203
1576
|
|
|
1204
1577
|
|
|
1205
1578
|
# Check if any parsing attempt succeeded
|
|
@@ -1333,7 +1706,7 @@ if __name__ == "__main__":
|
|
|
1333
1706
|
response = llm_invoke(
|
|
1334
1707
|
prompt="Tell me a short joke about {topic}.",
|
|
1335
1708
|
input_json={"topic": "programmers"},
|
|
1336
|
-
strength=0.5, # Use base model (gpt-
|
|
1709
|
+
strength=0.5, # Use base model (gpt-5-nano)
|
|
1337
1710
|
temperature=0.7,
|
|
1338
1711
|
verbose=True
|
|
1339
1712
|
)
|
|
@@ -1414,7 +1787,7 @@ if __name__ == "__main__":
|
|
|
1414
1787
|
{"role": "system", "content": "You are a helpful assistant."},
|
|
1415
1788
|
{"role": "user", "content": "What is the capital of France?"}
|
|
1416
1789
|
]
|
|
1417
|
-
# Strength 0.5 should select gpt-
|
|
1790
|
+
# Strength 0.5 should select gpt-5-nano
|
|
1418
1791
|
response_messages = llm_invoke(
|
|
1419
1792
|
messages=custom_messages,
|
|
1420
1793
|
strength=0.5,
|