pdd-cli 0.0.48__py3-none-any.whl → 0.0.50__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pdd-cli might be problematic. Click here for more details.

pdd/llm_invoke.py CHANGED
@@ -6,7 +6,7 @@ import pandas as pd
6
6
  import litellm
7
7
  import logging # ADDED FOR DETAILED LOGGING
8
8
  import importlib.resources
9
- from litellm.caching.caching import Cache # Fix for LiteLLM v1.49.3+
9
+ from litellm.caching.caching import Cache # Fix for LiteLLM v1.75.5+
10
10
 
11
11
  # --- Configure Standard Python Logging ---
12
12
  logger = logging.getLogger("pdd.llm_invoke")
@@ -26,6 +26,16 @@ litellm_logger = logging.getLogger("litellm")
26
26
  litellm_log_level = os.getenv("LITELLM_LOG_LEVEL", "WARNING" if PRODUCTION_MODE else "INFO")
27
27
  litellm_logger.setLevel(getattr(logging, litellm_log_level, logging.WARNING))
28
28
 
29
+ # Ensure LiteLLM drops provider-unsupported params instead of erroring
30
+ # This prevents failures like UnsupportedParamsError for OpenAI gpt-5-* when
31
+ # passing generic params (e.g., reasoning_effort) not accepted by that API path.
32
+ try:
33
+ _drop_params_env = os.getenv("LITELLM_DROP_PARAMS", "true")
34
+ litellm.drop_params = str(_drop_params_env).lower() in ("1", "true", "yes", "on")
35
+ except Exception:
36
+ # Be conservative: default to True even if env parsing fails
37
+ litellm.drop_params = True
38
+
29
39
  # Add a console handler if none exists
30
40
  if not logger.handlers:
31
41
  console_handler = logging.StreamHandler()
@@ -71,7 +81,7 @@ import json
71
81
  # from rich import print as rprint # Replaced with logger
72
82
  from dotenv import load_dotenv
73
83
  from pathlib import Path
74
- from typing import Optional, Dict, List, Any, Type, Union
84
+ from typing import Optional, Dict, List, Any, Type, Union, Tuple
75
85
  from pydantic import BaseModel, ValidationError
76
86
  import openai # Import openai for exception handling as LiteLLM maps to its types
77
87
  from langchain_core.prompts import PromptTemplate
@@ -114,6 +124,22 @@ def _is_wsl_environment() -> bool:
114
124
  return False
115
125
 
116
126
 
127
+ def _openai_responses_supports_response_format() -> bool:
128
+ """Detect if current OpenAI Python SDK supports `response_format` on Responses.create.
129
+
130
+ Returns True if the installed SDK exposes a `response_format` parameter on
131
+ `openai.resources.responses.Responses.create`, else False. This avoids
132
+ sending unsupported kwargs and triggering TypeError at runtime.
133
+ """
134
+ try:
135
+ import inspect
136
+ from openai.resources.responses import Responses
137
+ sig = inspect.signature(Responses.create)
138
+ return "response_format" in sig.parameters
139
+ except Exception:
140
+ return False
141
+
142
+
117
143
  def _get_environment_info() -> Dict[str, str]:
118
144
  """
119
145
  Get environment information for debugging and error reporting.
@@ -188,24 +214,80 @@ if PROJECT_ROOT is None: # Fallback to CWD if no method succeeded
188
214
 
189
215
  ENV_PATH = PROJECT_ROOT / ".env"
190
216
  # --- Determine LLM_MODEL_CSV_PATH ---
191
- # Prioritize ~/.pdd/llm_model.csv
217
+ # Prioritize ~/.pdd/llm_model.csv, then a project .pdd from the current CWD,
218
+ # then PROJECT_ROOT (which may be set from PDD_PATH), else fall back to package.
192
219
  user_pdd_dir = Path.home() / ".pdd"
193
220
  user_model_csv_path = user_pdd_dir / "llm_model.csv"
194
221
 
195
- # Check in order: user-specific, project-specific, package default
222
+ def _detect_project_root_from_cwd(max_levels: int = 5) -> Path:
223
+ """Search upwards from the current working directory for common project markers.
224
+
225
+ This intentionally ignores PDD_PATH to support CLI invocations that set
226
+ PDD_PATH to the installed package location. We want to honor a real project
227
+ checkout's .pdd/llm_model.csv when running inside it.
228
+ """
229
+ try:
230
+ current_dir = Path.cwd().resolve()
231
+ for _ in range(max_levels):
232
+ if (
233
+ (current_dir / ".git").exists()
234
+ or (current_dir / "pyproject.toml").exists()
235
+ or (current_dir / "data").is_dir()
236
+ or (current_dir / ".env").exists()
237
+ ):
238
+ return current_dir
239
+ parent = current_dir.parent
240
+ if parent == current_dir:
241
+ break
242
+ current_dir = parent
243
+ except Exception:
244
+ pass
245
+ return Path.cwd().resolve()
246
+
247
+ # Resolve candidates
248
+ project_root_from_cwd = _detect_project_root_from_cwd()
249
+ project_csv_from_cwd = project_root_from_cwd / ".pdd" / "llm_model.csv"
250
+ project_csv_from_env = PROJECT_ROOT / ".pdd" / "llm_model.csv"
251
+
252
+ # Detect whether PDD_PATH points to the installed package directory. If so,
253
+ # don't prioritize it over the real project from CWD.
254
+ try:
255
+ _installed_pkg_root = importlib.resources.files('pdd')
256
+ # importlib.resources.files returns a Traversable; get a FS path string if possible
257
+ try:
258
+ _installed_pkg_root_path = Path(str(_installed_pkg_root))
259
+ except Exception:
260
+ _installed_pkg_root_path = None
261
+ except Exception:
262
+ _installed_pkg_root_path = None
263
+
264
+ def _is_env_path_package_dir(env_path: Path) -> bool:
265
+ try:
266
+ if _installed_pkg_root_path is None:
267
+ return False
268
+ env_path = env_path.resolve()
269
+ pkg_path = _installed_pkg_root_path.resolve()
270
+ # Treat equal or subpath as package dir
271
+ return env_path == pkg_path or str(env_path).startswith(str(pkg_path))
272
+ except Exception:
273
+ return False
274
+
275
+ # Selection order
196
276
  if user_model_csv_path.is_file():
197
277
  LLM_MODEL_CSV_PATH = user_model_csv_path
198
278
  logger.info(f"Using user-specific LLM model CSV: {LLM_MODEL_CSV_PATH}")
279
+ elif (not _is_env_path_package_dir(PROJECT_ROOT)) and project_csv_from_env.is_file():
280
+ # Honor an explicitly-set PDD_PATH pointing to a real project directory
281
+ LLM_MODEL_CSV_PATH = project_csv_from_env
282
+ logger.info(f"Using project-specific LLM model CSV (from PDD_PATH): {LLM_MODEL_CSV_PATH}")
283
+ elif project_csv_from_cwd.is_file():
284
+ # Otherwise, prefer the project relative to the current working directory
285
+ LLM_MODEL_CSV_PATH = project_csv_from_cwd
286
+ logger.info(f"Using project-specific LLM model CSV (from CWD): {LLM_MODEL_CSV_PATH}")
199
287
  else:
200
- # Check project-specific location (.pdd directory)
201
- project_model_csv_path = PROJECT_ROOT / ".pdd" / "llm_model.csv"
202
- if project_model_csv_path.is_file():
203
- LLM_MODEL_CSV_PATH = project_model_csv_path
204
- logger.info(f"Using project-specific LLM model CSV: {LLM_MODEL_CSV_PATH}")
205
- else:
206
- # Neither exists, we'll use a marker path that _load_model_data will handle
207
- LLM_MODEL_CSV_PATH = None
208
- logger.info("No local LLM model CSV found, will use package default")
288
+ # Neither exists, we'll use a marker path that _load_model_data will handle
289
+ LLM_MODEL_CSV_PATH = None
290
+ logger.info("No local LLM model CSV found, will use package default")
209
291
  # ---------------------------------
210
292
 
211
293
  # Load environment variables from .env file
@@ -280,16 +362,22 @@ if GCS_BUCKET_NAME and GCS_HMAC_ACCESS_KEY_ID and GCS_HMAC_SECRET_ACCESS_KEY:
280
362
  elif 'AWS_REGION_NAME' in os.environ:
281
363
  pass # Or just leave it if the temporary setting wasn't done/needed
282
364
 
365
+ # Check if caching is disabled via environment variable
366
+ if os.getenv("LITELLM_CACHE_DISABLE") == "1":
367
+ logger.info("LiteLLM caching disabled via LITELLM_CACHE_DISABLE=1")
368
+ litellm.cache = None
369
+ cache_configured = True
370
+
283
371
  if not cache_configured:
284
372
  try:
285
- # Try SQLite-based cache as a fallback
373
+ # Try disk-based cache as a fallback
286
374
  sqlite_cache_path = PROJECT_ROOT / "litellm_cache.sqlite"
287
- configured_cache = Cache(type="sqlite", cache_path=str(sqlite_cache_path))
375
+ configured_cache = Cache(type="disk", disk_cache_dir=str(sqlite_cache_path))
288
376
  litellm.cache = configured_cache
289
- logger.info(f"LiteLLM SQLite cache configured at {sqlite_cache_path}")
377
+ logger.info(f"LiteLLM disk cache configured at {sqlite_cache_path}")
290
378
  cache_configured = True
291
379
  except Exception as e2:
292
- warnings.warn(f"Failed to configure LiteLLM SQLite cache: {e2}. Caching is disabled.")
380
+ warnings.warn(f"Failed to configure LiteLLM disk cache: {e2}. Caching is disabled.")
293
381
  litellm.cache = None
294
382
 
295
383
  if not cache_configured:
@@ -327,29 +415,49 @@ def _litellm_success_callback(
327
415
  cost_val = litellm.completion_cost(completion_response=completion_response)
328
416
  calculated_cost = cost_val if cost_val is not None else 0.0
329
417
  except Exception as e1:
330
- # Attempt 2: If response object failed (e.g., missing provider in model name),
331
- # try again using explicit model from kwargs and tokens from usage.
332
- # This is often needed for batch completion items.
418
+ # Attempt 2: Compute via tokens and model mapping. If LiteLLM mapping is
419
+ # missing or API differs, fall back to CSV rates in _MODEL_RATE_MAP.
333
420
  logger.debug(f"Attempting cost calculation with fallback method: {e1}")
334
421
  try:
335
- model_name = kwargs.get("model") # Get original model name from input kwargs
422
+ model_name = kwargs.get("model")
336
423
  if model_name and usage:
337
- prompt_tokens = getattr(usage, 'prompt_tokens', 0)
338
- completion_tokens = getattr(usage, 'completion_tokens', 0)
339
- cost_val = litellm.completion_cost(
340
- model=model_name,
341
- prompt_tokens=prompt_tokens,
342
- completion_tokens=completion_tokens
343
- )
344
- calculated_cost = cost_val if cost_val is not None else 0.0
424
+ in_tok = getattr(usage, 'prompt_tokens', None)
425
+ out_tok = getattr(usage, 'completion_tokens', None)
426
+ # Some providers may use 'input_tokens'/'output_tokens'
427
+ if in_tok is None:
428
+ in_tok = getattr(usage, 'input_tokens', 0)
429
+ if out_tok is None:
430
+ out_tok = getattr(usage, 'output_tokens', 0)
431
+
432
+ # Try LiteLLM helper (arg names vary across versions)
433
+ try:
434
+ cost_val = litellm.completion_cost(
435
+ model=model_name,
436
+ prompt_tokens=in_tok,
437
+ completion_tokens=out_tok,
438
+ )
439
+ calculated_cost = cost_val if cost_val is not None else 0.0
440
+ except TypeError:
441
+ # Older/newer versions may require input/output token names
442
+ try:
443
+ cost_val = litellm.completion_cost(
444
+ model=model_name,
445
+ input_tokens=in_tok,
446
+ output_tokens=out_tok,
447
+ )
448
+ calculated_cost = cost_val if cost_val is not None else 0.0
449
+ except Exception as e3:
450
+ # Final fallback: compute using CSV rates
451
+ rates = _MODEL_RATE_MAP.get(str(model_name))
452
+ if rates is not None:
453
+ in_rate, out_rate = rates
454
+ calculated_cost = (float(in_tok or 0) * in_rate + float(out_tok or 0) * out_rate) / 1_000_000.0
455
+ else:
456
+ calculated_cost = 0.0
457
+ logger.debug(f"Cost calculation failed with LiteLLM token API; used CSV rates if available. Detail: {e3}")
345
458
  else:
346
- # If we can't get model name or usage, fallback to 0
347
459
  calculated_cost = 0.0
348
- # Optional: Log the original error e1 if needed
349
- # logger.warning(f"[Callback WARN] Failed to calculate cost with response object ({e1}) and fallback failed.")
350
460
  except Exception as e2:
351
- # Optional: Log secondary error e2 if needed
352
- # logger.warning(f"[Callback WARN] Failed to calculate cost with fallback method: {e2}")
353
461
  calculated_cost = 0.0 # Default to 0 on any error
354
462
  logger.debug(f"Cost calculation failed with fallback method: {e2}")
355
463
 
@@ -367,6 +475,23 @@ def _litellm_success_callback(
367
475
  # Register the callback with LiteLLM
368
476
  litellm.success_callback = [_litellm_success_callback]
369
477
 
478
+ # --- Cost Mapping Support (CSV Rates) ---
479
+ # Populate from CSV inside llm_invoke; used by callback fallback
480
+ _MODEL_RATE_MAP: Dict[str, Tuple[float, float]] = {}
481
+
482
+ def _set_model_rate_map(df: pd.DataFrame) -> None:
483
+ global _MODEL_RATE_MAP
484
+ try:
485
+ _MODEL_RATE_MAP = {
486
+ str(row['model']): (
487
+ float(row['input']) if pd.notna(row['input']) else 0.0,
488
+ float(row['output']) if pd.notna(row['output']) else 0.0,
489
+ )
490
+ for _, row in df.iterrows()
491
+ }
492
+ except Exception:
493
+ _MODEL_RATE_MAP = {}
494
+
370
495
  # --- Helper Functions ---
371
496
 
372
497
  def _load_model_data(csv_path: Optional[Path]) -> pd.DataFrame:
@@ -476,11 +601,26 @@ def _select_model_candidates(
476
601
  # Try finding base model in the *original* df in case it was filtered out
477
602
  original_base = model_df[model_df['model'] == base_model_name]
478
603
  if not original_base.empty:
479
- raise ValueError(f"Base model '{base_model_name}' found in CSV but requires API key '{original_base.iloc[0]['api_key']}' which might be missing or invalid configuration.")
480
- else:
481
- raise ValueError(f"Specified base model '{base_model_name}' not found in the LLM model CSV.")
482
-
483
- base_model = base_model_row.iloc[0]
604
+ # Base exists but may be misconfigured (e.g., missing API key). Keep erroring loudly.
605
+ raise ValueError(
606
+ f"Base model '{base_model_name}' found in CSV but requires API key '{original_base.iloc[0]['api_key']}' which might be missing or invalid configuration."
607
+ )
608
+ # Option A': Soft fallback – choose a reasonable surrogate base and continue
609
+ # Strategy (simplified and deterministic): pick the first available model
610
+ # from the CSV as the surrogate base. This mirrors typical CSV ordering
611
+ # expectations and keeps behavior predictable across environments.
612
+ try:
613
+ base_model = available_df.iloc[0]
614
+ logger.warning(
615
+ f"Base model '{base_model_name}' not found in CSV. Falling back to surrogate base '{base_model['model']}' (Option A')."
616
+ )
617
+ except Exception:
618
+ # If any unexpected error occurs during fallback, raise a clear error
619
+ raise ValueError(
620
+ f"Specified base model '{base_model_name}' not found and fallback selection failed. Check your LLM model CSV."
621
+ )
622
+ else:
623
+ base_model = base_model_row.iloc[0]
484
624
 
485
625
  # 3. Determine Target and Sort
486
626
  candidates = []
@@ -491,9 +631,10 @@ def _select_model_candidates(
491
631
  # Sort remaining by ELO descending as fallback
492
632
  available_df['sort_metric'] = -available_df['coding_arena_elo'] # Negative for descending sort
493
633
  candidates = available_df.sort_values(by='sort_metric').to_dict('records')
494
- # Ensure base model is first if it exists
495
- if any(c['model'] == base_model_name for c in candidates):
496
- candidates.sort(key=lambda x: 0 if x['model'] == base_model_name else 1)
634
+ # Ensure effective base model is first if it exists (supports surrogate base)
635
+ effective_base_name = str(base_model['model']) if isinstance(base_model, pd.Series) else base_model_name
636
+ if any(c['model'] == effective_base_name for c in candidates):
637
+ candidates.sort(key=lambda x: 0 if x['model'] == effective_base_name else 1)
497
638
  target_metric_value = f"Base Model ELO: {base_model['coding_arena_elo']}"
498
639
 
499
640
  elif strength < 0.5:
@@ -710,6 +851,49 @@ def _format_messages(prompt: str, input_data: Union[Dict[str, Any], List[Dict[st
710
851
  except Exception as e:
711
852
  raise ValueError(f"Error formatting prompt: {e}") from e
712
853
 
854
+ # --- JSON Extraction Helpers ---
855
+ import re
856
+
857
+ def _extract_fenced_json_block(text: str) -> Optional[str]:
858
+ try:
859
+ m = re.search(r"```json\s*(\{[\s\S]*?\})\s*```", text, flags=re.IGNORECASE)
860
+ if m:
861
+ return m.group(1)
862
+ return None
863
+ except Exception:
864
+ return None
865
+
866
+ def _extract_balanced_json_objects(text: str) -> List[str]:
867
+ results: List[str] = []
868
+ brace_stack = 0
869
+ start_idx = -1
870
+ in_string = False
871
+ escape = False
872
+ for i, ch in enumerate(text):
873
+ if in_string:
874
+ if escape:
875
+ escape = False
876
+ elif ch == '\\':
877
+ escape = True
878
+ elif ch == '"':
879
+ in_string = False
880
+ continue
881
+ else:
882
+ if ch == '"':
883
+ in_string = True
884
+ continue
885
+ if ch == '{':
886
+ if brace_stack == 0:
887
+ start_idx = i
888
+ brace_stack += 1
889
+ elif ch == '}':
890
+ if brace_stack > 0:
891
+ brace_stack -= 1
892
+ if brace_stack == 0 and start_idx != -1:
893
+ results.append(text[start_idx:i+1])
894
+ start_idx = -1
895
+ return results
896
+
713
897
  # --- Main Function ---
714
898
 
715
899
  def llm_invoke(
@@ -852,6 +1036,16 @@ def llm_invoke(
852
1036
  # --- 3. Iterate Through Candidates and Invoke LLM ---
853
1037
  last_exception = None
854
1038
  newly_acquired_keys: Dict[str, bool] = {} # Track keys obtained in this run
1039
+
1040
+ # Initialize variables for retry section
1041
+ response_format = None
1042
+ time_kwargs = {}
1043
+
1044
+ # Update global rate map for callback cost fallback
1045
+ try:
1046
+ _set_model_rate_map(model_df)
1047
+ except Exception:
1048
+ pass
855
1049
 
856
1050
  for model_info in candidate_models:
857
1051
  model_name_litellm = model_info['model']
@@ -945,11 +1139,33 @@ def llm_invoke(
945
1139
  elif verbose: # No api_key_name_from_csv in CSV for this model
946
1140
  logger.info(f"[INFO] No API key name specified in CSV for model '{model_name_litellm}'. LiteLLM will use its default authentication mechanisms (e.g., standard provider env vars or ADC for Vertex AI).")
947
1141
 
948
- # Add api_base if present in CSV
1142
+ # Add base_url/api_base override if present in CSV
949
1143
  api_base = model_info.get('base_url')
950
1144
  if pd.notna(api_base) and api_base:
1145
+ # LiteLLM prefers `base_url`; some older paths accept `api_base`.
1146
+ litellm_kwargs["base_url"] = str(api_base)
951
1147
  litellm_kwargs["api_base"] = str(api_base)
952
1148
 
1149
+ # Provider-specific defaults (e.g., LM Studio)
1150
+ model_name_lower = str(model_name_litellm).lower()
1151
+ provider_lower_for_model = provider.lower()
1152
+ is_lm_studio = model_name_lower.startswith('lm_studio/') or provider_lower_for_model == 'lm_studio'
1153
+ if is_lm_studio:
1154
+ # Ensure base_url is set (fallback to env LM_STUDIO_API_BASE or localhost)
1155
+ if not litellm_kwargs.get("base_url"):
1156
+ lm_studio_base = os.getenv("LM_STUDIO_API_BASE", "http://localhost:1234/v1")
1157
+ litellm_kwargs["base_url"] = lm_studio_base
1158
+ litellm_kwargs["api_base"] = lm_studio_base
1159
+ if verbose:
1160
+ logger.info(f"[INFO] Using LM Studio base_url: {lm_studio_base}")
1161
+
1162
+ # Ensure a non-empty api_key; LM Studio accepts any non-empty token (e.g., 'lm-studio')
1163
+ if not litellm_kwargs.get("api_key"):
1164
+ lm_studio_key = os.getenv("LM_STUDIO_API_KEY") or "lm-studio"
1165
+ litellm_kwargs["api_key"] = lm_studio_key
1166
+ if verbose:
1167
+ logger.info("[INFO] Using LM Studio api_key placeholder (set LM_STUDIO_API_KEY to customize).")
1168
+
953
1169
  # Handle Structured Output (JSON Mode / Pydantic)
954
1170
  if output_pydantic:
955
1171
  # Check if model supports structured output based on CSV flag or LiteLLM check
@@ -964,7 +1180,8 @@ def llm_invoke(
964
1180
  logger.info(f"[INFO] Requesting structured output (Pydantic: {output_pydantic.__name__}) for {model_name_litellm}")
965
1181
  # Pass the Pydantic model directly if supported, else use json_object
966
1182
  # LiteLLM handles passing Pydantic models for supported providers
967
- litellm_kwargs["response_format"] = output_pydantic
1183
+ response_format = output_pydantic
1184
+ litellm_kwargs["response_format"] = response_format
968
1185
  # As a fallback, one could use:
969
1186
  # litellm_kwargs["response_format"] = {"type": "json_object"}
970
1187
  # And potentially enable client-side validation:
@@ -986,7 +1203,9 @@ def llm_invoke(
986
1203
  # Currently known: Anthropic uses 'thinking'
987
1204
  # Model name comparison is more robust than provider string
988
1205
  if provider == 'anthropic': # Check provider column instead of model prefix
989
- litellm_kwargs["thinking"] = {"type": "enabled", "budget_tokens": budget}
1206
+ thinking_param = {"type": "enabled", "budget_tokens": budget}
1207
+ litellm_kwargs["thinking"] = thinking_param
1208
+ time_kwargs["thinking"] = thinking_param
990
1209
  if verbose:
991
1210
  logger.info(f"[INFO] Requesting Anthropic thinking (budget type) with budget: {budget} tokens for {model_name_litellm}")
992
1211
  else:
@@ -1004,10 +1223,32 @@ def llm_invoke(
1004
1223
  effort = "high"
1005
1224
  elif time > 0.3:
1006
1225
  effort = "medium"
1007
- # Use the common 'reasoning_effort' param LiteLLM provides
1008
- litellm_kwargs["reasoning_effort"] = effort
1009
- if verbose:
1010
- logger.info(f"[INFO] Requesting reasoning_effort='{effort}' (effort type) for {model_name_litellm} based on time={time}")
1226
+
1227
+ # Map effort parameter per-provider/model family
1228
+ model_lower = str(model_name_litellm).lower()
1229
+ provider_lower = str(provider).lower()
1230
+
1231
+ if provider_lower == 'openai' and model_lower.startswith('gpt-5'):
1232
+ # OpenAI 5-series uses Responses API with nested 'reasoning'
1233
+ reasoning_obj = {"effort": effort, "summary": "auto"}
1234
+ litellm_kwargs["reasoning"] = reasoning_obj
1235
+ time_kwargs["reasoning"] = reasoning_obj
1236
+ if verbose:
1237
+ logger.info(f"[INFO] Requesting OpenAI reasoning.effort='{effort}' for {model_name_litellm} (Responses API)")
1238
+
1239
+ elif provider_lower == 'openai' and model_lower.startswith('o') and 'mini' not in model_lower:
1240
+ # Historical o* models may use LiteLLM's generic reasoning_effort param
1241
+ litellm_kwargs["reasoning_effort"] = effort
1242
+ time_kwargs["reasoning_effort"] = effort
1243
+ if verbose:
1244
+ logger.info(f"[INFO] Requesting reasoning_effort='{effort}' for {model_name_litellm}")
1245
+
1246
+ else:
1247
+ # Fallback to LiteLLM generic param when supported by provider adapter
1248
+ litellm_kwargs["reasoning_effort"] = effort
1249
+ time_kwargs["reasoning_effort"] = effort
1250
+ if verbose:
1251
+ logger.info(f"[INFO] Requesting generic reasoning_effort='{effort}' for {model_name_litellm}")
1011
1252
 
1012
1253
  elif reasoning_type == 'none':
1013
1254
  if verbose:
@@ -1039,6 +1280,142 @@ def llm_invoke(
1039
1280
  logger.debug("NOT ENABLING CACHING: litellm.cache is None at call time")
1040
1281
 
1041
1282
 
1283
+ # Route OpenAI gpt-5* models through Responses API to support 'reasoning'
1284
+ model_lower_for_call = str(model_name_litellm).lower()
1285
+ provider_lower_for_call = str(provider).lower()
1286
+
1287
+ if (
1288
+ not use_batch_mode
1289
+ and provider_lower_for_call == 'openai'
1290
+ and model_lower_for_call.startswith('gpt-5')
1291
+ ):
1292
+ if verbose:
1293
+ logger.info(f"[INFO] Calling OpenAI Responses API for {model_name_litellm}...")
1294
+ try:
1295
+ # Build input text from messages
1296
+ if isinstance(formatted_messages, list) and formatted_messages and isinstance(formatted_messages[0], dict):
1297
+ input_text = "\n\n".join(f"{m.get('role','user')}: {m.get('content','')}" for m in formatted_messages)
1298
+ else:
1299
+ # Fallback: string cast
1300
+ input_text = str(formatted_messages)
1301
+
1302
+ # Derive effort mapping already computed in time_kwargs
1303
+ reasoning_param = time_kwargs.get("reasoning")
1304
+
1305
+ # Optional text settings; keep simple
1306
+ text_block = {"format": {"type": "text"}}
1307
+
1308
+ # If structured output requested, attempt JSON schema via Pydantic
1309
+ # GPT-5 Responses API does not support temperature; omit it here.
1310
+ responses_kwargs = {
1311
+ "model": model_name_litellm,
1312
+ "input": input_text,
1313
+ "text": text_block,
1314
+ }
1315
+ if verbose and temperature not in (None, 0, 0.0):
1316
+ logger.info("[INFO] Skipping 'temperature' for OpenAI GPT-5 Responses call (unsupported by API).")
1317
+ if reasoning_param is not None:
1318
+ responses_kwargs["reasoning"] = reasoning_param
1319
+
1320
+ if output_pydantic:
1321
+ try:
1322
+ schema = output_pydantic.model_json_schema()
1323
+ if _openai_responses_supports_response_format():
1324
+ responses_kwargs["response_format"] = {
1325
+ "type": "json_schema",
1326
+ "json_schema": {
1327
+ "name": output_pydantic.__name__,
1328
+ "schema": schema,
1329
+ "strict": True,
1330
+ },
1331
+ }
1332
+ # When enforcing JSON schema, omit text formatting
1333
+ responses_kwargs.pop("text", None)
1334
+ else:
1335
+ if verbose:
1336
+ logger.info("[INFO] OpenAI SDK lacks Responses.response_format; will validate JSON client-side with Pydantic.")
1337
+ except Exception as schema_e:
1338
+ logger.warning(f"[WARN] Failed to derive JSON schema from Pydantic: {schema_e}. Proceeding without structured response_format.")
1339
+
1340
+ # Initialize OpenAI client with explicit key if provided
1341
+ try:
1342
+ from openai import OpenAI as _OpenAIClient
1343
+ except Exception:
1344
+ _OpenAIClient = None
1345
+ if _OpenAIClient is None:
1346
+ raise RuntimeError("OpenAI SDK not available to call Responses API.")
1347
+
1348
+ api_key_to_use = litellm_kwargs.get("api_key") or os.getenv("OPENAI_API_KEY")
1349
+ client = _OpenAIClient(api_key=api_key_to_use) if api_key_to_use else _OpenAIClient()
1350
+
1351
+ # Make the Responses API call, with graceful fallback if SDK
1352
+ # doesn't support certain newer kwargs (e.g., response_format)
1353
+ try:
1354
+ resp = client.responses.create(**responses_kwargs)
1355
+ except TypeError as te:
1356
+ msg = str(te)
1357
+ if 'response_format' in responses_kwargs and ('unexpected keyword argument' in msg or 'got an unexpected keyword argument' in msg):
1358
+ logger.warning("[WARN] OpenAI SDK doesn't support response_format; retrying without it.")
1359
+ responses_kwargs.pop('response_format', None)
1360
+ resp = client.responses.create(**responses_kwargs)
1361
+ else:
1362
+ raise
1363
+
1364
+ # Extract text result
1365
+ result_text = getattr(resp, "output_text", None)
1366
+ if result_text is None:
1367
+ try:
1368
+ # Fallback parse
1369
+ outputs = getattr(resp, "output", []) or getattr(resp, "outputs", [])
1370
+ if outputs:
1371
+ first = outputs[0]
1372
+ content = getattr(first, "content", [])
1373
+ if content and hasattr(content[0], "text"):
1374
+ result_text = content[0].text
1375
+ except Exception:
1376
+ result_text = None
1377
+
1378
+ # Calculate cost using usage + CSV rates
1379
+ usage = getattr(resp, "usage", None)
1380
+ total_cost = 0.0
1381
+ if usage is not None:
1382
+ in_tok = getattr(usage, "input_tokens", 0) or 0
1383
+ out_tok = getattr(usage, "output_tokens", 0) or 0
1384
+ in_rate = model_info.get('input', 0.0) or 0.0
1385
+ out_rate = model_info.get('output', 0.0) or 0.0
1386
+ total_cost = (in_tok * in_rate + out_tok * out_rate) / 1_000_000.0
1387
+
1388
+ final_result = None
1389
+ if output_pydantic and result_text:
1390
+ try:
1391
+ final_result = output_pydantic.model_validate_json(result_text)
1392
+ except Exception as e:
1393
+ logger.error(f"[ERROR] Pydantic parse failed on Responses output: {e}")
1394
+ final_result = result_text
1395
+ else:
1396
+ final_result = result_text
1397
+
1398
+ if verbose:
1399
+ logger.info(f"[RESULT] Model Used: {model_name_litellm}")
1400
+ logger.info(f"[RESULT] Total Cost (estimated): ${total_cost:.6g}")
1401
+
1402
+ return {
1403
+ 'result': final_result,
1404
+ 'cost': total_cost,
1405
+ 'model_name': model_name_litellm,
1406
+ 'thinking_output': None,
1407
+ }
1408
+ except Exception as e:
1409
+ last_exception = e
1410
+ logger.error(f"[ERROR] OpenAI Responses call failed for {model_name_litellm}: {e}")
1411
+ # Remove 'reasoning' key to avoid OpenAI Chat API unknown param errors
1412
+ if "reasoning" in litellm_kwargs:
1413
+ try:
1414
+ litellm_kwargs.pop("reasoning", None)
1415
+ except Exception:
1416
+ pass
1417
+ # Fall through to LiteLLM path as a fallback
1418
+
1042
1419
  if use_batch_mode:
1043
1420
  if verbose:
1044
1421
  logger.info(f"[INFO] Calling litellm.batch_completion for {model_name_litellm}...")
@@ -1105,7 +1482,6 @@ def llm_invoke(
1105
1482
  messages=retry_messages,
1106
1483
  temperature=temperature,
1107
1484
  response_format=response_format,
1108
- max_completion_tokens=max_tokens,
1109
1485
  **time_kwargs
1110
1486
  )
1111
1487
  # Re-enable cache - restore original configured cache (restore to original state, even if None)
@@ -1149,26 +1525,39 @@ def llm_invoke(
1149
1525
  elif isinstance(raw_result, str):
1150
1526
  json_string_to_parse = raw_result # Start with the raw string
1151
1527
  try:
1152
- # Look for first { and last }
1153
- start_brace = json_string_to_parse.find('{')
1154
- end_brace = json_string_to_parse.rfind('}')
1155
- if start_brace != -1 and end_brace != -1 and end_brace > start_brace:
1156
- potential_json = json_string_to_parse[start_brace:end_brace+1]
1157
- # Basic check if it looks like JSON
1158
- if potential_json.strip().startswith('{') and potential_json.strip().endswith('}'):
1159
- if verbose:
1160
- logger.debug(f"[DEBUG] Attempting to parse extracted JSON block: '{potential_json}'")
1161
- parsed_result = output_pydantic.model_validate_json(potential_json)
1162
- else:
1163
- # If block extraction fails, try cleaning markdown next
1164
- raise ValueError("Extracted block doesn't look like JSON")
1528
+ # 1) Prefer fenced ```json blocks
1529
+ fenced = _extract_fenced_json_block(raw_result)
1530
+ candidates: List[str] = []
1531
+ if fenced:
1532
+ candidates.append(fenced)
1165
1533
  else:
1166
- # If no braces found, try cleaning markdown next
1167
- raise ValueError("Could not find enclosing {}")
1534
+ # 2) Fall back to scanning for balanced JSON objects
1535
+ candidates.extend(_extract_balanced_json_objects(raw_result))
1536
+
1537
+ if not candidates:
1538
+ raise ValueError("No JSON-like content found")
1539
+
1540
+ parse_err: Optional[Exception] = None
1541
+ for cand in candidates:
1542
+ try:
1543
+ if verbose:
1544
+ logger.debug(f"[DEBUG] Attempting to parse candidate JSON block: {cand}")
1545
+ parsed_result = output_pydantic.model_validate_json(cand)
1546
+ json_string_to_parse = cand
1547
+ parse_err = None
1548
+ break
1549
+ except (json.JSONDecodeError, ValidationError, ValueError) as pe:
1550
+ parse_err = pe
1551
+
1552
+ if parsed_result is None:
1553
+ # If none of the candidates parsed, raise last error
1554
+ if parse_err is not None:
1555
+ raise parse_err
1556
+ raise ValueError("Unable to parse any JSON candidates")
1168
1557
  except (json.JSONDecodeError, ValidationError, ValueError) as extraction_error:
1169
1558
  if verbose:
1170
- logger.debug(f"[DEBUG] JSON block extraction/validation failed ('{extraction_error}'). Trying markdown cleaning.")
1171
- # Fallback: Clean markdown fences and retry JSON validation
1559
+ logger.debug(f"[DEBUG] JSON extraction/validation failed ('{extraction_error}'). Trying fence cleaning.")
1560
+ # Last resort: strip any leading/trailing code fences and retry
1172
1561
  cleaned_result_str = raw_result.strip()
1173
1562
  if cleaned_result_str.startswith("```json"):
1174
1563
  cleaned_result_str = cleaned_result_str[7:]
@@ -1177,15 +1566,13 @@ def llm_invoke(
1177
1566
  if cleaned_result_str.endswith("```"):
1178
1567
  cleaned_result_str = cleaned_result_str[:-3]
1179
1568
  cleaned_result_str = cleaned_result_str.strip()
1180
- # Check again if it looks like JSON before parsing
1181
1569
  if cleaned_result_str.startswith('{') and cleaned_result_str.endswith('}'):
1182
1570
  if verbose:
1183
- logger.debug(f"[DEBUG] Attempting parse after cleaning markdown fences. Cleaned string: '{cleaned_result_str}'")
1184
- json_string_to_parse = cleaned_result_str # Update string for error reporting
1571
+ logger.debug(f"[DEBUG] Attempting parse after generic fence cleaning. Cleaned string: '{cleaned_result_str}'")
1572
+ json_string_to_parse = cleaned_result_str
1185
1573
  parsed_result = output_pydantic.model_validate_json(json_string_to_parse)
1186
1574
  else:
1187
- # If still doesn't look like JSON, raise error
1188
- raise ValueError("Content after cleaning markdown doesn't look like JSON")
1575
+ raise ValueError("Content after cleaning doesn't look like JSON")
1189
1576
 
1190
1577
 
1191
1578
  # Check if any parsing attempt succeeded
@@ -1319,7 +1706,7 @@ if __name__ == "__main__":
1319
1706
  response = llm_invoke(
1320
1707
  prompt="Tell me a short joke about {topic}.",
1321
1708
  input_json={"topic": "programmers"},
1322
- strength=0.5, # Use base model (gpt-4.1-nano)
1709
+ strength=0.5, # Use base model (gpt-5-nano)
1323
1710
  temperature=0.7,
1324
1711
  verbose=True
1325
1712
  )
@@ -1400,7 +1787,7 @@ if __name__ == "__main__":
1400
1787
  {"role": "system", "content": "You are a helpful assistant."},
1401
1788
  {"role": "user", "content": "What is the capital of France?"}
1402
1789
  ]
1403
- # Strength 0.5 should select gpt-4.1-nano
1790
+ # Strength 0.5 should select gpt-5-nano
1404
1791
  response_messages = llm_invoke(
1405
1792
  messages=custom_messages,
1406
1793
  strength=0.5,