pdd-cli 0.0.49__py3-none-any.whl → 0.0.50__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pdd-cli might be problematic. Click here for more details.

pdd/llm_invoke.py CHANGED
@@ -26,6 +26,16 @@ litellm_logger = logging.getLogger("litellm")
26
26
  litellm_log_level = os.getenv("LITELLM_LOG_LEVEL", "WARNING" if PRODUCTION_MODE else "INFO")
27
27
  litellm_logger.setLevel(getattr(logging, litellm_log_level, logging.WARNING))
28
28
 
29
+ # Ensure LiteLLM drops provider-unsupported params instead of erroring
30
+ # This prevents failures like UnsupportedParamsError for OpenAI gpt-5-* when
31
+ # passing generic params (e.g., reasoning_effort) not accepted by that API path.
32
+ try:
33
+ _drop_params_env = os.getenv("LITELLM_DROP_PARAMS", "true")
34
+ litellm.drop_params = str(_drop_params_env).lower() in ("1", "true", "yes", "on")
35
+ except Exception:
36
+ # Be conservative: default to True even if env parsing fails
37
+ litellm.drop_params = True
38
+
29
39
  # Add a console handler if none exists
30
40
  if not logger.handlers:
31
41
  console_handler = logging.StreamHandler()
@@ -71,7 +81,7 @@ import json
71
81
  # from rich import print as rprint # Replaced with logger
72
82
  from dotenv import load_dotenv
73
83
  from pathlib import Path
74
- from typing import Optional, Dict, List, Any, Type, Union
84
+ from typing import Optional, Dict, List, Any, Type, Union, Tuple
75
85
  from pydantic import BaseModel, ValidationError
76
86
  import openai # Import openai for exception handling as LiteLLM maps to its types
77
87
  from langchain_core.prompts import PromptTemplate
@@ -114,6 +124,22 @@ def _is_wsl_environment() -> bool:
114
124
  return False
115
125
 
116
126
 
127
+ def _openai_responses_supports_response_format() -> bool:
128
+ """Detect if current OpenAI Python SDK supports `response_format` on Responses.create.
129
+
130
+ Returns True if the installed SDK exposes a `response_format` parameter on
131
+ `openai.resources.responses.Responses.create`, else False. This avoids
132
+ sending unsupported kwargs and triggering TypeError at runtime.
133
+ """
134
+ try:
135
+ import inspect
136
+ from openai.resources.responses import Responses
137
+ sig = inspect.signature(Responses.create)
138
+ return "response_format" in sig.parameters
139
+ except Exception:
140
+ return False
141
+
142
+
117
143
  def _get_environment_info() -> Dict[str, str]:
118
144
  """
119
145
  Get environment information for debugging and error reporting.
@@ -188,24 +214,80 @@ if PROJECT_ROOT is None: # Fallback to CWD if no method succeeded
188
214
 
189
215
  ENV_PATH = PROJECT_ROOT / ".env"
190
216
  # --- Determine LLM_MODEL_CSV_PATH ---
191
- # Prioritize ~/.pdd/llm_model.csv
217
+ # Prioritize ~/.pdd/llm_model.csv, then a project .pdd from the current CWD,
218
+ # then PROJECT_ROOT (which may be set from PDD_PATH), else fall back to package.
192
219
  user_pdd_dir = Path.home() / ".pdd"
193
220
  user_model_csv_path = user_pdd_dir / "llm_model.csv"
194
221
 
195
- # Check in order: user-specific, project-specific, package default
222
+ def _detect_project_root_from_cwd(max_levels: int = 5) -> Path:
223
+ """Search upwards from the current working directory for common project markers.
224
+
225
+ This intentionally ignores PDD_PATH to support CLI invocations that set
226
+ PDD_PATH to the installed package location. We want to honor a real project
227
+ checkout's .pdd/llm_model.csv when running inside it.
228
+ """
229
+ try:
230
+ current_dir = Path.cwd().resolve()
231
+ for _ in range(max_levels):
232
+ if (
233
+ (current_dir / ".git").exists()
234
+ or (current_dir / "pyproject.toml").exists()
235
+ or (current_dir / "data").is_dir()
236
+ or (current_dir / ".env").exists()
237
+ ):
238
+ return current_dir
239
+ parent = current_dir.parent
240
+ if parent == current_dir:
241
+ break
242
+ current_dir = parent
243
+ except Exception:
244
+ pass
245
+ return Path.cwd().resolve()
246
+
247
+ # Resolve candidates
248
+ project_root_from_cwd = _detect_project_root_from_cwd()
249
+ project_csv_from_cwd = project_root_from_cwd / ".pdd" / "llm_model.csv"
250
+ project_csv_from_env = PROJECT_ROOT / ".pdd" / "llm_model.csv"
251
+
252
+ # Detect whether PDD_PATH points to the installed package directory. If so,
253
+ # don't prioritize it over the real project from CWD.
254
+ try:
255
+ _installed_pkg_root = importlib.resources.files('pdd')
256
+ # importlib.resources.files returns a Traversable; get a FS path string if possible
257
+ try:
258
+ _installed_pkg_root_path = Path(str(_installed_pkg_root))
259
+ except Exception:
260
+ _installed_pkg_root_path = None
261
+ except Exception:
262
+ _installed_pkg_root_path = None
263
+
264
+ def _is_env_path_package_dir(env_path: Path) -> bool:
265
+ try:
266
+ if _installed_pkg_root_path is None:
267
+ return False
268
+ env_path = env_path.resolve()
269
+ pkg_path = _installed_pkg_root_path.resolve()
270
+ # Treat equal or subpath as package dir
271
+ return env_path == pkg_path or str(env_path).startswith(str(pkg_path))
272
+ except Exception:
273
+ return False
274
+
275
+ # Selection order
196
276
  if user_model_csv_path.is_file():
197
277
  LLM_MODEL_CSV_PATH = user_model_csv_path
198
278
  logger.info(f"Using user-specific LLM model CSV: {LLM_MODEL_CSV_PATH}")
279
+ elif (not _is_env_path_package_dir(PROJECT_ROOT)) and project_csv_from_env.is_file():
280
+ # Honor an explicitly-set PDD_PATH pointing to a real project directory
281
+ LLM_MODEL_CSV_PATH = project_csv_from_env
282
+ logger.info(f"Using project-specific LLM model CSV (from PDD_PATH): {LLM_MODEL_CSV_PATH}")
283
+ elif project_csv_from_cwd.is_file():
284
+ # Otherwise, prefer the project relative to the current working directory
285
+ LLM_MODEL_CSV_PATH = project_csv_from_cwd
286
+ logger.info(f"Using project-specific LLM model CSV (from CWD): {LLM_MODEL_CSV_PATH}")
199
287
  else:
200
- # Check project-specific location (.pdd directory)
201
- project_model_csv_path = PROJECT_ROOT / ".pdd" / "llm_model.csv"
202
- if project_model_csv_path.is_file():
203
- LLM_MODEL_CSV_PATH = project_model_csv_path
204
- logger.info(f"Using project-specific LLM model CSV: {LLM_MODEL_CSV_PATH}")
205
- else:
206
- # Neither exists, we'll use a marker path that _load_model_data will handle
207
- LLM_MODEL_CSV_PATH = None
208
- logger.info("No local LLM model CSV found, will use package default")
288
+ # Neither exists, we'll use a marker path that _load_model_data will handle
289
+ LLM_MODEL_CSV_PATH = None
290
+ logger.info("No local LLM model CSV found, will use package default")
209
291
  # ---------------------------------
210
292
 
211
293
  # Load environment variables from .env file
@@ -333,29 +415,49 @@ def _litellm_success_callback(
333
415
  cost_val = litellm.completion_cost(completion_response=completion_response)
334
416
  calculated_cost = cost_val if cost_val is not None else 0.0
335
417
  except Exception as e1:
336
- # Attempt 2: If response object failed (e.g., missing provider in model name),
337
- # try again using explicit model from kwargs and tokens from usage.
338
- # This is often needed for batch completion items.
418
+ # Attempt 2: Compute via tokens and model mapping. If LiteLLM mapping is
419
+ # missing or API differs, fall back to CSV rates in _MODEL_RATE_MAP.
339
420
  logger.debug(f"Attempting cost calculation with fallback method: {e1}")
340
421
  try:
341
- model_name = kwargs.get("model") # Get original model name from input kwargs
422
+ model_name = kwargs.get("model")
342
423
  if model_name and usage:
343
- prompt_tokens = getattr(usage, 'prompt_tokens', 0)
344
- completion_tokens = getattr(usage, 'completion_tokens', 0)
345
- cost_val = litellm.completion_cost(
346
- model=model_name,
347
- prompt_tokens=prompt_tokens,
348
- completion_tokens=completion_tokens
349
- )
350
- calculated_cost = cost_val if cost_val is not None else 0.0
424
+ in_tok = getattr(usage, 'prompt_tokens', None)
425
+ out_tok = getattr(usage, 'completion_tokens', None)
426
+ # Some providers may use 'input_tokens'/'output_tokens'
427
+ if in_tok is None:
428
+ in_tok = getattr(usage, 'input_tokens', 0)
429
+ if out_tok is None:
430
+ out_tok = getattr(usage, 'output_tokens', 0)
431
+
432
+ # Try LiteLLM helper (arg names vary across versions)
433
+ try:
434
+ cost_val = litellm.completion_cost(
435
+ model=model_name,
436
+ prompt_tokens=in_tok,
437
+ completion_tokens=out_tok,
438
+ )
439
+ calculated_cost = cost_val if cost_val is not None else 0.0
440
+ except TypeError:
441
+ # Older/newer versions may require input/output token names
442
+ try:
443
+ cost_val = litellm.completion_cost(
444
+ model=model_name,
445
+ input_tokens=in_tok,
446
+ output_tokens=out_tok,
447
+ )
448
+ calculated_cost = cost_val if cost_val is not None else 0.0
449
+ except Exception as e3:
450
+ # Final fallback: compute using CSV rates
451
+ rates = _MODEL_RATE_MAP.get(str(model_name))
452
+ if rates is not None:
453
+ in_rate, out_rate = rates
454
+ calculated_cost = (float(in_tok or 0) * in_rate + float(out_tok or 0) * out_rate) / 1_000_000.0
455
+ else:
456
+ calculated_cost = 0.0
457
+ logger.debug(f"Cost calculation failed with LiteLLM token API; used CSV rates if available. Detail: {e3}")
351
458
  else:
352
- # If we can't get model name or usage, fallback to 0
353
459
  calculated_cost = 0.0
354
- # Optional: Log the original error e1 if needed
355
- # logger.warning(f"[Callback WARN] Failed to calculate cost with response object ({e1}) and fallback failed.")
356
460
  except Exception as e2:
357
- # Optional: Log secondary error e2 if needed
358
- # logger.warning(f"[Callback WARN] Failed to calculate cost with fallback method: {e2}")
359
461
  calculated_cost = 0.0 # Default to 0 on any error
360
462
  logger.debug(f"Cost calculation failed with fallback method: {e2}")
361
463
 
@@ -373,6 +475,23 @@ def _litellm_success_callback(
373
475
  # Register the callback with LiteLLM
374
476
  litellm.success_callback = [_litellm_success_callback]
375
477
 
478
+ # --- Cost Mapping Support (CSV Rates) ---
479
+ # Populate from CSV inside llm_invoke; used by callback fallback
480
+ _MODEL_RATE_MAP: Dict[str, Tuple[float, float]] = {}
481
+
482
+ def _set_model_rate_map(df: pd.DataFrame) -> None:
483
+ global _MODEL_RATE_MAP
484
+ try:
485
+ _MODEL_RATE_MAP = {
486
+ str(row['model']): (
487
+ float(row['input']) if pd.notna(row['input']) else 0.0,
488
+ float(row['output']) if pd.notna(row['output']) else 0.0,
489
+ )
490
+ for _, row in df.iterrows()
491
+ }
492
+ except Exception:
493
+ _MODEL_RATE_MAP = {}
494
+
376
495
  # --- Helper Functions ---
377
496
 
378
497
  def _load_model_data(csv_path: Optional[Path]) -> pd.DataFrame:
@@ -482,11 +601,26 @@ def _select_model_candidates(
482
601
  # Try finding base model in the *original* df in case it was filtered out
483
602
  original_base = model_df[model_df['model'] == base_model_name]
484
603
  if not original_base.empty:
485
- raise ValueError(f"Base model '{base_model_name}' found in CSV but requires API key '{original_base.iloc[0]['api_key']}' which might be missing or invalid configuration.")
486
- else:
487
- raise ValueError(f"Specified base model '{base_model_name}' not found in the LLM model CSV.")
488
-
489
- base_model = base_model_row.iloc[0]
604
+ # Base exists but may be misconfigured (e.g., missing API key). Keep erroring loudly.
605
+ raise ValueError(
606
+ f"Base model '{base_model_name}' found in CSV but requires API key '{original_base.iloc[0]['api_key']}' which might be missing or invalid configuration."
607
+ )
608
+ # Option A': Soft fallback – choose a reasonable surrogate base and continue
609
+ # Strategy (simplified and deterministic): pick the first available model
610
+ # from the CSV as the surrogate base. This mirrors typical CSV ordering
611
+ # expectations and keeps behavior predictable across environments.
612
+ try:
613
+ base_model = available_df.iloc[0]
614
+ logger.warning(
615
+ f"Base model '{base_model_name}' not found in CSV. Falling back to surrogate base '{base_model['model']}' (Option A')."
616
+ )
617
+ except Exception:
618
+ # If any unexpected error occurs during fallback, raise a clear error
619
+ raise ValueError(
620
+ f"Specified base model '{base_model_name}' not found and fallback selection failed. Check your LLM model CSV."
621
+ )
622
+ else:
623
+ base_model = base_model_row.iloc[0]
490
624
 
491
625
  # 3. Determine Target and Sort
492
626
  candidates = []
@@ -497,9 +631,10 @@ def _select_model_candidates(
497
631
  # Sort remaining by ELO descending as fallback
498
632
  available_df['sort_metric'] = -available_df['coding_arena_elo'] # Negative for descending sort
499
633
  candidates = available_df.sort_values(by='sort_metric').to_dict('records')
500
- # Ensure base model is first if it exists
501
- if any(c['model'] == base_model_name for c in candidates):
502
- candidates.sort(key=lambda x: 0 if x['model'] == base_model_name else 1)
634
+ # Ensure effective base model is first if it exists (supports surrogate base)
635
+ effective_base_name = str(base_model['model']) if isinstance(base_model, pd.Series) else base_model_name
636
+ if any(c['model'] == effective_base_name for c in candidates):
637
+ candidates.sort(key=lambda x: 0 if x['model'] == effective_base_name else 1)
503
638
  target_metric_value = f"Base Model ELO: {base_model['coding_arena_elo']}"
504
639
 
505
640
  elif strength < 0.5:
@@ -716,6 +851,49 @@ def _format_messages(prompt: str, input_data: Union[Dict[str, Any], List[Dict[st
716
851
  except Exception as e:
717
852
  raise ValueError(f"Error formatting prompt: {e}") from e
718
853
 
854
+ # --- JSON Extraction Helpers ---
855
+ import re
856
+
857
+ def _extract_fenced_json_block(text: str) -> Optional[str]:
858
+ try:
859
+ m = re.search(r"```json\s*(\{[\s\S]*?\})\s*```", text, flags=re.IGNORECASE)
860
+ if m:
861
+ return m.group(1)
862
+ return None
863
+ except Exception:
864
+ return None
865
+
866
+ def _extract_balanced_json_objects(text: str) -> List[str]:
867
+ results: List[str] = []
868
+ brace_stack = 0
869
+ start_idx = -1
870
+ in_string = False
871
+ escape = False
872
+ for i, ch in enumerate(text):
873
+ if in_string:
874
+ if escape:
875
+ escape = False
876
+ elif ch == '\\':
877
+ escape = True
878
+ elif ch == '"':
879
+ in_string = False
880
+ continue
881
+ else:
882
+ if ch == '"':
883
+ in_string = True
884
+ continue
885
+ if ch == '{':
886
+ if brace_stack == 0:
887
+ start_idx = i
888
+ brace_stack += 1
889
+ elif ch == '}':
890
+ if brace_stack > 0:
891
+ brace_stack -= 1
892
+ if brace_stack == 0 and start_idx != -1:
893
+ results.append(text[start_idx:i+1])
894
+ start_idx = -1
895
+ return results
896
+
719
897
  # --- Main Function ---
720
898
 
721
899
  def llm_invoke(
@@ -863,6 +1041,12 @@ def llm_invoke(
863
1041
  response_format = None
864
1042
  time_kwargs = {}
865
1043
 
1044
+ # Update global rate map for callback cost fallback
1045
+ try:
1046
+ _set_model_rate_map(model_df)
1047
+ except Exception:
1048
+ pass
1049
+
866
1050
  for model_info in candidate_models:
867
1051
  model_name_litellm = model_info['model']
868
1052
  api_key_name = model_info.get('api_key')
@@ -955,11 +1139,33 @@ def llm_invoke(
955
1139
  elif verbose: # No api_key_name_from_csv in CSV for this model
956
1140
  logger.info(f"[INFO] No API key name specified in CSV for model '{model_name_litellm}'. LiteLLM will use its default authentication mechanisms (e.g., standard provider env vars or ADC for Vertex AI).")
957
1141
 
958
- # Add api_base if present in CSV
1142
+ # Add base_url/api_base override if present in CSV
959
1143
  api_base = model_info.get('base_url')
960
1144
  if pd.notna(api_base) and api_base:
1145
+ # LiteLLM prefers `base_url`; some older paths accept `api_base`.
1146
+ litellm_kwargs["base_url"] = str(api_base)
961
1147
  litellm_kwargs["api_base"] = str(api_base)
962
1148
 
1149
+ # Provider-specific defaults (e.g., LM Studio)
1150
+ model_name_lower = str(model_name_litellm).lower()
1151
+ provider_lower_for_model = provider.lower()
1152
+ is_lm_studio = model_name_lower.startswith('lm_studio/') or provider_lower_for_model == 'lm_studio'
1153
+ if is_lm_studio:
1154
+ # Ensure base_url is set (fallback to env LM_STUDIO_API_BASE or localhost)
1155
+ if not litellm_kwargs.get("base_url"):
1156
+ lm_studio_base = os.getenv("LM_STUDIO_API_BASE", "http://localhost:1234/v1")
1157
+ litellm_kwargs["base_url"] = lm_studio_base
1158
+ litellm_kwargs["api_base"] = lm_studio_base
1159
+ if verbose:
1160
+ logger.info(f"[INFO] Using LM Studio base_url: {lm_studio_base}")
1161
+
1162
+ # Ensure a non-empty api_key; LM Studio accepts any non-empty token (e.g., 'lm-studio')
1163
+ if not litellm_kwargs.get("api_key"):
1164
+ lm_studio_key = os.getenv("LM_STUDIO_API_KEY") or "lm-studio"
1165
+ litellm_kwargs["api_key"] = lm_studio_key
1166
+ if verbose:
1167
+ logger.info("[INFO] Using LM Studio api_key placeholder (set LM_STUDIO_API_KEY to customize).")
1168
+
963
1169
  # Handle Structured Output (JSON Mode / Pydantic)
964
1170
  if output_pydantic:
965
1171
  # Check if model supports structured output based on CSV flag or LiteLLM check
@@ -1017,11 +1223,32 @@ def llm_invoke(
1017
1223
  effort = "high"
1018
1224
  elif time > 0.3:
1019
1225
  effort = "medium"
1020
- # Use the common 'reasoning_effort' param LiteLLM provides
1021
- litellm_kwargs["reasoning_effort"] = effort
1022
- time_kwargs["reasoning_effort"] = effort
1023
- if verbose:
1024
- logger.info(f"[INFO] Requesting reasoning_effort='{effort}' (effort type) for {model_name_litellm} based on time={time}")
1226
+
1227
+ # Map effort parameter per-provider/model family
1228
+ model_lower = str(model_name_litellm).lower()
1229
+ provider_lower = str(provider).lower()
1230
+
1231
+ if provider_lower == 'openai' and model_lower.startswith('gpt-5'):
1232
+ # OpenAI 5-series uses Responses API with nested 'reasoning'
1233
+ reasoning_obj = {"effort": effort, "summary": "auto"}
1234
+ litellm_kwargs["reasoning"] = reasoning_obj
1235
+ time_kwargs["reasoning"] = reasoning_obj
1236
+ if verbose:
1237
+ logger.info(f"[INFO] Requesting OpenAI reasoning.effort='{effort}' for {model_name_litellm} (Responses API)")
1238
+
1239
+ elif provider_lower == 'openai' and model_lower.startswith('o') and 'mini' not in model_lower:
1240
+ # Historical o* models may use LiteLLM's generic reasoning_effort param
1241
+ litellm_kwargs["reasoning_effort"] = effort
1242
+ time_kwargs["reasoning_effort"] = effort
1243
+ if verbose:
1244
+ logger.info(f"[INFO] Requesting reasoning_effort='{effort}' for {model_name_litellm}")
1245
+
1246
+ else:
1247
+ # Fallback to LiteLLM generic param when supported by provider adapter
1248
+ litellm_kwargs["reasoning_effort"] = effort
1249
+ time_kwargs["reasoning_effort"] = effort
1250
+ if verbose:
1251
+ logger.info(f"[INFO] Requesting generic reasoning_effort='{effort}' for {model_name_litellm}")
1025
1252
 
1026
1253
  elif reasoning_type == 'none':
1027
1254
  if verbose:
@@ -1053,6 +1280,142 @@ def llm_invoke(
1053
1280
  logger.debug("NOT ENABLING CACHING: litellm.cache is None at call time")
1054
1281
 
1055
1282
 
1283
+ # Route OpenAI gpt-5* models through Responses API to support 'reasoning'
1284
+ model_lower_for_call = str(model_name_litellm).lower()
1285
+ provider_lower_for_call = str(provider).lower()
1286
+
1287
+ if (
1288
+ not use_batch_mode
1289
+ and provider_lower_for_call == 'openai'
1290
+ and model_lower_for_call.startswith('gpt-5')
1291
+ ):
1292
+ if verbose:
1293
+ logger.info(f"[INFO] Calling OpenAI Responses API for {model_name_litellm}...")
1294
+ try:
1295
+ # Build input text from messages
1296
+ if isinstance(formatted_messages, list) and formatted_messages and isinstance(formatted_messages[0], dict):
1297
+ input_text = "\n\n".join(f"{m.get('role','user')}: {m.get('content','')}" for m in formatted_messages)
1298
+ else:
1299
+ # Fallback: string cast
1300
+ input_text = str(formatted_messages)
1301
+
1302
+ # Derive effort mapping already computed in time_kwargs
1303
+ reasoning_param = time_kwargs.get("reasoning")
1304
+
1305
+ # Optional text settings; keep simple
1306
+ text_block = {"format": {"type": "text"}}
1307
+
1308
+ # If structured output requested, attempt JSON schema via Pydantic
1309
+ # GPT-5 Responses API does not support temperature; omit it here.
1310
+ responses_kwargs = {
1311
+ "model": model_name_litellm,
1312
+ "input": input_text,
1313
+ "text": text_block,
1314
+ }
1315
+ if verbose and temperature not in (None, 0, 0.0):
1316
+ logger.info("[INFO] Skipping 'temperature' for OpenAI GPT-5 Responses call (unsupported by API).")
1317
+ if reasoning_param is not None:
1318
+ responses_kwargs["reasoning"] = reasoning_param
1319
+
1320
+ if output_pydantic:
1321
+ try:
1322
+ schema = output_pydantic.model_json_schema()
1323
+ if _openai_responses_supports_response_format():
1324
+ responses_kwargs["response_format"] = {
1325
+ "type": "json_schema",
1326
+ "json_schema": {
1327
+ "name": output_pydantic.__name__,
1328
+ "schema": schema,
1329
+ "strict": True,
1330
+ },
1331
+ }
1332
+ # When enforcing JSON schema, omit text formatting
1333
+ responses_kwargs.pop("text", None)
1334
+ else:
1335
+ if verbose:
1336
+ logger.info("[INFO] OpenAI SDK lacks Responses.response_format; will validate JSON client-side with Pydantic.")
1337
+ except Exception as schema_e:
1338
+ logger.warning(f"[WARN] Failed to derive JSON schema from Pydantic: {schema_e}. Proceeding without structured response_format.")
1339
+
1340
+ # Initialize OpenAI client with explicit key if provided
1341
+ try:
1342
+ from openai import OpenAI as _OpenAIClient
1343
+ except Exception:
1344
+ _OpenAIClient = None
1345
+ if _OpenAIClient is None:
1346
+ raise RuntimeError("OpenAI SDK not available to call Responses API.")
1347
+
1348
+ api_key_to_use = litellm_kwargs.get("api_key") or os.getenv("OPENAI_API_KEY")
1349
+ client = _OpenAIClient(api_key=api_key_to_use) if api_key_to_use else _OpenAIClient()
1350
+
1351
+ # Make the Responses API call, with graceful fallback if SDK
1352
+ # doesn't support certain newer kwargs (e.g., response_format)
1353
+ try:
1354
+ resp = client.responses.create(**responses_kwargs)
1355
+ except TypeError as te:
1356
+ msg = str(te)
1357
+ if 'response_format' in responses_kwargs and ('unexpected keyword argument' in msg or 'got an unexpected keyword argument' in msg):
1358
+ logger.warning("[WARN] OpenAI SDK doesn't support response_format; retrying without it.")
1359
+ responses_kwargs.pop('response_format', None)
1360
+ resp = client.responses.create(**responses_kwargs)
1361
+ else:
1362
+ raise
1363
+
1364
+ # Extract text result
1365
+ result_text = getattr(resp, "output_text", None)
1366
+ if result_text is None:
1367
+ try:
1368
+ # Fallback parse
1369
+ outputs = getattr(resp, "output", []) or getattr(resp, "outputs", [])
1370
+ if outputs:
1371
+ first = outputs[0]
1372
+ content = getattr(first, "content", [])
1373
+ if content and hasattr(content[0], "text"):
1374
+ result_text = content[0].text
1375
+ except Exception:
1376
+ result_text = None
1377
+
1378
+ # Calculate cost using usage + CSV rates
1379
+ usage = getattr(resp, "usage", None)
1380
+ total_cost = 0.0
1381
+ if usage is not None:
1382
+ in_tok = getattr(usage, "input_tokens", 0) or 0
1383
+ out_tok = getattr(usage, "output_tokens", 0) or 0
1384
+ in_rate = model_info.get('input', 0.0) or 0.0
1385
+ out_rate = model_info.get('output', 0.0) or 0.0
1386
+ total_cost = (in_tok * in_rate + out_tok * out_rate) / 1_000_000.0
1387
+
1388
+ final_result = None
1389
+ if output_pydantic and result_text:
1390
+ try:
1391
+ final_result = output_pydantic.model_validate_json(result_text)
1392
+ except Exception as e:
1393
+ logger.error(f"[ERROR] Pydantic parse failed on Responses output: {e}")
1394
+ final_result = result_text
1395
+ else:
1396
+ final_result = result_text
1397
+
1398
+ if verbose:
1399
+ logger.info(f"[RESULT] Model Used: {model_name_litellm}")
1400
+ logger.info(f"[RESULT] Total Cost (estimated): ${total_cost:.6g}")
1401
+
1402
+ return {
1403
+ 'result': final_result,
1404
+ 'cost': total_cost,
1405
+ 'model_name': model_name_litellm,
1406
+ 'thinking_output': None,
1407
+ }
1408
+ except Exception as e:
1409
+ last_exception = e
1410
+ logger.error(f"[ERROR] OpenAI Responses call failed for {model_name_litellm}: {e}")
1411
+ # Remove 'reasoning' key to avoid OpenAI Chat API unknown param errors
1412
+ if "reasoning" in litellm_kwargs:
1413
+ try:
1414
+ litellm_kwargs.pop("reasoning", None)
1415
+ except Exception:
1416
+ pass
1417
+ # Fall through to LiteLLM path as a fallback
1418
+
1056
1419
  if use_batch_mode:
1057
1420
  if verbose:
1058
1421
  logger.info(f"[INFO] Calling litellm.batch_completion for {model_name_litellm}...")
@@ -1119,7 +1482,6 @@ def llm_invoke(
1119
1482
  messages=retry_messages,
1120
1483
  temperature=temperature,
1121
1484
  response_format=response_format,
1122
- max_completion_tokens=max_tokens,
1123
1485
  **time_kwargs
1124
1486
  )
1125
1487
  # Re-enable cache - restore original configured cache (restore to original state, even if None)
@@ -1163,26 +1525,39 @@ def llm_invoke(
1163
1525
  elif isinstance(raw_result, str):
1164
1526
  json_string_to_parse = raw_result # Start with the raw string
1165
1527
  try:
1166
- # Look for first { and last }
1167
- start_brace = json_string_to_parse.find('{')
1168
- end_brace = json_string_to_parse.rfind('}')
1169
- if start_brace != -1 and end_brace != -1 and end_brace > start_brace:
1170
- potential_json = json_string_to_parse[start_brace:end_brace+1]
1171
- # Basic check if it looks like JSON
1172
- if potential_json.strip().startswith('{') and potential_json.strip().endswith('}'):
1173
- if verbose:
1174
- logger.debug(f"[DEBUG] Attempting to parse extracted JSON block: '{potential_json}'")
1175
- parsed_result = output_pydantic.model_validate_json(potential_json)
1176
- else:
1177
- # If block extraction fails, try cleaning markdown next
1178
- raise ValueError("Extracted block doesn't look like JSON")
1528
+ # 1) Prefer fenced ```json blocks
1529
+ fenced = _extract_fenced_json_block(raw_result)
1530
+ candidates: List[str] = []
1531
+ if fenced:
1532
+ candidates.append(fenced)
1179
1533
  else:
1180
- # If no braces found, try cleaning markdown next
1181
- raise ValueError("Could not find enclosing {}")
1534
+ # 2) Fall back to scanning for balanced JSON objects
1535
+ candidates.extend(_extract_balanced_json_objects(raw_result))
1536
+
1537
+ if not candidates:
1538
+ raise ValueError("No JSON-like content found")
1539
+
1540
+ parse_err: Optional[Exception] = None
1541
+ for cand in candidates:
1542
+ try:
1543
+ if verbose:
1544
+ logger.debug(f"[DEBUG] Attempting to parse candidate JSON block: {cand}")
1545
+ parsed_result = output_pydantic.model_validate_json(cand)
1546
+ json_string_to_parse = cand
1547
+ parse_err = None
1548
+ break
1549
+ except (json.JSONDecodeError, ValidationError, ValueError) as pe:
1550
+ parse_err = pe
1551
+
1552
+ if parsed_result is None:
1553
+ # If none of the candidates parsed, raise last error
1554
+ if parse_err is not None:
1555
+ raise parse_err
1556
+ raise ValueError("Unable to parse any JSON candidates")
1182
1557
  except (json.JSONDecodeError, ValidationError, ValueError) as extraction_error:
1183
1558
  if verbose:
1184
- logger.debug(f"[DEBUG] JSON block extraction/validation failed ('{extraction_error}'). Trying markdown cleaning.")
1185
- # Fallback: Clean markdown fences and retry JSON validation
1559
+ logger.debug(f"[DEBUG] JSON extraction/validation failed ('{extraction_error}'). Trying fence cleaning.")
1560
+ # Last resort: strip any leading/trailing code fences and retry
1186
1561
  cleaned_result_str = raw_result.strip()
1187
1562
  if cleaned_result_str.startswith("```json"):
1188
1563
  cleaned_result_str = cleaned_result_str[7:]
@@ -1191,15 +1566,13 @@ def llm_invoke(
1191
1566
  if cleaned_result_str.endswith("```"):
1192
1567
  cleaned_result_str = cleaned_result_str[:-3]
1193
1568
  cleaned_result_str = cleaned_result_str.strip()
1194
- # Check again if it looks like JSON before parsing
1195
1569
  if cleaned_result_str.startswith('{') and cleaned_result_str.endswith('}'):
1196
1570
  if verbose:
1197
- logger.debug(f"[DEBUG] Attempting parse after cleaning markdown fences. Cleaned string: '{cleaned_result_str}'")
1198
- json_string_to_parse = cleaned_result_str # Update string for error reporting
1571
+ logger.debug(f"[DEBUG] Attempting parse after generic fence cleaning. Cleaned string: '{cleaned_result_str}'")
1572
+ json_string_to_parse = cleaned_result_str
1199
1573
  parsed_result = output_pydantic.model_validate_json(json_string_to_parse)
1200
1574
  else:
1201
- # If still doesn't look like JSON, raise error
1202
- raise ValueError("Content after cleaning markdown doesn't look like JSON")
1575
+ raise ValueError("Content after cleaning doesn't look like JSON")
1203
1576
 
1204
1577
 
1205
1578
  # Check if any parsing attempt succeeded
@@ -1333,7 +1706,7 @@ if __name__ == "__main__":
1333
1706
  response = llm_invoke(
1334
1707
  prompt="Tell me a short joke about {topic}.",
1335
1708
  input_json={"topic": "programmers"},
1336
- strength=0.5, # Use base model (gpt-4.1-nano)
1709
+ strength=0.5, # Use base model (gpt-5-nano)
1337
1710
  temperature=0.7,
1338
1711
  verbose=True
1339
1712
  )
@@ -1414,7 +1787,7 @@ if __name__ == "__main__":
1414
1787
  {"role": "system", "content": "You are a helpful assistant."},
1415
1788
  {"role": "user", "content": "What is the capital of France?"}
1416
1789
  ]
1417
- # Strength 0.5 should select gpt-4.1-nano
1790
+ # Strength 0.5 should select gpt-5-nano
1418
1791
  response_messages = llm_invoke(
1419
1792
  messages=custom_messages,
1420
1793
  strength=0.5,