pdd-cli 0.0.42__py3-none-any.whl → 0.0.90__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. pdd/__init__.py +4 -4
  2. pdd/agentic_common.py +863 -0
  3. pdd/agentic_crash.py +534 -0
  4. pdd/agentic_fix.py +1179 -0
  5. pdd/agentic_langtest.py +162 -0
  6. pdd/agentic_update.py +370 -0
  7. pdd/agentic_verify.py +183 -0
  8. pdd/auto_deps_main.py +15 -5
  9. pdd/auto_include.py +63 -5
  10. pdd/bug_main.py +3 -2
  11. pdd/bug_to_unit_test.py +2 -0
  12. pdd/change_main.py +11 -4
  13. pdd/cli.py +22 -1181
  14. pdd/cmd_test_main.py +80 -19
  15. pdd/code_generator.py +58 -18
  16. pdd/code_generator_main.py +672 -25
  17. pdd/commands/__init__.py +42 -0
  18. pdd/commands/analysis.py +248 -0
  19. pdd/commands/fix.py +140 -0
  20. pdd/commands/generate.py +257 -0
  21. pdd/commands/maintenance.py +174 -0
  22. pdd/commands/misc.py +79 -0
  23. pdd/commands/modify.py +230 -0
  24. pdd/commands/report.py +144 -0
  25. pdd/commands/templates.py +215 -0
  26. pdd/commands/utility.py +110 -0
  27. pdd/config_resolution.py +58 -0
  28. pdd/conflicts_main.py +8 -3
  29. pdd/construct_paths.py +281 -81
  30. pdd/context_generator.py +10 -2
  31. pdd/context_generator_main.py +113 -11
  32. pdd/continue_generation.py +47 -7
  33. pdd/core/__init__.py +0 -0
  34. pdd/core/cli.py +503 -0
  35. pdd/core/dump.py +554 -0
  36. pdd/core/errors.py +63 -0
  37. pdd/core/utils.py +90 -0
  38. pdd/crash_main.py +44 -11
  39. pdd/data/language_format.csv +71 -62
  40. pdd/data/llm_model.csv +20 -18
  41. pdd/detect_change_main.py +5 -4
  42. pdd/fix_code_loop.py +331 -77
  43. pdd/fix_error_loop.py +209 -60
  44. pdd/fix_errors_from_unit_tests.py +4 -3
  45. pdd/fix_main.py +75 -18
  46. pdd/fix_verification_errors.py +12 -100
  47. pdd/fix_verification_errors_loop.py +319 -272
  48. pdd/fix_verification_main.py +57 -17
  49. pdd/generate_output_paths.py +93 -10
  50. pdd/generate_test.py +16 -5
  51. pdd/get_jwt_token.py +48 -9
  52. pdd/get_run_command.py +73 -0
  53. pdd/get_test_command.py +68 -0
  54. pdd/git_update.py +70 -19
  55. pdd/increase_tests.py +7 -0
  56. pdd/incremental_code_generator.py +2 -2
  57. pdd/insert_includes.py +11 -3
  58. pdd/llm_invoke.py +1278 -110
  59. pdd/load_prompt_template.py +36 -10
  60. pdd/pdd_completion.fish +25 -2
  61. pdd/pdd_completion.sh +30 -4
  62. pdd/pdd_completion.zsh +79 -4
  63. pdd/postprocess.py +10 -3
  64. pdd/preprocess.py +228 -15
  65. pdd/preprocess_main.py +8 -5
  66. pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
  67. pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
  68. pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
  69. pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
  70. pdd/prompts/agentic_update_LLM.prompt +1071 -0
  71. pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
  72. pdd/prompts/auto_include_LLM.prompt +98 -101
  73. pdd/prompts/change_LLM.prompt +1 -3
  74. pdd/prompts/detect_change_LLM.prompt +562 -3
  75. pdd/prompts/example_generator_LLM.prompt +22 -1
  76. pdd/prompts/extract_code_LLM.prompt +5 -1
  77. pdd/prompts/extract_program_code_fix_LLM.prompt +14 -2
  78. pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
  79. pdd/prompts/extract_promptline_LLM.prompt +17 -11
  80. pdd/prompts/find_verification_errors_LLM.prompt +6 -0
  81. pdd/prompts/fix_code_module_errors_LLM.prompt +16 -4
  82. pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +6 -41
  83. pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
  84. pdd/prompts/generate_test_LLM.prompt +21 -6
  85. pdd/prompts/increase_tests_LLM.prompt +1 -2
  86. pdd/prompts/insert_includes_LLM.prompt +1181 -6
  87. pdd/prompts/split_LLM.prompt +1 -62
  88. pdd/prompts/trace_LLM.prompt +25 -22
  89. pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
  90. pdd/prompts/update_prompt_LLM.prompt +22 -1
  91. pdd/prompts/xml_convertor_LLM.prompt +3246 -7
  92. pdd/pytest_output.py +188 -21
  93. pdd/python_env_detector.py +151 -0
  94. pdd/render_mermaid.py +236 -0
  95. pdd/setup_tool.py +648 -0
  96. pdd/simple_math.py +2 -0
  97. pdd/split_main.py +3 -2
  98. pdd/summarize_directory.py +56 -7
  99. pdd/sync_determine_operation.py +918 -186
  100. pdd/sync_main.py +82 -32
  101. pdd/sync_orchestration.py +1456 -453
  102. pdd/sync_tui.py +848 -0
  103. pdd/template_registry.py +264 -0
  104. pdd/templates/architecture/architecture_json.prompt +242 -0
  105. pdd/templates/generic/generate_prompt.prompt +174 -0
  106. pdd/trace.py +168 -12
  107. pdd/trace_main.py +4 -3
  108. pdd/track_cost.py +151 -61
  109. pdd/unfinished_prompt.py +49 -3
  110. pdd/update_main.py +549 -67
  111. pdd/update_model_costs.py +2 -2
  112. pdd/update_prompt.py +19 -4
  113. {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/METADATA +20 -7
  114. pdd_cli-0.0.90.dist-info/RECORD +153 -0
  115. {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/licenses/LICENSE +1 -1
  116. pdd_cli-0.0.42.dist-info/RECORD +0 -115
  117. {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/WHEEL +0 -0
  118. {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/entry_points.txt +0 -0
  119. {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/top_level.txt +0 -0
pdd/llm_invoke.py CHANGED
@@ -5,6 +5,8 @@ import os
5
5
  import pandas as pd
6
6
  import litellm
7
7
  import logging # ADDED FOR DETAILED LOGGING
8
+ import importlib.resources
9
+ from litellm.caching.caching import Cache # Fix for LiteLLM v1.75.5+
8
10
 
9
11
  # --- Configure Standard Python Logging ---
10
12
  logger = logging.getLogger("pdd.llm_invoke")
@@ -24,6 +26,16 @@ litellm_logger = logging.getLogger("litellm")
24
26
  litellm_log_level = os.getenv("LITELLM_LOG_LEVEL", "WARNING" if PRODUCTION_MODE else "INFO")
25
27
  litellm_logger.setLevel(getattr(logging, litellm_log_level, logging.WARNING))
26
28
 
29
+ # Ensure LiteLLM drops provider-unsupported params instead of erroring
30
+ # This prevents failures like UnsupportedParamsError for OpenAI gpt-5-* when
31
+ # passing generic params (e.g., reasoning_effort) not accepted by that API path.
32
+ try:
33
+ _drop_params_env = os.getenv("LITELLM_DROP_PARAMS", "true")
34
+ litellm.drop_params = str(_drop_params_env).lower() in ("1", "true", "yes", "on")
35
+ except Exception:
36
+ # Be conservative: default to True even if env parsing fails
37
+ litellm.drop_params = True
38
+
27
39
  # Add a console handler if none exists
28
40
  if not logger.handlers:
29
41
  console_handler = logging.StreamHandler()
@@ -69,7 +81,7 @@ import json
69
81
  # from rich import print as rprint # Replaced with logger
70
82
  from dotenv import load_dotenv
71
83
  from pathlib import Path
72
- from typing import Optional, Dict, List, Any, Type, Union
84
+ from typing import Optional, Dict, List, Any, Type, Union, Tuple
73
85
  from pydantic import BaseModel, ValidationError
74
86
  import openai # Import openai for exception handling as LiteLLM maps to its types
75
87
  from langchain_core.prompts import PromptTemplate
@@ -79,7 +91,11 @@ import time as time_module # Alias to avoid conflict with 'time' parameter
79
91
  from pdd import DEFAULT_LLM_MODEL
80
92
 
81
93
  # Opt-in to future pandas behavior regarding downcasting
82
- pd.set_option('future.no_silent_downcasting', True)
94
+ try:
95
+ pd.set_option('future.no_silent_downcasting', True)
96
+ except pd._config.config.OptionError:
97
+ # Skip if option doesn't exist in older pandas versions
98
+ pass
83
99
 
84
100
 
85
101
  def _is_wsl_environment() -> bool:
@@ -108,6 +124,22 @@ def _is_wsl_environment() -> bool:
108
124
  return False
109
125
 
110
126
 
127
+ def _openai_responses_supports_response_format() -> bool:
128
+ """Detect if current OpenAI Python SDK supports `response_format` on Responses.create.
129
+
130
+ Returns True if the installed SDK exposes a `response_format` parameter on
131
+ `openai.resources.responses.Responses.create`, else False. This avoids
132
+ sending unsupported kwargs and triggering TypeError at runtime.
133
+ """
134
+ try:
135
+ import inspect
136
+ from openai.resources.responses import Responses
137
+ sig = inspect.signature(Responses.create)
138
+ return "response_format" in sig.parameters
139
+ except Exception:
140
+ return False
141
+
142
+
111
143
  def _get_environment_info() -> Dict[str, str]:
112
144
  """
113
145
  Get environment information for debugging and error reporting.
@@ -152,8 +184,8 @@ if PDD_PATH_ENV:
152
184
 
153
185
  if PROJECT_ROOT is None: # If PDD_PATH wasn't set or was invalid
154
186
  try:
155
- # Start from the directory containing this script
156
- current_dir = Path(__file__).resolve().parent
187
+ # Start from the current working directory (where user is running PDD)
188
+ current_dir = Path.cwd().resolve()
157
189
  # Look for project markers (e.g., .git, pyproject.toml, data/, .env)
158
190
  # Go up a maximum of 5 levels to prevent infinite loops
159
191
  for _ in range(5):
@@ -164,7 +196,7 @@ if PROJECT_ROOT is None: # If PDD_PATH wasn't set or was invalid
164
196
 
165
197
  if has_git or has_pyproject or has_data or has_dotenv:
166
198
  PROJECT_ROOT = current_dir
167
- logger.debug(f"Determined PROJECT_ROOT by marker search: {PROJECT_ROOT}")
199
+ logger.debug(f"Determined PROJECT_ROOT by marker search from CWD: {PROJECT_ROOT}")
168
200
  break
169
201
 
170
202
  parent_dir = current_dir.parent
@@ -172,10 +204,8 @@ if PROJECT_ROOT is None: # If PDD_PATH wasn't set or was invalid
172
204
  break
173
205
  current_dir = parent_dir
174
206
 
175
- except NameError: # __file__ might not be defined (e.g., interactive session)
176
- warnings.warn("__file__ not defined. Cannot automatically detect project root from script location.")
177
207
  except Exception as e: # Catch potential permission errors etc.
178
- warnings.warn(f"Error during project root auto-detection: {e}")
208
+ warnings.warn(f"Error during project root auto-detection from current working directory: {e}")
179
209
 
180
210
  if PROJECT_ROOT is None: # Fallback to CWD if no method succeeded
181
211
  PROJECT_ROOT = Path.cwd().resolve()
@@ -184,16 +214,80 @@ if PROJECT_ROOT is None: # Fallback to CWD if no method succeeded
184
214
 
185
215
  ENV_PATH = PROJECT_ROOT / ".env"
186
216
  # --- Determine LLM_MODEL_CSV_PATH ---
187
- # Prioritize ~/.pdd/llm_model.csv
217
+ # Prioritize ~/.pdd/llm_model.csv, then a project .pdd from the current CWD,
218
+ # then PROJECT_ROOT (which may be set from PDD_PATH), else fall back to package.
188
219
  user_pdd_dir = Path.home() / ".pdd"
189
220
  user_model_csv_path = user_pdd_dir / "llm_model.csv"
190
221
 
222
+ def _detect_project_root_from_cwd(max_levels: int = 5) -> Path:
223
+ """Search upwards from the current working directory for common project markers.
224
+
225
+ This intentionally ignores PDD_PATH to support CLI invocations that set
226
+ PDD_PATH to the installed package location. We want to honor a real project
227
+ checkout's .pdd/llm_model.csv when running inside it.
228
+ """
229
+ try:
230
+ current_dir = Path.cwd().resolve()
231
+ for _ in range(max_levels):
232
+ if (
233
+ (current_dir / ".git").exists()
234
+ or (current_dir / "pyproject.toml").exists()
235
+ or (current_dir / "data").is_dir()
236
+ or (current_dir / ".env").exists()
237
+ ):
238
+ return current_dir
239
+ parent = current_dir.parent
240
+ if parent == current_dir:
241
+ break
242
+ current_dir = parent
243
+ except Exception:
244
+ pass
245
+ return Path.cwd().resolve()
246
+
247
+ # Resolve candidates
248
+ project_root_from_cwd = _detect_project_root_from_cwd()
249
+ project_csv_from_cwd = project_root_from_cwd / ".pdd" / "llm_model.csv"
250
+ project_csv_from_env = PROJECT_ROOT / ".pdd" / "llm_model.csv"
251
+
252
+ # Detect whether PDD_PATH points to the installed package directory. If so,
253
+ # don't prioritize it over the real project from CWD.
254
+ try:
255
+ _installed_pkg_root = importlib.resources.files('pdd')
256
+ # importlib.resources.files returns a Traversable; get a FS path string if possible
257
+ try:
258
+ _installed_pkg_root_path = Path(str(_installed_pkg_root))
259
+ except Exception:
260
+ _installed_pkg_root_path = None
261
+ except Exception:
262
+ _installed_pkg_root_path = None
263
+
264
+ def _is_env_path_package_dir(env_path: Path) -> bool:
265
+ try:
266
+ if _installed_pkg_root_path is None:
267
+ return False
268
+ env_path = env_path.resolve()
269
+ pkg_path = _installed_pkg_root_path.resolve()
270
+ # Treat equal or subpath as package dir
271
+ return env_path == pkg_path or str(env_path).startswith(str(pkg_path))
272
+ except Exception:
273
+ return False
274
+
275
+ # Selection order
191
276
  if user_model_csv_path.is_file():
192
277
  LLM_MODEL_CSV_PATH = user_model_csv_path
193
278
  logger.info(f"Using user-specific LLM model CSV: {LLM_MODEL_CSV_PATH}")
279
+ elif (not _is_env_path_package_dir(PROJECT_ROOT)) and project_csv_from_env.is_file():
280
+ # Honor an explicitly-set PDD_PATH pointing to a real project directory
281
+ LLM_MODEL_CSV_PATH = project_csv_from_env
282
+ logger.info(f"Using project-specific LLM model CSV (from PDD_PATH): {LLM_MODEL_CSV_PATH}")
283
+ elif project_csv_from_cwd.is_file():
284
+ # Otherwise, prefer the project relative to the current working directory
285
+ LLM_MODEL_CSV_PATH = project_csv_from_cwd
286
+ logger.info(f"Using project-specific LLM model CSV (from CWD): {LLM_MODEL_CSV_PATH}")
194
287
  else:
195
- LLM_MODEL_CSV_PATH = PROJECT_ROOT / "data" / "llm_model.csv"
196
- logger.info(f"Using project LLM model CSV: {LLM_MODEL_CSV_PATH}")
288
+ # Neither exists, we'll use a marker path that _load_model_data will handle
289
+ LLM_MODEL_CSV_PATH = None
290
+ logger.info("No local LLM model CSV found, will use package default")
197
291
  # ---------------------------------
198
292
 
199
293
  # Load environment variables from .env file
@@ -223,6 +317,7 @@ if GCS_HMAC_SECRET_ACCESS_KEY:
223
317
  GCS_HMAC_SECRET_ACCESS_KEY = GCS_HMAC_SECRET_ACCESS_KEY.strip()
224
318
 
225
319
  cache_configured = False
320
+ configured_cache = None # Store the configured cache instance for restoration
226
321
 
227
322
  if GCS_BUCKET_NAME and GCS_HMAC_ACCESS_KEY_ID and GCS_HMAC_SECRET_ACCESS_KEY:
228
323
  # Store original AWS credentials before overwriting for GCS cache setup
@@ -236,12 +331,13 @@ if GCS_BUCKET_NAME and GCS_HMAC_ACCESS_KEY_ID and GCS_HMAC_SECRET_ACCESS_KEY:
236
331
  os.environ['AWS_SECRET_ACCESS_KEY'] = GCS_HMAC_SECRET_ACCESS_KEY
237
332
  # os.environ['AWS_REGION_NAME'] = GCS_REGION_NAME # Uncomment if needed
238
333
 
239
- litellm.cache = litellm.Cache(
334
+ configured_cache = Cache(
240
335
  type="s3",
241
336
  s3_bucket_name=GCS_BUCKET_NAME,
242
337
  s3_region_name=GCS_REGION_NAME, # Pass region explicitly to cache
243
338
  s3_endpoint_url=GCS_ENDPOINT_URL,
244
339
  )
340
+ litellm.cache = configured_cache
245
341
  logger.info(f"LiteLLM cache configured for GCS bucket (S3 compatible): {GCS_BUCKET_NAME}")
246
342
  cache_configured = True
247
343
 
@@ -266,15 +362,22 @@ if GCS_BUCKET_NAME and GCS_HMAC_ACCESS_KEY_ID and GCS_HMAC_SECRET_ACCESS_KEY:
266
362
  elif 'AWS_REGION_NAME' in os.environ:
267
363
  pass # Or just leave it if the temporary setting wasn't done/needed
268
364
 
365
+ # Check if caching is disabled via environment variable
366
+ if os.getenv("LITELLM_CACHE_DISABLE") == "1":
367
+ logger.info("LiteLLM caching disabled via LITELLM_CACHE_DISABLE=1")
368
+ litellm.cache = None
369
+ cache_configured = True
370
+
269
371
  if not cache_configured:
270
372
  try:
271
- # Try SQLite-based cache as a fallback
373
+ # Try disk-based cache as a fallback
272
374
  sqlite_cache_path = PROJECT_ROOT / "litellm_cache.sqlite"
273
- litellm.cache = litellm.Cache(type="sqlite", cache_path=str(sqlite_cache_path))
274
- logger.info(f"LiteLLM SQLite cache configured at {sqlite_cache_path}")
375
+ configured_cache = Cache(type="disk", disk_cache_dir=str(sqlite_cache_path))
376
+ litellm.cache = configured_cache
377
+ logger.info(f"LiteLLM disk cache configured at {sqlite_cache_path}")
275
378
  cache_configured = True
276
379
  except Exception as e2:
277
- warnings.warn(f"Failed to configure LiteLLM SQLite cache: {e2}. Caching is disabled.")
380
+ warnings.warn(f"Failed to configure LiteLLM disk cache: {e2}. Caching is disabled.")
278
381
  litellm.cache = None
279
382
 
280
383
  if not cache_configured:
@@ -312,29 +415,49 @@ def _litellm_success_callback(
312
415
  cost_val = litellm.completion_cost(completion_response=completion_response)
313
416
  calculated_cost = cost_val if cost_val is not None else 0.0
314
417
  except Exception as e1:
315
- # Attempt 2: If response object failed (e.g., missing provider in model name),
316
- # try again using explicit model from kwargs and tokens from usage.
317
- # This is often needed for batch completion items.
418
+ # Attempt 2: Compute via tokens and model mapping. If LiteLLM mapping is
419
+ # missing or API differs, fall back to CSV rates in _MODEL_RATE_MAP.
318
420
  logger.debug(f"Attempting cost calculation with fallback method: {e1}")
319
421
  try:
320
- model_name = kwargs.get("model") # Get original model name from input kwargs
422
+ model_name = kwargs.get("model")
321
423
  if model_name and usage:
322
- prompt_tokens = getattr(usage, 'prompt_tokens', 0)
323
- completion_tokens = getattr(usage, 'completion_tokens', 0)
324
- cost_val = litellm.completion_cost(
325
- model=model_name,
326
- prompt_tokens=prompt_tokens,
327
- completion_tokens=completion_tokens
328
- )
329
- calculated_cost = cost_val if cost_val is not None else 0.0
424
+ in_tok = getattr(usage, 'prompt_tokens', None)
425
+ out_tok = getattr(usage, 'completion_tokens', None)
426
+ # Some providers may use 'input_tokens'/'output_tokens'
427
+ if in_tok is None:
428
+ in_tok = getattr(usage, 'input_tokens', 0)
429
+ if out_tok is None:
430
+ out_tok = getattr(usage, 'output_tokens', 0)
431
+
432
+ # Try LiteLLM helper (arg names vary across versions)
433
+ try:
434
+ cost_val = litellm.completion_cost(
435
+ model=model_name,
436
+ prompt_tokens=in_tok,
437
+ completion_tokens=out_tok,
438
+ )
439
+ calculated_cost = cost_val if cost_val is not None else 0.0
440
+ except TypeError:
441
+ # Older/newer versions may require input/output token names
442
+ try:
443
+ cost_val = litellm.completion_cost(
444
+ model=model_name,
445
+ input_tokens=in_tok,
446
+ output_tokens=out_tok,
447
+ )
448
+ calculated_cost = cost_val if cost_val is not None else 0.0
449
+ except Exception as e3:
450
+ # Final fallback: compute using CSV rates
451
+ rates = _MODEL_RATE_MAP.get(str(model_name))
452
+ if rates is not None:
453
+ in_rate, out_rate = rates
454
+ calculated_cost = (float(in_tok or 0) * in_rate + float(out_tok or 0) * out_rate) / 1_000_000.0
455
+ else:
456
+ calculated_cost = 0.0
457
+ logger.debug(f"Cost calculation failed with LiteLLM token API; used CSV rates if available. Detail: {e3}")
330
458
  else:
331
- # If we can't get model name or usage, fallback to 0
332
459
  calculated_cost = 0.0
333
- # Optional: Log the original error e1 if needed
334
- # logger.warning(f"[Callback WARN] Failed to calculate cost with response object ({e1}) and fallback failed.")
335
460
  except Exception as e2:
336
- # Optional: Log secondary error e2 if needed
337
- # logger.warning(f"[Callback WARN] Failed to calculate cost with fallback method: {e2}")
338
461
  calculated_cost = 0.0 # Default to 0 on any error
339
462
  logger.debug(f"Cost calculation failed with fallback method: {e2}")
340
463
 
@@ -352,14 +475,108 @@ def _litellm_success_callback(
352
475
  # Register the callback with LiteLLM
353
476
  litellm.success_callback = [_litellm_success_callback]
354
477
 
478
+ # --- Cost Mapping Support (CSV Rates) ---
479
+ # Populate from CSV inside llm_invoke; used by callback fallback
480
+ _MODEL_RATE_MAP: Dict[str, Tuple[float, float]] = {}
481
+
482
+ def _set_model_rate_map(df: pd.DataFrame) -> None:
483
+ global _MODEL_RATE_MAP
484
+ try:
485
+ _MODEL_RATE_MAP = {
486
+ str(row['model']): (
487
+ float(row['input']) if pd.notna(row['input']) else 0.0,
488
+ float(row['output']) if pd.notna(row['output']) else 0.0,
489
+ )
490
+ for _, row in df.iterrows()
491
+ }
492
+ except Exception:
493
+ _MODEL_RATE_MAP = {}
494
+
355
495
  # --- Helper Functions ---
356
496
 
357
- def _load_model_data(csv_path: Path) -> pd.DataFrame:
358
- """Loads and preprocesses the LLM model data from CSV."""
359
- if not csv_path.exists():
360
- raise FileNotFoundError(f"LLM model CSV not found at {csv_path}")
497
+ def _is_malformed_json_response(content: str, threshold: int = 100) -> bool:
498
+ """
499
+ Detect if a JSON response appears malformed due to excessive trailing newlines.
500
+
501
+ This can happen when Gemini generates thousands of \n characters in a JSON string value,
502
+ causing the response to be truncated and missing closing braces.
503
+
504
+ Args:
505
+ content: The raw response content string
506
+ threshold: Number of consecutive trailing \n sequences to consider malformed
507
+
508
+ Returns:
509
+ True if the response appears malformed, False otherwise
510
+ """
511
+ if not content or not isinstance(content, str):
512
+ return False
513
+
514
+ # Check if it starts like JSON but doesn't end properly
515
+ stripped = content.strip()
516
+ if not stripped.startswith('{'):
517
+ return False
518
+
519
+ # If it ends with }, it's probably fine
520
+ if stripped.endswith('}'):
521
+ return False
522
+
523
+ # Count trailing \n sequences (escaped newlines in JSON strings)
524
+ # The pattern \n in a JSON string appears as \\n in the raw content
525
+ trailing_newline_count = 0
526
+ check_content = stripped
527
+ while check_content.endswith('\\n'):
528
+ trailing_newline_count += 1
529
+ check_content = check_content[:-2]
530
+
531
+ # If there are many trailing \n sequences, it's likely malformed
532
+ if trailing_newline_count >= threshold:
533
+ return True
534
+
535
+ # Also check for response that looks truncated mid-string
536
+ # (ends with characters that suggest we're inside a JSON string value)
537
+ if not stripped.endswith('}') and not stripped.endswith(']') and not stripped.endswith('"'):
538
+ # Could be truncated in the middle of an escaped sequence
539
+ if stripped.endswith('\\'):
540
+ return True
541
+
542
+ return False
543
+
544
+
545
+ def _load_model_data(csv_path: Optional[Path]) -> pd.DataFrame:
546
+ """Loads and preprocesses the LLM model data from CSV.
547
+
548
+ Args:
549
+ csv_path: Path to CSV file, or None to use package default
550
+
551
+ Returns:
552
+ DataFrame with model configuration data
553
+ """
554
+ # If csv_path is provided, try to load from it
555
+ if csv_path is not None:
556
+ if not csv_path.exists():
557
+ logger.warning(f"Specified LLM model CSV not found at {csv_path}, trying package default")
558
+ csv_path = None
559
+ else:
560
+ try:
561
+ df = pd.read_csv(csv_path)
562
+ logger.debug(f"Loaded model data from {csv_path}")
563
+ # Continue with the rest of the function...
564
+ except Exception as e:
565
+ logger.warning(f"Failed to load CSV from {csv_path}: {e}, trying package default")
566
+ csv_path = None
567
+
568
+ # If csv_path is None or loading failed, use package default
569
+ if csv_path is None:
570
+ try:
571
+ # Use importlib.resources to load the packaged CSV
572
+ csv_data = importlib.resources.files('pdd').joinpath('data/llm_model.csv').read_text()
573
+ import io
574
+ df = pd.read_csv(io.StringIO(csv_data))
575
+ logger.info("Loaded model data from package default")
576
+ except Exception as e:
577
+ raise FileNotFoundError(f"Failed to load default LLM model CSV from package: {e}")
578
+
361
579
  try:
362
- df = pd.read_csv(csv_path)
363
580
  # Basic validation and type conversion
364
581
  required_cols = ['provider', 'model', 'input', 'output', 'coding_arena_elo', 'api_key', 'structured_output', 'reasoning_type']
365
582
  for col in required_cols:
@@ -432,11 +649,26 @@ def _select_model_candidates(
432
649
  # Try finding base model in the *original* df in case it was filtered out
433
650
  original_base = model_df[model_df['model'] == base_model_name]
434
651
  if not original_base.empty:
435
- raise ValueError(f"Base model '{base_model_name}' found in CSV but requires API key '{original_base.iloc[0]['api_key']}' which might be missing or invalid configuration.")
436
- else:
437
- raise ValueError(f"Specified base model '{base_model_name}' not found in the LLM model CSV.")
438
-
439
- base_model = base_model_row.iloc[0]
652
+ # Base exists but may be misconfigured (e.g., missing API key). Keep erroring loudly.
653
+ raise ValueError(
654
+ f"Base model '{base_model_name}' found in CSV but requires API key '{original_base.iloc[0]['api_key']}' which might be missing or invalid configuration."
655
+ )
656
+ # Option A': Soft fallback – choose a reasonable surrogate base and continue
657
+ # Strategy (simplified and deterministic): pick the first available model
658
+ # from the CSV as the surrogate base. This mirrors typical CSV ordering
659
+ # expectations and keeps behavior predictable across environments.
660
+ try:
661
+ base_model = available_df.iloc[0]
662
+ logger.warning(
663
+ f"Base model '{base_model_name}' not found in CSV. Falling back to surrogate base '{base_model['model']}' (Option A')."
664
+ )
665
+ except Exception:
666
+ # If any unexpected error occurs during fallback, raise a clear error
667
+ raise ValueError(
668
+ f"Specified base model '{base_model_name}' not found and fallback selection failed. Check your LLM model CSV."
669
+ )
670
+ else:
671
+ base_model = base_model_row.iloc[0]
440
672
 
441
673
  # 3. Determine Target and Sort
442
674
  candidates = []
@@ -447,9 +679,10 @@ def _select_model_candidates(
447
679
  # Sort remaining by ELO descending as fallback
448
680
  available_df['sort_metric'] = -available_df['coding_arena_elo'] # Negative for descending sort
449
681
  candidates = available_df.sort_values(by='sort_metric').to_dict('records')
450
- # Ensure base model is first if it exists
451
- if any(c['model'] == base_model_name for c in candidates):
452
- candidates.sort(key=lambda x: 0 if x['model'] == base_model_name else 1)
682
+ # Ensure effective base model is first if it exists (supports surrogate base)
683
+ effective_base_name = str(base_model['model']) if isinstance(base_model, pd.Series) else base_model_name
684
+ if any(c['model'] == effective_base_name for c in candidates):
685
+ candidates.sort(key=lambda x: 0 if x['model'] == effective_base_name else 1)
453
686
  target_metric_value = f"Base Model ELO: {base_model['coding_arena_elo']}"
454
687
 
455
688
  elif strength < 0.5:
@@ -666,6 +899,378 @@ def _format_messages(prompt: str, input_data: Union[Dict[str, Any], List[Dict[st
666
899
  except Exception as e:
667
900
  raise ValueError(f"Error formatting prompt: {e}") from e
668
901
 
902
+ # --- JSON Extraction Helpers ---
903
+ import re
904
+
905
+ def _extract_fenced_json_block(text: str) -> Optional[str]:
906
+ try:
907
+ m = re.search(r"```json\s*(\{[\s\S]*?\})\s*```", text, flags=re.IGNORECASE)
908
+ if m:
909
+ return m.group(1)
910
+ return None
911
+ except Exception:
912
+ return None
913
+
914
+ def _extract_balanced_json_objects(text: str) -> List[str]:
915
+ results: List[str] = []
916
+ brace_stack = 0
917
+ start_idx = -1
918
+ in_string = False
919
+ escape = False
920
+ for i, ch in enumerate(text):
921
+ if in_string:
922
+ if escape:
923
+ escape = False
924
+ elif ch == '\\':
925
+ escape = True
926
+ elif ch == '"':
927
+ in_string = False
928
+ continue
929
+ else:
930
+ if ch == '"':
931
+ in_string = True
932
+ continue
933
+ if ch == '{':
934
+ if brace_stack == 0:
935
+ start_idx = i
936
+ brace_stack += 1
937
+ elif ch == '}':
938
+ if brace_stack > 0:
939
+ brace_stack -= 1
940
+ if brace_stack == 0 and start_idx != -1:
941
+ results.append(text[start_idx:i+1])
942
+ start_idx = -1
943
+ return results
944
+
945
+
946
+ def _looks_like_python_code(s: str) -> bool:
947
+ """
948
+ Heuristic check if a string looks like Python code.
949
+
950
+ Used to determine if we should attempt Python syntax repair on a string field.
951
+ """
952
+ if not s or len(s) < 10:
953
+ return False
954
+ # Check for common Python patterns
955
+ code_indicators = ('def ', 'class ', 'import ', 'from ', 'if __name__', 'return ', 'print(')
956
+ return any(indicator in s for indicator in code_indicators)
957
+
958
+
959
+ def _repair_python_syntax(code: str) -> str:
960
+ """
961
+ Validate Python code syntax and attempt repairs if invalid.
962
+
963
+ Sometimes LLMs include spurious characters at string boundaries,
964
+ especially when the code contains quotes. This function attempts
965
+ to detect and repair such issues.
966
+
967
+ Args:
968
+ code: Python code string to validate/repair
969
+
970
+ Returns:
971
+ Repaired code if a fix was found, otherwise original code
972
+ """
973
+ import ast
974
+
975
+ if not code or not code.strip():
976
+ return code
977
+
978
+ # First, try to parse as-is
979
+ try:
980
+ ast.parse(code)
981
+ return code # Valid, no repair needed
982
+ except SyntaxError:
983
+ pass
984
+
985
+ # Try common repairs
986
+ repaired = code
987
+
988
+ # Repair 1: Trailing spurious quote (the specific issue we've seen)
989
+ for quote in ['"', "'"]:
990
+ if repaired.rstrip().endswith(quote):
991
+ candidate = repaired.rstrip()[:-1]
992
+ try:
993
+ ast.parse(candidate)
994
+ logger.info(f"[INFO] Repaired code by removing trailing {quote!r}")
995
+ return candidate
996
+ except SyntaxError:
997
+ pass
998
+
999
+ # Repair 2: Leading spurious quote
1000
+ for quote in ['"', "'"]:
1001
+ if repaired.lstrip().startswith(quote):
1002
+ candidate = repaired.lstrip()[1:]
1003
+ try:
1004
+ ast.parse(candidate)
1005
+ logger.info(f"[INFO] Repaired code by removing leading {quote!r}")
1006
+ return candidate
1007
+ except SyntaxError:
1008
+ pass
1009
+
1010
+ # Repair 3: Both leading and trailing spurious quotes
1011
+ for quote in ['"', "'"]:
1012
+ stripped = repaired.strip()
1013
+ if stripped.startswith(quote) and stripped.endswith(quote):
1014
+ candidate = stripped[1:-1]
1015
+ try:
1016
+ ast.parse(candidate)
1017
+ logger.info(f"[INFO] Repaired code by removing surrounding {quote!r}")
1018
+ return candidate
1019
+ except SyntaxError:
1020
+ pass
1021
+
1022
+ # If no repair worked, return original (let it fail downstream)
1023
+ return code
1024
+
1025
+
1026
+ def _smart_unescape_code(code: str) -> str:
1027
+ """
1028
+ Unescape literal \\n sequences in code while preserving them inside string literals.
1029
+
1030
+ When LLMs return code as JSON, newlines get double-escaped. After JSON parsing,
1031
+ we have literal backslash-n (2 chars) that should be actual newlines for code
1032
+ structure, BUT escape sequences inside Python strings (like print("\\n")) should
1033
+ remain as escape sequences.
1034
+
1035
+ Args:
1036
+ code: Python code that may have literal \\n sequences
1037
+
1038
+ Returns:
1039
+ Code with structural newlines unescaped but string literals preserved
1040
+ """
1041
+ LITERAL_BACKSLASH_N = '\\' + 'n' # Literal \n (2 chars)
1042
+
1043
+ if LITERAL_BACKSLASH_N not in code:
1044
+ return code
1045
+
1046
+ # First, check if the code already has actual newlines (mixed state)
1047
+ # If it does, we need to be more careful
1048
+ has_actual_newlines = '\n' in code
1049
+
1050
+ if not has_actual_newlines:
1051
+ # All newlines are escaped - this is the double-escaped case
1052
+ # We need to unescape them but preserve \n inside string literals
1053
+
1054
+ # Strategy: Use a placeholder for \n inside strings, unescape all, then restore
1055
+ # We detect string literals by tracking quote state
1056
+
1057
+ result = []
1058
+ i = 0
1059
+ in_string = False
1060
+ string_char = None
1061
+ in_fstring = False
1062
+
1063
+ # Placeholder that won't appear in code
1064
+ PLACEHOLDER = '\x00NEWLINE_ESCAPE\x00'
1065
+
1066
+ while i < len(code):
1067
+ # Check for escape sequences (both actual and literal)
1068
+ if i + 1 < len(code) and code[i] == '\\':
1069
+ next_char = code[i + 1]
1070
+
1071
+ if in_string:
1072
+ # Inside a string - preserve escape sequences
1073
+ if next_char == 'n':
1074
+ result.append(PLACEHOLDER)
1075
+ i += 2
1076
+ continue
1077
+ elif next_char == 't':
1078
+ result.append('\\' + 't') # Keep \t as-is in strings
1079
+ i += 2
1080
+ continue
1081
+ elif next_char == 'r':
1082
+ result.append('\\' + 'r') # Keep \r as-is in strings
1083
+ i += 2
1084
+ continue
1085
+ elif next_char in ('"', "'", '\\'):
1086
+ # Keep escaped quotes and backslashes
1087
+ result.append(code[i:i+2])
1088
+ i += 2
1089
+ continue
1090
+
1091
+ # Check for string delimiters
1092
+ if not in_string:
1093
+ # Check for triple quotes first
1094
+ if i + 2 < len(code) and code[i:i+3] in ('"""', "'''"):
1095
+ in_string = True
1096
+ string_char = code[i:i+3]
1097
+ # Check if preceded by 'f' for f-string
1098
+ in_fstring = i > 0 and code[i-1] == 'f'
1099
+ result.append(code[i:i+3])
1100
+ i += 3
1101
+ continue
1102
+ elif code[i] in ('"', "'"):
1103
+ in_string = True
1104
+ string_char = code[i]
1105
+ in_fstring = i > 0 and code[i-1] == 'f'
1106
+ result.append(code[i])
1107
+ i += 1
1108
+ continue
1109
+ else:
1110
+ # Check for end of string
1111
+ if len(string_char) == 3: # Triple quote
1112
+ if i + 2 < len(code) and code[i:i+3] == string_char:
1113
+ in_string = False
1114
+ in_fstring = False
1115
+ result.append(code[i:i+3])
1116
+ i += 3
1117
+ continue
1118
+ else: # Single quote
1119
+ if code[i] == string_char:
1120
+ in_string = False
1121
+ in_fstring = False
1122
+ result.append(code[i])
1123
+ i += 1
1124
+ continue
1125
+
1126
+ result.append(code[i])
1127
+ i += 1
1128
+
1129
+ intermediate = ''.join(result)
1130
+
1131
+ # Now unescape all remaining \n (these are structural)
1132
+ LITERAL_BACKSLASH_R_N = '\\' + 'r' + '\\' + 'n'
1133
+ LITERAL_BACKSLASH_T = '\\' + 't'
1134
+
1135
+ intermediate = intermediate.replace(LITERAL_BACKSLASH_R_N, '\r\n')
1136
+ intermediate = intermediate.replace(LITERAL_BACKSLASH_N, '\n')
1137
+ intermediate = intermediate.replace(LITERAL_BACKSLASH_T, '\t')
1138
+
1139
+ # Restore placeholders to \n (as escape sequences in strings)
1140
+ result_code = intermediate.replace(PLACEHOLDER, '\\n')
1141
+
1142
+ return result_code
1143
+ else:
1144
+ # Mixed state - some actual newlines, some literal \n
1145
+ # This means the JSON parsing already converted some, but not all
1146
+ # The literal \n remaining are likely in strings, so leave them alone
1147
+ return code
1148
+
1149
+
1150
+ def _unescape_code_newlines(obj: Any) -> Any:
1151
+ """
1152
+ Fix double-escaped newlines in Pydantic model string fields.
1153
+
1154
+ Some models (e.g., Gemini) return JSON with \\\\n instead of \\n in code strings,
1155
+ resulting in literal backslash-n text instead of actual newlines after JSON parsing.
1156
+ This function recursively unescapes these in string fields of Pydantic models.
1157
+
1158
+ Also repairs Python syntax errors in code-like string fields (e.g., trailing quotes).
1159
+
1160
+ The check uses literal backslash-n (2 chars) vs actual newline (1 char):
1161
+ - '\\\\n' in Python source = literal backslash + n (2 chars) - needs fixing
1162
+ - '\\n' in Python source = newline character (1 char) - already correct
1163
+
1164
+ Args:
1165
+ obj: A Pydantic model, dict, list, or primitive value
1166
+
1167
+ Returns:
1168
+ The same object with string fields unescaped and code fields repaired
1169
+ """
1170
+ if obj is None:
1171
+ return obj
1172
+
1173
+ def _process_string(s: str) -> str:
1174
+ """Process a string: unescape newlines and repair Python syntax if needed."""
1175
+ result = s
1176
+ # Smart unescape that preserves \n inside string literals
1177
+ if _looks_like_python_code(result):
1178
+ result = _smart_unescape_code(result)
1179
+ result = _repair_python_syntax(result)
1180
+ else:
1181
+ # For non-code strings, do simple unescape
1182
+ LITERAL_BACKSLASH_N = '\\' + 'n'
1183
+ LITERAL_BACKSLASH_R_N = '\\' + 'r' + '\\' + 'n'
1184
+ LITERAL_BACKSLASH_T = '\\' + 't'
1185
+ if LITERAL_BACKSLASH_N in result:
1186
+ result = result.replace(LITERAL_BACKSLASH_R_N, '\r\n')
1187
+ result = result.replace(LITERAL_BACKSLASH_N, '\n')
1188
+ result = result.replace(LITERAL_BACKSLASH_T, '\t')
1189
+ return result
1190
+
1191
+ # Handle Pydantic models
1192
+ if isinstance(obj, BaseModel):
1193
+ # Get all field values and process strings
1194
+ for field_name in obj.model_fields:
1195
+ value = getattr(obj, field_name)
1196
+ if isinstance(value, str):
1197
+ processed = _process_string(value)
1198
+ if processed != value:
1199
+ object.__setattr__(obj, field_name, processed)
1200
+ elif isinstance(value, (dict, list, BaseModel)):
1201
+ _unescape_code_newlines(value)
1202
+ return obj
1203
+
1204
+ # Handle dicts
1205
+ if isinstance(obj, dict):
1206
+ for key, value in obj.items():
1207
+ if isinstance(value, str):
1208
+ obj[key] = _process_string(value)
1209
+ elif isinstance(value, (dict, list)):
1210
+ _unescape_code_newlines(value)
1211
+ return obj
1212
+
1213
+ # Handle lists
1214
+ if isinstance(obj, list):
1215
+ for i, item in enumerate(obj):
1216
+ if isinstance(item, str):
1217
+ obj[i] = _process_string(item)
1218
+ elif isinstance(item, (dict, list, BaseModel)):
1219
+ _unescape_code_newlines(item)
1220
+ return obj
1221
+
1222
+ return obj
1223
+
1224
+
1225
+ def _has_invalid_python_code(obj: Any) -> bool:
1226
+ """
1227
+ Check if any code-like string fields have invalid Python syntax.
1228
+
1229
+ This is used after _unescape_code_newlines to detect if repair failed
1230
+ and we should retry with cache disabled.
1231
+
1232
+ Args:
1233
+ obj: A Pydantic model, dict, list, or primitive value
1234
+
1235
+ Returns:
1236
+ True if there are invalid code fields that couldn't be repaired
1237
+ """
1238
+ import ast
1239
+
1240
+ if obj is None:
1241
+ return False
1242
+
1243
+ if isinstance(obj, str):
1244
+ if _looks_like_python_code(obj):
1245
+ try:
1246
+ ast.parse(obj)
1247
+ return False # Valid
1248
+ except SyntaxError:
1249
+ return True # Invalid
1250
+ return False
1251
+
1252
+ if isinstance(obj, BaseModel):
1253
+ for field_name in obj.model_fields:
1254
+ value = getattr(obj, field_name)
1255
+ if _has_invalid_python_code(value):
1256
+ return True
1257
+ return False
1258
+
1259
+ if isinstance(obj, dict):
1260
+ for value in obj.values():
1261
+ if _has_invalid_python_code(value):
1262
+ return True
1263
+ return False
1264
+
1265
+ if isinstance(obj, list):
1266
+ for item in obj:
1267
+ if _has_invalid_python_code(item):
1268
+ return True
1269
+ return False
1270
+
1271
+ return False
1272
+
1273
+
669
1274
  # --- Main Function ---
670
1275
 
671
1276
  def llm_invoke(
@@ -675,6 +1280,7 @@ def llm_invoke(
675
1280
  temperature: float = 0.1,
676
1281
  verbose: bool = False,
677
1282
  output_pydantic: Optional[Type[BaseModel]] = None,
1283
+ output_schema: Optional[Dict[str, Any]] = None,
678
1284
  time: float = 0.25,
679
1285
  use_batch_mode: bool = False,
680
1286
  messages: Optional[Union[List[Dict[str, str]], List[List[Dict[str, str]]]]] = None,
@@ -691,6 +1297,7 @@ def llm_invoke(
691
1297
  temperature: LLM temperature.
692
1298
  verbose: Print detailed logs.
693
1299
  output_pydantic: Optional Pydantic model for structured output.
1300
+ output_schema: Optional raw JSON schema dictionary for structured output (alternative to output_pydantic).
694
1301
  time: Relative thinking time (0-1, default 0.25).
695
1302
  use_batch_mode: Use batch completion if True.
696
1303
  messages: Pre-formatted list of messages (or list of lists for batch). If provided, ignores prompt and input_json.
@@ -808,6 +1415,16 @@ def llm_invoke(
808
1415
  # --- 3. Iterate Through Candidates and Invoke LLM ---
809
1416
  last_exception = None
810
1417
  newly_acquired_keys: Dict[str, bool] = {} # Track keys obtained in this run
1418
+
1419
+ # Initialize variables for retry section
1420
+ response_format = None
1421
+ time_kwargs = {}
1422
+
1423
+ # Update global rate map for callback cost fallback
1424
+ try:
1425
+ _set_model_rate_map(model_df)
1426
+ except Exception:
1427
+ pass
811
1428
 
812
1429
  for model_info in candidate_models:
813
1430
  model_name_litellm = model_info['model']
@@ -818,6 +1435,9 @@ def llm_invoke(
818
1435
  logger.info(f"\n[ATTEMPT] Trying model: {model_name_litellm} (Provider: {provider})")
819
1436
 
820
1437
  retry_with_same_model = True
1438
+ # Track per-model temperature adjustment attempt (avoid infinite loop)
1439
+ current_temperature = temperature
1440
+ temp_adjustment_done = False
821
1441
  while retry_with_same_model:
822
1442
  retry_with_same_model = False # Assume success unless auth error on new key
823
1443
 
@@ -832,7 +1452,8 @@ def llm_invoke(
832
1452
  litellm_kwargs: Dict[str, Any] = {
833
1453
  "model": model_name_litellm,
834
1454
  "messages": formatted_messages,
835
- "temperature": temperature,
1455
+ # Use a local adjustable temperature to allow provider-specific fallbacks
1456
+ "temperature": current_temperature,
836
1457
  }
837
1458
 
838
1459
  api_key_name_from_csv = model_info.get('api_key') # From CSV
@@ -845,7 +1466,14 @@ def llm_invoke(
845
1466
  if is_vertex_model and api_key_name_from_csv == 'VERTEX_CREDENTIALS':
846
1467
  credentials_file_path = os.getenv("VERTEX_CREDENTIALS") # Path from env var
847
1468
  vertex_project_env = os.getenv("VERTEX_PROJECT")
848
- vertex_location_env = os.getenv("VERTEX_LOCATION")
1469
+ # Check for per-model location override, fall back to env var
1470
+ model_location = model_info.get('location')
1471
+ if pd.notna(model_location) and str(model_location).strip():
1472
+ vertex_location_env = str(model_location).strip()
1473
+ if verbose:
1474
+ logger.info(f"[INFO] Using per-model location override: '{vertex_location_env}' for model '{model_name_litellm}'")
1475
+ else:
1476
+ vertex_location_env = os.getenv("VERTEX_LOCATION")
849
1477
 
850
1478
  if credentials_file_path and vertex_project_env and vertex_location_env:
851
1479
  try:
@@ -859,14 +1487,23 @@ def llm_invoke(
859
1487
  if verbose:
860
1488
  logger.info(f"[INFO] For Vertex AI: using vertex_credentials from '{credentials_file_path}', project '{vertex_project_env}', location '{vertex_location_env}'.")
861
1489
  except FileNotFoundError:
1490
+ # Still pass project and location so ADC can work
1491
+ litellm_kwargs["vertex_project"] = vertex_project_env
1492
+ litellm_kwargs["vertex_location"] = vertex_location_env
862
1493
  if verbose:
863
- logger.error(f"[ERROR] Vertex credentials file not found at path specified by VERTEX_CREDENTIALS env var: '{credentials_file_path}'. LiteLLM may try ADC or fail.")
1494
+ logger.warning(f"[WARN] Vertex credentials file not found at '{credentials_file_path}'. Using ADC with project '{vertex_project_env}', location '{vertex_location_env}'.")
864
1495
  except json.JSONDecodeError:
1496
+ # Still pass project and location so ADC can work
1497
+ litellm_kwargs["vertex_project"] = vertex_project_env
1498
+ litellm_kwargs["vertex_location"] = vertex_location_env
865
1499
  if verbose:
866
- logger.error(f"[ERROR] Failed to decode JSON from Vertex credentials file: '{credentials_file_path}'. Check file content. LiteLLM may try ADC or fail.")
1500
+ logger.error(f"[ERROR] Failed to decode JSON from Vertex credentials file: '{credentials_file_path}'. Using ADC with project '{vertex_project_env}', location '{vertex_location_env}'.")
867
1501
  except Exception as e:
1502
+ # Still pass project and location so ADC can work
1503
+ litellm_kwargs["vertex_project"] = vertex_project_env
1504
+ litellm_kwargs["vertex_location"] = vertex_location_env
868
1505
  if verbose:
869
- logger.error(f"[ERROR] Failed to load or process Vertex credentials from '{credentials_file_path}': {e}. LiteLLM may try ADC or fail.")
1506
+ logger.error(f"[ERROR] Failed to load Vertex credentials from '{credentials_file_path}': {e}. Using ADC with project '{vertex_project_env}', location '{vertex_location_env}'.")
870
1507
  else:
871
1508
  if verbose:
872
1509
  logger.warning(f"[WARN] For Vertex AI (using '{api_key_name_from_csv}'): One or more required environment variables (VERTEX_CREDENTIALS, VERTEX_PROJECT, VERTEX_LOCATION) are missing.")
@@ -885,9 +1522,16 @@ def llm_invoke(
885
1522
 
886
1523
  # If this model is Vertex AI AND uses a direct API key string (not VERTEX_CREDENTIALS from CSV),
887
1524
  # also pass project and location from env vars.
888
- if is_vertex_model:
1525
+ if is_vertex_model:
889
1526
  vertex_project_env = os.getenv("VERTEX_PROJECT")
890
- vertex_location_env = os.getenv("VERTEX_LOCATION")
1527
+ # Check for per-model location override, fall back to env var
1528
+ model_location = model_info.get('location')
1529
+ if pd.notna(model_location) and str(model_location).strip():
1530
+ vertex_location_env = str(model_location).strip()
1531
+ if verbose:
1532
+ logger.info(f"[INFO] Using per-model location override: '{vertex_location_env}' for model '{model_name_litellm}'")
1533
+ else:
1534
+ vertex_location_env = os.getenv("VERTEX_LOCATION")
891
1535
  if vertex_project_env and vertex_location_env:
892
1536
  litellm_kwargs["vertex_project"] = vertex_project_env
893
1537
  litellm_kwargs["vertex_location"] = vertex_location_env
@@ -901,13 +1545,36 @@ def llm_invoke(
901
1545
  elif verbose: # No api_key_name_from_csv in CSV for this model
902
1546
  logger.info(f"[INFO] No API key name specified in CSV for model '{model_name_litellm}'. LiteLLM will use its default authentication mechanisms (e.g., standard provider env vars or ADC for Vertex AI).")
903
1547
 
904
- # Add api_base if present in CSV
1548
+ # Add base_url/api_base override if present in CSV
905
1549
  api_base = model_info.get('base_url')
906
1550
  if pd.notna(api_base) and api_base:
1551
+ # LiteLLM prefers `base_url`; some older paths accept `api_base`.
1552
+ litellm_kwargs["base_url"] = str(api_base)
907
1553
  litellm_kwargs["api_base"] = str(api_base)
908
1554
 
909
- # Handle Structured Output (JSON Mode / Pydantic)
910
- if output_pydantic:
1555
+ # Provider-specific defaults (e.g., LM Studio)
1556
+ model_name_lower = str(model_name_litellm).lower()
1557
+ provider_lower_for_model = provider.lower()
1558
+ is_lm_studio = model_name_lower.startswith('lm_studio/') or provider_lower_for_model == 'lm_studio'
1559
+ is_groq = model_name_lower.startswith('groq/') or provider_lower_for_model == 'groq'
1560
+ if is_lm_studio:
1561
+ # Ensure base_url is set (fallback to env LM_STUDIO_API_BASE or localhost)
1562
+ if not litellm_kwargs.get("base_url"):
1563
+ lm_studio_base = os.getenv("LM_STUDIO_API_BASE", "http://localhost:1234/v1")
1564
+ litellm_kwargs["base_url"] = lm_studio_base
1565
+ litellm_kwargs["api_base"] = lm_studio_base
1566
+ if verbose:
1567
+ logger.info(f"[INFO] Using LM Studio base_url: {lm_studio_base}")
1568
+
1569
+ # Ensure a non-empty api_key; LM Studio accepts any non-empty token (e.g., 'lm-studio')
1570
+ if not litellm_kwargs.get("api_key"):
1571
+ lm_studio_key = os.getenv("LM_STUDIO_API_KEY") or "lm-studio"
1572
+ litellm_kwargs["api_key"] = lm_studio_key
1573
+ if verbose:
1574
+ logger.info("[INFO] Using LM Studio api_key placeholder (set LM_STUDIO_API_KEY to customize).")
1575
+
1576
+ # Handle Structured Output (JSON Mode / Pydantic / JSON Schema)
1577
+ if output_pydantic or output_schema:
911
1578
  # Check if model supports structured output based on CSV flag or LiteLLM check
912
1579
  supports_structured = model_info.get('structured_output', False)
913
1580
  # Optional: Add litellm.supports_response_schema check if CSV flag is unreliable
@@ -916,18 +1583,87 @@ def llm_invoke(
916
1583
  # except: pass # Ignore errors in supports_response_schema check
917
1584
 
918
1585
  if supports_structured:
919
- if verbose:
920
- logger.info(f"[INFO] Requesting structured output (Pydantic: {output_pydantic.__name__}) for {model_name_litellm}")
921
- # Pass the Pydantic model directly if supported, else use json_object
922
- # LiteLLM handles passing Pydantic models for supported providers
923
- litellm_kwargs["response_format"] = output_pydantic
1586
+ if output_pydantic:
1587
+ if verbose:
1588
+ logger.info(f"[INFO] Requesting structured output (Pydantic: {output_pydantic.__name__}) for {model_name_litellm}")
1589
+ # Use explicit json_object format with response_schema for better Gemini/Vertex AI compatibility
1590
+ # Passing Pydantic class directly may not trigger native structured output for all providers
1591
+ response_format = {
1592
+ "type": "json_object",
1593
+ "response_schema": output_pydantic.model_json_schema()
1594
+ }
1595
+ else: # output_schema is set
1596
+ if verbose:
1597
+ logger.info(f"[INFO] Requesting structured output (JSON Schema) for {model_name_litellm}")
1598
+ # LiteLLM expects {"type": "json_schema", "json_schema": {"name": "response", "schema": schema_dict, "strict": true}}
1599
+ # OR for some providers just the schema dict if type is json_object.
1600
+ # Best practice for broad compatibility via LiteLLM is usually the dict directly or wrapped.
1601
+ # For now, let's assume we pass the schema dict as 'response_format' which LiteLLM handles for many providers
1602
+ # or wrap it if needed. LiteLLM 1.40+ supports passing the dict directly for many.
1603
+ response_format = {
1604
+ "type": "json_schema",
1605
+ "json_schema": {
1606
+ "name": "response",
1607
+ "schema": output_schema,
1608
+ "strict": False
1609
+ }
1610
+ }
1611
+
1612
+ litellm_kwargs["response_format"] = response_format
1613
+
1614
+ # LM Studio requires "json_schema" format, not "json_object"
1615
+ # Use extra_body to bypass litellm.drop_params stripping the schema
1616
+ if is_lm_studio and response_format and response_format.get("type") == "json_object":
1617
+ schema = response_format.get("response_schema", {})
1618
+ lm_studio_response_format = {
1619
+ "type": "json_schema",
1620
+ "json_schema": {
1621
+ "name": "response",
1622
+ "strict": True,
1623
+ "schema": schema
1624
+ }
1625
+ }
1626
+ # Use extra_body to bypass drop_params - passes directly to API
1627
+ litellm_kwargs["extra_body"] = {"response_format": lm_studio_response_format}
1628
+ # Remove from regular response_format to avoid conflicts
1629
+ if "response_format" in litellm_kwargs:
1630
+ del litellm_kwargs["response_format"]
1631
+ if verbose:
1632
+ logger.info(f"[INFO] Using extra_body for LM Studio response_format to bypass drop_params")
1633
+
1634
+ # Groq has issues with tool-based structured output - use JSON mode with schema in prompt
1635
+ if is_groq and response_format:
1636
+ # Get the schema to include in system prompt
1637
+ if output_pydantic:
1638
+ schema = output_pydantic.model_json_schema()
1639
+ else:
1640
+ schema = output_schema
1641
+
1642
+ # Use simple json_object mode (Groq's tool_use often fails)
1643
+ litellm_kwargs["response_format"] = {"type": "json_object"}
1644
+
1645
+ # Prepend schema instruction to messages (json module is imported at top of file)
1646
+ schema_instruction = f"You must respond with valid JSON matching this schema:\n```json\n{json.dumps(schema, indent=2)}\n```\nRespond ONLY with the JSON object, no other text."
1647
+
1648
+ # Find or create system message to prepend schema
1649
+ messages_list = litellm_kwargs.get("messages", [])
1650
+ if messages_list and messages_list[0].get("role") == "system":
1651
+ messages_list[0]["content"] = schema_instruction + "\n\n" + messages_list[0]["content"]
1652
+ else:
1653
+ messages_list.insert(0, {"role": "system", "content": schema_instruction})
1654
+ litellm_kwargs["messages"] = messages_list
1655
+
1656
+ if verbose:
1657
+ logger.info(f"[INFO] Using JSON object mode with schema in prompt for Groq (avoiding tool_use issues)")
1658
+
924
1659
  # As a fallback, one could use:
925
1660
  # litellm_kwargs["response_format"] = {"type": "json_object"}
926
1661
  # And potentially enable client-side validation:
927
1662
  # litellm.enable_json_schema_validation = True # Enable globally if needed
928
1663
  else:
1664
+ schema_name = output_pydantic.__name__ if output_pydantic else "JSON Schema"
929
1665
  if verbose:
930
- logger.warning(f"[WARN] Model {model_name_litellm} does not support structured output via CSV flag. Output might not be valid {output_pydantic.__name__}.")
1666
+ logger.warning(f"[WARN] Model {model_name_litellm} does not support structured output via CSV flag. Output might not be valid {schema_name}.")
931
1667
  # Proceed without forcing JSON mode, parsing will be attempted later
932
1668
 
933
1669
  # --- NEW REASONING LOGIC ---
@@ -942,7 +1678,9 @@ def llm_invoke(
942
1678
  # Currently known: Anthropic uses 'thinking'
943
1679
  # Model name comparison is more robust than provider string
944
1680
  if provider == 'anthropic': # Check provider column instead of model prefix
945
- litellm_kwargs["thinking"] = {"type": "enabled", "budget_tokens": budget}
1681
+ thinking_param = {"type": "enabled", "budget_tokens": budget}
1682
+ litellm_kwargs["thinking"] = thinking_param
1683
+ time_kwargs["thinking"] = thinking_param
946
1684
  if verbose:
947
1685
  logger.info(f"[INFO] Requesting Anthropic thinking (budget type) with budget: {budget} tokens for {model_name_litellm}")
948
1686
  else:
@@ -960,10 +1698,32 @@ def llm_invoke(
960
1698
  effort = "high"
961
1699
  elif time > 0.3:
962
1700
  effort = "medium"
963
- # Use the common 'reasoning_effort' param LiteLLM provides
964
- litellm_kwargs["reasoning_effort"] = effort
965
- if verbose:
966
- logger.info(f"[INFO] Requesting reasoning_effort='{effort}' (effort type) for {model_name_litellm} based on time={time}")
1701
+
1702
+ # Map effort parameter per-provider/model family
1703
+ model_lower = str(model_name_litellm).lower()
1704
+ provider_lower = str(provider).lower()
1705
+
1706
+ if provider_lower == 'openai' and model_lower.startswith('gpt-5'):
1707
+ # OpenAI 5-series uses Responses API with nested 'reasoning'
1708
+ reasoning_obj = {"effort": effort, "summary": "auto"}
1709
+ litellm_kwargs["reasoning"] = reasoning_obj
1710
+ time_kwargs["reasoning"] = reasoning_obj
1711
+ if verbose:
1712
+ logger.info(f"[INFO] Requesting OpenAI reasoning.effort='{effort}' for {model_name_litellm} (Responses API)")
1713
+
1714
+ elif provider_lower == 'openai' and model_lower.startswith('o') and 'mini' not in model_lower:
1715
+ # Historical o* models may use LiteLLM's generic reasoning_effort param
1716
+ litellm_kwargs["reasoning_effort"] = effort
1717
+ time_kwargs["reasoning_effort"] = effort
1718
+ if verbose:
1719
+ logger.info(f"[INFO] Requesting reasoning_effort='{effort}' for {model_name_litellm}")
1720
+
1721
+ else:
1722
+ # Fallback to LiteLLM generic param when supported by provider adapter
1723
+ litellm_kwargs["reasoning_effort"] = effort
1724
+ time_kwargs["reasoning_effort"] = effort
1725
+ if verbose:
1726
+ logger.info(f"[INFO] Requesting generic reasoning_effort='{effort}' for {model_name_litellm}")
967
1727
 
968
1728
  elif reasoning_type == 'none':
969
1729
  if verbose:
@@ -995,6 +1755,166 @@ def llm_invoke(
995
1755
  logger.debug("NOT ENABLING CACHING: litellm.cache is None at call time")
996
1756
 
997
1757
 
1758
+ # Route OpenAI gpt-5* models through Responses API to support 'reasoning'
1759
+ model_lower_for_call = str(model_name_litellm).lower()
1760
+ provider_lower_for_call = str(provider).lower()
1761
+
1762
+ if (
1763
+ not use_batch_mode
1764
+ and provider_lower_for_call == 'openai'
1765
+ and model_lower_for_call.startswith('gpt-5')
1766
+ ):
1767
+ if verbose:
1768
+ logger.info(f"[INFO] Calling LiteLLM Responses API for {model_name_litellm}...")
1769
+ try:
1770
+ # Build input text from messages
1771
+ if isinstance(formatted_messages, list) and formatted_messages and isinstance(formatted_messages[0], dict):
1772
+ input_text = "\n\n".join(f"{m.get('role','user')}: {m.get('content','')}" for m in formatted_messages)
1773
+ else:
1774
+ # Fallback: string cast
1775
+ input_text = str(formatted_messages)
1776
+
1777
+ # Derive effort mapping already computed in time_kwargs
1778
+ reasoning_param = time_kwargs.get("reasoning")
1779
+
1780
+ # Build text.format block for structured output
1781
+ # Default to plain text format
1782
+ text_block = {"format": {"type": "text"}}
1783
+
1784
+ # If structured output requested, use text.format with json_schema
1785
+ # This is the correct way to enforce structured output via litellm.responses()
1786
+ if output_pydantic or output_schema:
1787
+ try:
1788
+ if output_pydantic:
1789
+ schema = output_pydantic.model_json_schema()
1790
+ name = output_pydantic.__name__
1791
+ else:
1792
+ schema = output_schema
1793
+ name = "response"
1794
+
1795
+ # Add additionalProperties: false for strict mode (required by OpenAI)
1796
+ schema['additionalProperties'] = False
1797
+
1798
+ # Use text.format with json_schema for structured output
1799
+ text_block = {
1800
+ "format": {
1801
+ "type": "json_schema",
1802
+ "name": name,
1803
+ "strict": True,
1804
+ "schema": schema,
1805
+ }
1806
+ }
1807
+ if verbose:
1808
+ logger.info(f"[INFO] Using structured output via text.format for Responses API")
1809
+ except Exception as schema_e:
1810
+ logger.warning(f"[WARN] Failed to derive JSON schema: {schema_e}. Proceeding with plain text format.")
1811
+
1812
+ # Build kwargs for litellm.responses()
1813
+ responses_kwargs = {
1814
+ "model": model_name_litellm,
1815
+ "input": input_text,
1816
+ "text": text_block,
1817
+ }
1818
+ if verbose and temperature not in (None, 0, 0.0):
1819
+ logger.info("[INFO] Skipping 'temperature' for OpenAI GPT-5 Responses call (unsupported by API).")
1820
+ if reasoning_param is not None:
1821
+ responses_kwargs["reasoning"] = reasoning_param
1822
+
1823
+ # Call litellm.responses() which handles the API interaction
1824
+ resp = litellm.responses(**responses_kwargs)
1825
+
1826
+ # Extract text result from response
1827
+ result_text = None
1828
+ try:
1829
+ # LiteLLM responses return output as a list of items
1830
+ for item in resp.output:
1831
+ if getattr(item, 'type', None) == 'message' and hasattr(item, 'content') and item.content:
1832
+ for content_item in item.content:
1833
+ if hasattr(content_item, 'text'):
1834
+ result_text = content_item.text
1835
+ break
1836
+ if result_text:
1837
+ break
1838
+ except Exception:
1839
+ result_text = None
1840
+
1841
+ # Calculate cost using usage + CSV rates
1842
+ total_cost = 0.0
1843
+ usage = getattr(resp, "usage", None)
1844
+ if usage is not None:
1845
+ in_tok = getattr(usage, "input_tokens", 0) or 0
1846
+ out_tok = getattr(usage, "output_tokens", 0) or 0
1847
+ in_rate = model_info.get('input', 0.0) or 0.0
1848
+ out_rate = model_info.get('output', 0.0) or 0.0
1849
+ total_cost = (in_tok * in_rate + out_tok * out_rate) / 1_000_000.0
1850
+
1851
+ # Parse result if Pydantic output requested
1852
+ final_result = None
1853
+ if output_pydantic and result_text:
1854
+ try:
1855
+ final_result = output_pydantic.model_validate_json(result_text)
1856
+ except Exception as e:
1857
+ # With structured output, parsing should succeed
1858
+ # But if it fails, try JSON repair as fallback
1859
+ logger.warning(f"[WARN] Pydantic parse failed on Responses output: {e}. Attempting JSON repair...")
1860
+
1861
+ # Try extracting from fenced JSON blocks first
1862
+ fenced = _extract_fenced_json_block(result_text)
1863
+ candidates: List[str] = []
1864
+ if fenced:
1865
+ candidates.append(fenced)
1866
+ else:
1867
+ candidates.extend(_extract_balanced_json_objects(result_text))
1868
+
1869
+ # Also try the raw text as-is after stripping fences
1870
+ cleaned = result_text.strip()
1871
+ if cleaned.startswith("```json"):
1872
+ cleaned = cleaned[7:]
1873
+ elif cleaned.startswith("```"):
1874
+ cleaned = cleaned[3:]
1875
+ if cleaned.endswith("```"):
1876
+ cleaned = cleaned[:-3]
1877
+ cleaned = cleaned.strip()
1878
+ if cleaned and cleaned not in candidates:
1879
+ candidates.append(cleaned)
1880
+
1881
+ parse_succeeded = False
1882
+ for cand in candidates:
1883
+ try:
1884
+ final_result = output_pydantic.model_validate_json(cand)
1885
+ parse_succeeded = True
1886
+ logger.info(f"[SUCCESS] JSON repair succeeded for Responses output")
1887
+ break
1888
+ except Exception:
1889
+ continue
1890
+
1891
+ if not parse_succeeded:
1892
+ logger.error(f"[ERROR] All JSON repair attempts failed for Responses output. Original error: {e}")
1893
+ final_result = f"ERROR: Failed to parse structured output from Responses API. Raw: {repr(result_text)[:200]}"
1894
+ else:
1895
+ final_result = result_text
1896
+
1897
+ if verbose:
1898
+ logger.info(f"[RESULT] Model Used: {model_name_litellm}")
1899
+ logger.info(f"[RESULT] Total Cost (estimated): ${total_cost:.6g}")
1900
+
1901
+ return {
1902
+ 'result': final_result,
1903
+ 'cost': total_cost,
1904
+ 'model_name': model_name_litellm,
1905
+ 'thinking_output': None,
1906
+ }
1907
+ except Exception as e:
1908
+ last_exception = e
1909
+ logger.error(f"[ERROR] OpenAI Responses call failed for {model_name_litellm}: {e}")
1910
+ # Remove 'reasoning' key to avoid OpenAI Chat API unknown param errors
1911
+ if "reasoning" in litellm_kwargs:
1912
+ try:
1913
+ litellm_kwargs.pop("reasoning", None)
1914
+ except Exception:
1915
+ pass
1916
+ # Fall through to LiteLLM path as a fallback
1917
+
998
1918
  if use_batch_mode:
999
1919
  if verbose:
1000
1920
  logger.info(f"[INFO] Calling litellm.batch_completion for {model_name_litellm}...")
@@ -1002,6 +1922,16 @@ def llm_invoke(
1002
1922
 
1003
1923
 
1004
1924
  else:
1925
+ # Anthropic requirement: when 'thinking' is enabled, temperature must be 1
1926
+ try:
1927
+ if provider.lower() == 'anthropic' and 'thinking' in litellm_kwargs:
1928
+ if litellm_kwargs.get('temperature') != 1:
1929
+ if verbose:
1930
+ logger.info("[INFO] Anthropic thinking enabled: forcing temperature=1 for compliance.")
1931
+ litellm_kwargs['temperature'] = 1
1932
+ current_temperature = 1
1933
+ except Exception:
1934
+ pass
1005
1935
  if verbose:
1006
1936
  logger.info(f"[INFO] Calling litellm.completion for {model_name_litellm}...")
1007
1937
  response = litellm.completion(**litellm_kwargs)
@@ -1059,13 +1989,12 @@ def llm_invoke(
1059
1989
  retry_response = litellm.completion(
1060
1990
  model=model_name_litellm,
1061
1991
  messages=retry_messages,
1062
- temperature=temperature,
1992
+ temperature=current_temperature,
1063
1993
  response_format=response_format,
1064
- max_completion_tokens=max_tokens,
1065
1994
  **time_kwargs
1066
1995
  )
1067
- # Re-enable cache
1068
- litellm.cache = Cache()
1996
+ # Re-enable cache - restore original configured cache (restore to original state, even if None)
1997
+ litellm.cache = configured_cache
1069
1998
  # Extract result from retry
1070
1999
  retry_raw_result = retry_response.choices[0].message.content
1071
2000
  if retry_raw_result is not None:
@@ -1083,21 +2012,66 @@ def llm_invoke(
1083
2012
  logger.error(f"[ERROR] Cannot retry - batch mode or missing prompt/input_json")
1084
2013
  results.append("ERROR: LLM returned None content and cannot retry")
1085
2014
  continue
1086
-
1087
- if output_pydantic:
2015
+
2016
+ # Check for malformed JSON response (excessive trailing newlines causing truncation)
2017
+ # This can happen when Gemini generates thousands of \n in JSON string values
2018
+ if isinstance(raw_result, str) and _is_malformed_json_response(raw_result):
2019
+ logger.warning(f"[WARNING] Detected malformed JSON response with excessive trailing newlines for item {i}. Retrying with cache bypass...")
2020
+ if not use_batch_mode and prompt and input_json is not None:
2021
+ # Add a small space to bypass cache
2022
+ modified_prompt = prompt + " "
2023
+ try:
2024
+ retry_messages = _format_messages(modified_prompt, input_json, use_batch_mode)
2025
+ # Disable cache for retry
2026
+ original_cache = litellm.cache
2027
+ litellm.cache = None
2028
+ retry_response = litellm.completion(
2029
+ model=model_name_litellm,
2030
+ messages=retry_messages,
2031
+ temperature=current_temperature,
2032
+ response_format=response_format,
2033
+ **time_kwargs
2034
+ )
2035
+ # Re-enable cache
2036
+ litellm.cache = original_cache
2037
+ # Extract result from retry
2038
+ retry_raw_result = retry_response.choices[0].message.content
2039
+ if retry_raw_result is not None and not _is_malformed_json_response(retry_raw_result):
2040
+ logger.info(f"[SUCCESS] Cache bypass retry for malformed JSON succeeded for item {i}")
2041
+ raw_result = retry_raw_result
2042
+ else:
2043
+ # Retry also failed, but we'll continue with repair logic below
2044
+ logger.warning(f"[WARNING] Cache bypass retry also returned malformed JSON for item {i}, attempting repair...")
2045
+ except Exception as retry_e:
2046
+ logger.warning(f"[WARNING] Cache bypass retry for malformed JSON failed for item {i}: {retry_e}, attempting repair...")
2047
+ else:
2048
+ logger.warning(f"[WARNING] Cannot retry malformed JSON - batch mode or missing prompt/input_json, attempting repair...")
2049
+
2050
+ if output_pydantic or output_schema:
1088
2051
  parsed_result = None
1089
2052
  json_string_to_parse = None
1090
2053
 
1091
2054
  try:
1092
- # Attempt 1: Check if LiteLLM already parsed it
1093
- if isinstance(raw_result, output_pydantic):
2055
+ # Attempt 1: Check if LiteLLM already parsed it (only for Pydantic)
2056
+ if output_pydantic and isinstance(raw_result, output_pydantic):
1094
2057
  parsed_result = raw_result
1095
2058
  if verbose:
1096
2059
  logger.debug("[DEBUG] Pydantic object received directly from LiteLLM.")
1097
2060
 
1098
2061
  # Attempt 2: Check if raw_result is dict-like and validate
1099
2062
  elif isinstance(raw_result, dict):
1100
- parsed_result = output_pydantic.model_validate(raw_result)
2063
+ if output_pydantic:
2064
+ parsed_result = output_pydantic.model_validate(raw_result)
2065
+ else:
2066
+ # Validate against JSON schema
2067
+ try:
2068
+ import jsonschema
2069
+ jsonschema.validate(instance=raw_result, schema=output_schema)
2070
+ parsed_result = json.dumps(raw_result) # Return as JSON string for consistency
2071
+ except ImportError:
2072
+ logger.warning("jsonschema not installed, skipping validation")
2073
+ parsed_result = json.dumps(raw_result)
2074
+
1101
2075
  if verbose:
1102
2076
  logger.debug("[DEBUG] Validated dictionary-like object directly.")
1103
2077
 
@@ -1105,26 +2079,59 @@ def llm_invoke(
1105
2079
  elif isinstance(raw_result, str):
1106
2080
  json_string_to_parse = raw_result # Start with the raw string
1107
2081
  try:
1108
- # Look for first { and last }
1109
- start_brace = json_string_to_parse.find('{')
1110
- end_brace = json_string_to_parse.rfind('}')
1111
- if start_brace != -1 and end_brace != -1 and end_brace > start_brace:
1112
- potential_json = json_string_to_parse[start_brace:end_brace+1]
1113
- # Basic check if it looks like JSON
1114
- if potential_json.strip().startswith('{') and potential_json.strip().endswith('}'):
1115
- if verbose:
1116
- logger.debug(f"[DEBUG] Attempting to parse extracted JSON block: '{potential_json}'")
1117
- parsed_result = output_pydantic.model_validate_json(potential_json)
1118
- else:
1119
- # If block extraction fails, try cleaning markdown next
1120
- raise ValueError("Extracted block doesn't look like JSON")
2082
+ # 1) Prefer fenced ```json blocks
2083
+ fenced = _extract_fenced_json_block(raw_result)
2084
+ candidates: List[str] = []
2085
+ if fenced:
2086
+ candidates.append(fenced)
1121
2087
  else:
1122
- # If no braces found, try cleaning markdown next
1123
- raise ValueError("Could not find enclosing {}")
1124
- except (json.JSONDecodeError, ValidationError, ValueError) as extraction_error:
2088
+ # 2) Fall back to scanning for balanced JSON objects
2089
+ candidates.extend(_extract_balanced_json_objects(raw_result))
2090
+
2091
+ if not candidates:
2092
+ raise ValueError("No JSON-like content found")
2093
+
2094
+ parse_err: Optional[Exception] = None
2095
+ for cand in candidates:
2096
+ try:
2097
+ if verbose:
2098
+ logger.debug(f"[DEBUG] Attempting to parse candidate JSON block: {cand}")
2099
+
2100
+ if output_pydantic:
2101
+ parsed_result = output_pydantic.model_validate_json(cand)
2102
+ else:
2103
+ # Parse JSON and validate against schema
2104
+ loaded = json.loads(cand)
2105
+ try:
2106
+ import jsonschema
2107
+ jsonschema.validate(instance=loaded, schema=output_schema)
2108
+ except ImportError:
2109
+ pass # Skip validation if lib missing
2110
+ parsed_result = cand # Return string if valid
2111
+
2112
+ json_string_to_parse = cand
2113
+ parse_err = None
2114
+ break
2115
+ except (json.JSONDecodeError, ValidationError, ValueError) as pe:
2116
+ # Also catch jsonschema.ValidationError if imported
2117
+ parse_err = pe
2118
+ try:
2119
+ import jsonschema
2120
+ if isinstance(pe, jsonschema.ValidationError):
2121
+ parse_err = pe
2122
+ except ImportError:
2123
+ pass
2124
+
2125
+ if parsed_result is None:
2126
+ # If none of the candidates parsed, raise last error
2127
+ if parse_err is not None:
2128
+ raise parse_err
2129
+ raise ValueError("Unable to parse any JSON candidates")
2130
+ except (json.JSONDecodeError, ValidationError, ValueError, Exception) as extraction_error:
2131
+ # Catch generic Exception to handle jsonschema errors without explicit import here
1125
2132
  if verbose:
1126
- logger.debug(f"[DEBUG] JSON block extraction/validation failed ('{extraction_error}'). Trying markdown cleaning.")
1127
- # Fallback: Clean markdown fences and retry JSON validation
2133
+ logger.debug(f"[DEBUG] JSON extraction/validation failed ('{extraction_error}'). Trying fence cleaning.")
2134
+ # Last resort: strip any leading/trailing code fences and retry
1128
2135
  cleaned_result_str = raw_result.strip()
1129
2136
  if cleaned_result_str.startswith("```json"):
1130
2137
  cleaned_result_str = cleaned_result_str[7:]
@@ -1133,35 +2140,166 @@ def llm_invoke(
1133
2140
  if cleaned_result_str.endswith("```"):
1134
2141
  cleaned_result_str = cleaned_result_str[:-3]
1135
2142
  cleaned_result_str = cleaned_result_str.strip()
1136
- # Check again if it looks like JSON before parsing
1137
- if cleaned_result_str.startswith('{') and cleaned_result_str.endswith('}'):
2143
+ # Check for complete JSON object or array
2144
+ is_complete_object = cleaned_result_str.startswith('{') and cleaned_result_str.endswith('}')
2145
+ is_complete_array = cleaned_result_str.startswith('[') and cleaned_result_str.endswith(']')
2146
+ if is_complete_object or is_complete_array:
1138
2147
  if verbose:
1139
- logger.debug(f"[DEBUG] Attempting parse after cleaning markdown fences. Cleaned string: '{cleaned_result_str}'")
1140
- json_string_to_parse = cleaned_result_str # Update string for error reporting
1141
- parsed_result = output_pydantic.model_validate_json(json_string_to_parse)
2148
+ logger.debug(f"[DEBUG] Attempting parse after generic fence cleaning. Cleaned string: '{cleaned_result_str}'")
2149
+ json_string_to_parse = cleaned_result_str
2150
+
2151
+ if output_pydantic:
2152
+ parsed_result = output_pydantic.model_validate_json(json_string_to_parse)
2153
+ else:
2154
+ loaded = json.loads(json_string_to_parse)
2155
+ try:
2156
+ import jsonschema
2157
+ jsonschema.validate(instance=loaded, schema=output_schema)
2158
+ except ImportError:
2159
+ pass
2160
+ parsed_result = json_string_to_parse
2161
+ elif cleaned_result_str.startswith('{') or cleaned_result_str.startswith('['):
2162
+ # Attempt to repair truncated JSON (e.g., missing closing braces)
2163
+ # This can happen when Gemini generates excessive trailing content
2164
+ # that causes token limit truncation
2165
+ if verbose:
2166
+ logger.debug(f"[DEBUG] JSON appears truncated (missing closing brace). Attempting repair.")
2167
+
2168
+ # Try to find the last valid JSON structure
2169
+ # For simple schemas like {"extracted_code": "..."}, we can try to close it
2170
+ repaired = cleaned_result_str.rstrip()
2171
+
2172
+ # Strip trailing escaped newline sequences (\\n in the JSON string)
2173
+ # These appear as literal backslash-n when Gemini generates excessive newlines
2174
+ while repaired.endswith('\\n'):
2175
+ repaired = repaired[:-2]
2176
+ # Also strip trailing literal backslashes that might be orphaned
2177
+ repaired = repaired.rstrip('\\')
2178
+
2179
+ # If we're in the middle of a string value, try to close it
2180
+ # Count unescaped quotes to determine if we're inside a string
2181
+ # Simple heuristic: if it ends without proper closure, add closing
2182
+ is_array = cleaned_result_str.startswith('[')
2183
+ expected_end = ']' if is_array else '}'
2184
+ if not repaired.endswith(expected_end):
2185
+ # Try adding various closures to repair
2186
+ if is_array:
2187
+ repair_attempts = [
2188
+ repaired + '}]', # Close object and array
2189
+ repaired + '"}]', # Close string, object and array
2190
+ repaired + '"}}]', # Close string, nested object and array
2191
+ repaired.rstrip(',') + ']', # Remove trailing comma and close array
2192
+ repaired.rstrip('"') + '"}]', # Handle partial string end
2193
+ ]
2194
+ else:
2195
+ repair_attempts = [
2196
+ repaired + '"}', # Close string and object
2197
+ repaired + '"}\n}', # Close string and nested object
2198
+ repaired + '"}}}', # Deeper nesting
2199
+ repaired.rstrip(',') + '}', # Remove trailing comma
2200
+ repaired.rstrip('"') + '"}', # Handle partial string end
2201
+ ]
2202
+
2203
+ for attempt in repair_attempts:
2204
+ try:
2205
+ if output_pydantic:
2206
+ parsed_result = output_pydantic.model_validate_json(attempt)
2207
+ else:
2208
+ loaded = json.loads(attempt)
2209
+ try:
2210
+ import jsonschema
2211
+ jsonschema.validate(instance=loaded, schema=output_schema)
2212
+ except ImportError:
2213
+ pass
2214
+ parsed_result = attempt
2215
+
2216
+ if verbose:
2217
+ logger.info(f"[INFO] Successfully repaired truncated JSON response")
2218
+ json_string_to_parse = attempt
2219
+ break
2220
+ except (json.JSONDecodeError, ValidationError, ValueError):
2221
+ continue
2222
+
2223
+ if parsed_result is None:
2224
+ raise ValueError("Content after cleaning doesn't look like JSON (and repair attempts failed)")
1142
2225
  else:
1143
- # If still doesn't look like JSON, raise error
1144
- raise ValueError("Content after cleaning markdown doesn't look like JSON")
2226
+ raise ValueError("Content after cleaning doesn't look like JSON")
1145
2227
 
1146
2228
 
1147
2229
  # Check if any parsing attempt succeeded
1148
2230
  if parsed_result is None:
2231
+ target_name = output_pydantic.__name__ if output_pydantic else "JSON Schema"
1149
2232
  # This case should ideally be caught by exceptions above, but as a safeguard:
1150
- raise TypeError(f"Raw result type {type(raw_result)} or content could not be validated/parsed against {output_pydantic.__name__}.")
2233
+ raise TypeError(f"Raw result type {type(raw_result)} or content could not be validated/parsed against {target_name}.")
1151
2234
 
1152
- except (ValidationError, json.JSONDecodeError, TypeError, ValueError) as parse_error:
1153
- logger.error(f"[ERROR] Failed to parse response into Pydantic model {output_pydantic.__name__} for item {i}: {parse_error}")
2235
+ except (ValidationError, json.JSONDecodeError, TypeError, ValueError, Exception) as parse_error:
2236
+ target_name = output_pydantic.__name__ if output_pydantic else "JSON Schema"
2237
+ logger.error(f"[ERROR] Failed to parse response into {target_name} for item {i}: {parse_error}")
1154
2238
  # Use the string that was last attempted for parsing in the error message
1155
2239
  error_content = json_string_to_parse if json_string_to_parse is not None else raw_result
1156
2240
  logger.error("[ERROR] Content attempted for parsing: %s", repr(error_content)) # CORRECTED (or use f-string)
1157
- results.append(f"ERROR: Failed to parse Pydantic. Raw: {repr(raw_result)}")
2241
+ results.append(f"ERROR: Failed to parse structured output. Raw: {repr(raw_result)}")
1158
2242
  continue # Skip appending result below if parsing failed
1159
2243
 
1160
- # If parsing succeeded, append the parsed_result
2244
+ # Post-process: unescape newlines and repair Python syntax
2245
+ _unescape_code_newlines(parsed_result)
2246
+
2247
+ # Check if code fields still have invalid Python syntax after repair
2248
+ # If so, retry without cache to get a fresh response
2249
+ if _has_invalid_python_code(parsed_result):
2250
+ logger.warning(f"[WARNING] Detected invalid Python syntax in code fields for item {i} after repair. Retrying with cache bypass...")
2251
+ if not use_batch_mode and prompt and input_json is not None:
2252
+ # Add a small variation to bypass cache
2253
+ modified_prompt = prompt + " " # Two spaces to differentiate from other retries
2254
+ try:
2255
+ retry_messages = _format_messages(modified_prompt, input_json, use_batch_mode)
2256
+ # Disable cache for retry
2257
+ original_cache = litellm.cache
2258
+ litellm.cache = None
2259
+ retry_response = litellm.completion(
2260
+ model=model_name_litellm,
2261
+ messages=retry_messages,
2262
+ temperature=current_temperature,
2263
+ response_format=response_format,
2264
+ **time_kwargs
2265
+ )
2266
+ # Re-enable cache
2267
+ litellm.cache = original_cache
2268
+ # Extract and re-parse the retry result
2269
+ retry_raw_result = retry_response.choices[0].message.content
2270
+ if retry_raw_result is not None:
2271
+ # Re-parse the retry result
2272
+ retry_parsed = None
2273
+ if output_pydantic:
2274
+ if isinstance(retry_raw_result, output_pydantic):
2275
+ retry_parsed = retry_raw_result
2276
+ elif isinstance(retry_raw_result, dict):
2277
+ retry_parsed = output_pydantic.model_validate(retry_raw_result)
2278
+ elif isinstance(retry_raw_result, str):
2279
+ retry_parsed = output_pydantic.model_validate_json(retry_raw_result)
2280
+ elif output_schema and isinstance(retry_raw_result, str):
2281
+ retry_parsed = retry_raw_result # Keep as string for schema validation
2282
+
2283
+ if retry_parsed is not None:
2284
+ _unescape_code_newlines(retry_parsed)
2285
+ if not _has_invalid_python_code(retry_parsed):
2286
+ logger.info(f"[SUCCESS] Cache bypass retry for invalid Python code succeeded for item {i}")
2287
+ parsed_result = retry_parsed
2288
+ else:
2289
+ logger.warning(f"[WARNING] Cache bypass retry still has invalid Python code for item {i}, using original")
2290
+ else:
2291
+ logger.warning(f"[WARNING] Cache bypass retry returned unparseable result for item {i}")
2292
+ else:
2293
+ logger.warning(f"[WARNING] Cache bypass retry returned None for item {i}")
2294
+ except Exception as retry_e:
2295
+ logger.warning(f"[WARNING] Cache bypass retry for invalid Python code failed for item {i}: {retry_e}")
2296
+ else:
2297
+ logger.warning(f"[WARNING] Cannot retry invalid Python code - batch mode or missing prompt/input_json")
2298
+
1161
2299
  results.append(parsed_result)
1162
2300
 
1163
2301
  else:
1164
- # If output_pydantic was not requested, append the raw result
2302
+ # If output_pydantic/schema was not requested, append the raw result
1165
2303
  results.append(raw_result)
1166
2304
 
1167
2305
  except (AttributeError, IndexError) as e:
@@ -1244,10 +2382,40 @@ def llm_invoke(
1244
2382
  Exception) as e: # Catch generic Exception last
1245
2383
  last_exception = e
1246
2384
  error_type = type(e).__name__
2385
+ error_str = str(e)
2386
+
2387
+ # Provider-specific handling for Anthropic temperature + thinking rules.
2388
+ # Two scenarios we auto-correct:
2389
+ # 1) temperature==1 without thinking -> retry with 0.99
2390
+ # 2) thinking enabled but temperature!=1 -> retry with 1
2391
+ lower_err = error_str.lower()
2392
+ if (not temp_adjustment_done) and ("temperature" in lower_err) and ("thinking" in lower_err):
2393
+ anthropic_thinking_sent = ('thinking' in litellm_kwargs) and (provider.lower() == 'anthropic')
2394
+ # Decide direction of adjustment based on whether thinking was enabled in the call
2395
+ if anthropic_thinking_sent:
2396
+ # thinking enabled -> force temperature=1
2397
+ adjusted_temp = 1
2398
+ logger.warning(
2399
+ f"[WARN] {model_name_litellm}: Anthropic with thinking requires temperature=1. "
2400
+ f"Retrying with temperature={adjusted_temp}."
2401
+ )
2402
+ else:
2403
+ # thinking not enabled -> avoid temperature=1
2404
+ adjusted_temp = 0.99
2405
+ logger.warning(
2406
+ f"[WARN] {model_name_litellm}: Provider rejected temperature=1 without thinking. "
2407
+ f"Retrying with temperature={adjusted_temp}."
2408
+ )
2409
+ current_temperature = adjusted_temp
2410
+ temp_adjustment_done = True
2411
+ retry_with_same_model = True
2412
+ if verbose:
2413
+ logger.debug(f"Retrying {model_name_litellm} with adjusted temperature {current_temperature}")
2414
+ continue
2415
+
1247
2416
  logger.error(f"[ERROR] Invocation failed for {model_name_litellm} ({error_type}): {e}. Trying next model.")
1248
2417
  # Log more details in verbose mode
1249
2418
  if verbose:
1250
- # import traceback # Not needed if using exc_info=True
1251
2419
  logger.debug(f"Detailed exception traceback for {model_name_litellm}:", exc_info=True)
1252
2420
  break # Break inner loop, try next model candidate
1253
2421
 
@@ -1275,7 +2443,7 @@ if __name__ == "__main__":
1275
2443
  response = llm_invoke(
1276
2444
  prompt="Tell me a short joke about {topic}.",
1277
2445
  input_json={"topic": "programmers"},
1278
- strength=0.5, # Use base model (gpt-4.1-nano)
2446
+ strength=0.5, # Use base model (gpt-5-nano)
1279
2447
  temperature=0.7,
1280
2448
  verbose=True
1281
2449
  )
@@ -1356,7 +2524,7 @@ if __name__ == "__main__":
1356
2524
  {"role": "system", "content": "You are a helpful assistant."},
1357
2525
  {"role": "user", "content": "What is the capital of France?"}
1358
2526
  ]
1359
- # Strength 0.5 should select gpt-4.1-nano
2527
+ # Strength 0.5 should select gpt-5-nano
1360
2528
  response_messages = llm_invoke(
1361
2529
  messages=custom_messages,
1362
2530
  strength=0.5,