pdd-cli 0.0.45__py3-none-any.whl → 0.0.90__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. pdd/__init__.py +4 -4
  2. pdd/agentic_common.py +863 -0
  3. pdd/agentic_crash.py +534 -0
  4. pdd/agentic_fix.py +1179 -0
  5. pdd/agentic_langtest.py +162 -0
  6. pdd/agentic_update.py +370 -0
  7. pdd/agentic_verify.py +183 -0
  8. pdd/auto_deps_main.py +15 -5
  9. pdd/auto_include.py +63 -5
  10. pdd/bug_main.py +3 -2
  11. pdd/bug_to_unit_test.py +2 -0
  12. pdd/change_main.py +11 -4
  13. pdd/cli.py +22 -1181
  14. pdd/cmd_test_main.py +73 -21
  15. pdd/code_generator.py +58 -18
  16. pdd/code_generator_main.py +672 -25
  17. pdd/commands/__init__.py +42 -0
  18. pdd/commands/analysis.py +248 -0
  19. pdd/commands/fix.py +140 -0
  20. pdd/commands/generate.py +257 -0
  21. pdd/commands/maintenance.py +174 -0
  22. pdd/commands/misc.py +79 -0
  23. pdd/commands/modify.py +230 -0
  24. pdd/commands/report.py +144 -0
  25. pdd/commands/templates.py +215 -0
  26. pdd/commands/utility.py +110 -0
  27. pdd/config_resolution.py +58 -0
  28. pdd/conflicts_main.py +8 -3
  29. pdd/construct_paths.py +258 -82
  30. pdd/context_generator.py +10 -2
  31. pdd/context_generator_main.py +113 -11
  32. pdd/continue_generation.py +47 -7
  33. pdd/core/__init__.py +0 -0
  34. pdd/core/cli.py +503 -0
  35. pdd/core/dump.py +554 -0
  36. pdd/core/errors.py +63 -0
  37. pdd/core/utils.py +90 -0
  38. pdd/crash_main.py +44 -11
  39. pdd/data/language_format.csv +71 -63
  40. pdd/data/llm_model.csv +20 -18
  41. pdd/detect_change_main.py +5 -4
  42. pdd/fix_code_loop.py +330 -76
  43. pdd/fix_error_loop.py +207 -61
  44. pdd/fix_errors_from_unit_tests.py +4 -3
  45. pdd/fix_main.py +75 -18
  46. pdd/fix_verification_errors.py +12 -100
  47. pdd/fix_verification_errors_loop.py +306 -272
  48. pdd/fix_verification_main.py +28 -9
  49. pdd/generate_output_paths.py +93 -10
  50. pdd/generate_test.py +16 -5
  51. pdd/get_jwt_token.py +9 -2
  52. pdd/get_run_command.py +73 -0
  53. pdd/get_test_command.py +68 -0
  54. pdd/git_update.py +70 -19
  55. pdd/incremental_code_generator.py +2 -2
  56. pdd/insert_includes.py +11 -3
  57. pdd/llm_invoke.py +1269 -103
  58. pdd/load_prompt_template.py +36 -10
  59. pdd/pdd_completion.fish +25 -2
  60. pdd/pdd_completion.sh +30 -4
  61. pdd/pdd_completion.zsh +79 -4
  62. pdd/postprocess.py +10 -3
  63. pdd/preprocess.py +228 -15
  64. pdd/preprocess_main.py +8 -5
  65. pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
  66. pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
  67. pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
  68. pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
  69. pdd/prompts/agentic_update_LLM.prompt +1071 -0
  70. pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
  71. pdd/prompts/auto_include_LLM.prompt +100 -905
  72. pdd/prompts/detect_change_LLM.prompt +122 -20
  73. pdd/prompts/example_generator_LLM.prompt +22 -1
  74. pdd/prompts/extract_code_LLM.prompt +5 -1
  75. pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
  76. pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
  77. pdd/prompts/extract_promptline_LLM.prompt +17 -11
  78. pdd/prompts/find_verification_errors_LLM.prompt +6 -0
  79. pdd/prompts/fix_code_module_errors_LLM.prompt +4 -2
  80. pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +8 -0
  81. pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
  82. pdd/prompts/generate_test_LLM.prompt +21 -6
  83. pdd/prompts/increase_tests_LLM.prompt +1 -5
  84. pdd/prompts/insert_includes_LLM.prompt +228 -108
  85. pdd/prompts/trace_LLM.prompt +25 -22
  86. pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
  87. pdd/prompts/update_prompt_LLM.prompt +22 -1
  88. pdd/pytest_output.py +127 -12
  89. pdd/render_mermaid.py +236 -0
  90. pdd/setup_tool.py +648 -0
  91. pdd/simple_math.py +2 -0
  92. pdd/split_main.py +3 -2
  93. pdd/summarize_directory.py +49 -6
  94. pdd/sync_determine_operation.py +543 -98
  95. pdd/sync_main.py +81 -31
  96. pdd/sync_orchestration.py +1334 -751
  97. pdd/sync_tui.py +848 -0
  98. pdd/template_registry.py +264 -0
  99. pdd/templates/architecture/architecture_json.prompt +242 -0
  100. pdd/templates/generic/generate_prompt.prompt +174 -0
  101. pdd/trace.py +168 -12
  102. pdd/trace_main.py +4 -3
  103. pdd/track_cost.py +151 -61
  104. pdd/unfinished_prompt.py +49 -3
  105. pdd/update_main.py +549 -67
  106. pdd/update_model_costs.py +2 -2
  107. pdd/update_prompt.py +19 -4
  108. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/METADATA +19 -6
  109. pdd_cli-0.0.90.dist-info/RECORD +153 -0
  110. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/licenses/LICENSE +1 -1
  111. pdd_cli-0.0.45.dist-info/RECORD +0 -116
  112. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/WHEEL +0 -0
  113. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/entry_points.txt +0 -0
  114. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/top_level.txt +0 -0
pdd/llm_invoke.py CHANGED
@@ -5,6 +5,8 @@ import os
5
5
  import pandas as pd
6
6
  import litellm
7
7
  import logging # ADDED FOR DETAILED LOGGING
8
+ import importlib.resources
9
+ from litellm.caching.caching import Cache # Fix for LiteLLM v1.75.5+
8
10
 
9
11
  # --- Configure Standard Python Logging ---
10
12
  logger = logging.getLogger("pdd.llm_invoke")
@@ -24,6 +26,16 @@ litellm_logger = logging.getLogger("litellm")
24
26
  litellm_log_level = os.getenv("LITELLM_LOG_LEVEL", "WARNING" if PRODUCTION_MODE else "INFO")
25
27
  litellm_logger.setLevel(getattr(logging, litellm_log_level, logging.WARNING))
26
28
 
29
+ # Ensure LiteLLM drops provider-unsupported params instead of erroring
30
+ # This prevents failures like UnsupportedParamsError for OpenAI gpt-5-* when
31
+ # passing generic params (e.g., reasoning_effort) not accepted by that API path.
32
+ try:
33
+ _drop_params_env = os.getenv("LITELLM_DROP_PARAMS", "true")
34
+ litellm.drop_params = str(_drop_params_env).lower() in ("1", "true", "yes", "on")
35
+ except Exception:
36
+ # Be conservative: default to True even if env parsing fails
37
+ litellm.drop_params = True
38
+
27
39
  # Add a console handler if none exists
28
40
  if not logger.handlers:
29
41
  console_handler = logging.StreamHandler()
@@ -69,7 +81,7 @@ import json
69
81
  # from rich import print as rprint # Replaced with logger
70
82
  from dotenv import load_dotenv
71
83
  from pathlib import Path
72
- from typing import Optional, Dict, List, Any, Type, Union
84
+ from typing import Optional, Dict, List, Any, Type, Union, Tuple
73
85
  from pydantic import BaseModel, ValidationError
74
86
  import openai # Import openai for exception handling as LiteLLM maps to its types
75
87
  from langchain_core.prompts import PromptTemplate
@@ -112,6 +124,22 @@ def _is_wsl_environment() -> bool:
112
124
  return False
113
125
 
114
126
 
127
+ def _openai_responses_supports_response_format() -> bool:
128
+ """Detect if current OpenAI Python SDK supports `response_format` on Responses.create.
129
+
130
+ Returns True if the installed SDK exposes a `response_format` parameter on
131
+ `openai.resources.responses.Responses.create`, else False. This avoids
132
+ sending unsupported kwargs and triggering TypeError at runtime.
133
+ """
134
+ try:
135
+ import inspect
136
+ from openai.resources.responses import Responses
137
+ sig = inspect.signature(Responses.create)
138
+ return "response_format" in sig.parameters
139
+ except Exception:
140
+ return False
141
+
142
+
115
143
  def _get_environment_info() -> Dict[str, str]:
116
144
  """
117
145
  Get environment information for debugging and error reporting.
@@ -186,16 +214,80 @@ if PROJECT_ROOT is None: # Fallback to CWD if no method succeeded
186
214
 
187
215
  ENV_PATH = PROJECT_ROOT / ".env"
188
216
  # --- Determine LLM_MODEL_CSV_PATH ---
189
- # Prioritize ~/.pdd/llm_model.csv
217
+ # Prioritize ~/.pdd/llm_model.csv, then a project .pdd from the current CWD,
218
+ # then PROJECT_ROOT (which may be set from PDD_PATH), else fall back to package.
190
219
  user_pdd_dir = Path.home() / ".pdd"
191
220
  user_model_csv_path = user_pdd_dir / "llm_model.csv"
192
221
 
222
+ def _detect_project_root_from_cwd(max_levels: int = 5) -> Path:
223
+ """Search upwards from the current working directory for common project markers.
224
+
225
+ This intentionally ignores PDD_PATH to support CLI invocations that set
226
+ PDD_PATH to the installed package location. We want to honor a real project
227
+ checkout's .pdd/llm_model.csv when running inside it.
228
+ """
229
+ try:
230
+ current_dir = Path.cwd().resolve()
231
+ for _ in range(max_levels):
232
+ if (
233
+ (current_dir / ".git").exists()
234
+ or (current_dir / "pyproject.toml").exists()
235
+ or (current_dir / "data").is_dir()
236
+ or (current_dir / ".env").exists()
237
+ ):
238
+ return current_dir
239
+ parent = current_dir.parent
240
+ if parent == current_dir:
241
+ break
242
+ current_dir = parent
243
+ except Exception:
244
+ pass
245
+ return Path.cwd().resolve()
246
+
247
+ # Resolve candidates
248
+ project_root_from_cwd = _detect_project_root_from_cwd()
249
+ project_csv_from_cwd = project_root_from_cwd / ".pdd" / "llm_model.csv"
250
+ project_csv_from_env = PROJECT_ROOT / ".pdd" / "llm_model.csv"
251
+
252
+ # Detect whether PDD_PATH points to the installed package directory. If so,
253
+ # don't prioritize it over the real project from CWD.
254
+ try:
255
+ _installed_pkg_root = importlib.resources.files('pdd')
256
+ # importlib.resources.files returns a Traversable; get a FS path string if possible
257
+ try:
258
+ _installed_pkg_root_path = Path(str(_installed_pkg_root))
259
+ except Exception:
260
+ _installed_pkg_root_path = None
261
+ except Exception:
262
+ _installed_pkg_root_path = None
263
+
264
+ def _is_env_path_package_dir(env_path: Path) -> bool:
265
+ try:
266
+ if _installed_pkg_root_path is None:
267
+ return False
268
+ env_path = env_path.resolve()
269
+ pkg_path = _installed_pkg_root_path.resolve()
270
+ # Treat equal or subpath as package dir
271
+ return env_path == pkg_path or str(env_path).startswith(str(pkg_path))
272
+ except Exception:
273
+ return False
274
+
275
+ # Selection order
193
276
  if user_model_csv_path.is_file():
194
277
  LLM_MODEL_CSV_PATH = user_model_csv_path
195
278
  logger.info(f"Using user-specific LLM model CSV: {LLM_MODEL_CSV_PATH}")
279
+ elif (not _is_env_path_package_dir(PROJECT_ROOT)) and project_csv_from_env.is_file():
280
+ # Honor an explicitly-set PDD_PATH pointing to a real project directory
281
+ LLM_MODEL_CSV_PATH = project_csv_from_env
282
+ logger.info(f"Using project-specific LLM model CSV (from PDD_PATH): {LLM_MODEL_CSV_PATH}")
283
+ elif project_csv_from_cwd.is_file():
284
+ # Otherwise, prefer the project relative to the current working directory
285
+ LLM_MODEL_CSV_PATH = project_csv_from_cwd
286
+ logger.info(f"Using project-specific LLM model CSV (from CWD): {LLM_MODEL_CSV_PATH}")
196
287
  else:
197
- LLM_MODEL_CSV_PATH = PROJECT_ROOT / "data" / "llm_model.csv"
198
- logger.info(f"Using project LLM model CSV: {LLM_MODEL_CSV_PATH}")
288
+ # Neither exists, we'll use a marker path that _load_model_data will handle
289
+ LLM_MODEL_CSV_PATH = None
290
+ logger.info("No local LLM model CSV found, will use package default")
199
291
  # ---------------------------------
200
292
 
201
293
  # Load environment variables from .env file
@@ -225,6 +317,7 @@ if GCS_HMAC_SECRET_ACCESS_KEY:
225
317
  GCS_HMAC_SECRET_ACCESS_KEY = GCS_HMAC_SECRET_ACCESS_KEY.strip()
226
318
 
227
319
  cache_configured = False
320
+ configured_cache = None # Store the configured cache instance for restoration
228
321
 
229
322
  if GCS_BUCKET_NAME and GCS_HMAC_ACCESS_KEY_ID and GCS_HMAC_SECRET_ACCESS_KEY:
230
323
  # Store original AWS credentials before overwriting for GCS cache setup
@@ -238,12 +331,13 @@ if GCS_BUCKET_NAME and GCS_HMAC_ACCESS_KEY_ID and GCS_HMAC_SECRET_ACCESS_KEY:
238
331
  os.environ['AWS_SECRET_ACCESS_KEY'] = GCS_HMAC_SECRET_ACCESS_KEY
239
332
  # os.environ['AWS_REGION_NAME'] = GCS_REGION_NAME # Uncomment if needed
240
333
 
241
- litellm.cache = litellm.Cache(
334
+ configured_cache = Cache(
242
335
  type="s3",
243
336
  s3_bucket_name=GCS_BUCKET_NAME,
244
337
  s3_region_name=GCS_REGION_NAME, # Pass region explicitly to cache
245
338
  s3_endpoint_url=GCS_ENDPOINT_URL,
246
339
  )
340
+ litellm.cache = configured_cache
247
341
  logger.info(f"LiteLLM cache configured for GCS bucket (S3 compatible): {GCS_BUCKET_NAME}")
248
342
  cache_configured = True
249
343
 
@@ -268,15 +362,22 @@ if GCS_BUCKET_NAME and GCS_HMAC_ACCESS_KEY_ID and GCS_HMAC_SECRET_ACCESS_KEY:
268
362
  elif 'AWS_REGION_NAME' in os.environ:
269
363
  pass # Or just leave it if the temporary setting wasn't done/needed
270
364
 
365
+ # Check if caching is disabled via environment variable
366
+ if os.getenv("LITELLM_CACHE_DISABLE") == "1":
367
+ logger.info("LiteLLM caching disabled via LITELLM_CACHE_DISABLE=1")
368
+ litellm.cache = None
369
+ cache_configured = True
370
+
271
371
  if not cache_configured:
272
372
  try:
273
- # Try SQLite-based cache as a fallback
373
+ # Try disk-based cache as a fallback
274
374
  sqlite_cache_path = PROJECT_ROOT / "litellm_cache.sqlite"
275
- litellm.cache = litellm.Cache(type="sqlite", cache_path=str(sqlite_cache_path))
276
- logger.info(f"LiteLLM SQLite cache configured at {sqlite_cache_path}")
375
+ configured_cache = Cache(type="disk", disk_cache_dir=str(sqlite_cache_path))
376
+ litellm.cache = configured_cache
377
+ logger.info(f"LiteLLM disk cache configured at {sqlite_cache_path}")
277
378
  cache_configured = True
278
379
  except Exception as e2:
279
- warnings.warn(f"Failed to configure LiteLLM SQLite cache: {e2}. Caching is disabled.")
380
+ warnings.warn(f"Failed to configure LiteLLM disk cache: {e2}. Caching is disabled.")
280
381
  litellm.cache = None
281
382
 
282
383
  if not cache_configured:
@@ -314,29 +415,49 @@ def _litellm_success_callback(
314
415
  cost_val = litellm.completion_cost(completion_response=completion_response)
315
416
  calculated_cost = cost_val if cost_val is not None else 0.0
316
417
  except Exception as e1:
317
- # Attempt 2: If response object failed (e.g., missing provider in model name),
318
- # try again using explicit model from kwargs and tokens from usage.
319
- # This is often needed for batch completion items.
418
+ # Attempt 2: Compute via tokens and model mapping. If LiteLLM mapping is
419
+ # missing or API differs, fall back to CSV rates in _MODEL_RATE_MAP.
320
420
  logger.debug(f"Attempting cost calculation with fallback method: {e1}")
321
421
  try:
322
- model_name = kwargs.get("model") # Get original model name from input kwargs
422
+ model_name = kwargs.get("model")
323
423
  if model_name and usage:
324
- prompt_tokens = getattr(usage, 'prompt_tokens', 0)
325
- completion_tokens = getattr(usage, 'completion_tokens', 0)
326
- cost_val = litellm.completion_cost(
327
- model=model_name,
328
- prompt_tokens=prompt_tokens,
329
- completion_tokens=completion_tokens
330
- )
331
- calculated_cost = cost_val if cost_val is not None else 0.0
424
+ in_tok = getattr(usage, 'prompt_tokens', None)
425
+ out_tok = getattr(usage, 'completion_tokens', None)
426
+ # Some providers may use 'input_tokens'/'output_tokens'
427
+ if in_tok is None:
428
+ in_tok = getattr(usage, 'input_tokens', 0)
429
+ if out_tok is None:
430
+ out_tok = getattr(usage, 'output_tokens', 0)
431
+
432
+ # Try LiteLLM helper (arg names vary across versions)
433
+ try:
434
+ cost_val = litellm.completion_cost(
435
+ model=model_name,
436
+ prompt_tokens=in_tok,
437
+ completion_tokens=out_tok,
438
+ )
439
+ calculated_cost = cost_val if cost_val is not None else 0.0
440
+ except TypeError:
441
+ # Older/newer versions may require input/output token names
442
+ try:
443
+ cost_val = litellm.completion_cost(
444
+ model=model_name,
445
+ input_tokens=in_tok,
446
+ output_tokens=out_tok,
447
+ )
448
+ calculated_cost = cost_val if cost_val is not None else 0.0
449
+ except Exception as e3:
450
+ # Final fallback: compute using CSV rates
451
+ rates = _MODEL_RATE_MAP.get(str(model_name))
452
+ if rates is not None:
453
+ in_rate, out_rate = rates
454
+ calculated_cost = (float(in_tok or 0) * in_rate + float(out_tok or 0) * out_rate) / 1_000_000.0
455
+ else:
456
+ calculated_cost = 0.0
457
+ logger.debug(f"Cost calculation failed with LiteLLM token API; used CSV rates if available. Detail: {e3}")
332
458
  else:
333
- # If we can't get model name or usage, fallback to 0
334
459
  calculated_cost = 0.0
335
- # Optional: Log the original error e1 if needed
336
- # logger.warning(f"[Callback WARN] Failed to calculate cost with response object ({e1}) and fallback failed.")
337
460
  except Exception as e2:
338
- # Optional: Log secondary error e2 if needed
339
- # logger.warning(f"[Callback WARN] Failed to calculate cost with fallback method: {e2}")
340
461
  calculated_cost = 0.0 # Default to 0 on any error
341
462
  logger.debug(f"Cost calculation failed with fallback method: {e2}")
342
463
 
@@ -354,14 +475,108 @@ def _litellm_success_callback(
354
475
  # Register the callback with LiteLLM
355
476
  litellm.success_callback = [_litellm_success_callback]
356
477
 
478
+ # --- Cost Mapping Support (CSV Rates) ---
479
+ # Populate from CSV inside llm_invoke; used by callback fallback
480
+ _MODEL_RATE_MAP: Dict[str, Tuple[float, float]] = {}
481
+
482
+ def _set_model_rate_map(df: pd.DataFrame) -> None:
483
+ global _MODEL_RATE_MAP
484
+ try:
485
+ _MODEL_RATE_MAP = {
486
+ str(row['model']): (
487
+ float(row['input']) if pd.notna(row['input']) else 0.0,
488
+ float(row['output']) if pd.notna(row['output']) else 0.0,
489
+ )
490
+ for _, row in df.iterrows()
491
+ }
492
+ except Exception:
493
+ _MODEL_RATE_MAP = {}
494
+
357
495
  # --- Helper Functions ---
358
496
 
359
- def _load_model_data(csv_path: Path) -> pd.DataFrame:
360
- """Loads and preprocesses the LLM model data from CSV."""
361
- if not csv_path.exists():
362
- raise FileNotFoundError(f"LLM model CSV not found at {csv_path}")
497
+ def _is_malformed_json_response(content: str, threshold: int = 100) -> bool:
498
+ """
499
+ Detect if a JSON response appears malformed due to excessive trailing newlines.
500
+
501
+ This can happen when Gemini generates thousands of \n characters in a JSON string value,
502
+ causing the response to be truncated and missing closing braces.
503
+
504
+ Args:
505
+ content: The raw response content string
506
+ threshold: Number of consecutive trailing \n sequences to consider malformed
507
+
508
+ Returns:
509
+ True if the response appears malformed, False otherwise
510
+ """
511
+ if not content or not isinstance(content, str):
512
+ return False
513
+
514
+ # Check if it starts like JSON but doesn't end properly
515
+ stripped = content.strip()
516
+ if not stripped.startswith('{'):
517
+ return False
518
+
519
+ # If it ends with }, it's probably fine
520
+ if stripped.endswith('}'):
521
+ return False
522
+
523
+ # Count trailing \n sequences (escaped newlines in JSON strings)
524
+ # The pattern \n in a JSON string appears as \\n in the raw content
525
+ trailing_newline_count = 0
526
+ check_content = stripped
527
+ while check_content.endswith('\\n'):
528
+ trailing_newline_count += 1
529
+ check_content = check_content[:-2]
530
+
531
+ # If there are many trailing \n sequences, it's likely malformed
532
+ if trailing_newline_count >= threshold:
533
+ return True
534
+
535
+ # Also check for response that looks truncated mid-string
536
+ # (ends with characters that suggest we're inside a JSON string value)
537
+ if not stripped.endswith('}') and not stripped.endswith(']') and not stripped.endswith('"'):
538
+ # Could be truncated in the middle of an escaped sequence
539
+ if stripped.endswith('\\'):
540
+ return True
541
+
542
+ return False
543
+
544
+
545
+ def _load_model_data(csv_path: Optional[Path]) -> pd.DataFrame:
546
+ """Loads and preprocesses the LLM model data from CSV.
547
+
548
+ Args:
549
+ csv_path: Path to CSV file, or None to use package default
550
+
551
+ Returns:
552
+ DataFrame with model configuration data
553
+ """
554
+ # If csv_path is provided, try to load from it
555
+ if csv_path is not None:
556
+ if not csv_path.exists():
557
+ logger.warning(f"Specified LLM model CSV not found at {csv_path}, trying package default")
558
+ csv_path = None
559
+ else:
560
+ try:
561
+ df = pd.read_csv(csv_path)
562
+ logger.debug(f"Loaded model data from {csv_path}")
563
+ # Continue with the rest of the function...
564
+ except Exception as e:
565
+ logger.warning(f"Failed to load CSV from {csv_path}: {e}, trying package default")
566
+ csv_path = None
567
+
568
+ # If csv_path is None or loading failed, use package default
569
+ if csv_path is None:
570
+ try:
571
+ # Use importlib.resources to load the packaged CSV
572
+ csv_data = importlib.resources.files('pdd').joinpath('data/llm_model.csv').read_text()
573
+ import io
574
+ df = pd.read_csv(io.StringIO(csv_data))
575
+ logger.info("Loaded model data from package default")
576
+ except Exception as e:
577
+ raise FileNotFoundError(f"Failed to load default LLM model CSV from package: {e}")
578
+
363
579
  try:
364
- df = pd.read_csv(csv_path)
365
580
  # Basic validation and type conversion
366
581
  required_cols = ['provider', 'model', 'input', 'output', 'coding_arena_elo', 'api_key', 'structured_output', 'reasoning_type']
367
582
  for col in required_cols:
@@ -434,11 +649,26 @@ def _select_model_candidates(
434
649
  # Try finding base model in the *original* df in case it was filtered out
435
650
  original_base = model_df[model_df['model'] == base_model_name]
436
651
  if not original_base.empty:
437
- raise ValueError(f"Base model '{base_model_name}' found in CSV but requires API key '{original_base.iloc[0]['api_key']}' which might be missing or invalid configuration.")
438
- else:
439
- raise ValueError(f"Specified base model '{base_model_name}' not found in the LLM model CSV.")
440
-
441
- base_model = base_model_row.iloc[0]
652
+ # Base exists but may be misconfigured (e.g., missing API key). Keep erroring loudly.
653
+ raise ValueError(
654
+ f"Base model '{base_model_name}' found in CSV but requires API key '{original_base.iloc[0]['api_key']}' which might be missing or invalid configuration."
655
+ )
656
+ # Option A': Soft fallback – choose a reasonable surrogate base and continue
657
+ # Strategy (simplified and deterministic): pick the first available model
658
+ # from the CSV as the surrogate base. This mirrors typical CSV ordering
659
+ # expectations and keeps behavior predictable across environments.
660
+ try:
661
+ base_model = available_df.iloc[0]
662
+ logger.warning(
663
+ f"Base model '{base_model_name}' not found in CSV. Falling back to surrogate base '{base_model['model']}' (Option A')."
664
+ )
665
+ except Exception:
666
+ # If any unexpected error occurs during fallback, raise a clear error
667
+ raise ValueError(
668
+ f"Specified base model '{base_model_name}' not found and fallback selection failed. Check your LLM model CSV."
669
+ )
670
+ else:
671
+ base_model = base_model_row.iloc[0]
442
672
 
443
673
  # 3. Determine Target and Sort
444
674
  candidates = []
@@ -449,9 +679,10 @@ def _select_model_candidates(
449
679
  # Sort remaining by ELO descending as fallback
450
680
  available_df['sort_metric'] = -available_df['coding_arena_elo'] # Negative for descending sort
451
681
  candidates = available_df.sort_values(by='sort_metric').to_dict('records')
452
- # Ensure base model is first if it exists
453
- if any(c['model'] == base_model_name for c in candidates):
454
- candidates.sort(key=lambda x: 0 if x['model'] == base_model_name else 1)
682
+ # Ensure effective base model is first if it exists (supports surrogate base)
683
+ effective_base_name = str(base_model['model']) if isinstance(base_model, pd.Series) else base_model_name
684
+ if any(c['model'] == effective_base_name for c in candidates):
685
+ candidates.sort(key=lambda x: 0 if x['model'] == effective_base_name else 1)
455
686
  target_metric_value = f"Base Model ELO: {base_model['coding_arena_elo']}"
456
687
 
457
688
  elif strength < 0.5:
@@ -668,6 +899,378 @@ def _format_messages(prompt: str, input_data: Union[Dict[str, Any], List[Dict[st
668
899
  except Exception as e:
669
900
  raise ValueError(f"Error formatting prompt: {e}") from e
670
901
 
902
+ # --- JSON Extraction Helpers ---
903
+ import re
904
+
905
+ def _extract_fenced_json_block(text: str) -> Optional[str]:
906
+ try:
907
+ m = re.search(r"```json\s*(\{[\s\S]*?\})\s*```", text, flags=re.IGNORECASE)
908
+ if m:
909
+ return m.group(1)
910
+ return None
911
+ except Exception:
912
+ return None
913
+
914
+ def _extract_balanced_json_objects(text: str) -> List[str]:
915
+ results: List[str] = []
916
+ brace_stack = 0
917
+ start_idx = -1
918
+ in_string = False
919
+ escape = False
920
+ for i, ch in enumerate(text):
921
+ if in_string:
922
+ if escape:
923
+ escape = False
924
+ elif ch == '\\':
925
+ escape = True
926
+ elif ch == '"':
927
+ in_string = False
928
+ continue
929
+ else:
930
+ if ch == '"':
931
+ in_string = True
932
+ continue
933
+ if ch == '{':
934
+ if brace_stack == 0:
935
+ start_idx = i
936
+ brace_stack += 1
937
+ elif ch == '}':
938
+ if brace_stack > 0:
939
+ brace_stack -= 1
940
+ if brace_stack == 0 and start_idx != -1:
941
+ results.append(text[start_idx:i+1])
942
+ start_idx = -1
943
+ return results
944
+
945
+
946
+ def _looks_like_python_code(s: str) -> bool:
947
+ """
948
+ Heuristic check if a string looks like Python code.
949
+
950
+ Used to determine if we should attempt Python syntax repair on a string field.
951
+ """
952
+ if not s or len(s) < 10:
953
+ return False
954
+ # Check for common Python patterns
955
+ code_indicators = ('def ', 'class ', 'import ', 'from ', 'if __name__', 'return ', 'print(')
956
+ return any(indicator in s for indicator in code_indicators)
957
+
958
+
959
+ def _repair_python_syntax(code: str) -> str:
960
+ """
961
+ Validate Python code syntax and attempt repairs if invalid.
962
+
963
+ Sometimes LLMs include spurious characters at string boundaries,
964
+ especially when the code contains quotes. This function attempts
965
+ to detect and repair such issues.
966
+
967
+ Args:
968
+ code: Python code string to validate/repair
969
+
970
+ Returns:
971
+ Repaired code if a fix was found, otherwise original code
972
+ """
973
+ import ast
974
+
975
+ if not code or not code.strip():
976
+ return code
977
+
978
+ # First, try to parse as-is
979
+ try:
980
+ ast.parse(code)
981
+ return code # Valid, no repair needed
982
+ except SyntaxError:
983
+ pass
984
+
985
+ # Try common repairs
986
+ repaired = code
987
+
988
+ # Repair 1: Trailing spurious quote (the specific issue we've seen)
989
+ for quote in ['"', "'"]:
990
+ if repaired.rstrip().endswith(quote):
991
+ candidate = repaired.rstrip()[:-1]
992
+ try:
993
+ ast.parse(candidate)
994
+ logger.info(f"[INFO] Repaired code by removing trailing {quote!r}")
995
+ return candidate
996
+ except SyntaxError:
997
+ pass
998
+
999
+ # Repair 2: Leading spurious quote
1000
+ for quote in ['"', "'"]:
1001
+ if repaired.lstrip().startswith(quote):
1002
+ candidate = repaired.lstrip()[1:]
1003
+ try:
1004
+ ast.parse(candidate)
1005
+ logger.info(f"[INFO] Repaired code by removing leading {quote!r}")
1006
+ return candidate
1007
+ except SyntaxError:
1008
+ pass
1009
+
1010
+ # Repair 3: Both leading and trailing spurious quotes
1011
+ for quote in ['"', "'"]:
1012
+ stripped = repaired.strip()
1013
+ if stripped.startswith(quote) and stripped.endswith(quote):
1014
+ candidate = stripped[1:-1]
1015
+ try:
1016
+ ast.parse(candidate)
1017
+ logger.info(f"[INFO] Repaired code by removing surrounding {quote!r}")
1018
+ return candidate
1019
+ except SyntaxError:
1020
+ pass
1021
+
1022
+ # If no repair worked, return original (let it fail downstream)
1023
+ return code
1024
+
1025
+
1026
+ def _smart_unescape_code(code: str) -> str:
1027
+ """
1028
+ Unescape literal \\n sequences in code while preserving them inside string literals.
1029
+
1030
+ When LLMs return code as JSON, newlines get double-escaped. After JSON parsing,
1031
+ we have literal backslash-n (2 chars) that should be actual newlines for code
1032
+ structure, BUT escape sequences inside Python strings (like print("\\n")) should
1033
+ remain as escape sequences.
1034
+
1035
+ Args:
1036
+ code: Python code that may have literal \\n sequences
1037
+
1038
+ Returns:
1039
+ Code with structural newlines unescaped but string literals preserved
1040
+ """
1041
+ LITERAL_BACKSLASH_N = '\\' + 'n' # Literal \n (2 chars)
1042
+
1043
+ if LITERAL_BACKSLASH_N not in code:
1044
+ return code
1045
+
1046
+ # First, check if the code already has actual newlines (mixed state)
1047
+ # If it does, we need to be more careful
1048
+ has_actual_newlines = '\n' in code
1049
+
1050
+ if not has_actual_newlines:
1051
+ # All newlines are escaped - this is the double-escaped case
1052
+ # We need to unescape them but preserve \n inside string literals
1053
+
1054
+ # Strategy: Use a placeholder for \n inside strings, unescape all, then restore
1055
+ # We detect string literals by tracking quote state
1056
+
1057
+ result = []
1058
+ i = 0
1059
+ in_string = False
1060
+ string_char = None
1061
+ in_fstring = False
1062
+
1063
+ # Placeholder that won't appear in code
1064
+ PLACEHOLDER = '\x00NEWLINE_ESCAPE\x00'
1065
+
1066
+ while i < len(code):
1067
+ # Check for escape sequences (both actual and literal)
1068
+ if i + 1 < len(code) and code[i] == '\\':
1069
+ next_char = code[i + 1]
1070
+
1071
+ if in_string:
1072
+ # Inside a string - preserve escape sequences
1073
+ if next_char == 'n':
1074
+ result.append(PLACEHOLDER)
1075
+ i += 2
1076
+ continue
1077
+ elif next_char == 't':
1078
+ result.append('\\' + 't') # Keep \t as-is in strings
1079
+ i += 2
1080
+ continue
1081
+ elif next_char == 'r':
1082
+ result.append('\\' + 'r') # Keep \r as-is in strings
1083
+ i += 2
1084
+ continue
1085
+ elif next_char in ('"', "'", '\\'):
1086
+ # Keep escaped quotes and backslashes
1087
+ result.append(code[i:i+2])
1088
+ i += 2
1089
+ continue
1090
+
1091
+ # Check for string delimiters
1092
+ if not in_string:
1093
+ # Check for triple quotes first
1094
+ if i + 2 < len(code) and code[i:i+3] in ('"""', "'''"):
1095
+ in_string = True
1096
+ string_char = code[i:i+3]
1097
+ # Check if preceded by 'f' for f-string
1098
+ in_fstring = i > 0 and code[i-1] == 'f'
1099
+ result.append(code[i:i+3])
1100
+ i += 3
1101
+ continue
1102
+ elif code[i] in ('"', "'"):
1103
+ in_string = True
1104
+ string_char = code[i]
1105
+ in_fstring = i > 0 and code[i-1] == 'f'
1106
+ result.append(code[i])
1107
+ i += 1
1108
+ continue
1109
+ else:
1110
+ # Check for end of string
1111
+ if len(string_char) == 3: # Triple quote
1112
+ if i + 2 < len(code) and code[i:i+3] == string_char:
1113
+ in_string = False
1114
+ in_fstring = False
1115
+ result.append(code[i:i+3])
1116
+ i += 3
1117
+ continue
1118
+ else: # Single quote
1119
+ if code[i] == string_char:
1120
+ in_string = False
1121
+ in_fstring = False
1122
+ result.append(code[i])
1123
+ i += 1
1124
+ continue
1125
+
1126
+ result.append(code[i])
1127
+ i += 1
1128
+
1129
+ intermediate = ''.join(result)
1130
+
1131
+ # Now unescape all remaining \n (these are structural)
1132
+ LITERAL_BACKSLASH_R_N = '\\' + 'r' + '\\' + 'n'
1133
+ LITERAL_BACKSLASH_T = '\\' + 't'
1134
+
1135
+ intermediate = intermediate.replace(LITERAL_BACKSLASH_R_N, '\r\n')
1136
+ intermediate = intermediate.replace(LITERAL_BACKSLASH_N, '\n')
1137
+ intermediate = intermediate.replace(LITERAL_BACKSLASH_T, '\t')
1138
+
1139
+ # Restore placeholders to \n (as escape sequences in strings)
1140
+ result_code = intermediate.replace(PLACEHOLDER, '\\n')
1141
+
1142
+ return result_code
1143
+ else:
1144
+ # Mixed state - some actual newlines, some literal \n
1145
+ # This means the JSON parsing already converted some, but not all
1146
+ # The literal \n remaining are likely in strings, so leave them alone
1147
+ return code
1148
+
1149
+
1150
+ def _unescape_code_newlines(obj: Any) -> Any:
1151
+ """
1152
+ Fix double-escaped newlines in Pydantic model string fields.
1153
+
1154
+ Some models (e.g., Gemini) return JSON with \\\\n instead of \\n in code strings,
1155
+ resulting in literal backslash-n text instead of actual newlines after JSON parsing.
1156
+ This function recursively unescapes these in string fields of Pydantic models.
1157
+
1158
+ Also repairs Python syntax errors in code-like string fields (e.g., trailing quotes).
1159
+
1160
+ The check uses literal backslash-n (2 chars) vs actual newline (1 char):
1161
+ - '\\\\n' in Python source = literal backslash + n (2 chars) - needs fixing
1162
+ - '\\n' in Python source = newline character (1 char) - already correct
1163
+
1164
+ Args:
1165
+ obj: A Pydantic model, dict, list, or primitive value
1166
+
1167
+ Returns:
1168
+ The same object with string fields unescaped and code fields repaired
1169
+ """
1170
+ if obj is None:
1171
+ return obj
1172
+
1173
+ def _process_string(s: str) -> str:
1174
+ """Process a string: unescape newlines and repair Python syntax if needed."""
1175
+ result = s
1176
+ # Smart unescape that preserves \n inside string literals
1177
+ if _looks_like_python_code(result):
1178
+ result = _smart_unescape_code(result)
1179
+ result = _repair_python_syntax(result)
1180
+ else:
1181
+ # For non-code strings, do simple unescape
1182
+ LITERAL_BACKSLASH_N = '\\' + 'n'
1183
+ LITERAL_BACKSLASH_R_N = '\\' + 'r' + '\\' + 'n'
1184
+ LITERAL_BACKSLASH_T = '\\' + 't'
1185
+ if LITERAL_BACKSLASH_N in result:
1186
+ result = result.replace(LITERAL_BACKSLASH_R_N, '\r\n')
1187
+ result = result.replace(LITERAL_BACKSLASH_N, '\n')
1188
+ result = result.replace(LITERAL_BACKSLASH_T, '\t')
1189
+ return result
1190
+
1191
+ # Handle Pydantic models
1192
+ if isinstance(obj, BaseModel):
1193
+ # Get all field values and process strings
1194
+ for field_name in obj.model_fields:
1195
+ value = getattr(obj, field_name)
1196
+ if isinstance(value, str):
1197
+ processed = _process_string(value)
1198
+ if processed != value:
1199
+ object.__setattr__(obj, field_name, processed)
1200
+ elif isinstance(value, (dict, list, BaseModel)):
1201
+ _unescape_code_newlines(value)
1202
+ return obj
1203
+
1204
+ # Handle dicts
1205
+ if isinstance(obj, dict):
1206
+ for key, value in obj.items():
1207
+ if isinstance(value, str):
1208
+ obj[key] = _process_string(value)
1209
+ elif isinstance(value, (dict, list)):
1210
+ _unescape_code_newlines(value)
1211
+ return obj
1212
+
1213
+ # Handle lists
1214
+ if isinstance(obj, list):
1215
+ for i, item in enumerate(obj):
1216
+ if isinstance(item, str):
1217
+ obj[i] = _process_string(item)
1218
+ elif isinstance(item, (dict, list, BaseModel)):
1219
+ _unescape_code_newlines(item)
1220
+ return obj
1221
+
1222
+ return obj
1223
+
1224
+
1225
+ def _has_invalid_python_code(obj: Any) -> bool:
1226
+ """
1227
+ Check if any code-like string fields have invalid Python syntax.
1228
+
1229
+ This is used after _unescape_code_newlines to detect if repair failed
1230
+ and we should retry with cache disabled.
1231
+
1232
+ Args:
1233
+ obj: A Pydantic model, dict, list, or primitive value
1234
+
1235
+ Returns:
1236
+ True if there are invalid code fields that couldn't be repaired
1237
+ """
1238
+ import ast
1239
+
1240
+ if obj is None:
1241
+ return False
1242
+
1243
+ if isinstance(obj, str):
1244
+ if _looks_like_python_code(obj):
1245
+ try:
1246
+ ast.parse(obj)
1247
+ return False # Valid
1248
+ except SyntaxError:
1249
+ return True # Invalid
1250
+ return False
1251
+
1252
+ if isinstance(obj, BaseModel):
1253
+ for field_name in obj.model_fields:
1254
+ value = getattr(obj, field_name)
1255
+ if _has_invalid_python_code(value):
1256
+ return True
1257
+ return False
1258
+
1259
+ if isinstance(obj, dict):
1260
+ for value in obj.values():
1261
+ if _has_invalid_python_code(value):
1262
+ return True
1263
+ return False
1264
+
1265
+ if isinstance(obj, list):
1266
+ for item in obj:
1267
+ if _has_invalid_python_code(item):
1268
+ return True
1269
+ return False
1270
+
1271
+ return False
1272
+
1273
+
671
1274
  # --- Main Function ---
672
1275
 
673
1276
  def llm_invoke(
@@ -677,6 +1280,7 @@ def llm_invoke(
677
1280
  temperature: float = 0.1,
678
1281
  verbose: bool = False,
679
1282
  output_pydantic: Optional[Type[BaseModel]] = None,
1283
+ output_schema: Optional[Dict[str, Any]] = None,
680
1284
  time: float = 0.25,
681
1285
  use_batch_mode: bool = False,
682
1286
  messages: Optional[Union[List[Dict[str, str]], List[List[Dict[str, str]]]]] = None,
@@ -693,6 +1297,7 @@ def llm_invoke(
693
1297
  temperature: LLM temperature.
694
1298
  verbose: Print detailed logs.
695
1299
  output_pydantic: Optional Pydantic model for structured output.
1300
+ output_schema: Optional raw JSON schema dictionary for structured output (alternative to output_pydantic).
696
1301
  time: Relative thinking time (0-1, default 0.25).
697
1302
  use_batch_mode: Use batch completion if True.
698
1303
  messages: Pre-formatted list of messages (or list of lists for batch). If provided, ignores prompt and input_json.
@@ -810,6 +1415,16 @@ def llm_invoke(
810
1415
  # --- 3. Iterate Through Candidates and Invoke LLM ---
811
1416
  last_exception = None
812
1417
  newly_acquired_keys: Dict[str, bool] = {} # Track keys obtained in this run
1418
+
1419
+ # Initialize variables for retry section
1420
+ response_format = None
1421
+ time_kwargs = {}
1422
+
1423
+ # Update global rate map for callback cost fallback
1424
+ try:
1425
+ _set_model_rate_map(model_df)
1426
+ except Exception:
1427
+ pass
813
1428
 
814
1429
  for model_info in candidate_models:
815
1430
  model_name_litellm = model_info['model']
@@ -820,6 +1435,9 @@ def llm_invoke(
820
1435
  logger.info(f"\n[ATTEMPT] Trying model: {model_name_litellm} (Provider: {provider})")
821
1436
 
822
1437
  retry_with_same_model = True
1438
+ # Track per-model temperature adjustment attempt (avoid infinite loop)
1439
+ current_temperature = temperature
1440
+ temp_adjustment_done = False
823
1441
  while retry_with_same_model:
824
1442
  retry_with_same_model = False # Assume success unless auth error on new key
825
1443
 
@@ -834,7 +1452,8 @@ def llm_invoke(
834
1452
  litellm_kwargs: Dict[str, Any] = {
835
1453
  "model": model_name_litellm,
836
1454
  "messages": formatted_messages,
837
- "temperature": temperature,
1455
+ # Use a local adjustable temperature to allow provider-specific fallbacks
1456
+ "temperature": current_temperature,
838
1457
  }
839
1458
 
840
1459
  api_key_name_from_csv = model_info.get('api_key') # From CSV
@@ -847,7 +1466,14 @@ def llm_invoke(
847
1466
  if is_vertex_model and api_key_name_from_csv == 'VERTEX_CREDENTIALS':
848
1467
  credentials_file_path = os.getenv("VERTEX_CREDENTIALS") # Path from env var
849
1468
  vertex_project_env = os.getenv("VERTEX_PROJECT")
850
- vertex_location_env = os.getenv("VERTEX_LOCATION")
1469
+ # Check for per-model location override, fall back to env var
1470
+ model_location = model_info.get('location')
1471
+ if pd.notna(model_location) and str(model_location).strip():
1472
+ vertex_location_env = str(model_location).strip()
1473
+ if verbose:
1474
+ logger.info(f"[INFO] Using per-model location override: '{vertex_location_env}' for model '{model_name_litellm}'")
1475
+ else:
1476
+ vertex_location_env = os.getenv("VERTEX_LOCATION")
851
1477
 
852
1478
  if credentials_file_path and vertex_project_env and vertex_location_env:
853
1479
  try:
@@ -861,14 +1487,23 @@ def llm_invoke(
861
1487
  if verbose:
862
1488
  logger.info(f"[INFO] For Vertex AI: using vertex_credentials from '{credentials_file_path}', project '{vertex_project_env}', location '{vertex_location_env}'.")
863
1489
  except FileNotFoundError:
1490
+ # Still pass project and location so ADC can work
1491
+ litellm_kwargs["vertex_project"] = vertex_project_env
1492
+ litellm_kwargs["vertex_location"] = vertex_location_env
864
1493
  if verbose:
865
- logger.error(f"[ERROR] Vertex credentials file not found at path specified by VERTEX_CREDENTIALS env var: '{credentials_file_path}'. LiteLLM may try ADC or fail.")
1494
+ logger.warning(f"[WARN] Vertex credentials file not found at '{credentials_file_path}'. Using ADC with project '{vertex_project_env}', location '{vertex_location_env}'.")
866
1495
  except json.JSONDecodeError:
1496
+ # Still pass project and location so ADC can work
1497
+ litellm_kwargs["vertex_project"] = vertex_project_env
1498
+ litellm_kwargs["vertex_location"] = vertex_location_env
867
1499
  if verbose:
868
- logger.error(f"[ERROR] Failed to decode JSON from Vertex credentials file: '{credentials_file_path}'. Check file content. LiteLLM may try ADC or fail.")
1500
+ logger.error(f"[ERROR] Failed to decode JSON from Vertex credentials file: '{credentials_file_path}'. Using ADC with project '{vertex_project_env}', location '{vertex_location_env}'.")
869
1501
  except Exception as e:
1502
+ # Still pass project and location so ADC can work
1503
+ litellm_kwargs["vertex_project"] = vertex_project_env
1504
+ litellm_kwargs["vertex_location"] = vertex_location_env
870
1505
  if verbose:
871
- logger.error(f"[ERROR] Failed to load or process Vertex credentials from '{credentials_file_path}': {e}. LiteLLM may try ADC or fail.")
1506
+ logger.error(f"[ERROR] Failed to load Vertex credentials from '{credentials_file_path}': {e}. Using ADC with project '{vertex_project_env}', location '{vertex_location_env}'.")
872
1507
  else:
873
1508
  if verbose:
874
1509
  logger.warning(f"[WARN] For Vertex AI (using '{api_key_name_from_csv}'): One or more required environment variables (VERTEX_CREDENTIALS, VERTEX_PROJECT, VERTEX_LOCATION) are missing.")
@@ -887,9 +1522,16 @@ def llm_invoke(
887
1522
 
888
1523
  # If this model is Vertex AI AND uses a direct API key string (not VERTEX_CREDENTIALS from CSV),
889
1524
  # also pass project and location from env vars.
890
- if is_vertex_model:
1525
+ if is_vertex_model:
891
1526
  vertex_project_env = os.getenv("VERTEX_PROJECT")
892
- vertex_location_env = os.getenv("VERTEX_LOCATION")
1527
+ # Check for per-model location override, fall back to env var
1528
+ model_location = model_info.get('location')
1529
+ if pd.notna(model_location) and str(model_location).strip():
1530
+ vertex_location_env = str(model_location).strip()
1531
+ if verbose:
1532
+ logger.info(f"[INFO] Using per-model location override: '{vertex_location_env}' for model '{model_name_litellm}'")
1533
+ else:
1534
+ vertex_location_env = os.getenv("VERTEX_LOCATION")
893
1535
  if vertex_project_env and vertex_location_env:
894
1536
  litellm_kwargs["vertex_project"] = vertex_project_env
895
1537
  litellm_kwargs["vertex_location"] = vertex_location_env
@@ -903,13 +1545,36 @@ def llm_invoke(
903
1545
  elif verbose: # No api_key_name_from_csv in CSV for this model
904
1546
  logger.info(f"[INFO] No API key name specified in CSV for model '{model_name_litellm}'. LiteLLM will use its default authentication mechanisms (e.g., standard provider env vars or ADC for Vertex AI).")
905
1547
 
906
- # Add api_base if present in CSV
1548
+ # Add base_url/api_base override if present in CSV
907
1549
  api_base = model_info.get('base_url')
908
1550
  if pd.notna(api_base) and api_base:
1551
+ # LiteLLM prefers `base_url`; some older paths accept `api_base`.
1552
+ litellm_kwargs["base_url"] = str(api_base)
909
1553
  litellm_kwargs["api_base"] = str(api_base)
910
1554
 
911
- # Handle Structured Output (JSON Mode / Pydantic)
912
- if output_pydantic:
1555
+ # Provider-specific defaults (e.g., LM Studio)
1556
+ model_name_lower = str(model_name_litellm).lower()
1557
+ provider_lower_for_model = provider.lower()
1558
+ is_lm_studio = model_name_lower.startswith('lm_studio/') or provider_lower_for_model == 'lm_studio'
1559
+ is_groq = model_name_lower.startswith('groq/') or provider_lower_for_model == 'groq'
1560
+ if is_lm_studio:
1561
+ # Ensure base_url is set (fallback to env LM_STUDIO_API_BASE or localhost)
1562
+ if not litellm_kwargs.get("base_url"):
1563
+ lm_studio_base = os.getenv("LM_STUDIO_API_BASE", "http://localhost:1234/v1")
1564
+ litellm_kwargs["base_url"] = lm_studio_base
1565
+ litellm_kwargs["api_base"] = lm_studio_base
1566
+ if verbose:
1567
+ logger.info(f"[INFO] Using LM Studio base_url: {lm_studio_base}")
1568
+
1569
+ # Ensure a non-empty api_key; LM Studio accepts any non-empty token (e.g., 'lm-studio')
1570
+ if not litellm_kwargs.get("api_key"):
1571
+ lm_studio_key = os.getenv("LM_STUDIO_API_KEY") or "lm-studio"
1572
+ litellm_kwargs["api_key"] = lm_studio_key
1573
+ if verbose:
1574
+ logger.info("[INFO] Using LM Studio api_key placeholder (set LM_STUDIO_API_KEY to customize).")
1575
+
1576
+ # Handle Structured Output (JSON Mode / Pydantic / JSON Schema)
1577
+ if output_pydantic or output_schema:
913
1578
  # Check if model supports structured output based on CSV flag or LiteLLM check
914
1579
  supports_structured = model_info.get('structured_output', False)
915
1580
  # Optional: Add litellm.supports_response_schema check if CSV flag is unreliable
@@ -918,18 +1583,87 @@ def llm_invoke(
918
1583
  # except: pass # Ignore errors in supports_response_schema check
919
1584
 
920
1585
  if supports_structured:
921
- if verbose:
922
- logger.info(f"[INFO] Requesting structured output (Pydantic: {output_pydantic.__name__}) for {model_name_litellm}")
923
- # Pass the Pydantic model directly if supported, else use json_object
924
- # LiteLLM handles passing Pydantic models for supported providers
925
- litellm_kwargs["response_format"] = output_pydantic
1586
+ if output_pydantic:
1587
+ if verbose:
1588
+ logger.info(f"[INFO] Requesting structured output (Pydantic: {output_pydantic.__name__}) for {model_name_litellm}")
1589
+ # Use explicit json_object format with response_schema for better Gemini/Vertex AI compatibility
1590
+ # Passing Pydantic class directly may not trigger native structured output for all providers
1591
+ response_format = {
1592
+ "type": "json_object",
1593
+ "response_schema": output_pydantic.model_json_schema()
1594
+ }
1595
+ else: # output_schema is set
1596
+ if verbose:
1597
+ logger.info(f"[INFO] Requesting structured output (JSON Schema) for {model_name_litellm}")
1598
+ # LiteLLM expects {"type": "json_schema", "json_schema": {"name": "response", "schema": schema_dict, "strict": true}}
1599
+ # OR for some providers just the schema dict if type is json_object.
1600
+ # Best practice for broad compatibility via LiteLLM is usually the dict directly or wrapped.
1601
+ # For now, let's assume we pass the schema dict as 'response_format' which LiteLLM handles for many providers
1602
+ # or wrap it if needed. LiteLLM 1.40+ supports passing the dict directly for many.
1603
+ response_format = {
1604
+ "type": "json_schema",
1605
+ "json_schema": {
1606
+ "name": "response",
1607
+ "schema": output_schema,
1608
+ "strict": False
1609
+ }
1610
+ }
1611
+
1612
+ litellm_kwargs["response_format"] = response_format
1613
+
1614
+ # LM Studio requires "json_schema" format, not "json_object"
1615
+ # Use extra_body to bypass litellm.drop_params stripping the schema
1616
+ if is_lm_studio and response_format and response_format.get("type") == "json_object":
1617
+ schema = response_format.get("response_schema", {})
1618
+ lm_studio_response_format = {
1619
+ "type": "json_schema",
1620
+ "json_schema": {
1621
+ "name": "response",
1622
+ "strict": True,
1623
+ "schema": schema
1624
+ }
1625
+ }
1626
+ # Use extra_body to bypass drop_params - passes directly to API
1627
+ litellm_kwargs["extra_body"] = {"response_format": lm_studio_response_format}
1628
+ # Remove from regular response_format to avoid conflicts
1629
+ if "response_format" in litellm_kwargs:
1630
+ del litellm_kwargs["response_format"]
1631
+ if verbose:
1632
+ logger.info(f"[INFO] Using extra_body for LM Studio response_format to bypass drop_params")
1633
+
1634
+ # Groq has issues with tool-based structured output - use JSON mode with schema in prompt
1635
+ if is_groq and response_format:
1636
+ # Get the schema to include in system prompt
1637
+ if output_pydantic:
1638
+ schema = output_pydantic.model_json_schema()
1639
+ else:
1640
+ schema = output_schema
1641
+
1642
+ # Use simple json_object mode (Groq's tool_use often fails)
1643
+ litellm_kwargs["response_format"] = {"type": "json_object"}
1644
+
1645
+ # Prepend schema instruction to messages (json module is imported at top of file)
1646
+ schema_instruction = f"You must respond with valid JSON matching this schema:\n```json\n{json.dumps(schema, indent=2)}\n```\nRespond ONLY with the JSON object, no other text."
1647
+
1648
+ # Find or create system message to prepend schema
1649
+ messages_list = litellm_kwargs.get("messages", [])
1650
+ if messages_list and messages_list[0].get("role") == "system":
1651
+ messages_list[0]["content"] = schema_instruction + "\n\n" + messages_list[0]["content"]
1652
+ else:
1653
+ messages_list.insert(0, {"role": "system", "content": schema_instruction})
1654
+ litellm_kwargs["messages"] = messages_list
1655
+
1656
+ if verbose:
1657
+ logger.info(f"[INFO] Using JSON object mode with schema in prompt for Groq (avoiding tool_use issues)")
1658
+
926
1659
  # As a fallback, one could use:
927
1660
  # litellm_kwargs["response_format"] = {"type": "json_object"}
928
1661
  # And potentially enable client-side validation:
929
1662
  # litellm.enable_json_schema_validation = True # Enable globally if needed
930
1663
  else:
1664
+ schema_name = output_pydantic.__name__ if output_pydantic else "JSON Schema"
931
1665
  if verbose:
932
- logger.warning(f"[WARN] Model {model_name_litellm} does not support structured output via CSV flag. Output might not be valid {output_pydantic.__name__}.")
1666
+ logger.warning(f"[WARN] Model {model_name_litellm} does not support structured output via CSV flag. Output might not be valid {schema_name}.")
933
1667
  # Proceed without forcing JSON mode, parsing will be attempted later
934
1668
 
935
1669
  # --- NEW REASONING LOGIC ---
@@ -944,7 +1678,9 @@ def llm_invoke(
944
1678
  # Currently known: Anthropic uses 'thinking'
945
1679
  # Model name comparison is more robust than provider string
946
1680
  if provider == 'anthropic': # Check provider column instead of model prefix
947
- litellm_kwargs["thinking"] = {"type": "enabled", "budget_tokens": budget}
1681
+ thinking_param = {"type": "enabled", "budget_tokens": budget}
1682
+ litellm_kwargs["thinking"] = thinking_param
1683
+ time_kwargs["thinking"] = thinking_param
948
1684
  if verbose:
949
1685
  logger.info(f"[INFO] Requesting Anthropic thinking (budget type) with budget: {budget} tokens for {model_name_litellm}")
950
1686
  else:
@@ -962,10 +1698,32 @@ def llm_invoke(
962
1698
  effort = "high"
963
1699
  elif time > 0.3:
964
1700
  effort = "medium"
965
- # Use the common 'reasoning_effort' param LiteLLM provides
966
- litellm_kwargs["reasoning_effort"] = effort
967
- if verbose:
968
- logger.info(f"[INFO] Requesting reasoning_effort='{effort}' (effort type) for {model_name_litellm} based on time={time}")
1701
+
1702
+ # Map effort parameter per-provider/model family
1703
+ model_lower = str(model_name_litellm).lower()
1704
+ provider_lower = str(provider).lower()
1705
+
1706
+ if provider_lower == 'openai' and model_lower.startswith('gpt-5'):
1707
+ # OpenAI 5-series uses Responses API with nested 'reasoning'
1708
+ reasoning_obj = {"effort": effort, "summary": "auto"}
1709
+ litellm_kwargs["reasoning"] = reasoning_obj
1710
+ time_kwargs["reasoning"] = reasoning_obj
1711
+ if verbose:
1712
+ logger.info(f"[INFO] Requesting OpenAI reasoning.effort='{effort}' for {model_name_litellm} (Responses API)")
1713
+
1714
+ elif provider_lower == 'openai' and model_lower.startswith('o') and 'mini' not in model_lower:
1715
+ # Historical o* models may use LiteLLM's generic reasoning_effort param
1716
+ litellm_kwargs["reasoning_effort"] = effort
1717
+ time_kwargs["reasoning_effort"] = effort
1718
+ if verbose:
1719
+ logger.info(f"[INFO] Requesting reasoning_effort='{effort}' for {model_name_litellm}")
1720
+
1721
+ else:
1722
+ # Fallback to LiteLLM generic param when supported by provider adapter
1723
+ litellm_kwargs["reasoning_effort"] = effort
1724
+ time_kwargs["reasoning_effort"] = effort
1725
+ if verbose:
1726
+ logger.info(f"[INFO] Requesting generic reasoning_effort='{effort}' for {model_name_litellm}")
969
1727
 
970
1728
  elif reasoning_type == 'none':
971
1729
  if verbose:
@@ -997,6 +1755,166 @@ def llm_invoke(
997
1755
  logger.debug("NOT ENABLING CACHING: litellm.cache is None at call time")
998
1756
 
999
1757
 
1758
+ # Route OpenAI gpt-5* models through Responses API to support 'reasoning'
1759
+ model_lower_for_call = str(model_name_litellm).lower()
1760
+ provider_lower_for_call = str(provider).lower()
1761
+
1762
+ if (
1763
+ not use_batch_mode
1764
+ and provider_lower_for_call == 'openai'
1765
+ and model_lower_for_call.startswith('gpt-5')
1766
+ ):
1767
+ if verbose:
1768
+ logger.info(f"[INFO] Calling LiteLLM Responses API for {model_name_litellm}...")
1769
+ try:
1770
+ # Build input text from messages
1771
+ if isinstance(formatted_messages, list) and formatted_messages and isinstance(formatted_messages[0], dict):
1772
+ input_text = "\n\n".join(f"{m.get('role','user')}: {m.get('content','')}" for m in formatted_messages)
1773
+ else:
1774
+ # Fallback: string cast
1775
+ input_text = str(formatted_messages)
1776
+
1777
+ # Derive effort mapping already computed in time_kwargs
1778
+ reasoning_param = time_kwargs.get("reasoning")
1779
+
1780
+ # Build text.format block for structured output
1781
+ # Default to plain text format
1782
+ text_block = {"format": {"type": "text"}}
1783
+
1784
+ # If structured output requested, use text.format with json_schema
1785
+ # This is the correct way to enforce structured output via litellm.responses()
1786
+ if output_pydantic or output_schema:
1787
+ try:
1788
+ if output_pydantic:
1789
+ schema = output_pydantic.model_json_schema()
1790
+ name = output_pydantic.__name__
1791
+ else:
1792
+ schema = output_schema
1793
+ name = "response"
1794
+
1795
+ # Add additionalProperties: false for strict mode (required by OpenAI)
1796
+ schema['additionalProperties'] = False
1797
+
1798
+ # Use text.format with json_schema for structured output
1799
+ text_block = {
1800
+ "format": {
1801
+ "type": "json_schema",
1802
+ "name": name,
1803
+ "strict": True,
1804
+ "schema": schema,
1805
+ }
1806
+ }
1807
+ if verbose:
1808
+ logger.info(f"[INFO] Using structured output via text.format for Responses API")
1809
+ except Exception as schema_e:
1810
+ logger.warning(f"[WARN] Failed to derive JSON schema: {schema_e}. Proceeding with plain text format.")
1811
+
1812
+ # Build kwargs for litellm.responses()
1813
+ responses_kwargs = {
1814
+ "model": model_name_litellm,
1815
+ "input": input_text,
1816
+ "text": text_block,
1817
+ }
1818
+ if verbose and temperature not in (None, 0, 0.0):
1819
+ logger.info("[INFO] Skipping 'temperature' for OpenAI GPT-5 Responses call (unsupported by API).")
1820
+ if reasoning_param is not None:
1821
+ responses_kwargs["reasoning"] = reasoning_param
1822
+
1823
+ # Call litellm.responses() which handles the API interaction
1824
+ resp = litellm.responses(**responses_kwargs)
1825
+
1826
+ # Extract text result from response
1827
+ result_text = None
1828
+ try:
1829
+ # LiteLLM responses return output as a list of items
1830
+ for item in resp.output:
1831
+ if getattr(item, 'type', None) == 'message' and hasattr(item, 'content') and item.content:
1832
+ for content_item in item.content:
1833
+ if hasattr(content_item, 'text'):
1834
+ result_text = content_item.text
1835
+ break
1836
+ if result_text:
1837
+ break
1838
+ except Exception:
1839
+ result_text = None
1840
+
1841
+ # Calculate cost using usage + CSV rates
1842
+ total_cost = 0.0
1843
+ usage = getattr(resp, "usage", None)
1844
+ if usage is not None:
1845
+ in_tok = getattr(usage, "input_tokens", 0) or 0
1846
+ out_tok = getattr(usage, "output_tokens", 0) or 0
1847
+ in_rate = model_info.get('input', 0.0) or 0.0
1848
+ out_rate = model_info.get('output', 0.0) or 0.0
1849
+ total_cost = (in_tok * in_rate + out_tok * out_rate) / 1_000_000.0
1850
+
1851
+ # Parse result if Pydantic output requested
1852
+ final_result = None
1853
+ if output_pydantic and result_text:
1854
+ try:
1855
+ final_result = output_pydantic.model_validate_json(result_text)
1856
+ except Exception as e:
1857
+ # With structured output, parsing should succeed
1858
+ # But if it fails, try JSON repair as fallback
1859
+ logger.warning(f"[WARN] Pydantic parse failed on Responses output: {e}. Attempting JSON repair...")
1860
+
1861
+ # Try extracting from fenced JSON blocks first
1862
+ fenced = _extract_fenced_json_block(result_text)
1863
+ candidates: List[str] = []
1864
+ if fenced:
1865
+ candidates.append(fenced)
1866
+ else:
1867
+ candidates.extend(_extract_balanced_json_objects(result_text))
1868
+
1869
+ # Also try the raw text as-is after stripping fences
1870
+ cleaned = result_text.strip()
1871
+ if cleaned.startswith("```json"):
1872
+ cleaned = cleaned[7:]
1873
+ elif cleaned.startswith("```"):
1874
+ cleaned = cleaned[3:]
1875
+ if cleaned.endswith("```"):
1876
+ cleaned = cleaned[:-3]
1877
+ cleaned = cleaned.strip()
1878
+ if cleaned and cleaned not in candidates:
1879
+ candidates.append(cleaned)
1880
+
1881
+ parse_succeeded = False
1882
+ for cand in candidates:
1883
+ try:
1884
+ final_result = output_pydantic.model_validate_json(cand)
1885
+ parse_succeeded = True
1886
+ logger.info(f"[SUCCESS] JSON repair succeeded for Responses output")
1887
+ break
1888
+ except Exception:
1889
+ continue
1890
+
1891
+ if not parse_succeeded:
1892
+ logger.error(f"[ERROR] All JSON repair attempts failed for Responses output. Original error: {e}")
1893
+ final_result = f"ERROR: Failed to parse structured output from Responses API. Raw: {repr(result_text)[:200]}"
1894
+ else:
1895
+ final_result = result_text
1896
+
1897
+ if verbose:
1898
+ logger.info(f"[RESULT] Model Used: {model_name_litellm}")
1899
+ logger.info(f"[RESULT] Total Cost (estimated): ${total_cost:.6g}")
1900
+
1901
+ return {
1902
+ 'result': final_result,
1903
+ 'cost': total_cost,
1904
+ 'model_name': model_name_litellm,
1905
+ 'thinking_output': None,
1906
+ }
1907
+ except Exception as e:
1908
+ last_exception = e
1909
+ logger.error(f"[ERROR] OpenAI Responses call failed for {model_name_litellm}: {e}")
1910
+ # Remove 'reasoning' key to avoid OpenAI Chat API unknown param errors
1911
+ if "reasoning" in litellm_kwargs:
1912
+ try:
1913
+ litellm_kwargs.pop("reasoning", None)
1914
+ except Exception:
1915
+ pass
1916
+ # Fall through to LiteLLM path as a fallback
1917
+
1000
1918
  if use_batch_mode:
1001
1919
  if verbose:
1002
1920
  logger.info(f"[INFO] Calling litellm.batch_completion for {model_name_litellm}...")
@@ -1004,6 +1922,16 @@ def llm_invoke(
1004
1922
 
1005
1923
 
1006
1924
  else:
1925
+ # Anthropic requirement: when 'thinking' is enabled, temperature must be 1
1926
+ try:
1927
+ if provider.lower() == 'anthropic' and 'thinking' in litellm_kwargs:
1928
+ if litellm_kwargs.get('temperature') != 1:
1929
+ if verbose:
1930
+ logger.info("[INFO] Anthropic thinking enabled: forcing temperature=1 for compliance.")
1931
+ litellm_kwargs['temperature'] = 1
1932
+ current_temperature = 1
1933
+ except Exception:
1934
+ pass
1007
1935
  if verbose:
1008
1936
  logger.info(f"[INFO] Calling litellm.completion for {model_name_litellm}...")
1009
1937
  response = litellm.completion(**litellm_kwargs)
@@ -1061,13 +1989,12 @@ def llm_invoke(
1061
1989
  retry_response = litellm.completion(
1062
1990
  model=model_name_litellm,
1063
1991
  messages=retry_messages,
1064
- temperature=temperature,
1992
+ temperature=current_temperature,
1065
1993
  response_format=response_format,
1066
- max_completion_tokens=max_tokens,
1067
1994
  **time_kwargs
1068
1995
  )
1069
- # Re-enable cache
1070
- litellm.cache = Cache()
1996
+ # Re-enable cache - restore original configured cache (restore to original state, even if None)
1997
+ litellm.cache = configured_cache
1071
1998
  # Extract result from retry
1072
1999
  retry_raw_result = retry_response.choices[0].message.content
1073
2000
  if retry_raw_result is not None:
@@ -1085,21 +2012,66 @@ def llm_invoke(
1085
2012
  logger.error(f"[ERROR] Cannot retry - batch mode or missing prompt/input_json")
1086
2013
  results.append("ERROR: LLM returned None content and cannot retry")
1087
2014
  continue
1088
-
1089
- if output_pydantic:
2015
+
2016
+ # Check for malformed JSON response (excessive trailing newlines causing truncation)
2017
+ # This can happen when Gemini generates thousands of \n in JSON string values
2018
+ if isinstance(raw_result, str) and _is_malformed_json_response(raw_result):
2019
+ logger.warning(f"[WARNING] Detected malformed JSON response with excessive trailing newlines for item {i}. Retrying with cache bypass...")
2020
+ if not use_batch_mode and prompt and input_json is not None:
2021
+ # Add a small space to bypass cache
2022
+ modified_prompt = prompt + " "
2023
+ try:
2024
+ retry_messages = _format_messages(modified_prompt, input_json, use_batch_mode)
2025
+ # Disable cache for retry
2026
+ original_cache = litellm.cache
2027
+ litellm.cache = None
2028
+ retry_response = litellm.completion(
2029
+ model=model_name_litellm,
2030
+ messages=retry_messages,
2031
+ temperature=current_temperature,
2032
+ response_format=response_format,
2033
+ **time_kwargs
2034
+ )
2035
+ # Re-enable cache
2036
+ litellm.cache = original_cache
2037
+ # Extract result from retry
2038
+ retry_raw_result = retry_response.choices[0].message.content
2039
+ if retry_raw_result is not None and not _is_malformed_json_response(retry_raw_result):
2040
+ logger.info(f"[SUCCESS] Cache bypass retry for malformed JSON succeeded for item {i}")
2041
+ raw_result = retry_raw_result
2042
+ else:
2043
+ # Retry also failed, but we'll continue with repair logic below
2044
+ logger.warning(f"[WARNING] Cache bypass retry also returned malformed JSON for item {i}, attempting repair...")
2045
+ except Exception as retry_e:
2046
+ logger.warning(f"[WARNING] Cache bypass retry for malformed JSON failed for item {i}: {retry_e}, attempting repair...")
2047
+ else:
2048
+ logger.warning(f"[WARNING] Cannot retry malformed JSON - batch mode or missing prompt/input_json, attempting repair...")
2049
+
2050
+ if output_pydantic or output_schema:
1090
2051
  parsed_result = None
1091
2052
  json_string_to_parse = None
1092
2053
 
1093
2054
  try:
1094
- # Attempt 1: Check if LiteLLM already parsed it
1095
- if isinstance(raw_result, output_pydantic):
2055
+ # Attempt 1: Check if LiteLLM already parsed it (only for Pydantic)
2056
+ if output_pydantic and isinstance(raw_result, output_pydantic):
1096
2057
  parsed_result = raw_result
1097
2058
  if verbose:
1098
2059
  logger.debug("[DEBUG] Pydantic object received directly from LiteLLM.")
1099
2060
 
1100
2061
  # Attempt 2: Check if raw_result is dict-like and validate
1101
2062
  elif isinstance(raw_result, dict):
1102
- parsed_result = output_pydantic.model_validate(raw_result)
2063
+ if output_pydantic:
2064
+ parsed_result = output_pydantic.model_validate(raw_result)
2065
+ else:
2066
+ # Validate against JSON schema
2067
+ try:
2068
+ import jsonschema
2069
+ jsonschema.validate(instance=raw_result, schema=output_schema)
2070
+ parsed_result = json.dumps(raw_result) # Return as JSON string for consistency
2071
+ except ImportError:
2072
+ logger.warning("jsonschema not installed, skipping validation")
2073
+ parsed_result = json.dumps(raw_result)
2074
+
1103
2075
  if verbose:
1104
2076
  logger.debug("[DEBUG] Validated dictionary-like object directly.")
1105
2077
 
@@ -1107,26 +2079,59 @@ def llm_invoke(
1107
2079
  elif isinstance(raw_result, str):
1108
2080
  json_string_to_parse = raw_result # Start with the raw string
1109
2081
  try:
1110
- # Look for first { and last }
1111
- start_brace = json_string_to_parse.find('{')
1112
- end_brace = json_string_to_parse.rfind('}')
1113
- if start_brace != -1 and end_brace != -1 and end_brace > start_brace:
1114
- potential_json = json_string_to_parse[start_brace:end_brace+1]
1115
- # Basic check if it looks like JSON
1116
- if potential_json.strip().startswith('{') and potential_json.strip().endswith('}'):
1117
- if verbose:
1118
- logger.debug(f"[DEBUG] Attempting to parse extracted JSON block: '{potential_json}'")
1119
- parsed_result = output_pydantic.model_validate_json(potential_json)
1120
- else:
1121
- # If block extraction fails, try cleaning markdown next
1122
- raise ValueError("Extracted block doesn't look like JSON")
2082
+ # 1) Prefer fenced ```json blocks
2083
+ fenced = _extract_fenced_json_block(raw_result)
2084
+ candidates: List[str] = []
2085
+ if fenced:
2086
+ candidates.append(fenced)
1123
2087
  else:
1124
- # If no braces found, try cleaning markdown next
1125
- raise ValueError("Could not find enclosing {}")
1126
- except (json.JSONDecodeError, ValidationError, ValueError) as extraction_error:
2088
+ # 2) Fall back to scanning for balanced JSON objects
2089
+ candidates.extend(_extract_balanced_json_objects(raw_result))
2090
+
2091
+ if not candidates:
2092
+ raise ValueError("No JSON-like content found")
2093
+
2094
+ parse_err: Optional[Exception] = None
2095
+ for cand in candidates:
2096
+ try:
2097
+ if verbose:
2098
+ logger.debug(f"[DEBUG] Attempting to parse candidate JSON block: {cand}")
2099
+
2100
+ if output_pydantic:
2101
+ parsed_result = output_pydantic.model_validate_json(cand)
2102
+ else:
2103
+ # Parse JSON and validate against schema
2104
+ loaded = json.loads(cand)
2105
+ try:
2106
+ import jsonschema
2107
+ jsonschema.validate(instance=loaded, schema=output_schema)
2108
+ except ImportError:
2109
+ pass # Skip validation if lib missing
2110
+ parsed_result = cand # Return string if valid
2111
+
2112
+ json_string_to_parse = cand
2113
+ parse_err = None
2114
+ break
2115
+ except (json.JSONDecodeError, ValidationError, ValueError) as pe:
2116
+ # Also catch jsonschema.ValidationError if imported
2117
+ parse_err = pe
2118
+ try:
2119
+ import jsonschema
2120
+ if isinstance(pe, jsonschema.ValidationError):
2121
+ parse_err = pe
2122
+ except ImportError:
2123
+ pass
2124
+
2125
+ if parsed_result is None:
2126
+ # If none of the candidates parsed, raise last error
2127
+ if parse_err is not None:
2128
+ raise parse_err
2129
+ raise ValueError("Unable to parse any JSON candidates")
2130
+ except (json.JSONDecodeError, ValidationError, ValueError, Exception) as extraction_error:
2131
+ # Catch generic Exception to handle jsonschema errors without explicit import here
1127
2132
  if verbose:
1128
- logger.debug(f"[DEBUG] JSON block extraction/validation failed ('{extraction_error}'). Trying markdown cleaning.")
1129
- # Fallback: Clean markdown fences and retry JSON validation
2133
+ logger.debug(f"[DEBUG] JSON extraction/validation failed ('{extraction_error}'). Trying fence cleaning.")
2134
+ # Last resort: strip any leading/trailing code fences and retry
1130
2135
  cleaned_result_str = raw_result.strip()
1131
2136
  if cleaned_result_str.startswith("```json"):
1132
2137
  cleaned_result_str = cleaned_result_str[7:]
@@ -1135,35 +2140,166 @@ def llm_invoke(
1135
2140
  if cleaned_result_str.endswith("```"):
1136
2141
  cleaned_result_str = cleaned_result_str[:-3]
1137
2142
  cleaned_result_str = cleaned_result_str.strip()
1138
- # Check again if it looks like JSON before parsing
1139
- if cleaned_result_str.startswith('{') and cleaned_result_str.endswith('}'):
2143
+ # Check for complete JSON object or array
2144
+ is_complete_object = cleaned_result_str.startswith('{') and cleaned_result_str.endswith('}')
2145
+ is_complete_array = cleaned_result_str.startswith('[') and cleaned_result_str.endswith(']')
2146
+ if is_complete_object or is_complete_array:
1140
2147
  if verbose:
1141
- logger.debug(f"[DEBUG] Attempting parse after cleaning markdown fences. Cleaned string: '{cleaned_result_str}'")
1142
- json_string_to_parse = cleaned_result_str # Update string for error reporting
1143
- parsed_result = output_pydantic.model_validate_json(json_string_to_parse)
2148
+ logger.debug(f"[DEBUG] Attempting parse after generic fence cleaning. Cleaned string: '{cleaned_result_str}'")
2149
+ json_string_to_parse = cleaned_result_str
2150
+
2151
+ if output_pydantic:
2152
+ parsed_result = output_pydantic.model_validate_json(json_string_to_parse)
2153
+ else:
2154
+ loaded = json.loads(json_string_to_parse)
2155
+ try:
2156
+ import jsonschema
2157
+ jsonschema.validate(instance=loaded, schema=output_schema)
2158
+ except ImportError:
2159
+ pass
2160
+ parsed_result = json_string_to_parse
2161
+ elif cleaned_result_str.startswith('{') or cleaned_result_str.startswith('['):
2162
+ # Attempt to repair truncated JSON (e.g., missing closing braces)
2163
+ # This can happen when Gemini generates excessive trailing content
2164
+ # that causes token limit truncation
2165
+ if verbose:
2166
+ logger.debug(f"[DEBUG] JSON appears truncated (missing closing brace). Attempting repair.")
2167
+
2168
+ # Try to find the last valid JSON structure
2169
+ # For simple schemas like {"extracted_code": "..."}, we can try to close it
2170
+ repaired = cleaned_result_str.rstrip()
2171
+
2172
+ # Strip trailing escaped newline sequences (\\n in the JSON string)
2173
+ # These appear as literal backslash-n when Gemini generates excessive newlines
2174
+ while repaired.endswith('\\n'):
2175
+ repaired = repaired[:-2]
2176
+ # Also strip trailing literal backslashes that might be orphaned
2177
+ repaired = repaired.rstrip('\\')
2178
+
2179
+ # If we're in the middle of a string value, try to close it
2180
+ # Count unescaped quotes to determine if we're inside a string
2181
+ # Simple heuristic: if it ends without proper closure, add closing
2182
+ is_array = cleaned_result_str.startswith('[')
2183
+ expected_end = ']' if is_array else '}'
2184
+ if not repaired.endswith(expected_end):
2185
+ # Try adding various closures to repair
2186
+ if is_array:
2187
+ repair_attempts = [
2188
+ repaired + '}]', # Close object and array
2189
+ repaired + '"}]', # Close string, object and array
2190
+ repaired + '"}}]', # Close string, nested object and array
2191
+ repaired.rstrip(',') + ']', # Remove trailing comma and close array
2192
+ repaired.rstrip('"') + '"}]', # Handle partial string end
2193
+ ]
2194
+ else:
2195
+ repair_attempts = [
2196
+ repaired + '"}', # Close string and object
2197
+ repaired + '"}\n}', # Close string and nested object
2198
+ repaired + '"}}}', # Deeper nesting
2199
+ repaired.rstrip(',') + '}', # Remove trailing comma
2200
+ repaired.rstrip('"') + '"}', # Handle partial string end
2201
+ ]
2202
+
2203
+ for attempt in repair_attempts:
2204
+ try:
2205
+ if output_pydantic:
2206
+ parsed_result = output_pydantic.model_validate_json(attempt)
2207
+ else:
2208
+ loaded = json.loads(attempt)
2209
+ try:
2210
+ import jsonschema
2211
+ jsonschema.validate(instance=loaded, schema=output_schema)
2212
+ except ImportError:
2213
+ pass
2214
+ parsed_result = attempt
2215
+
2216
+ if verbose:
2217
+ logger.info(f"[INFO] Successfully repaired truncated JSON response")
2218
+ json_string_to_parse = attempt
2219
+ break
2220
+ except (json.JSONDecodeError, ValidationError, ValueError):
2221
+ continue
2222
+
2223
+ if parsed_result is None:
2224
+ raise ValueError("Content after cleaning doesn't look like JSON (and repair attempts failed)")
1144
2225
  else:
1145
- # If still doesn't look like JSON, raise error
1146
- raise ValueError("Content after cleaning markdown doesn't look like JSON")
2226
+ raise ValueError("Content after cleaning doesn't look like JSON")
1147
2227
 
1148
2228
 
1149
2229
  # Check if any parsing attempt succeeded
1150
2230
  if parsed_result is None:
2231
+ target_name = output_pydantic.__name__ if output_pydantic else "JSON Schema"
1151
2232
  # This case should ideally be caught by exceptions above, but as a safeguard:
1152
- raise TypeError(f"Raw result type {type(raw_result)} or content could not be validated/parsed against {output_pydantic.__name__}.")
2233
+ raise TypeError(f"Raw result type {type(raw_result)} or content could not be validated/parsed against {target_name}.")
1153
2234
 
1154
- except (ValidationError, json.JSONDecodeError, TypeError, ValueError) as parse_error:
1155
- logger.error(f"[ERROR] Failed to parse response into Pydantic model {output_pydantic.__name__} for item {i}: {parse_error}")
2235
+ except (ValidationError, json.JSONDecodeError, TypeError, ValueError, Exception) as parse_error:
2236
+ target_name = output_pydantic.__name__ if output_pydantic else "JSON Schema"
2237
+ logger.error(f"[ERROR] Failed to parse response into {target_name} for item {i}: {parse_error}")
1156
2238
  # Use the string that was last attempted for parsing in the error message
1157
2239
  error_content = json_string_to_parse if json_string_to_parse is not None else raw_result
1158
2240
  logger.error("[ERROR] Content attempted for parsing: %s", repr(error_content)) # CORRECTED (or use f-string)
1159
- results.append(f"ERROR: Failed to parse Pydantic. Raw: {repr(raw_result)}")
2241
+ results.append(f"ERROR: Failed to parse structured output. Raw: {repr(raw_result)}")
1160
2242
  continue # Skip appending result below if parsing failed
1161
2243
 
1162
- # If parsing succeeded, append the parsed_result
2244
+ # Post-process: unescape newlines and repair Python syntax
2245
+ _unescape_code_newlines(parsed_result)
2246
+
2247
+ # Check if code fields still have invalid Python syntax after repair
2248
+ # If so, retry without cache to get a fresh response
2249
+ if _has_invalid_python_code(parsed_result):
2250
+ logger.warning(f"[WARNING] Detected invalid Python syntax in code fields for item {i} after repair. Retrying with cache bypass...")
2251
+ if not use_batch_mode and prompt and input_json is not None:
2252
+ # Add a small variation to bypass cache
2253
+ modified_prompt = prompt + " " # Two spaces to differentiate from other retries
2254
+ try:
2255
+ retry_messages = _format_messages(modified_prompt, input_json, use_batch_mode)
2256
+ # Disable cache for retry
2257
+ original_cache = litellm.cache
2258
+ litellm.cache = None
2259
+ retry_response = litellm.completion(
2260
+ model=model_name_litellm,
2261
+ messages=retry_messages,
2262
+ temperature=current_temperature,
2263
+ response_format=response_format,
2264
+ **time_kwargs
2265
+ )
2266
+ # Re-enable cache
2267
+ litellm.cache = original_cache
2268
+ # Extract and re-parse the retry result
2269
+ retry_raw_result = retry_response.choices[0].message.content
2270
+ if retry_raw_result is not None:
2271
+ # Re-parse the retry result
2272
+ retry_parsed = None
2273
+ if output_pydantic:
2274
+ if isinstance(retry_raw_result, output_pydantic):
2275
+ retry_parsed = retry_raw_result
2276
+ elif isinstance(retry_raw_result, dict):
2277
+ retry_parsed = output_pydantic.model_validate(retry_raw_result)
2278
+ elif isinstance(retry_raw_result, str):
2279
+ retry_parsed = output_pydantic.model_validate_json(retry_raw_result)
2280
+ elif output_schema and isinstance(retry_raw_result, str):
2281
+ retry_parsed = retry_raw_result # Keep as string for schema validation
2282
+
2283
+ if retry_parsed is not None:
2284
+ _unescape_code_newlines(retry_parsed)
2285
+ if not _has_invalid_python_code(retry_parsed):
2286
+ logger.info(f"[SUCCESS] Cache bypass retry for invalid Python code succeeded for item {i}")
2287
+ parsed_result = retry_parsed
2288
+ else:
2289
+ logger.warning(f"[WARNING] Cache bypass retry still has invalid Python code for item {i}, using original")
2290
+ else:
2291
+ logger.warning(f"[WARNING] Cache bypass retry returned unparseable result for item {i}")
2292
+ else:
2293
+ logger.warning(f"[WARNING] Cache bypass retry returned None for item {i}")
2294
+ except Exception as retry_e:
2295
+ logger.warning(f"[WARNING] Cache bypass retry for invalid Python code failed for item {i}: {retry_e}")
2296
+ else:
2297
+ logger.warning(f"[WARNING] Cannot retry invalid Python code - batch mode or missing prompt/input_json")
2298
+
1163
2299
  results.append(parsed_result)
1164
2300
 
1165
2301
  else:
1166
- # If output_pydantic was not requested, append the raw result
2302
+ # If output_pydantic/schema was not requested, append the raw result
1167
2303
  results.append(raw_result)
1168
2304
 
1169
2305
  except (AttributeError, IndexError) as e:
@@ -1246,10 +2382,40 @@ def llm_invoke(
1246
2382
  Exception) as e: # Catch generic Exception last
1247
2383
  last_exception = e
1248
2384
  error_type = type(e).__name__
2385
+ error_str = str(e)
2386
+
2387
+ # Provider-specific handling for Anthropic temperature + thinking rules.
2388
+ # Two scenarios we auto-correct:
2389
+ # 1) temperature==1 without thinking -> retry with 0.99
2390
+ # 2) thinking enabled but temperature!=1 -> retry with 1
2391
+ lower_err = error_str.lower()
2392
+ if (not temp_adjustment_done) and ("temperature" in lower_err) and ("thinking" in lower_err):
2393
+ anthropic_thinking_sent = ('thinking' in litellm_kwargs) and (provider.lower() == 'anthropic')
2394
+ # Decide direction of adjustment based on whether thinking was enabled in the call
2395
+ if anthropic_thinking_sent:
2396
+ # thinking enabled -> force temperature=1
2397
+ adjusted_temp = 1
2398
+ logger.warning(
2399
+ f"[WARN] {model_name_litellm}: Anthropic with thinking requires temperature=1. "
2400
+ f"Retrying with temperature={adjusted_temp}."
2401
+ )
2402
+ else:
2403
+ # thinking not enabled -> avoid temperature=1
2404
+ adjusted_temp = 0.99
2405
+ logger.warning(
2406
+ f"[WARN] {model_name_litellm}: Provider rejected temperature=1 without thinking. "
2407
+ f"Retrying with temperature={adjusted_temp}."
2408
+ )
2409
+ current_temperature = adjusted_temp
2410
+ temp_adjustment_done = True
2411
+ retry_with_same_model = True
2412
+ if verbose:
2413
+ logger.debug(f"Retrying {model_name_litellm} with adjusted temperature {current_temperature}")
2414
+ continue
2415
+
1249
2416
  logger.error(f"[ERROR] Invocation failed for {model_name_litellm} ({error_type}): {e}. Trying next model.")
1250
2417
  # Log more details in verbose mode
1251
2418
  if verbose:
1252
- # import traceback # Not needed if using exc_info=True
1253
2419
  logger.debug(f"Detailed exception traceback for {model_name_litellm}:", exc_info=True)
1254
2420
  break # Break inner loop, try next model candidate
1255
2421
 
@@ -1277,7 +2443,7 @@ if __name__ == "__main__":
1277
2443
  response = llm_invoke(
1278
2444
  prompt="Tell me a short joke about {topic}.",
1279
2445
  input_json={"topic": "programmers"},
1280
- strength=0.5, # Use base model (gpt-4.1-nano)
2446
+ strength=0.5, # Use base model (gpt-5-nano)
1281
2447
  temperature=0.7,
1282
2448
  verbose=True
1283
2449
  )
@@ -1358,7 +2524,7 @@ if __name__ == "__main__":
1358
2524
  {"role": "system", "content": "You are a helpful assistant."},
1359
2525
  {"role": "user", "content": "What is the capital of France?"}
1360
2526
  ]
1361
- # Strength 0.5 should select gpt-4.1-nano
2527
+ # Strength 0.5 should select gpt-5-nano
1362
2528
  response_messages = llm_invoke(
1363
2529
  messages=custom_messages,
1364
2530
  strength=0.5,