pdd-cli 0.0.45__py3-none-any.whl → 0.0.90__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdd/__init__.py +4 -4
- pdd/agentic_common.py +863 -0
- pdd/agentic_crash.py +534 -0
- pdd/agentic_fix.py +1179 -0
- pdd/agentic_langtest.py +162 -0
- pdd/agentic_update.py +370 -0
- pdd/agentic_verify.py +183 -0
- pdd/auto_deps_main.py +15 -5
- pdd/auto_include.py +63 -5
- pdd/bug_main.py +3 -2
- pdd/bug_to_unit_test.py +2 -0
- pdd/change_main.py +11 -4
- pdd/cli.py +22 -1181
- pdd/cmd_test_main.py +73 -21
- pdd/code_generator.py +58 -18
- pdd/code_generator_main.py +672 -25
- pdd/commands/__init__.py +42 -0
- pdd/commands/analysis.py +248 -0
- pdd/commands/fix.py +140 -0
- pdd/commands/generate.py +257 -0
- pdd/commands/maintenance.py +174 -0
- pdd/commands/misc.py +79 -0
- pdd/commands/modify.py +230 -0
- pdd/commands/report.py +144 -0
- pdd/commands/templates.py +215 -0
- pdd/commands/utility.py +110 -0
- pdd/config_resolution.py +58 -0
- pdd/conflicts_main.py +8 -3
- pdd/construct_paths.py +258 -82
- pdd/context_generator.py +10 -2
- pdd/context_generator_main.py +113 -11
- pdd/continue_generation.py +47 -7
- pdd/core/__init__.py +0 -0
- pdd/core/cli.py +503 -0
- pdd/core/dump.py +554 -0
- pdd/core/errors.py +63 -0
- pdd/core/utils.py +90 -0
- pdd/crash_main.py +44 -11
- pdd/data/language_format.csv +71 -63
- pdd/data/llm_model.csv +20 -18
- pdd/detect_change_main.py +5 -4
- pdd/fix_code_loop.py +330 -76
- pdd/fix_error_loop.py +207 -61
- pdd/fix_errors_from_unit_tests.py +4 -3
- pdd/fix_main.py +75 -18
- pdd/fix_verification_errors.py +12 -100
- pdd/fix_verification_errors_loop.py +306 -272
- pdd/fix_verification_main.py +28 -9
- pdd/generate_output_paths.py +93 -10
- pdd/generate_test.py +16 -5
- pdd/get_jwt_token.py +9 -2
- pdd/get_run_command.py +73 -0
- pdd/get_test_command.py +68 -0
- pdd/git_update.py +70 -19
- pdd/incremental_code_generator.py +2 -2
- pdd/insert_includes.py +11 -3
- pdd/llm_invoke.py +1269 -103
- pdd/load_prompt_template.py +36 -10
- pdd/pdd_completion.fish +25 -2
- pdd/pdd_completion.sh +30 -4
- pdd/pdd_completion.zsh +79 -4
- pdd/postprocess.py +10 -3
- pdd/preprocess.py +228 -15
- pdd/preprocess_main.py +8 -5
- pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
- pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
- pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
- pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
- pdd/prompts/agentic_update_LLM.prompt +1071 -0
- pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
- pdd/prompts/auto_include_LLM.prompt +100 -905
- pdd/prompts/detect_change_LLM.prompt +122 -20
- pdd/prompts/example_generator_LLM.prompt +22 -1
- pdd/prompts/extract_code_LLM.prompt +5 -1
- pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
- pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
- pdd/prompts/extract_promptline_LLM.prompt +17 -11
- pdd/prompts/find_verification_errors_LLM.prompt +6 -0
- pdd/prompts/fix_code_module_errors_LLM.prompt +4 -2
- pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +8 -0
- pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
- pdd/prompts/generate_test_LLM.prompt +21 -6
- pdd/prompts/increase_tests_LLM.prompt +1 -5
- pdd/prompts/insert_includes_LLM.prompt +228 -108
- pdd/prompts/trace_LLM.prompt +25 -22
- pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
- pdd/prompts/update_prompt_LLM.prompt +22 -1
- pdd/pytest_output.py +127 -12
- pdd/render_mermaid.py +236 -0
- pdd/setup_tool.py +648 -0
- pdd/simple_math.py +2 -0
- pdd/split_main.py +3 -2
- pdd/summarize_directory.py +49 -6
- pdd/sync_determine_operation.py +543 -98
- pdd/sync_main.py +81 -31
- pdd/sync_orchestration.py +1334 -751
- pdd/sync_tui.py +848 -0
- pdd/template_registry.py +264 -0
- pdd/templates/architecture/architecture_json.prompt +242 -0
- pdd/templates/generic/generate_prompt.prompt +174 -0
- pdd/trace.py +168 -12
- pdd/trace_main.py +4 -3
- pdd/track_cost.py +151 -61
- pdd/unfinished_prompt.py +49 -3
- pdd/update_main.py +549 -67
- pdd/update_model_costs.py +2 -2
- pdd/update_prompt.py +19 -4
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/METADATA +19 -6
- pdd_cli-0.0.90.dist-info/RECORD +153 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/licenses/LICENSE +1 -1
- pdd_cli-0.0.45.dist-info/RECORD +0 -116
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/WHEEL +0 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.90.dist-info}/top_level.txt +0 -0
pdd/llm_invoke.py
CHANGED
|
@@ -5,6 +5,8 @@ import os
|
|
|
5
5
|
import pandas as pd
|
|
6
6
|
import litellm
|
|
7
7
|
import logging # ADDED FOR DETAILED LOGGING
|
|
8
|
+
import importlib.resources
|
|
9
|
+
from litellm.caching.caching import Cache # Fix for LiteLLM v1.75.5+
|
|
8
10
|
|
|
9
11
|
# --- Configure Standard Python Logging ---
|
|
10
12
|
logger = logging.getLogger("pdd.llm_invoke")
|
|
@@ -24,6 +26,16 @@ litellm_logger = logging.getLogger("litellm")
|
|
|
24
26
|
litellm_log_level = os.getenv("LITELLM_LOG_LEVEL", "WARNING" if PRODUCTION_MODE else "INFO")
|
|
25
27
|
litellm_logger.setLevel(getattr(logging, litellm_log_level, logging.WARNING))
|
|
26
28
|
|
|
29
|
+
# Ensure LiteLLM drops provider-unsupported params instead of erroring
|
|
30
|
+
# This prevents failures like UnsupportedParamsError for OpenAI gpt-5-* when
|
|
31
|
+
# passing generic params (e.g., reasoning_effort) not accepted by that API path.
|
|
32
|
+
try:
|
|
33
|
+
_drop_params_env = os.getenv("LITELLM_DROP_PARAMS", "true")
|
|
34
|
+
litellm.drop_params = str(_drop_params_env).lower() in ("1", "true", "yes", "on")
|
|
35
|
+
except Exception:
|
|
36
|
+
# Be conservative: default to True even if env parsing fails
|
|
37
|
+
litellm.drop_params = True
|
|
38
|
+
|
|
27
39
|
# Add a console handler if none exists
|
|
28
40
|
if not logger.handlers:
|
|
29
41
|
console_handler = logging.StreamHandler()
|
|
@@ -69,7 +81,7 @@ import json
|
|
|
69
81
|
# from rich import print as rprint # Replaced with logger
|
|
70
82
|
from dotenv import load_dotenv
|
|
71
83
|
from pathlib import Path
|
|
72
|
-
from typing import Optional, Dict, List, Any, Type, Union
|
|
84
|
+
from typing import Optional, Dict, List, Any, Type, Union, Tuple
|
|
73
85
|
from pydantic import BaseModel, ValidationError
|
|
74
86
|
import openai # Import openai for exception handling as LiteLLM maps to its types
|
|
75
87
|
from langchain_core.prompts import PromptTemplate
|
|
@@ -112,6 +124,22 @@ def _is_wsl_environment() -> bool:
|
|
|
112
124
|
return False
|
|
113
125
|
|
|
114
126
|
|
|
127
|
+
def _openai_responses_supports_response_format() -> bool:
|
|
128
|
+
"""Detect if current OpenAI Python SDK supports `response_format` on Responses.create.
|
|
129
|
+
|
|
130
|
+
Returns True if the installed SDK exposes a `response_format` parameter on
|
|
131
|
+
`openai.resources.responses.Responses.create`, else False. This avoids
|
|
132
|
+
sending unsupported kwargs and triggering TypeError at runtime.
|
|
133
|
+
"""
|
|
134
|
+
try:
|
|
135
|
+
import inspect
|
|
136
|
+
from openai.resources.responses import Responses
|
|
137
|
+
sig = inspect.signature(Responses.create)
|
|
138
|
+
return "response_format" in sig.parameters
|
|
139
|
+
except Exception:
|
|
140
|
+
return False
|
|
141
|
+
|
|
142
|
+
|
|
115
143
|
def _get_environment_info() -> Dict[str, str]:
|
|
116
144
|
"""
|
|
117
145
|
Get environment information for debugging and error reporting.
|
|
@@ -186,16 +214,80 @@ if PROJECT_ROOT is None: # Fallback to CWD if no method succeeded
|
|
|
186
214
|
|
|
187
215
|
ENV_PATH = PROJECT_ROOT / ".env"
|
|
188
216
|
# --- Determine LLM_MODEL_CSV_PATH ---
|
|
189
|
-
# Prioritize ~/.pdd/llm_model.csv
|
|
217
|
+
# Prioritize ~/.pdd/llm_model.csv, then a project .pdd from the current CWD,
|
|
218
|
+
# then PROJECT_ROOT (which may be set from PDD_PATH), else fall back to package.
|
|
190
219
|
user_pdd_dir = Path.home() / ".pdd"
|
|
191
220
|
user_model_csv_path = user_pdd_dir / "llm_model.csv"
|
|
192
221
|
|
|
222
|
+
def _detect_project_root_from_cwd(max_levels: int = 5) -> Path:
|
|
223
|
+
"""Search upwards from the current working directory for common project markers.
|
|
224
|
+
|
|
225
|
+
This intentionally ignores PDD_PATH to support CLI invocations that set
|
|
226
|
+
PDD_PATH to the installed package location. We want to honor a real project
|
|
227
|
+
checkout's .pdd/llm_model.csv when running inside it.
|
|
228
|
+
"""
|
|
229
|
+
try:
|
|
230
|
+
current_dir = Path.cwd().resolve()
|
|
231
|
+
for _ in range(max_levels):
|
|
232
|
+
if (
|
|
233
|
+
(current_dir / ".git").exists()
|
|
234
|
+
or (current_dir / "pyproject.toml").exists()
|
|
235
|
+
or (current_dir / "data").is_dir()
|
|
236
|
+
or (current_dir / ".env").exists()
|
|
237
|
+
):
|
|
238
|
+
return current_dir
|
|
239
|
+
parent = current_dir.parent
|
|
240
|
+
if parent == current_dir:
|
|
241
|
+
break
|
|
242
|
+
current_dir = parent
|
|
243
|
+
except Exception:
|
|
244
|
+
pass
|
|
245
|
+
return Path.cwd().resolve()
|
|
246
|
+
|
|
247
|
+
# Resolve candidates
|
|
248
|
+
project_root_from_cwd = _detect_project_root_from_cwd()
|
|
249
|
+
project_csv_from_cwd = project_root_from_cwd / ".pdd" / "llm_model.csv"
|
|
250
|
+
project_csv_from_env = PROJECT_ROOT / ".pdd" / "llm_model.csv"
|
|
251
|
+
|
|
252
|
+
# Detect whether PDD_PATH points to the installed package directory. If so,
|
|
253
|
+
# don't prioritize it over the real project from CWD.
|
|
254
|
+
try:
|
|
255
|
+
_installed_pkg_root = importlib.resources.files('pdd')
|
|
256
|
+
# importlib.resources.files returns a Traversable; get a FS path string if possible
|
|
257
|
+
try:
|
|
258
|
+
_installed_pkg_root_path = Path(str(_installed_pkg_root))
|
|
259
|
+
except Exception:
|
|
260
|
+
_installed_pkg_root_path = None
|
|
261
|
+
except Exception:
|
|
262
|
+
_installed_pkg_root_path = None
|
|
263
|
+
|
|
264
|
+
def _is_env_path_package_dir(env_path: Path) -> bool:
|
|
265
|
+
try:
|
|
266
|
+
if _installed_pkg_root_path is None:
|
|
267
|
+
return False
|
|
268
|
+
env_path = env_path.resolve()
|
|
269
|
+
pkg_path = _installed_pkg_root_path.resolve()
|
|
270
|
+
# Treat equal or subpath as package dir
|
|
271
|
+
return env_path == pkg_path or str(env_path).startswith(str(pkg_path))
|
|
272
|
+
except Exception:
|
|
273
|
+
return False
|
|
274
|
+
|
|
275
|
+
# Selection order
|
|
193
276
|
if user_model_csv_path.is_file():
|
|
194
277
|
LLM_MODEL_CSV_PATH = user_model_csv_path
|
|
195
278
|
logger.info(f"Using user-specific LLM model CSV: {LLM_MODEL_CSV_PATH}")
|
|
279
|
+
elif (not _is_env_path_package_dir(PROJECT_ROOT)) and project_csv_from_env.is_file():
|
|
280
|
+
# Honor an explicitly-set PDD_PATH pointing to a real project directory
|
|
281
|
+
LLM_MODEL_CSV_PATH = project_csv_from_env
|
|
282
|
+
logger.info(f"Using project-specific LLM model CSV (from PDD_PATH): {LLM_MODEL_CSV_PATH}")
|
|
283
|
+
elif project_csv_from_cwd.is_file():
|
|
284
|
+
# Otherwise, prefer the project relative to the current working directory
|
|
285
|
+
LLM_MODEL_CSV_PATH = project_csv_from_cwd
|
|
286
|
+
logger.info(f"Using project-specific LLM model CSV (from CWD): {LLM_MODEL_CSV_PATH}")
|
|
196
287
|
else:
|
|
197
|
-
|
|
198
|
-
|
|
288
|
+
# Neither exists, we'll use a marker path that _load_model_data will handle
|
|
289
|
+
LLM_MODEL_CSV_PATH = None
|
|
290
|
+
logger.info("No local LLM model CSV found, will use package default")
|
|
199
291
|
# ---------------------------------
|
|
200
292
|
|
|
201
293
|
# Load environment variables from .env file
|
|
@@ -225,6 +317,7 @@ if GCS_HMAC_SECRET_ACCESS_KEY:
|
|
|
225
317
|
GCS_HMAC_SECRET_ACCESS_KEY = GCS_HMAC_SECRET_ACCESS_KEY.strip()
|
|
226
318
|
|
|
227
319
|
cache_configured = False
|
|
320
|
+
configured_cache = None # Store the configured cache instance for restoration
|
|
228
321
|
|
|
229
322
|
if GCS_BUCKET_NAME and GCS_HMAC_ACCESS_KEY_ID and GCS_HMAC_SECRET_ACCESS_KEY:
|
|
230
323
|
# Store original AWS credentials before overwriting for GCS cache setup
|
|
@@ -238,12 +331,13 @@ if GCS_BUCKET_NAME and GCS_HMAC_ACCESS_KEY_ID and GCS_HMAC_SECRET_ACCESS_KEY:
|
|
|
238
331
|
os.environ['AWS_SECRET_ACCESS_KEY'] = GCS_HMAC_SECRET_ACCESS_KEY
|
|
239
332
|
# os.environ['AWS_REGION_NAME'] = GCS_REGION_NAME # Uncomment if needed
|
|
240
333
|
|
|
241
|
-
|
|
334
|
+
configured_cache = Cache(
|
|
242
335
|
type="s3",
|
|
243
336
|
s3_bucket_name=GCS_BUCKET_NAME,
|
|
244
337
|
s3_region_name=GCS_REGION_NAME, # Pass region explicitly to cache
|
|
245
338
|
s3_endpoint_url=GCS_ENDPOINT_URL,
|
|
246
339
|
)
|
|
340
|
+
litellm.cache = configured_cache
|
|
247
341
|
logger.info(f"LiteLLM cache configured for GCS bucket (S3 compatible): {GCS_BUCKET_NAME}")
|
|
248
342
|
cache_configured = True
|
|
249
343
|
|
|
@@ -268,15 +362,22 @@ if GCS_BUCKET_NAME and GCS_HMAC_ACCESS_KEY_ID and GCS_HMAC_SECRET_ACCESS_KEY:
|
|
|
268
362
|
elif 'AWS_REGION_NAME' in os.environ:
|
|
269
363
|
pass # Or just leave it if the temporary setting wasn't done/needed
|
|
270
364
|
|
|
365
|
+
# Check if caching is disabled via environment variable
|
|
366
|
+
if os.getenv("LITELLM_CACHE_DISABLE") == "1":
|
|
367
|
+
logger.info("LiteLLM caching disabled via LITELLM_CACHE_DISABLE=1")
|
|
368
|
+
litellm.cache = None
|
|
369
|
+
cache_configured = True
|
|
370
|
+
|
|
271
371
|
if not cache_configured:
|
|
272
372
|
try:
|
|
273
|
-
# Try
|
|
373
|
+
# Try disk-based cache as a fallback
|
|
274
374
|
sqlite_cache_path = PROJECT_ROOT / "litellm_cache.sqlite"
|
|
275
|
-
|
|
276
|
-
|
|
375
|
+
configured_cache = Cache(type="disk", disk_cache_dir=str(sqlite_cache_path))
|
|
376
|
+
litellm.cache = configured_cache
|
|
377
|
+
logger.info(f"LiteLLM disk cache configured at {sqlite_cache_path}")
|
|
277
378
|
cache_configured = True
|
|
278
379
|
except Exception as e2:
|
|
279
|
-
warnings.warn(f"Failed to configure LiteLLM
|
|
380
|
+
warnings.warn(f"Failed to configure LiteLLM disk cache: {e2}. Caching is disabled.")
|
|
280
381
|
litellm.cache = None
|
|
281
382
|
|
|
282
383
|
if not cache_configured:
|
|
@@ -314,29 +415,49 @@ def _litellm_success_callback(
|
|
|
314
415
|
cost_val = litellm.completion_cost(completion_response=completion_response)
|
|
315
416
|
calculated_cost = cost_val if cost_val is not None else 0.0
|
|
316
417
|
except Exception as e1:
|
|
317
|
-
# Attempt 2:
|
|
318
|
-
#
|
|
319
|
-
# This is often needed for batch completion items.
|
|
418
|
+
# Attempt 2: Compute via tokens and model mapping. If LiteLLM mapping is
|
|
419
|
+
# missing or API differs, fall back to CSV rates in _MODEL_RATE_MAP.
|
|
320
420
|
logger.debug(f"Attempting cost calculation with fallback method: {e1}")
|
|
321
421
|
try:
|
|
322
|
-
model_name = kwargs.get("model")
|
|
422
|
+
model_name = kwargs.get("model")
|
|
323
423
|
if model_name and usage:
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
424
|
+
in_tok = getattr(usage, 'prompt_tokens', None)
|
|
425
|
+
out_tok = getattr(usage, 'completion_tokens', None)
|
|
426
|
+
# Some providers may use 'input_tokens'/'output_tokens'
|
|
427
|
+
if in_tok is None:
|
|
428
|
+
in_tok = getattr(usage, 'input_tokens', 0)
|
|
429
|
+
if out_tok is None:
|
|
430
|
+
out_tok = getattr(usage, 'output_tokens', 0)
|
|
431
|
+
|
|
432
|
+
# Try LiteLLM helper (arg names vary across versions)
|
|
433
|
+
try:
|
|
434
|
+
cost_val = litellm.completion_cost(
|
|
435
|
+
model=model_name,
|
|
436
|
+
prompt_tokens=in_tok,
|
|
437
|
+
completion_tokens=out_tok,
|
|
438
|
+
)
|
|
439
|
+
calculated_cost = cost_val if cost_val is not None else 0.0
|
|
440
|
+
except TypeError:
|
|
441
|
+
# Older/newer versions may require input/output token names
|
|
442
|
+
try:
|
|
443
|
+
cost_val = litellm.completion_cost(
|
|
444
|
+
model=model_name,
|
|
445
|
+
input_tokens=in_tok,
|
|
446
|
+
output_tokens=out_tok,
|
|
447
|
+
)
|
|
448
|
+
calculated_cost = cost_val if cost_val is not None else 0.0
|
|
449
|
+
except Exception as e3:
|
|
450
|
+
# Final fallback: compute using CSV rates
|
|
451
|
+
rates = _MODEL_RATE_MAP.get(str(model_name))
|
|
452
|
+
if rates is not None:
|
|
453
|
+
in_rate, out_rate = rates
|
|
454
|
+
calculated_cost = (float(in_tok or 0) * in_rate + float(out_tok or 0) * out_rate) / 1_000_000.0
|
|
455
|
+
else:
|
|
456
|
+
calculated_cost = 0.0
|
|
457
|
+
logger.debug(f"Cost calculation failed with LiteLLM token API; used CSV rates if available. Detail: {e3}")
|
|
332
458
|
else:
|
|
333
|
-
# If we can't get model name or usage, fallback to 0
|
|
334
459
|
calculated_cost = 0.0
|
|
335
|
-
# Optional: Log the original error e1 if needed
|
|
336
|
-
# logger.warning(f"[Callback WARN] Failed to calculate cost with response object ({e1}) and fallback failed.")
|
|
337
460
|
except Exception as e2:
|
|
338
|
-
# Optional: Log secondary error e2 if needed
|
|
339
|
-
# logger.warning(f"[Callback WARN] Failed to calculate cost with fallback method: {e2}")
|
|
340
461
|
calculated_cost = 0.0 # Default to 0 on any error
|
|
341
462
|
logger.debug(f"Cost calculation failed with fallback method: {e2}")
|
|
342
463
|
|
|
@@ -354,14 +475,108 @@ def _litellm_success_callback(
|
|
|
354
475
|
# Register the callback with LiteLLM
|
|
355
476
|
litellm.success_callback = [_litellm_success_callback]
|
|
356
477
|
|
|
478
|
+
# --- Cost Mapping Support (CSV Rates) ---
|
|
479
|
+
# Populate from CSV inside llm_invoke; used by callback fallback
|
|
480
|
+
_MODEL_RATE_MAP: Dict[str, Tuple[float, float]] = {}
|
|
481
|
+
|
|
482
|
+
def _set_model_rate_map(df: pd.DataFrame) -> None:
|
|
483
|
+
global _MODEL_RATE_MAP
|
|
484
|
+
try:
|
|
485
|
+
_MODEL_RATE_MAP = {
|
|
486
|
+
str(row['model']): (
|
|
487
|
+
float(row['input']) if pd.notna(row['input']) else 0.0,
|
|
488
|
+
float(row['output']) if pd.notna(row['output']) else 0.0,
|
|
489
|
+
)
|
|
490
|
+
for _, row in df.iterrows()
|
|
491
|
+
}
|
|
492
|
+
except Exception:
|
|
493
|
+
_MODEL_RATE_MAP = {}
|
|
494
|
+
|
|
357
495
|
# --- Helper Functions ---
|
|
358
496
|
|
|
359
|
-
def
|
|
360
|
-
"""
|
|
361
|
-
if
|
|
362
|
-
|
|
497
|
+
def _is_malformed_json_response(content: str, threshold: int = 100) -> bool:
|
|
498
|
+
"""
|
|
499
|
+
Detect if a JSON response appears malformed due to excessive trailing newlines.
|
|
500
|
+
|
|
501
|
+
This can happen when Gemini generates thousands of \n characters in a JSON string value,
|
|
502
|
+
causing the response to be truncated and missing closing braces.
|
|
503
|
+
|
|
504
|
+
Args:
|
|
505
|
+
content: The raw response content string
|
|
506
|
+
threshold: Number of consecutive trailing \n sequences to consider malformed
|
|
507
|
+
|
|
508
|
+
Returns:
|
|
509
|
+
True if the response appears malformed, False otherwise
|
|
510
|
+
"""
|
|
511
|
+
if not content or not isinstance(content, str):
|
|
512
|
+
return False
|
|
513
|
+
|
|
514
|
+
# Check if it starts like JSON but doesn't end properly
|
|
515
|
+
stripped = content.strip()
|
|
516
|
+
if not stripped.startswith('{'):
|
|
517
|
+
return False
|
|
518
|
+
|
|
519
|
+
# If it ends with }, it's probably fine
|
|
520
|
+
if stripped.endswith('}'):
|
|
521
|
+
return False
|
|
522
|
+
|
|
523
|
+
# Count trailing \n sequences (escaped newlines in JSON strings)
|
|
524
|
+
# The pattern \n in a JSON string appears as \\n in the raw content
|
|
525
|
+
trailing_newline_count = 0
|
|
526
|
+
check_content = stripped
|
|
527
|
+
while check_content.endswith('\\n'):
|
|
528
|
+
trailing_newline_count += 1
|
|
529
|
+
check_content = check_content[:-2]
|
|
530
|
+
|
|
531
|
+
# If there are many trailing \n sequences, it's likely malformed
|
|
532
|
+
if trailing_newline_count >= threshold:
|
|
533
|
+
return True
|
|
534
|
+
|
|
535
|
+
# Also check for response that looks truncated mid-string
|
|
536
|
+
# (ends with characters that suggest we're inside a JSON string value)
|
|
537
|
+
if not stripped.endswith('}') and not stripped.endswith(']') and not stripped.endswith('"'):
|
|
538
|
+
# Could be truncated in the middle of an escaped sequence
|
|
539
|
+
if stripped.endswith('\\'):
|
|
540
|
+
return True
|
|
541
|
+
|
|
542
|
+
return False
|
|
543
|
+
|
|
544
|
+
|
|
545
|
+
def _load_model_data(csv_path: Optional[Path]) -> pd.DataFrame:
|
|
546
|
+
"""Loads and preprocesses the LLM model data from CSV.
|
|
547
|
+
|
|
548
|
+
Args:
|
|
549
|
+
csv_path: Path to CSV file, or None to use package default
|
|
550
|
+
|
|
551
|
+
Returns:
|
|
552
|
+
DataFrame with model configuration data
|
|
553
|
+
"""
|
|
554
|
+
# If csv_path is provided, try to load from it
|
|
555
|
+
if csv_path is not None:
|
|
556
|
+
if not csv_path.exists():
|
|
557
|
+
logger.warning(f"Specified LLM model CSV not found at {csv_path}, trying package default")
|
|
558
|
+
csv_path = None
|
|
559
|
+
else:
|
|
560
|
+
try:
|
|
561
|
+
df = pd.read_csv(csv_path)
|
|
562
|
+
logger.debug(f"Loaded model data from {csv_path}")
|
|
563
|
+
# Continue with the rest of the function...
|
|
564
|
+
except Exception as e:
|
|
565
|
+
logger.warning(f"Failed to load CSV from {csv_path}: {e}, trying package default")
|
|
566
|
+
csv_path = None
|
|
567
|
+
|
|
568
|
+
# If csv_path is None or loading failed, use package default
|
|
569
|
+
if csv_path is None:
|
|
570
|
+
try:
|
|
571
|
+
# Use importlib.resources to load the packaged CSV
|
|
572
|
+
csv_data = importlib.resources.files('pdd').joinpath('data/llm_model.csv').read_text()
|
|
573
|
+
import io
|
|
574
|
+
df = pd.read_csv(io.StringIO(csv_data))
|
|
575
|
+
logger.info("Loaded model data from package default")
|
|
576
|
+
except Exception as e:
|
|
577
|
+
raise FileNotFoundError(f"Failed to load default LLM model CSV from package: {e}")
|
|
578
|
+
|
|
363
579
|
try:
|
|
364
|
-
df = pd.read_csv(csv_path)
|
|
365
580
|
# Basic validation and type conversion
|
|
366
581
|
required_cols = ['provider', 'model', 'input', 'output', 'coding_arena_elo', 'api_key', 'structured_output', 'reasoning_type']
|
|
367
582
|
for col in required_cols:
|
|
@@ -434,11 +649,26 @@ def _select_model_candidates(
|
|
|
434
649
|
# Try finding base model in the *original* df in case it was filtered out
|
|
435
650
|
original_base = model_df[model_df['model'] == base_model_name]
|
|
436
651
|
if not original_base.empty:
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
652
|
+
# Base exists but may be misconfigured (e.g., missing API key). Keep erroring loudly.
|
|
653
|
+
raise ValueError(
|
|
654
|
+
f"Base model '{base_model_name}' found in CSV but requires API key '{original_base.iloc[0]['api_key']}' which might be missing or invalid configuration."
|
|
655
|
+
)
|
|
656
|
+
# Option A': Soft fallback – choose a reasonable surrogate base and continue
|
|
657
|
+
# Strategy (simplified and deterministic): pick the first available model
|
|
658
|
+
# from the CSV as the surrogate base. This mirrors typical CSV ordering
|
|
659
|
+
# expectations and keeps behavior predictable across environments.
|
|
660
|
+
try:
|
|
661
|
+
base_model = available_df.iloc[0]
|
|
662
|
+
logger.warning(
|
|
663
|
+
f"Base model '{base_model_name}' not found in CSV. Falling back to surrogate base '{base_model['model']}' (Option A')."
|
|
664
|
+
)
|
|
665
|
+
except Exception:
|
|
666
|
+
# If any unexpected error occurs during fallback, raise a clear error
|
|
667
|
+
raise ValueError(
|
|
668
|
+
f"Specified base model '{base_model_name}' not found and fallback selection failed. Check your LLM model CSV."
|
|
669
|
+
)
|
|
670
|
+
else:
|
|
671
|
+
base_model = base_model_row.iloc[0]
|
|
442
672
|
|
|
443
673
|
# 3. Determine Target and Sort
|
|
444
674
|
candidates = []
|
|
@@ -449,9 +679,10 @@ def _select_model_candidates(
|
|
|
449
679
|
# Sort remaining by ELO descending as fallback
|
|
450
680
|
available_df['sort_metric'] = -available_df['coding_arena_elo'] # Negative for descending sort
|
|
451
681
|
candidates = available_df.sort_values(by='sort_metric').to_dict('records')
|
|
452
|
-
# Ensure base model is first if it exists
|
|
453
|
-
|
|
454
|
-
|
|
682
|
+
# Ensure effective base model is first if it exists (supports surrogate base)
|
|
683
|
+
effective_base_name = str(base_model['model']) if isinstance(base_model, pd.Series) else base_model_name
|
|
684
|
+
if any(c['model'] == effective_base_name for c in candidates):
|
|
685
|
+
candidates.sort(key=lambda x: 0 if x['model'] == effective_base_name else 1)
|
|
455
686
|
target_metric_value = f"Base Model ELO: {base_model['coding_arena_elo']}"
|
|
456
687
|
|
|
457
688
|
elif strength < 0.5:
|
|
@@ -668,6 +899,378 @@ def _format_messages(prompt: str, input_data: Union[Dict[str, Any], List[Dict[st
|
|
|
668
899
|
except Exception as e:
|
|
669
900
|
raise ValueError(f"Error formatting prompt: {e}") from e
|
|
670
901
|
|
|
902
|
+
# --- JSON Extraction Helpers ---
|
|
903
|
+
import re
|
|
904
|
+
|
|
905
|
+
def _extract_fenced_json_block(text: str) -> Optional[str]:
|
|
906
|
+
try:
|
|
907
|
+
m = re.search(r"```json\s*(\{[\s\S]*?\})\s*```", text, flags=re.IGNORECASE)
|
|
908
|
+
if m:
|
|
909
|
+
return m.group(1)
|
|
910
|
+
return None
|
|
911
|
+
except Exception:
|
|
912
|
+
return None
|
|
913
|
+
|
|
914
|
+
def _extract_balanced_json_objects(text: str) -> List[str]:
|
|
915
|
+
results: List[str] = []
|
|
916
|
+
brace_stack = 0
|
|
917
|
+
start_idx = -1
|
|
918
|
+
in_string = False
|
|
919
|
+
escape = False
|
|
920
|
+
for i, ch in enumerate(text):
|
|
921
|
+
if in_string:
|
|
922
|
+
if escape:
|
|
923
|
+
escape = False
|
|
924
|
+
elif ch == '\\':
|
|
925
|
+
escape = True
|
|
926
|
+
elif ch == '"':
|
|
927
|
+
in_string = False
|
|
928
|
+
continue
|
|
929
|
+
else:
|
|
930
|
+
if ch == '"':
|
|
931
|
+
in_string = True
|
|
932
|
+
continue
|
|
933
|
+
if ch == '{':
|
|
934
|
+
if brace_stack == 0:
|
|
935
|
+
start_idx = i
|
|
936
|
+
brace_stack += 1
|
|
937
|
+
elif ch == '}':
|
|
938
|
+
if brace_stack > 0:
|
|
939
|
+
brace_stack -= 1
|
|
940
|
+
if brace_stack == 0 and start_idx != -1:
|
|
941
|
+
results.append(text[start_idx:i+1])
|
|
942
|
+
start_idx = -1
|
|
943
|
+
return results
|
|
944
|
+
|
|
945
|
+
|
|
946
|
+
def _looks_like_python_code(s: str) -> bool:
|
|
947
|
+
"""
|
|
948
|
+
Heuristic check if a string looks like Python code.
|
|
949
|
+
|
|
950
|
+
Used to determine if we should attempt Python syntax repair on a string field.
|
|
951
|
+
"""
|
|
952
|
+
if not s or len(s) < 10:
|
|
953
|
+
return False
|
|
954
|
+
# Check for common Python patterns
|
|
955
|
+
code_indicators = ('def ', 'class ', 'import ', 'from ', 'if __name__', 'return ', 'print(')
|
|
956
|
+
return any(indicator in s for indicator in code_indicators)
|
|
957
|
+
|
|
958
|
+
|
|
959
|
+
def _repair_python_syntax(code: str) -> str:
|
|
960
|
+
"""
|
|
961
|
+
Validate Python code syntax and attempt repairs if invalid.
|
|
962
|
+
|
|
963
|
+
Sometimes LLMs include spurious characters at string boundaries,
|
|
964
|
+
especially when the code contains quotes. This function attempts
|
|
965
|
+
to detect and repair such issues.
|
|
966
|
+
|
|
967
|
+
Args:
|
|
968
|
+
code: Python code string to validate/repair
|
|
969
|
+
|
|
970
|
+
Returns:
|
|
971
|
+
Repaired code if a fix was found, otherwise original code
|
|
972
|
+
"""
|
|
973
|
+
import ast
|
|
974
|
+
|
|
975
|
+
if not code or not code.strip():
|
|
976
|
+
return code
|
|
977
|
+
|
|
978
|
+
# First, try to parse as-is
|
|
979
|
+
try:
|
|
980
|
+
ast.parse(code)
|
|
981
|
+
return code # Valid, no repair needed
|
|
982
|
+
except SyntaxError:
|
|
983
|
+
pass
|
|
984
|
+
|
|
985
|
+
# Try common repairs
|
|
986
|
+
repaired = code
|
|
987
|
+
|
|
988
|
+
# Repair 1: Trailing spurious quote (the specific issue we've seen)
|
|
989
|
+
for quote in ['"', "'"]:
|
|
990
|
+
if repaired.rstrip().endswith(quote):
|
|
991
|
+
candidate = repaired.rstrip()[:-1]
|
|
992
|
+
try:
|
|
993
|
+
ast.parse(candidate)
|
|
994
|
+
logger.info(f"[INFO] Repaired code by removing trailing {quote!r}")
|
|
995
|
+
return candidate
|
|
996
|
+
except SyntaxError:
|
|
997
|
+
pass
|
|
998
|
+
|
|
999
|
+
# Repair 2: Leading spurious quote
|
|
1000
|
+
for quote in ['"', "'"]:
|
|
1001
|
+
if repaired.lstrip().startswith(quote):
|
|
1002
|
+
candidate = repaired.lstrip()[1:]
|
|
1003
|
+
try:
|
|
1004
|
+
ast.parse(candidate)
|
|
1005
|
+
logger.info(f"[INFO] Repaired code by removing leading {quote!r}")
|
|
1006
|
+
return candidate
|
|
1007
|
+
except SyntaxError:
|
|
1008
|
+
pass
|
|
1009
|
+
|
|
1010
|
+
# Repair 3: Both leading and trailing spurious quotes
|
|
1011
|
+
for quote in ['"', "'"]:
|
|
1012
|
+
stripped = repaired.strip()
|
|
1013
|
+
if stripped.startswith(quote) and stripped.endswith(quote):
|
|
1014
|
+
candidate = stripped[1:-1]
|
|
1015
|
+
try:
|
|
1016
|
+
ast.parse(candidate)
|
|
1017
|
+
logger.info(f"[INFO] Repaired code by removing surrounding {quote!r}")
|
|
1018
|
+
return candidate
|
|
1019
|
+
except SyntaxError:
|
|
1020
|
+
pass
|
|
1021
|
+
|
|
1022
|
+
# If no repair worked, return original (let it fail downstream)
|
|
1023
|
+
return code
|
|
1024
|
+
|
|
1025
|
+
|
|
1026
|
+
def _smart_unescape_code(code: str) -> str:
|
|
1027
|
+
"""
|
|
1028
|
+
Unescape literal \\n sequences in code while preserving them inside string literals.
|
|
1029
|
+
|
|
1030
|
+
When LLMs return code as JSON, newlines get double-escaped. After JSON parsing,
|
|
1031
|
+
we have literal backslash-n (2 chars) that should be actual newlines for code
|
|
1032
|
+
structure, BUT escape sequences inside Python strings (like print("\\n")) should
|
|
1033
|
+
remain as escape sequences.
|
|
1034
|
+
|
|
1035
|
+
Args:
|
|
1036
|
+
code: Python code that may have literal \\n sequences
|
|
1037
|
+
|
|
1038
|
+
Returns:
|
|
1039
|
+
Code with structural newlines unescaped but string literals preserved
|
|
1040
|
+
"""
|
|
1041
|
+
LITERAL_BACKSLASH_N = '\\' + 'n' # Literal \n (2 chars)
|
|
1042
|
+
|
|
1043
|
+
if LITERAL_BACKSLASH_N not in code:
|
|
1044
|
+
return code
|
|
1045
|
+
|
|
1046
|
+
# First, check if the code already has actual newlines (mixed state)
|
|
1047
|
+
# If it does, we need to be more careful
|
|
1048
|
+
has_actual_newlines = '\n' in code
|
|
1049
|
+
|
|
1050
|
+
if not has_actual_newlines:
|
|
1051
|
+
# All newlines are escaped - this is the double-escaped case
|
|
1052
|
+
# We need to unescape them but preserve \n inside string literals
|
|
1053
|
+
|
|
1054
|
+
# Strategy: Use a placeholder for \n inside strings, unescape all, then restore
|
|
1055
|
+
# We detect string literals by tracking quote state
|
|
1056
|
+
|
|
1057
|
+
result = []
|
|
1058
|
+
i = 0
|
|
1059
|
+
in_string = False
|
|
1060
|
+
string_char = None
|
|
1061
|
+
in_fstring = False
|
|
1062
|
+
|
|
1063
|
+
# Placeholder that won't appear in code
|
|
1064
|
+
PLACEHOLDER = '\x00NEWLINE_ESCAPE\x00'
|
|
1065
|
+
|
|
1066
|
+
while i < len(code):
|
|
1067
|
+
# Check for escape sequences (both actual and literal)
|
|
1068
|
+
if i + 1 < len(code) and code[i] == '\\':
|
|
1069
|
+
next_char = code[i + 1]
|
|
1070
|
+
|
|
1071
|
+
if in_string:
|
|
1072
|
+
# Inside a string - preserve escape sequences
|
|
1073
|
+
if next_char == 'n':
|
|
1074
|
+
result.append(PLACEHOLDER)
|
|
1075
|
+
i += 2
|
|
1076
|
+
continue
|
|
1077
|
+
elif next_char == 't':
|
|
1078
|
+
result.append('\\' + 't') # Keep \t as-is in strings
|
|
1079
|
+
i += 2
|
|
1080
|
+
continue
|
|
1081
|
+
elif next_char == 'r':
|
|
1082
|
+
result.append('\\' + 'r') # Keep \r as-is in strings
|
|
1083
|
+
i += 2
|
|
1084
|
+
continue
|
|
1085
|
+
elif next_char in ('"', "'", '\\'):
|
|
1086
|
+
# Keep escaped quotes and backslashes
|
|
1087
|
+
result.append(code[i:i+2])
|
|
1088
|
+
i += 2
|
|
1089
|
+
continue
|
|
1090
|
+
|
|
1091
|
+
# Check for string delimiters
|
|
1092
|
+
if not in_string:
|
|
1093
|
+
# Check for triple quotes first
|
|
1094
|
+
if i + 2 < len(code) and code[i:i+3] in ('"""', "'''"):
|
|
1095
|
+
in_string = True
|
|
1096
|
+
string_char = code[i:i+3]
|
|
1097
|
+
# Check if preceded by 'f' for f-string
|
|
1098
|
+
in_fstring = i > 0 and code[i-1] == 'f'
|
|
1099
|
+
result.append(code[i:i+3])
|
|
1100
|
+
i += 3
|
|
1101
|
+
continue
|
|
1102
|
+
elif code[i] in ('"', "'"):
|
|
1103
|
+
in_string = True
|
|
1104
|
+
string_char = code[i]
|
|
1105
|
+
in_fstring = i > 0 and code[i-1] == 'f'
|
|
1106
|
+
result.append(code[i])
|
|
1107
|
+
i += 1
|
|
1108
|
+
continue
|
|
1109
|
+
else:
|
|
1110
|
+
# Check for end of string
|
|
1111
|
+
if len(string_char) == 3: # Triple quote
|
|
1112
|
+
if i + 2 < len(code) and code[i:i+3] == string_char:
|
|
1113
|
+
in_string = False
|
|
1114
|
+
in_fstring = False
|
|
1115
|
+
result.append(code[i:i+3])
|
|
1116
|
+
i += 3
|
|
1117
|
+
continue
|
|
1118
|
+
else: # Single quote
|
|
1119
|
+
if code[i] == string_char:
|
|
1120
|
+
in_string = False
|
|
1121
|
+
in_fstring = False
|
|
1122
|
+
result.append(code[i])
|
|
1123
|
+
i += 1
|
|
1124
|
+
continue
|
|
1125
|
+
|
|
1126
|
+
result.append(code[i])
|
|
1127
|
+
i += 1
|
|
1128
|
+
|
|
1129
|
+
intermediate = ''.join(result)
|
|
1130
|
+
|
|
1131
|
+
# Now unescape all remaining \n (these are structural)
|
|
1132
|
+
LITERAL_BACKSLASH_R_N = '\\' + 'r' + '\\' + 'n'
|
|
1133
|
+
LITERAL_BACKSLASH_T = '\\' + 't'
|
|
1134
|
+
|
|
1135
|
+
intermediate = intermediate.replace(LITERAL_BACKSLASH_R_N, '\r\n')
|
|
1136
|
+
intermediate = intermediate.replace(LITERAL_BACKSLASH_N, '\n')
|
|
1137
|
+
intermediate = intermediate.replace(LITERAL_BACKSLASH_T, '\t')
|
|
1138
|
+
|
|
1139
|
+
# Restore placeholders to \n (as escape sequences in strings)
|
|
1140
|
+
result_code = intermediate.replace(PLACEHOLDER, '\\n')
|
|
1141
|
+
|
|
1142
|
+
return result_code
|
|
1143
|
+
else:
|
|
1144
|
+
# Mixed state - some actual newlines, some literal \n
|
|
1145
|
+
# This means the JSON parsing already converted some, but not all
|
|
1146
|
+
# The literal \n remaining are likely in strings, so leave them alone
|
|
1147
|
+
return code
|
|
1148
|
+
|
|
1149
|
+
|
|
1150
|
+
def _unescape_code_newlines(obj: Any) -> Any:
|
|
1151
|
+
"""
|
|
1152
|
+
Fix double-escaped newlines in Pydantic model string fields.
|
|
1153
|
+
|
|
1154
|
+
Some models (e.g., Gemini) return JSON with \\\\n instead of \\n in code strings,
|
|
1155
|
+
resulting in literal backslash-n text instead of actual newlines after JSON parsing.
|
|
1156
|
+
This function recursively unescapes these in string fields of Pydantic models.
|
|
1157
|
+
|
|
1158
|
+
Also repairs Python syntax errors in code-like string fields (e.g., trailing quotes).
|
|
1159
|
+
|
|
1160
|
+
The check uses literal backslash-n (2 chars) vs actual newline (1 char):
|
|
1161
|
+
- '\\\\n' in Python source = literal backslash + n (2 chars) - needs fixing
|
|
1162
|
+
- '\\n' in Python source = newline character (1 char) - already correct
|
|
1163
|
+
|
|
1164
|
+
Args:
|
|
1165
|
+
obj: A Pydantic model, dict, list, or primitive value
|
|
1166
|
+
|
|
1167
|
+
Returns:
|
|
1168
|
+
The same object with string fields unescaped and code fields repaired
|
|
1169
|
+
"""
|
|
1170
|
+
if obj is None:
|
|
1171
|
+
return obj
|
|
1172
|
+
|
|
1173
|
+
def _process_string(s: str) -> str:
|
|
1174
|
+
"""Process a string: unescape newlines and repair Python syntax if needed."""
|
|
1175
|
+
result = s
|
|
1176
|
+
# Smart unescape that preserves \n inside string literals
|
|
1177
|
+
if _looks_like_python_code(result):
|
|
1178
|
+
result = _smart_unescape_code(result)
|
|
1179
|
+
result = _repair_python_syntax(result)
|
|
1180
|
+
else:
|
|
1181
|
+
# For non-code strings, do simple unescape
|
|
1182
|
+
LITERAL_BACKSLASH_N = '\\' + 'n'
|
|
1183
|
+
LITERAL_BACKSLASH_R_N = '\\' + 'r' + '\\' + 'n'
|
|
1184
|
+
LITERAL_BACKSLASH_T = '\\' + 't'
|
|
1185
|
+
if LITERAL_BACKSLASH_N in result:
|
|
1186
|
+
result = result.replace(LITERAL_BACKSLASH_R_N, '\r\n')
|
|
1187
|
+
result = result.replace(LITERAL_BACKSLASH_N, '\n')
|
|
1188
|
+
result = result.replace(LITERAL_BACKSLASH_T, '\t')
|
|
1189
|
+
return result
|
|
1190
|
+
|
|
1191
|
+
# Handle Pydantic models
|
|
1192
|
+
if isinstance(obj, BaseModel):
|
|
1193
|
+
# Get all field values and process strings
|
|
1194
|
+
for field_name in obj.model_fields:
|
|
1195
|
+
value = getattr(obj, field_name)
|
|
1196
|
+
if isinstance(value, str):
|
|
1197
|
+
processed = _process_string(value)
|
|
1198
|
+
if processed != value:
|
|
1199
|
+
object.__setattr__(obj, field_name, processed)
|
|
1200
|
+
elif isinstance(value, (dict, list, BaseModel)):
|
|
1201
|
+
_unescape_code_newlines(value)
|
|
1202
|
+
return obj
|
|
1203
|
+
|
|
1204
|
+
# Handle dicts
|
|
1205
|
+
if isinstance(obj, dict):
|
|
1206
|
+
for key, value in obj.items():
|
|
1207
|
+
if isinstance(value, str):
|
|
1208
|
+
obj[key] = _process_string(value)
|
|
1209
|
+
elif isinstance(value, (dict, list)):
|
|
1210
|
+
_unescape_code_newlines(value)
|
|
1211
|
+
return obj
|
|
1212
|
+
|
|
1213
|
+
# Handle lists
|
|
1214
|
+
if isinstance(obj, list):
|
|
1215
|
+
for i, item in enumerate(obj):
|
|
1216
|
+
if isinstance(item, str):
|
|
1217
|
+
obj[i] = _process_string(item)
|
|
1218
|
+
elif isinstance(item, (dict, list, BaseModel)):
|
|
1219
|
+
_unescape_code_newlines(item)
|
|
1220
|
+
return obj
|
|
1221
|
+
|
|
1222
|
+
return obj
|
|
1223
|
+
|
|
1224
|
+
|
|
1225
|
+
def _has_invalid_python_code(obj: Any) -> bool:
|
|
1226
|
+
"""
|
|
1227
|
+
Check if any code-like string fields have invalid Python syntax.
|
|
1228
|
+
|
|
1229
|
+
This is used after _unescape_code_newlines to detect if repair failed
|
|
1230
|
+
and we should retry with cache disabled.
|
|
1231
|
+
|
|
1232
|
+
Args:
|
|
1233
|
+
obj: A Pydantic model, dict, list, or primitive value
|
|
1234
|
+
|
|
1235
|
+
Returns:
|
|
1236
|
+
True if there are invalid code fields that couldn't be repaired
|
|
1237
|
+
"""
|
|
1238
|
+
import ast
|
|
1239
|
+
|
|
1240
|
+
if obj is None:
|
|
1241
|
+
return False
|
|
1242
|
+
|
|
1243
|
+
if isinstance(obj, str):
|
|
1244
|
+
if _looks_like_python_code(obj):
|
|
1245
|
+
try:
|
|
1246
|
+
ast.parse(obj)
|
|
1247
|
+
return False # Valid
|
|
1248
|
+
except SyntaxError:
|
|
1249
|
+
return True # Invalid
|
|
1250
|
+
return False
|
|
1251
|
+
|
|
1252
|
+
if isinstance(obj, BaseModel):
|
|
1253
|
+
for field_name in obj.model_fields:
|
|
1254
|
+
value = getattr(obj, field_name)
|
|
1255
|
+
if _has_invalid_python_code(value):
|
|
1256
|
+
return True
|
|
1257
|
+
return False
|
|
1258
|
+
|
|
1259
|
+
if isinstance(obj, dict):
|
|
1260
|
+
for value in obj.values():
|
|
1261
|
+
if _has_invalid_python_code(value):
|
|
1262
|
+
return True
|
|
1263
|
+
return False
|
|
1264
|
+
|
|
1265
|
+
if isinstance(obj, list):
|
|
1266
|
+
for item in obj:
|
|
1267
|
+
if _has_invalid_python_code(item):
|
|
1268
|
+
return True
|
|
1269
|
+
return False
|
|
1270
|
+
|
|
1271
|
+
return False
|
|
1272
|
+
|
|
1273
|
+
|
|
671
1274
|
# --- Main Function ---
|
|
672
1275
|
|
|
673
1276
|
def llm_invoke(
|
|
@@ -677,6 +1280,7 @@ def llm_invoke(
|
|
|
677
1280
|
temperature: float = 0.1,
|
|
678
1281
|
verbose: bool = False,
|
|
679
1282
|
output_pydantic: Optional[Type[BaseModel]] = None,
|
|
1283
|
+
output_schema: Optional[Dict[str, Any]] = None,
|
|
680
1284
|
time: float = 0.25,
|
|
681
1285
|
use_batch_mode: bool = False,
|
|
682
1286
|
messages: Optional[Union[List[Dict[str, str]], List[List[Dict[str, str]]]]] = None,
|
|
@@ -693,6 +1297,7 @@ def llm_invoke(
|
|
|
693
1297
|
temperature: LLM temperature.
|
|
694
1298
|
verbose: Print detailed logs.
|
|
695
1299
|
output_pydantic: Optional Pydantic model for structured output.
|
|
1300
|
+
output_schema: Optional raw JSON schema dictionary for structured output (alternative to output_pydantic).
|
|
696
1301
|
time: Relative thinking time (0-1, default 0.25).
|
|
697
1302
|
use_batch_mode: Use batch completion if True.
|
|
698
1303
|
messages: Pre-formatted list of messages (or list of lists for batch). If provided, ignores prompt and input_json.
|
|
@@ -810,6 +1415,16 @@ def llm_invoke(
|
|
|
810
1415
|
# --- 3. Iterate Through Candidates and Invoke LLM ---
|
|
811
1416
|
last_exception = None
|
|
812
1417
|
newly_acquired_keys: Dict[str, bool] = {} # Track keys obtained in this run
|
|
1418
|
+
|
|
1419
|
+
# Initialize variables for retry section
|
|
1420
|
+
response_format = None
|
|
1421
|
+
time_kwargs = {}
|
|
1422
|
+
|
|
1423
|
+
# Update global rate map for callback cost fallback
|
|
1424
|
+
try:
|
|
1425
|
+
_set_model_rate_map(model_df)
|
|
1426
|
+
except Exception:
|
|
1427
|
+
pass
|
|
813
1428
|
|
|
814
1429
|
for model_info in candidate_models:
|
|
815
1430
|
model_name_litellm = model_info['model']
|
|
@@ -820,6 +1435,9 @@ def llm_invoke(
|
|
|
820
1435
|
logger.info(f"\n[ATTEMPT] Trying model: {model_name_litellm} (Provider: {provider})")
|
|
821
1436
|
|
|
822
1437
|
retry_with_same_model = True
|
|
1438
|
+
# Track per-model temperature adjustment attempt (avoid infinite loop)
|
|
1439
|
+
current_temperature = temperature
|
|
1440
|
+
temp_adjustment_done = False
|
|
823
1441
|
while retry_with_same_model:
|
|
824
1442
|
retry_with_same_model = False # Assume success unless auth error on new key
|
|
825
1443
|
|
|
@@ -834,7 +1452,8 @@ def llm_invoke(
|
|
|
834
1452
|
litellm_kwargs: Dict[str, Any] = {
|
|
835
1453
|
"model": model_name_litellm,
|
|
836
1454
|
"messages": formatted_messages,
|
|
837
|
-
|
|
1455
|
+
# Use a local adjustable temperature to allow provider-specific fallbacks
|
|
1456
|
+
"temperature": current_temperature,
|
|
838
1457
|
}
|
|
839
1458
|
|
|
840
1459
|
api_key_name_from_csv = model_info.get('api_key') # From CSV
|
|
@@ -847,7 +1466,14 @@ def llm_invoke(
|
|
|
847
1466
|
if is_vertex_model and api_key_name_from_csv == 'VERTEX_CREDENTIALS':
|
|
848
1467
|
credentials_file_path = os.getenv("VERTEX_CREDENTIALS") # Path from env var
|
|
849
1468
|
vertex_project_env = os.getenv("VERTEX_PROJECT")
|
|
850
|
-
|
|
1469
|
+
# Check for per-model location override, fall back to env var
|
|
1470
|
+
model_location = model_info.get('location')
|
|
1471
|
+
if pd.notna(model_location) and str(model_location).strip():
|
|
1472
|
+
vertex_location_env = str(model_location).strip()
|
|
1473
|
+
if verbose:
|
|
1474
|
+
logger.info(f"[INFO] Using per-model location override: '{vertex_location_env}' for model '{model_name_litellm}'")
|
|
1475
|
+
else:
|
|
1476
|
+
vertex_location_env = os.getenv("VERTEX_LOCATION")
|
|
851
1477
|
|
|
852
1478
|
if credentials_file_path and vertex_project_env and vertex_location_env:
|
|
853
1479
|
try:
|
|
@@ -861,14 +1487,23 @@ def llm_invoke(
|
|
|
861
1487
|
if verbose:
|
|
862
1488
|
logger.info(f"[INFO] For Vertex AI: using vertex_credentials from '{credentials_file_path}', project '{vertex_project_env}', location '{vertex_location_env}'.")
|
|
863
1489
|
except FileNotFoundError:
|
|
1490
|
+
# Still pass project and location so ADC can work
|
|
1491
|
+
litellm_kwargs["vertex_project"] = vertex_project_env
|
|
1492
|
+
litellm_kwargs["vertex_location"] = vertex_location_env
|
|
864
1493
|
if verbose:
|
|
865
|
-
logger.
|
|
1494
|
+
logger.warning(f"[WARN] Vertex credentials file not found at '{credentials_file_path}'. Using ADC with project '{vertex_project_env}', location '{vertex_location_env}'.")
|
|
866
1495
|
except json.JSONDecodeError:
|
|
1496
|
+
# Still pass project and location so ADC can work
|
|
1497
|
+
litellm_kwargs["vertex_project"] = vertex_project_env
|
|
1498
|
+
litellm_kwargs["vertex_location"] = vertex_location_env
|
|
867
1499
|
if verbose:
|
|
868
|
-
logger.error(f"[ERROR] Failed to decode JSON from Vertex credentials file: '{credentials_file_path}'.
|
|
1500
|
+
logger.error(f"[ERROR] Failed to decode JSON from Vertex credentials file: '{credentials_file_path}'. Using ADC with project '{vertex_project_env}', location '{vertex_location_env}'.")
|
|
869
1501
|
except Exception as e:
|
|
1502
|
+
# Still pass project and location so ADC can work
|
|
1503
|
+
litellm_kwargs["vertex_project"] = vertex_project_env
|
|
1504
|
+
litellm_kwargs["vertex_location"] = vertex_location_env
|
|
870
1505
|
if verbose:
|
|
871
|
-
logger.error(f"[ERROR] Failed to load
|
|
1506
|
+
logger.error(f"[ERROR] Failed to load Vertex credentials from '{credentials_file_path}': {e}. Using ADC with project '{vertex_project_env}', location '{vertex_location_env}'.")
|
|
872
1507
|
else:
|
|
873
1508
|
if verbose:
|
|
874
1509
|
logger.warning(f"[WARN] For Vertex AI (using '{api_key_name_from_csv}'): One or more required environment variables (VERTEX_CREDENTIALS, VERTEX_PROJECT, VERTEX_LOCATION) are missing.")
|
|
@@ -887,9 +1522,16 @@ def llm_invoke(
|
|
|
887
1522
|
|
|
888
1523
|
# If this model is Vertex AI AND uses a direct API key string (not VERTEX_CREDENTIALS from CSV),
|
|
889
1524
|
# also pass project and location from env vars.
|
|
890
|
-
if is_vertex_model:
|
|
1525
|
+
if is_vertex_model:
|
|
891
1526
|
vertex_project_env = os.getenv("VERTEX_PROJECT")
|
|
892
|
-
|
|
1527
|
+
# Check for per-model location override, fall back to env var
|
|
1528
|
+
model_location = model_info.get('location')
|
|
1529
|
+
if pd.notna(model_location) and str(model_location).strip():
|
|
1530
|
+
vertex_location_env = str(model_location).strip()
|
|
1531
|
+
if verbose:
|
|
1532
|
+
logger.info(f"[INFO] Using per-model location override: '{vertex_location_env}' for model '{model_name_litellm}'")
|
|
1533
|
+
else:
|
|
1534
|
+
vertex_location_env = os.getenv("VERTEX_LOCATION")
|
|
893
1535
|
if vertex_project_env and vertex_location_env:
|
|
894
1536
|
litellm_kwargs["vertex_project"] = vertex_project_env
|
|
895
1537
|
litellm_kwargs["vertex_location"] = vertex_location_env
|
|
@@ -903,13 +1545,36 @@ def llm_invoke(
|
|
|
903
1545
|
elif verbose: # No api_key_name_from_csv in CSV for this model
|
|
904
1546
|
logger.info(f"[INFO] No API key name specified in CSV for model '{model_name_litellm}'. LiteLLM will use its default authentication mechanisms (e.g., standard provider env vars or ADC for Vertex AI).")
|
|
905
1547
|
|
|
906
|
-
# Add api_base if present in CSV
|
|
1548
|
+
# Add base_url/api_base override if present in CSV
|
|
907
1549
|
api_base = model_info.get('base_url')
|
|
908
1550
|
if pd.notna(api_base) and api_base:
|
|
1551
|
+
# LiteLLM prefers `base_url`; some older paths accept `api_base`.
|
|
1552
|
+
litellm_kwargs["base_url"] = str(api_base)
|
|
909
1553
|
litellm_kwargs["api_base"] = str(api_base)
|
|
910
1554
|
|
|
911
|
-
#
|
|
912
|
-
|
|
1555
|
+
# Provider-specific defaults (e.g., LM Studio)
|
|
1556
|
+
model_name_lower = str(model_name_litellm).lower()
|
|
1557
|
+
provider_lower_for_model = provider.lower()
|
|
1558
|
+
is_lm_studio = model_name_lower.startswith('lm_studio/') or provider_lower_for_model == 'lm_studio'
|
|
1559
|
+
is_groq = model_name_lower.startswith('groq/') or provider_lower_for_model == 'groq'
|
|
1560
|
+
if is_lm_studio:
|
|
1561
|
+
# Ensure base_url is set (fallback to env LM_STUDIO_API_BASE or localhost)
|
|
1562
|
+
if not litellm_kwargs.get("base_url"):
|
|
1563
|
+
lm_studio_base = os.getenv("LM_STUDIO_API_BASE", "http://localhost:1234/v1")
|
|
1564
|
+
litellm_kwargs["base_url"] = lm_studio_base
|
|
1565
|
+
litellm_kwargs["api_base"] = lm_studio_base
|
|
1566
|
+
if verbose:
|
|
1567
|
+
logger.info(f"[INFO] Using LM Studio base_url: {lm_studio_base}")
|
|
1568
|
+
|
|
1569
|
+
# Ensure a non-empty api_key; LM Studio accepts any non-empty token (e.g., 'lm-studio')
|
|
1570
|
+
if not litellm_kwargs.get("api_key"):
|
|
1571
|
+
lm_studio_key = os.getenv("LM_STUDIO_API_KEY") or "lm-studio"
|
|
1572
|
+
litellm_kwargs["api_key"] = lm_studio_key
|
|
1573
|
+
if verbose:
|
|
1574
|
+
logger.info("[INFO] Using LM Studio api_key placeholder (set LM_STUDIO_API_KEY to customize).")
|
|
1575
|
+
|
|
1576
|
+
# Handle Structured Output (JSON Mode / Pydantic / JSON Schema)
|
|
1577
|
+
if output_pydantic or output_schema:
|
|
913
1578
|
# Check if model supports structured output based on CSV flag or LiteLLM check
|
|
914
1579
|
supports_structured = model_info.get('structured_output', False)
|
|
915
1580
|
# Optional: Add litellm.supports_response_schema check if CSV flag is unreliable
|
|
@@ -918,18 +1583,87 @@ def llm_invoke(
|
|
|
918
1583
|
# except: pass # Ignore errors in supports_response_schema check
|
|
919
1584
|
|
|
920
1585
|
if supports_structured:
|
|
921
|
-
if
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
1586
|
+
if output_pydantic:
|
|
1587
|
+
if verbose:
|
|
1588
|
+
logger.info(f"[INFO] Requesting structured output (Pydantic: {output_pydantic.__name__}) for {model_name_litellm}")
|
|
1589
|
+
# Use explicit json_object format with response_schema for better Gemini/Vertex AI compatibility
|
|
1590
|
+
# Passing Pydantic class directly may not trigger native structured output for all providers
|
|
1591
|
+
response_format = {
|
|
1592
|
+
"type": "json_object",
|
|
1593
|
+
"response_schema": output_pydantic.model_json_schema()
|
|
1594
|
+
}
|
|
1595
|
+
else: # output_schema is set
|
|
1596
|
+
if verbose:
|
|
1597
|
+
logger.info(f"[INFO] Requesting structured output (JSON Schema) for {model_name_litellm}")
|
|
1598
|
+
# LiteLLM expects {"type": "json_schema", "json_schema": {"name": "response", "schema": schema_dict, "strict": true}}
|
|
1599
|
+
# OR for some providers just the schema dict if type is json_object.
|
|
1600
|
+
# Best practice for broad compatibility via LiteLLM is usually the dict directly or wrapped.
|
|
1601
|
+
# For now, let's assume we pass the schema dict as 'response_format' which LiteLLM handles for many providers
|
|
1602
|
+
# or wrap it if needed. LiteLLM 1.40+ supports passing the dict directly for many.
|
|
1603
|
+
response_format = {
|
|
1604
|
+
"type": "json_schema",
|
|
1605
|
+
"json_schema": {
|
|
1606
|
+
"name": "response",
|
|
1607
|
+
"schema": output_schema,
|
|
1608
|
+
"strict": False
|
|
1609
|
+
}
|
|
1610
|
+
}
|
|
1611
|
+
|
|
1612
|
+
litellm_kwargs["response_format"] = response_format
|
|
1613
|
+
|
|
1614
|
+
# LM Studio requires "json_schema" format, not "json_object"
|
|
1615
|
+
# Use extra_body to bypass litellm.drop_params stripping the schema
|
|
1616
|
+
if is_lm_studio and response_format and response_format.get("type") == "json_object":
|
|
1617
|
+
schema = response_format.get("response_schema", {})
|
|
1618
|
+
lm_studio_response_format = {
|
|
1619
|
+
"type": "json_schema",
|
|
1620
|
+
"json_schema": {
|
|
1621
|
+
"name": "response",
|
|
1622
|
+
"strict": True,
|
|
1623
|
+
"schema": schema
|
|
1624
|
+
}
|
|
1625
|
+
}
|
|
1626
|
+
# Use extra_body to bypass drop_params - passes directly to API
|
|
1627
|
+
litellm_kwargs["extra_body"] = {"response_format": lm_studio_response_format}
|
|
1628
|
+
# Remove from regular response_format to avoid conflicts
|
|
1629
|
+
if "response_format" in litellm_kwargs:
|
|
1630
|
+
del litellm_kwargs["response_format"]
|
|
1631
|
+
if verbose:
|
|
1632
|
+
logger.info(f"[INFO] Using extra_body for LM Studio response_format to bypass drop_params")
|
|
1633
|
+
|
|
1634
|
+
# Groq has issues with tool-based structured output - use JSON mode with schema in prompt
|
|
1635
|
+
if is_groq and response_format:
|
|
1636
|
+
# Get the schema to include in system prompt
|
|
1637
|
+
if output_pydantic:
|
|
1638
|
+
schema = output_pydantic.model_json_schema()
|
|
1639
|
+
else:
|
|
1640
|
+
schema = output_schema
|
|
1641
|
+
|
|
1642
|
+
# Use simple json_object mode (Groq's tool_use often fails)
|
|
1643
|
+
litellm_kwargs["response_format"] = {"type": "json_object"}
|
|
1644
|
+
|
|
1645
|
+
# Prepend schema instruction to messages (json module is imported at top of file)
|
|
1646
|
+
schema_instruction = f"You must respond with valid JSON matching this schema:\n```json\n{json.dumps(schema, indent=2)}\n```\nRespond ONLY with the JSON object, no other text."
|
|
1647
|
+
|
|
1648
|
+
# Find or create system message to prepend schema
|
|
1649
|
+
messages_list = litellm_kwargs.get("messages", [])
|
|
1650
|
+
if messages_list and messages_list[0].get("role") == "system":
|
|
1651
|
+
messages_list[0]["content"] = schema_instruction + "\n\n" + messages_list[0]["content"]
|
|
1652
|
+
else:
|
|
1653
|
+
messages_list.insert(0, {"role": "system", "content": schema_instruction})
|
|
1654
|
+
litellm_kwargs["messages"] = messages_list
|
|
1655
|
+
|
|
1656
|
+
if verbose:
|
|
1657
|
+
logger.info(f"[INFO] Using JSON object mode with schema in prompt for Groq (avoiding tool_use issues)")
|
|
1658
|
+
|
|
926
1659
|
# As a fallback, one could use:
|
|
927
1660
|
# litellm_kwargs["response_format"] = {"type": "json_object"}
|
|
928
1661
|
# And potentially enable client-side validation:
|
|
929
1662
|
# litellm.enable_json_schema_validation = True # Enable globally if needed
|
|
930
1663
|
else:
|
|
1664
|
+
schema_name = output_pydantic.__name__ if output_pydantic else "JSON Schema"
|
|
931
1665
|
if verbose:
|
|
932
|
-
logger.warning(f"[WARN] Model {model_name_litellm} does not support structured output via CSV flag. Output might not be valid {
|
|
1666
|
+
logger.warning(f"[WARN] Model {model_name_litellm} does not support structured output via CSV flag. Output might not be valid {schema_name}.")
|
|
933
1667
|
# Proceed without forcing JSON mode, parsing will be attempted later
|
|
934
1668
|
|
|
935
1669
|
# --- NEW REASONING LOGIC ---
|
|
@@ -944,7 +1678,9 @@ def llm_invoke(
|
|
|
944
1678
|
# Currently known: Anthropic uses 'thinking'
|
|
945
1679
|
# Model name comparison is more robust than provider string
|
|
946
1680
|
if provider == 'anthropic': # Check provider column instead of model prefix
|
|
947
|
-
|
|
1681
|
+
thinking_param = {"type": "enabled", "budget_tokens": budget}
|
|
1682
|
+
litellm_kwargs["thinking"] = thinking_param
|
|
1683
|
+
time_kwargs["thinking"] = thinking_param
|
|
948
1684
|
if verbose:
|
|
949
1685
|
logger.info(f"[INFO] Requesting Anthropic thinking (budget type) with budget: {budget} tokens for {model_name_litellm}")
|
|
950
1686
|
else:
|
|
@@ -962,10 +1698,32 @@ def llm_invoke(
|
|
|
962
1698
|
effort = "high"
|
|
963
1699
|
elif time > 0.3:
|
|
964
1700
|
effort = "medium"
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
1701
|
+
|
|
1702
|
+
# Map effort parameter per-provider/model family
|
|
1703
|
+
model_lower = str(model_name_litellm).lower()
|
|
1704
|
+
provider_lower = str(provider).lower()
|
|
1705
|
+
|
|
1706
|
+
if provider_lower == 'openai' and model_lower.startswith('gpt-5'):
|
|
1707
|
+
# OpenAI 5-series uses Responses API with nested 'reasoning'
|
|
1708
|
+
reasoning_obj = {"effort": effort, "summary": "auto"}
|
|
1709
|
+
litellm_kwargs["reasoning"] = reasoning_obj
|
|
1710
|
+
time_kwargs["reasoning"] = reasoning_obj
|
|
1711
|
+
if verbose:
|
|
1712
|
+
logger.info(f"[INFO] Requesting OpenAI reasoning.effort='{effort}' for {model_name_litellm} (Responses API)")
|
|
1713
|
+
|
|
1714
|
+
elif provider_lower == 'openai' and model_lower.startswith('o') and 'mini' not in model_lower:
|
|
1715
|
+
# Historical o* models may use LiteLLM's generic reasoning_effort param
|
|
1716
|
+
litellm_kwargs["reasoning_effort"] = effort
|
|
1717
|
+
time_kwargs["reasoning_effort"] = effort
|
|
1718
|
+
if verbose:
|
|
1719
|
+
logger.info(f"[INFO] Requesting reasoning_effort='{effort}' for {model_name_litellm}")
|
|
1720
|
+
|
|
1721
|
+
else:
|
|
1722
|
+
# Fallback to LiteLLM generic param when supported by provider adapter
|
|
1723
|
+
litellm_kwargs["reasoning_effort"] = effort
|
|
1724
|
+
time_kwargs["reasoning_effort"] = effort
|
|
1725
|
+
if verbose:
|
|
1726
|
+
logger.info(f"[INFO] Requesting generic reasoning_effort='{effort}' for {model_name_litellm}")
|
|
969
1727
|
|
|
970
1728
|
elif reasoning_type == 'none':
|
|
971
1729
|
if verbose:
|
|
@@ -997,6 +1755,166 @@ def llm_invoke(
|
|
|
997
1755
|
logger.debug("NOT ENABLING CACHING: litellm.cache is None at call time")
|
|
998
1756
|
|
|
999
1757
|
|
|
1758
|
+
# Route OpenAI gpt-5* models through Responses API to support 'reasoning'
|
|
1759
|
+
model_lower_for_call = str(model_name_litellm).lower()
|
|
1760
|
+
provider_lower_for_call = str(provider).lower()
|
|
1761
|
+
|
|
1762
|
+
if (
|
|
1763
|
+
not use_batch_mode
|
|
1764
|
+
and provider_lower_for_call == 'openai'
|
|
1765
|
+
and model_lower_for_call.startswith('gpt-5')
|
|
1766
|
+
):
|
|
1767
|
+
if verbose:
|
|
1768
|
+
logger.info(f"[INFO] Calling LiteLLM Responses API for {model_name_litellm}...")
|
|
1769
|
+
try:
|
|
1770
|
+
# Build input text from messages
|
|
1771
|
+
if isinstance(formatted_messages, list) and formatted_messages and isinstance(formatted_messages[0], dict):
|
|
1772
|
+
input_text = "\n\n".join(f"{m.get('role','user')}: {m.get('content','')}" for m in formatted_messages)
|
|
1773
|
+
else:
|
|
1774
|
+
# Fallback: string cast
|
|
1775
|
+
input_text = str(formatted_messages)
|
|
1776
|
+
|
|
1777
|
+
# Derive effort mapping already computed in time_kwargs
|
|
1778
|
+
reasoning_param = time_kwargs.get("reasoning")
|
|
1779
|
+
|
|
1780
|
+
# Build text.format block for structured output
|
|
1781
|
+
# Default to plain text format
|
|
1782
|
+
text_block = {"format": {"type": "text"}}
|
|
1783
|
+
|
|
1784
|
+
# If structured output requested, use text.format with json_schema
|
|
1785
|
+
# This is the correct way to enforce structured output via litellm.responses()
|
|
1786
|
+
if output_pydantic or output_schema:
|
|
1787
|
+
try:
|
|
1788
|
+
if output_pydantic:
|
|
1789
|
+
schema = output_pydantic.model_json_schema()
|
|
1790
|
+
name = output_pydantic.__name__
|
|
1791
|
+
else:
|
|
1792
|
+
schema = output_schema
|
|
1793
|
+
name = "response"
|
|
1794
|
+
|
|
1795
|
+
# Add additionalProperties: false for strict mode (required by OpenAI)
|
|
1796
|
+
schema['additionalProperties'] = False
|
|
1797
|
+
|
|
1798
|
+
# Use text.format with json_schema for structured output
|
|
1799
|
+
text_block = {
|
|
1800
|
+
"format": {
|
|
1801
|
+
"type": "json_schema",
|
|
1802
|
+
"name": name,
|
|
1803
|
+
"strict": True,
|
|
1804
|
+
"schema": schema,
|
|
1805
|
+
}
|
|
1806
|
+
}
|
|
1807
|
+
if verbose:
|
|
1808
|
+
logger.info(f"[INFO] Using structured output via text.format for Responses API")
|
|
1809
|
+
except Exception as schema_e:
|
|
1810
|
+
logger.warning(f"[WARN] Failed to derive JSON schema: {schema_e}. Proceeding with plain text format.")
|
|
1811
|
+
|
|
1812
|
+
# Build kwargs for litellm.responses()
|
|
1813
|
+
responses_kwargs = {
|
|
1814
|
+
"model": model_name_litellm,
|
|
1815
|
+
"input": input_text,
|
|
1816
|
+
"text": text_block,
|
|
1817
|
+
}
|
|
1818
|
+
if verbose and temperature not in (None, 0, 0.0):
|
|
1819
|
+
logger.info("[INFO] Skipping 'temperature' for OpenAI GPT-5 Responses call (unsupported by API).")
|
|
1820
|
+
if reasoning_param is not None:
|
|
1821
|
+
responses_kwargs["reasoning"] = reasoning_param
|
|
1822
|
+
|
|
1823
|
+
# Call litellm.responses() which handles the API interaction
|
|
1824
|
+
resp = litellm.responses(**responses_kwargs)
|
|
1825
|
+
|
|
1826
|
+
# Extract text result from response
|
|
1827
|
+
result_text = None
|
|
1828
|
+
try:
|
|
1829
|
+
# LiteLLM responses return output as a list of items
|
|
1830
|
+
for item in resp.output:
|
|
1831
|
+
if getattr(item, 'type', None) == 'message' and hasattr(item, 'content') and item.content:
|
|
1832
|
+
for content_item in item.content:
|
|
1833
|
+
if hasattr(content_item, 'text'):
|
|
1834
|
+
result_text = content_item.text
|
|
1835
|
+
break
|
|
1836
|
+
if result_text:
|
|
1837
|
+
break
|
|
1838
|
+
except Exception:
|
|
1839
|
+
result_text = None
|
|
1840
|
+
|
|
1841
|
+
# Calculate cost using usage + CSV rates
|
|
1842
|
+
total_cost = 0.0
|
|
1843
|
+
usage = getattr(resp, "usage", None)
|
|
1844
|
+
if usage is not None:
|
|
1845
|
+
in_tok = getattr(usage, "input_tokens", 0) or 0
|
|
1846
|
+
out_tok = getattr(usage, "output_tokens", 0) or 0
|
|
1847
|
+
in_rate = model_info.get('input', 0.0) or 0.0
|
|
1848
|
+
out_rate = model_info.get('output', 0.0) or 0.0
|
|
1849
|
+
total_cost = (in_tok * in_rate + out_tok * out_rate) / 1_000_000.0
|
|
1850
|
+
|
|
1851
|
+
# Parse result if Pydantic output requested
|
|
1852
|
+
final_result = None
|
|
1853
|
+
if output_pydantic and result_text:
|
|
1854
|
+
try:
|
|
1855
|
+
final_result = output_pydantic.model_validate_json(result_text)
|
|
1856
|
+
except Exception as e:
|
|
1857
|
+
# With structured output, parsing should succeed
|
|
1858
|
+
# But if it fails, try JSON repair as fallback
|
|
1859
|
+
logger.warning(f"[WARN] Pydantic parse failed on Responses output: {e}. Attempting JSON repair...")
|
|
1860
|
+
|
|
1861
|
+
# Try extracting from fenced JSON blocks first
|
|
1862
|
+
fenced = _extract_fenced_json_block(result_text)
|
|
1863
|
+
candidates: List[str] = []
|
|
1864
|
+
if fenced:
|
|
1865
|
+
candidates.append(fenced)
|
|
1866
|
+
else:
|
|
1867
|
+
candidates.extend(_extract_balanced_json_objects(result_text))
|
|
1868
|
+
|
|
1869
|
+
# Also try the raw text as-is after stripping fences
|
|
1870
|
+
cleaned = result_text.strip()
|
|
1871
|
+
if cleaned.startswith("```json"):
|
|
1872
|
+
cleaned = cleaned[7:]
|
|
1873
|
+
elif cleaned.startswith("```"):
|
|
1874
|
+
cleaned = cleaned[3:]
|
|
1875
|
+
if cleaned.endswith("```"):
|
|
1876
|
+
cleaned = cleaned[:-3]
|
|
1877
|
+
cleaned = cleaned.strip()
|
|
1878
|
+
if cleaned and cleaned not in candidates:
|
|
1879
|
+
candidates.append(cleaned)
|
|
1880
|
+
|
|
1881
|
+
parse_succeeded = False
|
|
1882
|
+
for cand in candidates:
|
|
1883
|
+
try:
|
|
1884
|
+
final_result = output_pydantic.model_validate_json(cand)
|
|
1885
|
+
parse_succeeded = True
|
|
1886
|
+
logger.info(f"[SUCCESS] JSON repair succeeded for Responses output")
|
|
1887
|
+
break
|
|
1888
|
+
except Exception:
|
|
1889
|
+
continue
|
|
1890
|
+
|
|
1891
|
+
if not parse_succeeded:
|
|
1892
|
+
logger.error(f"[ERROR] All JSON repair attempts failed for Responses output. Original error: {e}")
|
|
1893
|
+
final_result = f"ERROR: Failed to parse structured output from Responses API. Raw: {repr(result_text)[:200]}"
|
|
1894
|
+
else:
|
|
1895
|
+
final_result = result_text
|
|
1896
|
+
|
|
1897
|
+
if verbose:
|
|
1898
|
+
logger.info(f"[RESULT] Model Used: {model_name_litellm}")
|
|
1899
|
+
logger.info(f"[RESULT] Total Cost (estimated): ${total_cost:.6g}")
|
|
1900
|
+
|
|
1901
|
+
return {
|
|
1902
|
+
'result': final_result,
|
|
1903
|
+
'cost': total_cost,
|
|
1904
|
+
'model_name': model_name_litellm,
|
|
1905
|
+
'thinking_output': None,
|
|
1906
|
+
}
|
|
1907
|
+
except Exception as e:
|
|
1908
|
+
last_exception = e
|
|
1909
|
+
logger.error(f"[ERROR] OpenAI Responses call failed for {model_name_litellm}: {e}")
|
|
1910
|
+
# Remove 'reasoning' key to avoid OpenAI Chat API unknown param errors
|
|
1911
|
+
if "reasoning" in litellm_kwargs:
|
|
1912
|
+
try:
|
|
1913
|
+
litellm_kwargs.pop("reasoning", None)
|
|
1914
|
+
except Exception:
|
|
1915
|
+
pass
|
|
1916
|
+
# Fall through to LiteLLM path as a fallback
|
|
1917
|
+
|
|
1000
1918
|
if use_batch_mode:
|
|
1001
1919
|
if verbose:
|
|
1002
1920
|
logger.info(f"[INFO] Calling litellm.batch_completion for {model_name_litellm}...")
|
|
@@ -1004,6 +1922,16 @@ def llm_invoke(
|
|
|
1004
1922
|
|
|
1005
1923
|
|
|
1006
1924
|
else:
|
|
1925
|
+
# Anthropic requirement: when 'thinking' is enabled, temperature must be 1
|
|
1926
|
+
try:
|
|
1927
|
+
if provider.lower() == 'anthropic' and 'thinking' in litellm_kwargs:
|
|
1928
|
+
if litellm_kwargs.get('temperature') != 1:
|
|
1929
|
+
if verbose:
|
|
1930
|
+
logger.info("[INFO] Anthropic thinking enabled: forcing temperature=1 for compliance.")
|
|
1931
|
+
litellm_kwargs['temperature'] = 1
|
|
1932
|
+
current_temperature = 1
|
|
1933
|
+
except Exception:
|
|
1934
|
+
pass
|
|
1007
1935
|
if verbose:
|
|
1008
1936
|
logger.info(f"[INFO] Calling litellm.completion for {model_name_litellm}...")
|
|
1009
1937
|
response = litellm.completion(**litellm_kwargs)
|
|
@@ -1061,13 +1989,12 @@ def llm_invoke(
|
|
|
1061
1989
|
retry_response = litellm.completion(
|
|
1062
1990
|
model=model_name_litellm,
|
|
1063
1991
|
messages=retry_messages,
|
|
1064
|
-
temperature=
|
|
1992
|
+
temperature=current_temperature,
|
|
1065
1993
|
response_format=response_format,
|
|
1066
|
-
max_completion_tokens=max_tokens,
|
|
1067
1994
|
**time_kwargs
|
|
1068
1995
|
)
|
|
1069
|
-
# Re-enable cache
|
|
1070
|
-
litellm.cache =
|
|
1996
|
+
# Re-enable cache - restore original configured cache (restore to original state, even if None)
|
|
1997
|
+
litellm.cache = configured_cache
|
|
1071
1998
|
# Extract result from retry
|
|
1072
1999
|
retry_raw_result = retry_response.choices[0].message.content
|
|
1073
2000
|
if retry_raw_result is not None:
|
|
@@ -1085,21 +2012,66 @@ def llm_invoke(
|
|
|
1085
2012
|
logger.error(f"[ERROR] Cannot retry - batch mode or missing prompt/input_json")
|
|
1086
2013
|
results.append("ERROR: LLM returned None content and cannot retry")
|
|
1087
2014
|
continue
|
|
1088
|
-
|
|
1089
|
-
|
|
2015
|
+
|
|
2016
|
+
# Check for malformed JSON response (excessive trailing newlines causing truncation)
|
|
2017
|
+
# This can happen when Gemini generates thousands of \n in JSON string values
|
|
2018
|
+
if isinstance(raw_result, str) and _is_malformed_json_response(raw_result):
|
|
2019
|
+
logger.warning(f"[WARNING] Detected malformed JSON response with excessive trailing newlines for item {i}. Retrying with cache bypass...")
|
|
2020
|
+
if not use_batch_mode and prompt and input_json is not None:
|
|
2021
|
+
# Add a small space to bypass cache
|
|
2022
|
+
modified_prompt = prompt + " "
|
|
2023
|
+
try:
|
|
2024
|
+
retry_messages = _format_messages(modified_prompt, input_json, use_batch_mode)
|
|
2025
|
+
# Disable cache for retry
|
|
2026
|
+
original_cache = litellm.cache
|
|
2027
|
+
litellm.cache = None
|
|
2028
|
+
retry_response = litellm.completion(
|
|
2029
|
+
model=model_name_litellm,
|
|
2030
|
+
messages=retry_messages,
|
|
2031
|
+
temperature=current_temperature,
|
|
2032
|
+
response_format=response_format,
|
|
2033
|
+
**time_kwargs
|
|
2034
|
+
)
|
|
2035
|
+
# Re-enable cache
|
|
2036
|
+
litellm.cache = original_cache
|
|
2037
|
+
# Extract result from retry
|
|
2038
|
+
retry_raw_result = retry_response.choices[0].message.content
|
|
2039
|
+
if retry_raw_result is not None and not _is_malformed_json_response(retry_raw_result):
|
|
2040
|
+
logger.info(f"[SUCCESS] Cache bypass retry for malformed JSON succeeded for item {i}")
|
|
2041
|
+
raw_result = retry_raw_result
|
|
2042
|
+
else:
|
|
2043
|
+
# Retry also failed, but we'll continue with repair logic below
|
|
2044
|
+
logger.warning(f"[WARNING] Cache bypass retry also returned malformed JSON for item {i}, attempting repair...")
|
|
2045
|
+
except Exception as retry_e:
|
|
2046
|
+
logger.warning(f"[WARNING] Cache bypass retry for malformed JSON failed for item {i}: {retry_e}, attempting repair...")
|
|
2047
|
+
else:
|
|
2048
|
+
logger.warning(f"[WARNING] Cannot retry malformed JSON - batch mode or missing prompt/input_json, attempting repair...")
|
|
2049
|
+
|
|
2050
|
+
if output_pydantic or output_schema:
|
|
1090
2051
|
parsed_result = None
|
|
1091
2052
|
json_string_to_parse = None
|
|
1092
2053
|
|
|
1093
2054
|
try:
|
|
1094
|
-
# Attempt 1: Check if LiteLLM already parsed it
|
|
1095
|
-
if isinstance(raw_result, output_pydantic):
|
|
2055
|
+
# Attempt 1: Check if LiteLLM already parsed it (only for Pydantic)
|
|
2056
|
+
if output_pydantic and isinstance(raw_result, output_pydantic):
|
|
1096
2057
|
parsed_result = raw_result
|
|
1097
2058
|
if verbose:
|
|
1098
2059
|
logger.debug("[DEBUG] Pydantic object received directly from LiteLLM.")
|
|
1099
2060
|
|
|
1100
2061
|
# Attempt 2: Check if raw_result is dict-like and validate
|
|
1101
2062
|
elif isinstance(raw_result, dict):
|
|
1102
|
-
|
|
2063
|
+
if output_pydantic:
|
|
2064
|
+
parsed_result = output_pydantic.model_validate(raw_result)
|
|
2065
|
+
else:
|
|
2066
|
+
# Validate against JSON schema
|
|
2067
|
+
try:
|
|
2068
|
+
import jsonschema
|
|
2069
|
+
jsonschema.validate(instance=raw_result, schema=output_schema)
|
|
2070
|
+
parsed_result = json.dumps(raw_result) # Return as JSON string for consistency
|
|
2071
|
+
except ImportError:
|
|
2072
|
+
logger.warning("jsonschema not installed, skipping validation")
|
|
2073
|
+
parsed_result = json.dumps(raw_result)
|
|
2074
|
+
|
|
1103
2075
|
if verbose:
|
|
1104
2076
|
logger.debug("[DEBUG] Validated dictionary-like object directly.")
|
|
1105
2077
|
|
|
@@ -1107,26 +2079,59 @@ def llm_invoke(
|
|
|
1107
2079
|
elif isinstance(raw_result, str):
|
|
1108
2080
|
json_string_to_parse = raw_result # Start with the raw string
|
|
1109
2081
|
try:
|
|
1110
|
-
#
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
if
|
|
1114
|
-
|
|
1115
|
-
# Basic check if it looks like JSON
|
|
1116
|
-
if potential_json.strip().startswith('{') and potential_json.strip().endswith('}'):
|
|
1117
|
-
if verbose:
|
|
1118
|
-
logger.debug(f"[DEBUG] Attempting to parse extracted JSON block: '{potential_json}'")
|
|
1119
|
-
parsed_result = output_pydantic.model_validate_json(potential_json)
|
|
1120
|
-
else:
|
|
1121
|
-
# If block extraction fails, try cleaning markdown next
|
|
1122
|
-
raise ValueError("Extracted block doesn't look like JSON")
|
|
2082
|
+
# 1) Prefer fenced ```json blocks
|
|
2083
|
+
fenced = _extract_fenced_json_block(raw_result)
|
|
2084
|
+
candidates: List[str] = []
|
|
2085
|
+
if fenced:
|
|
2086
|
+
candidates.append(fenced)
|
|
1123
2087
|
else:
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
2088
|
+
# 2) Fall back to scanning for balanced JSON objects
|
|
2089
|
+
candidates.extend(_extract_balanced_json_objects(raw_result))
|
|
2090
|
+
|
|
2091
|
+
if not candidates:
|
|
2092
|
+
raise ValueError("No JSON-like content found")
|
|
2093
|
+
|
|
2094
|
+
parse_err: Optional[Exception] = None
|
|
2095
|
+
for cand in candidates:
|
|
2096
|
+
try:
|
|
2097
|
+
if verbose:
|
|
2098
|
+
logger.debug(f"[DEBUG] Attempting to parse candidate JSON block: {cand}")
|
|
2099
|
+
|
|
2100
|
+
if output_pydantic:
|
|
2101
|
+
parsed_result = output_pydantic.model_validate_json(cand)
|
|
2102
|
+
else:
|
|
2103
|
+
# Parse JSON and validate against schema
|
|
2104
|
+
loaded = json.loads(cand)
|
|
2105
|
+
try:
|
|
2106
|
+
import jsonschema
|
|
2107
|
+
jsonschema.validate(instance=loaded, schema=output_schema)
|
|
2108
|
+
except ImportError:
|
|
2109
|
+
pass # Skip validation if lib missing
|
|
2110
|
+
parsed_result = cand # Return string if valid
|
|
2111
|
+
|
|
2112
|
+
json_string_to_parse = cand
|
|
2113
|
+
parse_err = None
|
|
2114
|
+
break
|
|
2115
|
+
except (json.JSONDecodeError, ValidationError, ValueError) as pe:
|
|
2116
|
+
# Also catch jsonschema.ValidationError if imported
|
|
2117
|
+
parse_err = pe
|
|
2118
|
+
try:
|
|
2119
|
+
import jsonschema
|
|
2120
|
+
if isinstance(pe, jsonschema.ValidationError):
|
|
2121
|
+
parse_err = pe
|
|
2122
|
+
except ImportError:
|
|
2123
|
+
pass
|
|
2124
|
+
|
|
2125
|
+
if parsed_result is None:
|
|
2126
|
+
# If none of the candidates parsed, raise last error
|
|
2127
|
+
if parse_err is not None:
|
|
2128
|
+
raise parse_err
|
|
2129
|
+
raise ValueError("Unable to parse any JSON candidates")
|
|
2130
|
+
except (json.JSONDecodeError, ValidationError, ValueError, Exception) as extraction_error:
|
|
2131
|
+
# Catch generic Exception to handle jsonschema errors without explicit import here
|
|
1127
2132
|
if verbose:
|
|
1128
|
-
logger.debug(f"[DEBUG] JSON
|
|
1129
|
-
#
|
|
2133
|
+
logger.debug(f"[DEBUG] JSON extraction/validation failed ('{extraction_error}'). Trying fence cleaning.")
|
|
2134
|
+
# Last resort: strip any leading/trailing code fences and retry
|
|
1130
2135
|
cleaned_result_str = raw_result.strip()
|
|
1131
2136
|
if cleaned_result_str.startswith("```json"):
|
|
1132
2137
|
cleaned_result_str = cleaned_result_str[7:]
|
|
@@ -1135,35 +2140,166 @@ def llm_invoke(
|
|
|
1135
2140
|
if cleaned_result_str.endswith("```"):
|
|
1136
2141
|
cleaned_result_str = cleaned_result_str[:-3]
|
|
1137
2142
|
cleaned_result_str = cleaned_result_str.strip()
|
|
1138
|
-
# Check
|
|
1139
|
-
|
|
2143
|
+
# Check for complete JSON object or array
|
|
2144
|
+
is_complete_object = cleaned_result_str.startswith('{') and cleaned_result_str.endswith('}')
|
|
2145
|
+
is_complete_array = cleaned_result_str.startswith('[') and cleaned_result_str.endswith(']')
|
|
2146
|
+
if is_complete_object or is_complete_array:
|
|
1140
2147
|
if verbose:
|
|
1141
|
-
logger.debug(f"[DEBUG] Attempting parse after
|
|
1142
|
-
json_string_to_parse = cleaned_result_str
|
|
1143
|
-
|
|
2148
|
+
logger.debug(f"[DEBUG] Attempting parse after generic fence cleaning. Cleaned string: '{cleaned_result_str}'")
|
|
2149
|
+
json_string_to_parse = cleaned_result_str
|
|
2150
|
+
|
|
2151
|
+
if output_pydantic:
|
|
2152
|
+
parsed_result = output_pydantic.model_validate_json(json_string_to_parse)
|
|
2153
|
+
else:
|
|
2154
|
+
loaded = json.loads(json_string_to_parse)
|
|
2155
|
+
try:
|
|
2156
|
+
import jsonschema
|
|
2157
|
+
jsonschema.validate(instance=loaded, schema=output_schema)
|
|
2158
|
+
except ImportError:
|
|
2159
|
+
pass
|
|
2160
|
+
parsed_result = json_string_to_parse
|
|
2161
|
+
elif cleaned_result_str.startswith('{') or cleaned_result_str.startswith('['):
|
|
2162
|
+
# Attempt to repair truncated JSON (e.g., missing closing braces)
|
|
2163
|
+
# This can happen when Gemini generates excessive trailing content
|
|
2164
|
+
# that causes token limit truncation
|
|
2165
|
+
if verbose:
|
|
2166
|
+
logger.debug(f"[DEBUG] JSON appears truncated (missing closing brace). Attempting repair.")
|
|
2167
|
+
|
|
2168
|
+
# Try to find the last valid JSON structure
|
|
2169
|
+
# For simple schemas like {"extracted_code": "..."}, we can try to close it
|
|
2170
|
+
repaired = cleaned_result_str.rstrip()
|
|
2171
|
+
|
|
2172
|
+
# Strip trailing escaped newline sequences (\\n in the JSON string)
|
|
2173
|
+
# These appear as literal backslash-n when Gemini generates excessive newlines
|
|
2174
|
+
while repaired.endswith('\\n'):
|
|
2175
|
+
repaired = repaired[:-2]
|
|
2176
|
+
# Also strip trailing literal backslashes that might be orphaned
|
|
2177
|
+
repaired = repaired.rstrip('\\')
|
|
2178
|
+
|
|
2179
|
+
# If we're in the middle of a string value, try to close it
|
|
2180
|
+
# Count unescaped quotes to determine if we're inside a string
|
|
2181
|
+
# Simple heuristic: if it ends without proper closure, add closing
|
|
2182
|
+
is_array = cleaned_result_str.startswith('[')
|
|
2183
|
+
expected_end = ']' if is_array else '}'
|
|
2184
|
+
if not repaired.endswith(expected_end):
|
|
2185
|
+
# Try adding various closures to repair
|
|
2186
|
+
if is_array:
|
|
2187
|
+
repair_attempts = [
|
|
2188
|
+
repaired + '}]', # Close object and array
|
|
2189
|
+
repaired + '"}]', # Close string, object and array
|
|
2190
|
+
repaired + '"}}]', # Close string, nested object and array
|
|
2191
|
+
repaired.rstrip(',') + ']', # Remove trailing comma and close array
|
|
2192
|
+
repaired.rstrip('"') + '"}]', # Handle partial string end
|
|
2193
|
+
]
|
|
2194
|
+
else:
|
|
2195
|
+
repair_attempts = [
|
|
2196
|
+
repaired + '"}', # Close string and object
|
|
2197
|
+
repaired + '"}\n}', # Close string and nested object
|
|
2198
|
+
repaired + '"}}}', # Deeper nesting
|
|
2199
|
+
repaired.rstrip(',') + '}', # Remove trailing comma
|
|
2200
|
+
repaired.rstrip('"') + '"}', # Handle partial string end
|
|
2201
|
+
]
|
|
2202
|
+
|
|
2203
|
+
for attempt in repair_attempts:
|
|
2204
|
+
try:
|
|
2205
|
+
if output_pydantic:
|
|
2206
|
+
parsed_result = output_pydantic.model_validate_json(attempt)
|
|
2207
|
+
else:
|
|
2208
|
+
loaded = json.loads(attempt)
|
|
2209
|
+
try:
|
|
2210
|
+
import jsonschema
|
|
2211
|
+
jsonschema.validate(instance=loaded, schema=output_schema)
|
|
2212
|
+
except ImportError:
|
|
2213
|
+
pass
|
|
2214
|
+
parsed_result = attempt
|
|
2215
|
+
|
|
2216
|
+
if verbose:
|
|
2217
|
+
logger.info(f"[INFO] Successfully repaired truncated JSON response")
|
|
2218
|
+
json_string_to_parse = attempt
|
|
2219
|
+
break
|
|
2220
|
+
except (json.JSONDecodeError, ValidationError, ValueError):
|
|
2221
|
+
continue
|
|
2222
|
+
|
|
2223
|
+
if parsed_result is None:
|
|
2224
|
+
raise ValueError("Content after cleaning doesn't look like JSON (and repair attempts failed)")
|
|
1144
2225
|
else:
|
|
1145
|
-
|
|
1146
|
-
raise ValueError("Content after cleaning markdown doesn't look like JSON")
|
|
2226
|
+
raise ValueError("Content after cleaning doesn't look like JSON")
|
|
1147
2227
|
|
|
1148
2228
|
|
|
1149
2229
|
# Check if any parsing attempt succeeded
|
|
1150
2230
|
if parsed_result is None:
|
|
2231
|
+
target_name = output_pydantic.__name__ if output_pydantic else "JSON Schema"
|
|
1151
2232
|
# This case should ideally be caught by exceptions above, but as a safeguard:
|
|
1152
|
-
raise TypeError(f"Raw result type {type(raw_result)} or content could not be validated/parsed against {
|
|
2233
|
+
raise TypeError(f"Raw result type {type(raw_result)} or content could not be validated/parsed against {target_name}.")
|
|
1153
2234
|
|
|
1154
|
-
except (ValidationError, json.JSONDecodeError, TypeError, ValueError) as parse_error:
|
|
1155
|
-
|
|
2235
|
+
except (ValidationError, json.JSONDecodeError, TypeError, ValueError, Exception) as parse_error:
|
|
2236
|
+
target_name = output_pydantic.__name__ if output_pydantic else "JSON Schema"
|
|
2237
|
+
logger.error(f"[ERROR] Failed to parse response into {target_name} for item {i}: {parse_error}")
|
|
1156
2238
|
# Use the string that was last attempted for parsing in the error message
|
|
1157
2239
|
error_content = json_string_to_parse if json_string_to_parse is not None else raw_result
|
|
1158
2240
|
logger.error("[ERROR] Content attempted for parsing: %s", repr(error_content)) # CORRECTED (or use f-string)
|
|
1159
|
-
results.append(f"ERROR: Failed to parse
|
|
2241
|
+
results.append(f"ERROR: Failed to parse structured output. Raw: {repr(raw_result)}")
|
|
1160
2242
|
continue # Skip appending result below if parsing failed
|
|
1161
2243
|
|
|
1162
|
-
#
|
|
2244
|
+
# Post-process: unescape newlines and repair Python syntax
|
|
2245
|
+
_unescape_code_newlines(parsed_result)
|
|
2246
|
+
|
|
2247
|
+
# Check if code fields still have invalid Python syntax after repair
|
|
2248
|
+
# If so, retry without cache to get a fresh response
|
|
2249
|
+
if _has_invalid_python_code(parsed_result):
|
|
2250
|
+
logger.warning(f"[WARNING] Detected invalid Python syntax in code fields for item {i} after repair. Retrying with cache bypass...")
|
|
2251
|
+
if not use_batch_mode and prompt and input_json is not None:
|
|
2252
|
+
# Add a small variation to bypass cache
|
|
2253
|
+
modified_prompt = prompt + " " # Two spaces to differentiate from other retries
|
|
2254
|
+
try:
|
|
2255
|
+
retry_messages = _format_messages(modified_prompt, input_json, use_batch_mode)
|
|
2256
|
+
# Disable cache for retry
|
|
2257
|
+
original_cache = litellm.cache
|
|
2258
|
+
litellm.cache = None
|
|
2259
|
+
retry_response = litellm.completion(
|
|
2260
|
+
model=model_name_litellm,
|
|
2261
|
+
messages=retry_messages,
|
|
2262
|
+
temperature=current_temperature,
|
|
2263
|
+
response_format=response_format,
|
|
2264
|
+
**time_kwargs
|
|
2265
|
+
)
|
|
2266
|
+
# Re-enable cache
|
|
2267
|
+
litellm.cache = original_cache
|
|
2268
|
+
# Extract and re-parse the retry result
|
|
2269
|
+
retry_raw_result = retry_response.choices[0].message.content
|
|
2270
|
+
if retry_raw_result is not None:
|
|
2271
|
+
# Re-parse the retry result
|
|
2272
|
+
retry_parsed = None
|
|
2273
|
+
if output_pydantic:
|
|
2274
|
+
if isinstance(retry_raw_result, output_pydantic):
|
|
2275
|
+
retry_parsed = retry_raw_result
|
|
2276
|
+
elif isinstance(retry_raw_result, dict):
|
|
2277
|
+
retry_parsed = output_pydantic.model_validate(retry_raw_result)
|
|
2278
|
+
elif isinstance(retry_raw_result, str):
|
|
2279
|
+
retry_parsed = output_pydantic.model_validate_json(retry_raw_result)
|
|
2280
|
+
elif output_schema and isinstance(retry_raw_result, str):
|
|
2281
|
+
retry_parsed = retry_raw_result # Keep as string for schema validation
|
|
2282
|
+
|
|
2283
|
+
if retry_parsed is not None:
|
|
2284
|
+
_unescape_code_newlines(retry_parsed)
|
|
2285
|
+
if not _has_invalid_python_code(retry_parsed):
|
|
2286
|
+
logger.info(f"[SUCCESS] Cache bypass retry for invalid Python code succeeded for item {i}")
|
|
2287
|
+
parsed_result = retry_parsed
|
|
2288
|
+
else:
|
|
2289
|
+
logger.warning(f"[WARNING] Cache bypass retry still has invalid Python code for item {i}, using original")
|
|
2290
|
+
else:
|
|
2291
|
+
logger.warning(f"[WARNING] Cache bypass retry returned unparseable result for item {i}")
|
|
2292
|
+
else:
|
|
2293
|
+
logger.warning(f"[WARNING] Cache bypass retry returned None for item {i}")
|
|
2294
|
+
except Exception as retry_e:
|
|
2295
|
+
logger.warning(f"[WARNING] Cache bypass retry for invalid Python code failed for item {i}: {retry_e}")
|
|
2296
|
+
else:
|
|
2297
|
+
logger.warning(f"[WARNING] Cannot retry invalid Python code - batch mode or missing prompt/input_json")
|
|
2298
|
+
|
|
1163
2299
|
results.append(parsed_result)
|
|
1164
2300
|
|
|
1165
2301
|
else:
|
|
1166
|
-
# If output_pydantic was not requested, append the raw result
|
|
2302
|
+
# If output_pydantic/schema was not requested, append the raw result
|
|
1167
2303
|
results.append(raw_result)
|
|
1168
2304
|
|
|
1169
2305
|
except (AttributeError, IndexError) as e:
|
|
@@ -1246,10 +2382,40 @@ def llm_invoke(
|
|
|
1246
2382
|
Exception) as e: # Catch generic Exception last
|
|
1247
2383
|
last_exception = e
|
|
1248
2384
|
error_type = type(e).__name__
|
|
2385
|
+
error_str = str(e)
|
|
2386
|
+
|
|
2387
|
+
# Provider-specific handling for Anthropic temperature + thinking rules.
|
|
2388
|
+
# Two scenarios we auto-correct:
|
|
2389
|
+
# 1) temperature==1 without thinking -> retry with 0.99
|
|
2390
|
+
# 2) thinking enabled but temperature!=1 -> retry with 1
|
|
2391
|
+
lower_err = error_str.lower()
|
|
2392
|
+
if (not temp_adjustment_done) and ("temperature" in lower_err) and ("thinking" in lower_err):
|
|
2393
|
+
anthropic_thinking_sent = ('thinking' in litellm_kwargs) and (provider.lower() == 'anthropic')
|
|
2394
|
+
# Decide direction of adjustment based on whether thinking was enabled in the call
|
|
2395
|
+
if anthropic_thinking_sent:
|
|
2396
|
+
# thinking enabled -> force temperature=1
|
|
2397
|
+
adjusted_temp = 1
|
|
2398
|
+
logger.warning(
|
|
2399
|
+
f"[WARN] {model_name_litellm}: Anthropic with thinking requires temperature=1. "
|
|
2400
|
+
f"Retrying with temperature={adjusted_temp}."
|
|
2401
|
+
)
|
|
2402
|
+
else:
|
|
2403
|
+
# thinking not enabled -> avoid temperature=1
|
|
2404
|
+
adjusted_temp = 0.99
|
|
2405
|
+
logger.warning(
|
|
2406
|
+
f"[WARN] {model_name_litellm}: Provider rejected temperature=1 without thinking. "
|
|
2407
|
+
f"Retrying with temperature={adjusted_temp}."
|
|
2408
|
+
)
|
|
2409
|
+
current_temperature = adjusted_temp
|
|
2410
|
+
temp_adjustment_done = True
|
|
2411
|
+
retry_with_same_model = True
|
|
2412
|
+
if verbose:
|
|
2413
|
+
logger.debug(f"Retrying {model_name_litellm} with adjusted temperature {current_temperature}")
|
|
2414
|
+
continue
|
|
2415
|
+
|
|
1249
2416
|
logger.error(f"[ERROR] Invocation failed for {model_name_litellm} ({error_type}): {e}. Trying next model.")
|
|
1250
2417
|
# Log more details in verbose mode
|
|
1251
2418
|
if verbose:
|
|
1252
|
-
# import traceback # Not needed if using exc_info=True
|
|
1253
2419
|
logger.debug(f"Detailed exception traceback for {model_name_litellm}:", exc_info=True)
|
|
1254
2420
|
break # Break inner loop, try next model candidate
|
|
1255
2421
|
|
|
@@ -1277,7 +2443,7 @@ if __name__ == "__main__":
|
|
|
1277
2443
|
response = llm_invoke(
|
|
1278
2444
|
prompt="Tell me a short joke about {topic}.",
|
|
1279
2445
|
input_json={"topic": "programmers"},
|
|
1280
|
-
strength=0.5, # Use base model (gpt-
|
|
2446
|
+
strength=0.5, # Use base model (gpt-5-nano)
|
|
1281
2447
|
temperature=0.7,
|
|
1282
2448
|
verbose=True
|
|
1283
2449
|
)
|
|
@@ -1358,7 +2524,7 @@ if __name__ == "__main__":
|
|
|
1358
2524
|
{"role": "system", "content": "You are a helpful assistant."},
|
|
1359
2525
|
{"role": "user", "content": "What is the capital of France?"}
|
|
1360
2526
|
]
|
|
1361
|
-
# Strength 0.5 should select gpt-
|
|
2527
|
+
# Strength 0.5 should select gpt-5-nano
|
|
1362
2528
|
response_messages = llm_invoke(
|
|
1363
2529
|
messages=custom_messages,
|
|
1364
2530
|
strength=0.5,
|