pdd-cli 0.0.42__py3-none-any.whl → 0.0.90__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdd/__init__.py +4 -4
- pdd/agentic_common.py +863 -0
- pdd/agentic_crash.py +534 -0
- pdd/agentic_fix.py +1179 -0
- pdd/agentic_langtest.py +162 -0
- pdd/agentic_update.py +370 -0
- pdd/agentic_verify.py +183 -0
- pdd/auto_deps_main.py +15 -5
- pdd/auto_include.py +63 -5
- pdd/bug_main.py +3 -2
- pdd/bug_to_unit_test.py +2 -0
- pdd/change_main.py +11 -4
- pdd/cli.py +22 -1181
- pdd/cmd_test_main.py +80 -19
- pdd/code_generator.py +58 -18
- pdd/code_generator_main.py +672 -25
- pdd/commands/__init__.py +42 -0
- pdd/commands/analysis.py +248 -0
- pdd/commands/fix.py +140 -0
- pdd/commands/generate.py +257 -0
- pdd/commands/maintenance.py +174 -0
- pdd/commands/misc.py +79 -0
- pdd/commands/modify.py +230 -0
- pdd/commands/report.py +144 -0
- pdd/commands/templates.py +215 -0
- pdd/commands/utility.py +110 -0
- pdd/config_resolution.py +58 -0
- pdd/conflicts_main.py +8 -3
- pdd/construct_paths.py +281 -81
- pdd/context_generator.py +10 -2
- pdd/context_generator_main.py +113 -11
- pdd/continue_generation.py +47 -7
- pdd/core/__init__.py +0 -0
- pdd/core/cli.py +503 -0
- pdd/core/dump.py +554 -0
- pdd/core/errors.py +63 -0
- pdd/core/utils.py +90 -0
- pdd/crash_main.py +44 -11
- pdd/data/language_format.csv +71 -62
- pdd/data/llm_model.csv +20 -18
- pdd/detect_change_main.py +5 -4
- pdd/fix_code_loop.py +331 -77
- pdd/fix_error_loop.py +209 -60
- pdd/fix_errors_from_unit_tests.py +4 -3
- pdd/fix_main.py +75 -18
- pdd/fix_verification_errors.py +12 -100
- pdd/fix_verification_errors_loop.py +319 -272
- pdd/fix_verification_main.py +57 -17
- pdd/generate_output_paths.py +93 -10
- pdd/generate_test.py +16 -5
- pdd/get_jwt_token.py +48 -9
- pdd/get_run_command.py +73 -0
- pdd/get_test_command.py +68 -0
- pdd/git_update.py +70 -19
- pdd/increase_tests.py +7 -0
- pdd/incremental_code_generator.py +2 -2
- pdd/insert_includes.py +11 -3
- pdd/llm_invoke.py +1278 -110
- pdd/load_prompt_template.py +36 -10
- pdd/pdd_completion.fish +25 -2
- pdd/pdd_completion.sh +30 -4
- pdd/pdd_completion.zsh +79 -4
- pdd/postprocess.py +10 -3
- pdd/preprocess.py +228 -15
- pdd/preprocess_main.py +8 -5
- pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
- pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
- pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
- pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
- pdd/prompts/agentic_update_LLM.prompt +1071 -0
- pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
- pdd/prompts/auto_include_LLM.prompt +98 -101
- pdd/prompts/change_LLM.prompt +1 -3
- pdd/prompts/detect_change_LLM.prompt +562 -3
- pdd/prompts/example_generator_LLM.prompt +22 -1
- pdd/prompts/extract_code_LLM.prompt +5 -1
- pdd/prompts/extract_program_code_fix_LLM.prompt +14 -2
- pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
- pdd/prompts/extract_promptline_LLM.prompt +17 -11
- pdd/prompts/find_verification_errors_LLM.prompt +6 -0
- pdd/prompts/fix_code_module_errors_LLM.prompt +16 -4
- pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +6 -41
- pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
- pdd/prompts/generate_test_LLM.prompt +21 -6
- pdd/prompts/increase_tests_LLM.prompt +1 -2
- pdd/prompts/insert_includes_LLM.prompt +1181 -6
- pdd/prompts/split_LLM.prompt +1 -62
- pdd/prompts/trace_LLM.prompt +25 -22
- pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
- pdd/prompts/update_prompt_LLM.prompt +22 -1
- pdd/prompts/xml_convertor_LLM.prompt +3246 -7
- pdd/pytest_output.py +188 -21
- pdd/python_env_detector.py +151 -0
- pdd/render_mermaid.py +236 -0
- pdd/setup_tool.py +648 -0
- pdd/simple_math.py +2 -0
- pdd/split_main.py +3 -2
- pdd/summarize_directory.py +56 -7
- pdd/sync_determine_operation.py +918 -186
- pdd/sync_main.py +82 -32
- pdd/sync_orchestration.py +1456 -453
- pdd/sync_tui.py +848 -0
- pdd/template_registry.py +264 -0
- pdd/templates/architecture/architecture_json.prompt +242 -0
- pdd/templates/generic/generate_prompt.prompt +174 -0
- pdd/trace.py +168 -12
- pdd/trace_main.py +4 -3
- pdd/track_cost.py +151 -61
- pdd/unfinished_prompt.py +49 -3
- pdd/update_main.py +549 -67
- pdd/update_model_costs.py +2 -2
- pdd/update_prompt.py +19 -4
- {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/METADATA +20 -7
- pdd_cli-0.0.90.dist-info/RECORD +153 -0
- {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/licenses/LICENSE +1 -1
- pdd_cli-0.0.42.dist-info/RECORD +0 -115
- {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/WHEEL +0 -0
- {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.42.dist-info → pdd_cli-0.0.90.dist-info}/top_level.txt +0 -0
pdd/llm_invoke.py
CHANGED
|
@@ -5,6 +5,8 @@ import os
|
|
|
5
5
|
import pandas as pd
|
|
6
6
|
import litellm
|
|
7
7
|
import logging # ADDED FOR DETAILED LOGGING
|
|
8
|
+
import importlib.resources
|
|
9
|
+
from litellm.caching.caching import Cache # Fix for LiteLLM v1.75.5+
|
|
8
10
|
|
|
9
11
|
# --- Configure Standard Python Logging ---
|
|
10
12
|
logger = logging.getLogger("pdd.llm_invoke")
|
|
@@ -24,6 +26,16 @@ litellm_logger = logging.getLogger("litellm")
|
|
|
24
26
|
litellm_log_level = os.getenv("LITELLM_LOG_LEVEL", "WARNING" if PRODUCTION_MODE else "INFO")
|
|
25
27
|
litellm_logger.setLevel(getattr(logging, litellm_log_level, logging.WARNING))
|
|
26
28
|
|
|
29
|
+
# Ensure LiteLLM drops provider-unsupported params instead of erroring
|
|
30
|
+
# This prevents failures like UnsupportedParamsError for OpenAI gpt-5-* when
|
|
31
|
+
# passing generic params (e.g., reasoning_effort) not accepted by that API path.
|
|
32
|
+
try:
|
|
33
|
+
_drop_params_env = os.getenv("LITELLM_DROP_PARAMS", "true")
|
|
34
|
+
litellm.drop_params = str(_drop_params_env).lower() in ("1", "true", "yes", "on")
|
|
35
|
+
except Exception:
|
|
36
|
+
# Be conservative: default to True even if env parsing fails
|
|
37
|
+
litellm.drop_params = True
|
|
38
|
+
|
|
27
39
|
# Add a console handler if none exists
|
|
28
40
|
if not logger.handlers:
|
|
29
41
|
console_handler = logging.StreamHandler()
|
|
@@ -69,7 +81,7 @@ import json
|
|
|
69
81
|
# from rich import print as rprint # Replaced with logger
|
|
70
82
|
from dotenv import load_dotenv
|
|
71
83
|
from pathlib import Path
|
|
72
|
-
from typing import Optional, Dict, List, Any, Type, Union
|
|
84
|
+
from typing import Optional, Dict, List, Any, Type, Union, Tuple
|
|
73
85
|
from pydantic import BaseModel, ValidationError
|
|
74
86
|
import openai # Import openai for exception handling as LiteLLM maps to its types
|
|
75
87
|
from langchain_core.prompts import PromptTemplate
|
|
@@ -79,7 +91,11 @@ import time as time_module # Alias to avoid conflict with 'time' parameter
|
|
|
79
91
|
from pdd import DEFAULT_LLM_MODEL
|
|
80
92
|
|
|
81
93
|
# Opt-in to future pandas behavior regarding downcasting
|
|
82
|
-
|
|
94
|
+
try:
|
|
95
|
+
pd.set_option('future.no_silent_downcasting', True)
|
|
96
|
+
except pd._config.config.OptionError:
|
|
97
|
+
# Skip if option doesn't exist in older pandas versions
|
|
98
|
+
pass
|
|
83
99
|
|
|
84
100
|
|
|
85
101
|
def _is_wsl_environment() -> bool:
|
|
@@ -108,6 +124,22 @@ def _is_wsl_environment() -> bool:
|
|
|
108
124
|
return False
|
|
109
125
|
|
|
110
126
|
|
|
127
|
+
def _openai_responses_supports_response_format() -> bool:
|
|
128
|
+
"""Detect if current OpenAI Python SDK supports `response_format` on Responses.create.
|
|
129
|
+
|
|
130
|
+
Returns True if the installed SDK exposes a `response_format` parameter on
|
|
131
|
+
`openai.resources.responses.Responses.create`, else False. This avoids
|
|
132
|
+
sending unsupported kwargs and triggering TypeError at runtime.
|
|
133
|
+
"""
|
|
134
|
+
try:
|
|
135
|
+
import inspect
|
|
136
|
+
from openai.resources.responses import Responses
|
|
137
|
+
sig = inspect.signature(Responses.create)
|
|
138
|
+
return "response_format" in sig.parameters
|
|
139
|
+
except Exception:
|
|
140
|
+
return False
|
|
141
|
+
|
|
142
|
+
|
|
111
143
|
def _get_environment_info() -> Dict[str, str]:
|
|
112
144
|
"""
|
|
113
145
|
Get environment information for debugging and error reporting.
|
|
@@ -152,8 +184,8 @@ if PDD_PATH_ENV:
|
|
|
152
184
|
|
|
153
185
|
if PROJECT_ROOT is None: # If PDD_PATH wasn't set or was invalid
|
|
154
186
|
try:
|
|
155
|
-
# Start from the directory
|
|
156
|
-
current_dir = Path(
|
|
187
|
+
# Start from the current working directory (where user is running PDD)
|
|
188
|
+
current_dir = Path.cwd().resolve()
|
|
157
189
|
# Look for project markers (e.g., .git, pyproject.toml, data/, .env)
|
|
158
190
|
# Go up a maximum of 5 levels to prevent infinite loops
|
|
159
191
|
for _ in range(5):
|
|
@@ -164,7 +196,7 @@ if PROJECT_ROOT is None: # If PDD_PATH wasn't set or was invalid
|
|
|
164
196
|
|
|
165
197
|
if has_git or has_pyproject or has_data or has_dotenv:
|
|
166
198
|
PROJECT_ROOT = current_dir
|
|
167
|
-
logger.debug(f"Determined PROJECT_ROOT by marker search: {PROJECT_ROOT}")
|
|
199
|
+
logger.debug(f"Determined PROJECT_ROOT by marker search from CWD: {PROJECT_ROOT}")
|
|
168
200
|
break
|
|
169
201
|
|
|
170
202
|
parent_dir = current_dir.parent
|
|
@@ -172,10 +204,8 @@ if PROJECT_ROOT is None: # If PDD_PATH wasn't set or was invalid
|
|
|
172
204
|
break
|
|
173
205
|
current_dir = parent_dir
|
|
174
206
|
|
|
175
|
-
except NameError: # __file__ might not be defined (e.g., interactive session)
|
|
176
|
-
warnings.warn("__file__ not defined. Cannot automatically detect project root from script location.")
|
|
177
207
|
except Exception as e: # Catch potential permission errors etc.
|
|
178
|
-
warnings.warn(f"Error during project root auto-detection: {e}")
|
|
208
|
+
warnings.warn(f"Error during project root auto-detection from current working directory: {e}")
|
|
179
209
|
|
|
180
210
|
if PROJECT_ROOT is None: # Fallback to CWD if no method succeeded
|
|
181
211
|
PROJECT_ROOT = Path.cwd().resolve()
|
|
@@ -184,16 +214,80 @@ if PROJECT_ROOT is None: # Fallback to CWD if no method succeeded
|
|
|
184
214
|
|
|
185
215
|
ENV_PATH = PROJECT_ROOT / ".env"
|
|
186
216
|
# --- Determine LLM_MODEL_CSV_PATH ---
|
|
187
|
-
# Prioritize ~/.pdd/llm_model.csv
|
|
217
|
+
# Prioritize ~/.pdd/llm_model.csv, then a project .pdd from the current CWD,
|
|
218
|
+
# then PROJECT_ROOT (which may be set from PDD_PATH), else fall back to package.
|
|
188
219
|
user_pdd_dir = Path.home() / ".pdd"
|
|
189
220
|
user_model_csv_path = user_pdd_dir / "llm_model.csv"
|
|
190
221
|
|
|
222
|
+
def _detect_project_root_from_cwd(max_levels: int = 5) -> Path:
|
|
223
|
+
"""Search upwards from the current working directory for common project markers.
|
|
224
|
+
|
|
225
|
+
This intentionally ignores PDD_PATH to support CLI invocations that set
|
|
226
|
+
PDD_PATH to the installed package location. We want to honor a real project
|
|
227
|
+
checkout's .pdd/llm_model.csv when running inside it.
|
|
228
|
+
"""
|
|
229
|
+
try:
|
|
230
|
+
current_dir = Path.cwd().resolve()
|
|
231
|
+
for _ in range(max_levels):
|
|
232
|
+
if (
|
|
233
|
+
(current_dir / ".git").exists()
|
|
234
|
+
or (current_dir / "pyproject.toml").exists()
|
|
235
|
+
or (current_dir / "data").is_dir()
|
|
236
|
+
or (current_dir / ".env").exists()
|
|
237
|
+
):
|
|
238
|
+
return current_dir
|
|
239
|
+
parent = current_dir.parent
|
|
240
|
+
if parent == current_dir:
|
|
241
|
+
break
|
|
242
|
+
current_dir = parent
|
|
243
|
+
except Exception:
|
|
244
|
+
pass
|
|
245
|
+
return Path.cwd().resolve()
|
|
246
|
+
|
|
247
|
+
# Resolve candidates
|
|
248
|
+
project_root_from_cwd = _detect_project_root_from_cwd()
|
|
249
|
+
project_csv_from_cwd = project_root_from_cwd / ".pdd" / "llm_model.csv"
|
|
250
|
+
project_csv_from_env = PROJECT_ROOT / ".pdd" / "llm_model.csv"
|
|
251
|
+
|
|
252
|
+
# Detect whether PDD_PATH points to the installed package directory. If so,
|
|
253
|
+
# don't prioritize it over the real project from CWD.
|
|
254
|
+
try:
|
|
255
|
+
_installed_pkg_root = importlib.resources.files('pdd')
|
|
256
|
+
# importlib.resources.files returns a Traversable; get a FS path string if possible
|
|
257
|
+
try:
|
|
258
|
+
_installed_pkg_root_path = Path(str(_installed_pkg_root))
|
|
259
|
+
except Exception:
|
|
260
|
+
_installed_pkg_root_path = None
|
|
261
|
+
except Exception:
|
|
262
|
+
_installed_pkg_root_path = None
|
|
263
|
+
|
|
264
|
+
def _is_env_path_package_dir(env_path: Path) -> bool:
|
|
265
|
+
try:
|
|
266
|
+
if _installed_pkg_root_path is None:
|
|
267
|
+
return False
|
|
268
|
+
env_path = env_path.resolve()
|
|
269
|
+
pkg_path = _installed_pkg_root_path.resolve()
|
|
270
|
+
# Treat equal or subpath as package dir
|
|
271
|
+
return env_path == pkg_path or str(env_path).startswith(str(pkg_path))
|
|
272
|
+
except Exception:
|
|
273
|
+
return False
|
|
274
|
+
|
|
275
|
+
# Selection order
|
|
191
276
|
if user_model_csv_path.is_file():
|
|
192
277
|
LLM_MODEL_CSV_PATH = user_model_csv_path
|
|
193
278
|
logger.info(f"Using user-specific LLM model CSV: {LLM_MODEL_CSV_PATH}")
|
|
279
|
+
elif (not _is_env_path_package_dir(PROJECT_ROOT)) and project_csv_from_env.is_file():
|
|
280
|
+
# Honor an explicitly-set PDD_PATH pointing to a real project directory
|
|
281
|
+
LLM_MODEL_CSV_PATH = project_csv_from_env
|
|
282
|
+
logger.info(f"Using project-specific LLM model CSV (from PDD_PATH): {LLM_MODEL_CSV_PATH}")
|
|
283
|
+
elif project_csv_from_cwd.is_file():
|
|
284
|
+
# Otherwise, prefer the project relative to the current working directory
|
|
285
|
+
LLM_MODEL_CSV_PATH = project_csv_from_cwd
|
|
286
|
+
logger.info(f"Using project-specific LLM model CSV (from CWD): {LLM_MODEL_CSV_PATH}")
|
|
194
287
|
else:
|
|
195
|
-
|
|
196
|
-
|
|
288
|
+
# Neither exists, we'll use a marker path that _load_model_data will handle
|
|
289
|
+
LLM_MODEL_CSV_PATH = None
|
|
290
|
+
logger.info("No local LLM model CSV found, will use package default")
|
|
197
291
|
# ---------------------------------
|
|
198
292
|
|
|
199
293
|
# Load environment variables from .env file
|
|
@@ -223,6 +317,7 @@ if GCS_HMAC_SECRET_ACCESS_KEY:
|
|
|
223
317
|
GCS_HMAC_SECRET_ACCESS_KEY = GCS_HMAC_SECRET_ACCESS_KEY.strip()
|
|
224
318
|
|
|
225
319
|
cache_configured = False
|
|
320
|
+
configured_cache = None # Store the configured cache instance for restoration
|
|
226
321
|
|
|
227
322
|
if GCS_BUCKET_NAME and GCS_HMAC_ACCESS_KEY_ID and GCS_HMAC_SECRET_ACCESS_KEY:
|
|
228
323
|
# Store original AWS credentials before overwriting for GCS cache setup
|
|
@@ -236,12 +331,13 @@ if GCS_BUCKET_NAME and GCS_HMAC_ACCESS_KEY_ID and GCS_HMAC_SECRET_ACCESS_KEY:
|
|
|
236
331
|
os.environ['AWS_SECRET_ACCESS_KEY'] = GCS_HMAC_SECRET_ACCESS_KEY
|
|
237
332
|
# os.environ['AWS_REGION_NAME'] = GCS_REGION_NAME # Uncomment if needed
|
|
238
333
|
|
|
239
|
-
|
|
334
|
+
configured_cache = Cache(
|
|
240
335
|
type="s3",
|
|
241
336
|
s3_bucket_name=GCS_BUCKET_NAME,
|
|
242
337
|
s3_region_name=GCS_REGION_NAME, # Pass region explicitly to cache
|
|
243
338
|
s3_endpoint_url=GCS_ENDPOINT_URL,
|
|
244
339
|
)
|
|
340
|
+
litellm.cache = configured_cache
|
|
245
341
|
logger.info(f"LiteLLM cache configured for GCS bucket (S3 compatible): {GCS_BUCKET_NAME}")
|
|
246
342
|
cache_configured = True
|
|
247
343
|
|
|
@@ -266,15 +362,22 @@ if GCS_BUCKET_NAME and GCS_HMAC_ACCESS_KEY_ID and GCS_HMAC_SECRET_ACCESS_KEY:
|
|
|
266
362
|
elif 'AWS_REGION_NAME' in os.environ:
|
|
267
363
|
pass # Or just leave it if the temporary setting wasn't done/needed
|
|
268
364
|
|
|
365
|
+
# Check if caching is disabled via environment variable
|
|
366
|
+
if os.getenv("LITELLM_CACHE_DISABLE") == "1":
|
|
367
|
+
logger.info("LiteLLM caching disabled via LITELLM_CACHE_DISABLE=1")
|
|
368
|
+
litellm.cache = None
|
|
369
|
+
cache_configured = True
|
|
370
|
+
|
|
269
371
|
if not cache_configured:
|
|
270
372
|
try:
|
|
271
|
-
# Try
|
|
373
|
+
# Try disk-based cache as a fallback
|
|
272
374
|
sqlite_cache_path = PROJECT_ROOT / "litellm_cache.sqlite"
|
|
273
|
-
|
|
274
|
-
|
|
375
|
+
configured_cache = Cache(type="disk", disk_cache_dir=str(sqlite_cache_path))
|
|
376
|
+
litellm.cache = configured_cache
|
|
377
|
+
logger.info(f"LiteLLM disk cache configured at {sqlite_cache_path}")
|
|
275
378
|
cache_configured = True
|
|
276
379
|
except Exception as e2:
|
|
277
|
-
warnings.warn(f"Failed to configure LiteLLM
|
|
380
|
+
warnings.warn(f"Failed to configure LiteLLM disk cache: {e2}. Caching is disabled.")
|
|
278
381
|
litellm.cache = None
|
|
279
382
|
|
|
280
383
|
if not cache_configured:
|
|
@@ -312,29 +415,49 @@ def _litellm_success_callback(
|
|
|
312
415
|
cost_val = litellm.completion_cost(completion_response=completion_response)
|
|
313
416
|
calculated_cost = cost_val if cost_val is not None else 0.0
|
|
314
417
|
except Exception as e1:
|
|
315
|
-
# Attempt 2:
|
|
316
|
-
#
|
|
317
|
-
# This is often needed for batch completion items.
|
|
418
|
+
# Attempt 2: Compute via tokens and model mapping. If LiteLLM mapping is
|
|
419
|
+
# missing or API differs, fall back to CSV rates in _MODEL_RATE_MAP.
|
|
318
420
|
logger.debug(f"Attempting cost calculation with fallback method: {e1}")
|
|
319
421
|
try:
|
|
320
|
-
model_name = kwargs.get("model")
|
|
422
|
+
model_name = kwargs.get("model")
|
|
321
423
|
if model_name and usage:
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
424
|
+
in_tok = getattr(usage, 'prompt_tokens', None)
|
|
425
|
+
out_tok = getattr(usage, 'completion_tokens', None)
|
|
426
|
+
# Some providers may use 'input_tokens'/'output_tokens'
|
|
427
|
+
if in_tok is None:
|
|
428
|
+
in_tok = getattr(usage, 'input_tokens', 0)
|
|
429
|
+
if out_tok is None:
|
|
430
|
+
out_tok = getattr(usage, 'output_tokens', 0)
|
|
431
|
+
|
|
432
|
+
# Try LiteLLM helper (arg names vary across versions)
|
|
433
|
+
try:
|
|
434
|
+
cost_val = litellm.completion_cost(
|
|
435
|
+
model=model_name,
|
|
436
|
+
prompt_tokens=in_tok,
|
|
437
|
+
completion_tokens=out_tok,
|
|
438
|
+
)
|
|
439
|
+
calculated_cost = cost_val if cost_val is not None else 0.0
|
|
440
|
+
except TypeError:
|
|
441
|
+
# Older/newer versions may require input/output token names
|
|
442
|
+
try:
|
|
443
|
+
cost_val = litellm.completion_cost(
|
|
444
|
+
model=model_name,
|
|
445
|
+
input_tokens=in_tok,
|
|
446
|
+
output_tokens=out_tok,
|
|
447
|
+
)
|
|
448
|
+
calculated_cost = cost_val if cost_val is not None else 0.0
|
|
449
|
+
except Exception as e3:
|
|
450
|
+
# Final fallback: compute using CSV rates
|
|
451
|
+
rates = _MODEL_RATE_MAP.get(str(model_name))
|
|
452
|
+
if rates is not None:
|
|
453
|
+
in_rate, out_rate = rates
|
|
454
|
+
calculated_cost = (float(in_tok or 0) * in_rate + float(out_tok or 0) * out_rate) / 1_000_000.0
|
|
455
|
+
else:
|
|
456
|
+
calculated_cost = 0.0
|
|
457
|
+
logger.debug(f"Cost calculation failed with LiteLLM token API; used CSV rates if available. Detail: {e3}")
|
|
330
458
|
else:
|
|
331
|
-
# If we can't get model name or usage, fallback to 0
|
|
332
459
|
calculated_cost = 0.0
|
|
333
|
-
# Optional: Log the original error e1 if needed
|
|
334
|
-
# logger.warning(f"[Callback WARN] Failed to calculate cost with response object ({e1}) and fallback failed.")
|
|
335
460
|
except Exception as e2:
|
|
336
|
-
# Optional: Log secondary error e2 if needed
|
|
337
|
-
# logger.warning(f"[Callback WARN] Failed to calculate cost with fallback method: {e2}")
|
|
338
461
|
calculated_cost = 0.0 # Default to 0 on any error
|
|
339
462
|
logger.debug(f"Cost calculation failed with fallback method: {e2}")
|
|
340
463
|
|
|
@@ -352,14 +475,108 @@ def _litellm_success_callback(
|
|
|
352
475
|
# Register the callback with LiteLLM
|
|
353
476
|
litellm.success_callback = [_litellm_success_callback]
|
|
354
477
|
|
|
478
|
+
# --- Cost Mapping Support (CSV Rates) ---
|
|
479
|
+
# Populate from CSV inside llm_invoke; used by callback fallback
|
|
480
|
+
_MODEL_RATE_MAP: Dict[str, Tuple[float, float]] = {}
|
|
481
|
+
|
|
482
|
+
def _set_model_rate_map(df: pd.DataFrame) -> None:
|
|
483
|
+
global _MODEL_RATE_MAP
|
|
484
|
+
try:
|
|
485
|
+
_MODEL_RATE_MAP = {
|
|
486
|
+
str(row['model']): (
|
|
487
|
+
float(row['input']) if pd.notna(row['input']) else 0.0,
|
|
488
|
+
float(row['output']) if pd.notna(row['output']) else 0.0,
|
|
489
|
+
)
|
|
490
|
+
for _, row in df.iterrows()
|
|
491
|
+
}
|
|
492
|
+
except Exception:
|
|
493
|
+
_MODEL_RATE_MAP = {}
|
|
494
|
+
|
|
355
495
|
# --- Helper Functions ---
|
|
356
496
|
|
|
357
|
-
def
|
|
358
|
-
"""
|
|
359
|
-
if
|
|
360
|
-
|
|
497
|
+
def _is_malformed_json_response(content: str, threshold: int = 100) -> bool:
|
|
498
|
+
"""
|
|
499
|
+
Detect if a JSON response appears malformed due to excessive trailing newlines.
|
|
500
|
+
|
|
501
|
+
This can happen when Gemini generates thousands of \n characters in a JSON string value,
|
|
502
|
+
causing the response to be truncated and missing closing braces.
|
|
503
|
+
|
|
504
|
+
Args:
|
|
505
|
+
content: The raw response content string
|
|
506
|
+
threshold: Number of consecutive trailing \n sequences to consider malformed
|
|
507
|
+
|
|
508
|
+
Returns:
|
|
509
|
+
True if the response appears malformed, False otherwise
|
|
510
|
+
"""
|
|
511
|
+
if not content or not isinstance(content, str):
|
|
512
|
+
return False
|
|
513
|
+
|
|
514
|
+
# Check if it starts like JSON but doesn't end properly
|
|
515
|
+
stripped = content.strip()
|
|
516
|
+
if not stripped.startswith('{'):
|
|
517
|
+
return False
|
|
518
|
+
|
|
519
|
+
# If it ends with }, it's probably fine
|
|
520
|
+
if stripped.endswith('}'):
|
|
521
|
+
return False
|
|
522
|
+
|
|
523
|
+
# Count trailing \n sequences (escaped newlines in JSON strings)
|
|
524
|
+
# The pattern \n in a JSON string appears as \\n in the raw content
|
|
525
|
+
trailing_newline_count = 0
|
|
526
|
+
check_content = stripped
|
|
527
|
+
while check_content.endswith('\\n'):
|
|
528
|
+
trailing_newline_count += 1
|
|
529
|
+
check_content = check_content[:-2]
|
|
530
|
+
|
|
531
|
+
# If there are many trailing \n sequences, it's likely malformed
|
|
532
|
+
if trailing_newline_count >= threshold:
|
|
533
|
+
return True
|
|
534
|
+
|
|
535
|
+
# Also check for response that looks truncated mid-string
|
|
536
|
+
# (ends with characters that suggest we're inside a JSON string value)
|
|
537
|
+
if not stripped.endswith('}') and not stripped.endswith(']') and not stripped.endswith('"'):
|
|
538
|
+
# Could be truncated in the middle of an escaped sequence
|
|
539
|
+
if stripped.endswith('\\'):
|
|
540
|
+
return True
|
|
541
|
+
|
|
542
|
+
return False
|
|
543
|
+
|
|
544
|
+
|
|
545
|
+
def _load_model_data(csv_path: Optional[Path]) -> pd.DataFrame:
|
|
546
|
+
"""Loads and preprocesses the LLM model data from CSV.
|
|
547
|
+
|
|
548
|
+
Args:
|
|
549
|
+
csv_path: Path to CSV file, or None to use package default
|
|
550
|
+
|
|
551
|
+
Returns:
|
|
552
|
+
DataFrame with model configuration data
|
|
553
|
+
"""
|
|
554
|
+
# If csv_path is provided, try to load from it
|
|
555
|
+
if csv_path is not None:
|
|
556
|
+
if not csv_path.exists():
|
|
557
|
+
logger.warning(f"Specified LLM model CSV not found at {csv_path}, trying package default")
|
|
558
|
+
csv_path = None
|
|
559
|
+
else:
|
|
560
|
+
try:
|
|
561
|
+
df = pd.read_csv(csv_path)
|
|
562
|
+
logger.debug(f"Loaded model data from {csv_path}")
|
|
563
|
+
# Continue with the rest of the function...
|
|
564
|
+
except Exception as e:
|
|
565
|
+
logger.warning(f"Failed to load CSV from {csv_path}: {e}, trying package default")
|
|
566
|
+
csv_path = None
|
|
567
|
+
|
|
568
|
+
# If csv_path is None or loading failed, use package default
|
|
569
|
+
if csv_path is None:
|
|
570
|
+
try:
|
|
571
|
+
# Use importlib.resources to load the packaged CSV
|
|
572
|
+
csv_data = importlib.resources.files('pdd').joinpath('data/llm_model.csv').read_text()
|
|
573
|
+
import io
|
|
574
|
+
df = pd.read_csv(io.StringIO(csv_data))
|
|
575
|
+
logger.info("Loaded model data from package default")
|
|
576
|
+
except Exception as e:
|
|
577
|
+
raise FileNotFoundError(f"Failed to load default LLM model CSV from package: {e}")
|
|
578
|
+
|
|
361
579
|
try:
|
|
362
|
-
df = pd.read_csv(csv_path)
|
|
363
580
|
# Basic validation and type conversion
|
|
364
581
|
required_cols = ['provider', 'model', 'input', 'output', 'coding_arena_elo', 'api_key', 'structured_output', 'reasoning_type']
|
|
365
582
|
for col in required_cols:
|
|
@@ -432,11 +649,26 @@ def _select_model_candidates(
|
|
|
432
649
|
# Try finding base model in the *original* df in case it was filtered out
|
|
433
650
|
original_base = model_df[model_df['model'] == base_model_name]
|
|
434
651
|
if not original_base.empty:
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
652
|
+
# Base exists but may be misconfigured (e.g., missing API key). Keep erroring loudly.
|
|
653
|
+
raise ValueError(
|
|
654
|
+
f"Base model '{base_model_name}' found in CSV but requires API key '{original_base.iloc[0]['api_key']}' which might be missing or invalid configuration."
|
|
655
|
+
)
|
|
656
|
+
# Option A': Soft fallback – choose a reasonable surrogate base and continue
|
|
657
|
+
# Strategy (simplified and deterministic): pick the first available model
|
|
658
|
+
# from the CSV as the surrogate base. This mirrors typical CSV ordering
|
|
659
|
+
# expectations and keeps behavior predictable across environments.
|
|
660
|
+
try:
|
|
661
|
+
base_model = available_df.iloc[0]
|
|
662
|
+
logger.warning(
|
|
663
|
+
f"Base model '{base_model_name}' not found in CSV. Falling back to surrogate base '{base_model['model']}' (Option A')."
|
|
664
|
+
)
|
|
665
|
+
except Exception:
|
|
666
|
+
# If any unexpected error occurs during fallback, raise a clear error
|
|
667
|
+
raise ValueError(
|
|
668
|
+
f"Specified base model '{base_model_name}' not found and fallback selection failed. Check your LLM model CSV."
|
|
669
|
+
)
|
|
670
|
+
else:
|
|
671
|
+
base_model = base_model_row.iloc[0]
|
|
440
672
|
|
|
441
673
|
# 3. Determine Target and Sort
|
|
442
674
|
candidates = []
|
|
@@ -447,9 +679,10 @@ def _select_model_candidates(
|
|
|
447
679
|
# Sort remaining by ELO descending as fallback
|
|
448
680
|
available_df['sort_metric'] = -available_df['coding_arena_elo'] # Negative for descending sort
|
|
449
681
|
candidates = available_df.sort_values(by='sort_metric').to_dict('records')
|
|
450
|
-
# Ensure base model is first if it exists
|
|
451
|
-
|
|
452
|
-
|
|
682
|
+
# Ensure effective base model is first if it exists (supports surrogate base)
|
|
683
|
+
effective_base_name = str(base_model['model']) if isinstance(base_model, pd.Series) else base_model_name
|
|
684
|
+
if any(c['model'] == effective_base_name for c in candidates):
|
|
685
|
+
candidates.sort(key=lambda x: 0 if x['model'] == effective_base_name else 1)
|
|
453
686
|
target_metric_value = f"Base Model ELO: {base_model['coding_arena_elo']}"
|
|
454
687
|
|
|
455
688
|
elif strength < 0.5:
|
|
@@ -666,6 +899,378 @@ def _format_messages(prompt: str, input_data: Union[Dict[str, Any], List[Dict[st
|
|
|
666
899
|
except Exception as e:
|
|
667
900
|
raise ValueError(f"Error formatting prompt: {e}") from e
|
|
668
901
|
|
|
902
|
+
# --- JSON Extraction Helpers ---
|
|
903
|
+
import re
|
|
904
|
+
|
|
905
|
+
def _extract_fenced_json_block(text: str) -> Optional[str]:
|
|
906
|
+
try:
|
|
907
|
+
m = re.search(r"```json\s*(\{[\s\S]*?\})\s*```", text, flags=re.IGNORECASE)
|
|
908
|
+
if m:
|
|
909
|
+
return m.group(1)
|
|
910
|
+
return None
|
|
911
|
+
except Exception:
|
|
912
|
+
return None
|
|
913
|
+
|
|
914
|
+
def _extract_balanced_json_objects(text: str) -> List[str]:
|
|
915
|
+
results: List[str] = []
|
|
916
|
+
brace_stack = 0
|
|
917
|
+
start_idx = -1
|
|
918
|
+
in_string = False
|
|
919
|
+
escape = False
|
|
920
|
+
for i, ch in enumerate(text):
|
|
921
|
+
if in_string:
|
|
922
|
+
if escape:
|
|
923
|
+
escape = False
|
|
924
|
+
elif ch == '\\':
|
|
925
|
+
escape = True
|
|
926
|
+
elif ch == '"':
|
|
927
|
+
in_string = False
|
|
928
|
+
continue
|
|
929
|
+
else:
|
|
930
|
+
if ch == '"':
|
|
931
|
+
in_string = True
|
|
932
|
+
continue
|
|
933
|
+
if ch == '{':
|
|
934
|
+
if brace_stack == 0:
|
|
935
|
+
start_idx = i
|
|
936
|
+
brace_stack += 1
|
|
937
|
+
elif ch == '}':
|
|
938
|
+
if brace_stack > 0:
|
|
939
|
+
brace_stack -= 1
|
|
940
|
+
if brace_stack == 0 and start_idx != -1:
|
|
941
|
+
results.append(text[start_idx:i+1])
|
|
942
|
+
start_idx = -1
|
|
943
|
+
return results
|
|
944
|
+
|
|
945
|
+
|
|
946
|
+
def _looks_like_python_code(s: str) -> bool:
|
|
947
|
+
"""
|
|
948
|
+
Heuristic check if a string looks like Python code.
|
|
949
|
+
|
|
950
|
+
Used to determine if we should attempt Python syntax repair on a string field.
|
|
951
|
+
"""
|
|
952
|
+
if not s or len(s) < 10:
|
|
953
|
+
return False
|
|
954
|
+
# Check for common Python patterns
|
|
955
|
+
code_indicators = ('def ', 'class ', 'import ', 'from ', 'if __name__', 'return ', 'print(')
|
|
956
|
+
return any(indicator in s for indicator in code_indicators)
|
|
957
|
+
|
|
958
|
+
|
|
959
|
+
def _repair_python_syntax(code: str) -> str:
|
|
960
|
+
"""
|
|
961
|
+
Validate Python code syntax and attempt repairs if invalid.
|
|
962
|
+
|
|
963
|
+
Sometimes LLMs include spurious characters at string boundaries,
|
|
964
|
+
especially when the code contains quotes. This function attempts
|
|
965
|
+
to detect and repair such issues.
|
|
966
|
+
|
|
967
|
+
Args:
|
|
968
|
+
code: Python code string to validate/repair
|
|
969
|
+
|
|
970
|
+
Returns:
|
|
971
|
+
Repaired code if a fix was found, otherwise original code
|
|
972
|
+
"""
|
|
973
|
+
import ast
|
|
974
|
+
|
|
975
|
+
if not code or not code.strip():
|
|
976
|
+
return code
|
|
977
|
+
|
|
978
|
+
# First, try to parse as-is
|
|
979
|
+
try:
|
|
980
|
+
ast.parse(code)
|
|
981
|
+
return code # Valid, no repair needed
|
|
982
|
+
except SyntaxError:
|
|
983
|
+
pass
|
|
984
|
+
|
|
985
|
+
# Try common repairs
|
|
986
|
+
repaired = code
|
|
987
|
+
|
|
988
|
+
# Repair 1: Trailing spurious quote (the specific issue we've seen)
|
|
989
|
+
for quote in ['"', "'"]:
|
|
990
|
+
if repaired.rstrip().endswith(quote):
|
|
991
|
+
candidate = repaired.rstrip()[:-1]
|
|
992
|
+
try:
|
|
993
|
+
ast.parse(candidate)
|
|
994
|
+
logger.info(f"[INFO] Repaired code by removing trailing {quote!r}")
|
|
995
|
+
return candidate
|
|
996
|
+
except SyntaxError:
|
|
997
|
+
pass
|
|
998
|
+
|
|
999
|
+
# Repair 2: Leading spurious quote
|
|
1000
|
+
for quote in ['"', "'"]:
|
|
1001
|
+
if repaired.lstrip().startswith(quote):
|
|
1002
|
+
candidate = repaired.lstrip()[1:]
|
|
1003
|
+
try:
|
|
1004
|
+
ast.parse(candidate)
|
|
1005
|
+
logger.info(f"[INFO] Repaired code by removing leading {quote!r}")
|
|
1006
|
+
return candidate
|
|
1007
|
+
except SyntaxError:
|
|
1008
|
+
pass
|
|
1009
|
+
|
|
1010
|
+
# Repair 3: Both leading and trailing spurious quotes
|
|
1011
|
+
for quote in ['"', "'"]:
|
|
1012
|
+
stripped = repaired.strip()
|
|
1013
|
+
if stripped.startswith(quote) and stripped.endswith(quote):
|
|
1014
|
+
candidate = stripped[1:-1]
|
|
1015
|
+
try:
|
|
1016
|
+
ast.parse(candidate)
|
|
1017
|
+
logger.info(f"[INFO] Repaired code by removing surrounding {quote!r}")
|
|
1018
|
+
return candidate
|
|
1019
|
+
except SyntaxError:
|
|
1020
|
+
pass
|
|
1021
|
+
|
|
1022
|
+
# If no repair worked, return original (let it fail downstream)
|
|
1023
|
+
return code
|
|
1024
|
+
|
|
1025
|
+
|
|
1026
|
+
def _smart_unescape_code(code: str) -> str:
|
|
1027
|
+
"""
|
|
1028
|
+
Unescape literal \\n sequences in code while preserving them inside string literals.
|
|
1029
|
+
|
|
1030
|
+
When LLMs return code as JSON, newlines get double-escaped. After JSON parsing,
|
|
1031
|
+
we have literal backslash-n (2 chars) that should be actual newlines for code
|
|
1032
|
+
structure, BUT escape sequences inside Python strings (like print("\\n")) should
|
|
1033
|
+
remain as escape sequences.
|
|
1034
|
+
|
|
1035
|
+
Args:
|
|
1036
|
+
code: Python code that may have literal \\n sequences
|
|
1037
|
+
|
|
1038
|
+
Returns:
|
|
1039
|
+
Code with structural newlines unescaped but string literals preserved
|
|
1040
|
+
"""
|
|
1041
|
+
LITERAL_BACKSLASH_N = '\\' + 'n' # Literal \n (2 chars)
|
|
1042
|
+
|
|
1043
|
+
if LITERAL_BACKSLASH_N not in code:
|
|
1044
|
+
return code
|
|
1045
|
+
|
|
1046
|
+
# First, check if the code already has actual newlines (mixed state)
|
|
1047
|
+
# If it does, we need to be more careful
|
|
1048
|
+
has_actual_newlines = '\n' in code
|
|
1049
|
+
|
|
1050
|
+
if not has_actual_newlines:
|
|
1051
|
+
# All newlines are escaped - this is the double-escaped case
|
|
1052
|
+
# We need to unescape them but preserve \n inside string literals
|
|
1053
|
+
|
|
1054
|
+
# Strategy: Use a placeholder for \n inside strings, unescape all, then restore
|
|
1055
|
+
# We detect string literals by tracking quote state
|
|
1056
|
+
|
|
1057
|
+
result = []
|
|
1058
|
+
i = 0
|
|
1059
|
+
in_string = False
|
|
1060
|
+
string_char = None
|
|
1061
|
+
in_fstring = False
|
|
1062
|
+
|
|
1063
|
+
# Placeholder that won't appear in code
|
|
1064
|
+
PLACEHOLDER = '\x00NEWLINE_ESCAPE\x00'
|
|
1065
|
+
|
|
1066
|
+
while i < len(code):
|
|
1067
|
+
# Check for escape sequences (both actual and literal)
|
|
1068
|
+
if i + 1 < len(code) and code[i] == '\\':
|
|
1069
|
+
next_char = code[i + 1]
|
|
1070
|
+
|
|
1071
|
+
if in_string:
|
|
1072
|
+
# Inside a string - preserve escape sequences
|
|
1073
|
+
if next_char == 'n':
|
|
1074
|
+
result.append(PLACEHOLDER)
|
|
1075
|
+
i += 2
|
|
1076
|
+
continue
|
|
1077
|
+
elif next_char == 't':
|
|
1078
|
+
result.append('\\' + 't') # Keep \t as-is in strings
|
|
1079
|
+
i += 2
|
|
1080
|
+
continue
|
|
1081
|
+
elif next_char == 'r':
|
|
1082
|
+
result.append('\\' + 'r') # Keep \r as-is in strings
|
|
1083
|
+
i += 2
|
|
1084
|
+
continue
|
|
1085
|
+
elif next_char in ('"', "'", '\\'):
|
|
1086
|
+
# Keep escaped quotes and backslashes
|
|
1087
|
+
result.append(code[i:i+2])
|
|
1088
|
+
i += 2
|
|
1089
|
+
continue
|
|
1090
|
+
|
|
1091
|
+
# Check for string delimiters
|
|
1092
|
+
if not in_string:
|
|
1093
|
+
# Check for triple quotes first
|
|
1094
|
+
if i + 2 < len(code) and code[i:i+3] in ('"""', "'''"):
|
|
1095
|
+
in_string = True
|
|
1096
|
+
string_char = code[i:i+3]
|
|
1097
|
+
# Check if preceded by 'f' for f-string
|
|
1098
|
+
in_fstring = i > 0 and code[i-1] == 'f'
|
|
1099
|
+
result.append(code[i:i+3])
|
|
1100
|
+
i += 3
|
|
1101
|
+
continue
|
|
1102
|
+
elif code[i] in ('"', "'"):
|
|
1103
|
+
in_string = True
|
|
1104
|
+
string_char = code[i]
|
|
1105
|
+
in_fstring = i > 0 and code[i-1] == 'f'
|
|
1106
|
+
result.append(code[i])
|
|
1107
|
+
i += 1
|
|
1108
|
+
continue
|
|
1109
|
+
else:
|
|
1110
|
+
# Check for end of string
|
|
1111
|
+
if len(string_char) == 3: # Triple quote
|
|
1112
|
+
if i + 2 < len(code) and code[i:i+3] == string_char:
|
|
1113
|
+
in_string = False
|
|
1114
|
+
in_fstring = False
|
|
1115
|
+
result.append(code[i:i+3])
|
|
1116
|
+
i += 3
|
|
1117
|
+
continue
|
|
1118
|
+
else: # Single quote
|
|
1119
|
+
if code[i] == string_char:
|
|
1120
|
+
in_string = False
|
|
1121
|
+
in_fstring = False
|
|
1122
|
+
result.append(code[i])
|
|
1123
|
+
i += 1
|
|
1124
|
+
continue
|
|
1125
|
+
|
|
1126
|
+
result.append(code[i])
|
|
1127
|
+
i += 1
|
|
1128
|
+
|
|
1129
|
+
intermediate = ''.join(result)
|
|
1130
|
+
|
|
1131
|
+
# Now unescape all remaining \n (these are structural)
|
|
1132
|
+
LITERAL_BACKSLASH_R_N = '\\' + 'r' + '\\' + 'n'
|
|
1133
|
+
LITERAL_BACKSLASH_T = '\\' + 't'
|
|
1134
|
+
|
|
1135
|
+
intermediate = intermediate.replace(LITERAL_BACKSLASH_R_N, '\r\n')
|
|
1136
|
+
intermediate = intermediate.replace(LITERAL_BACKSLASH_N, '\n')
|
|
1137
|
+
intermediate = intermediate.replace(LITERAL_BACKSLASH_T, '\t')
|
|
1138
|
+
|
|
1139
|
+
# Restore placeholders to \n (as escape sequences in strings)
|
|
1140
|
+
result_code = intermediate.replace(PLACEHOLDER, '\\n')
|
|
1141
|
+
|
|
1142
|
+
return result_code
|
|
1143
|
+
else:
|
|
1144
|
+
# Mixed state - some actual newlines, some literal \n
|
|
1145
|
+
# This means the JSON parsing already converted some, but not all
|
|
1146
|
+
# The literal \n remaining are likely in strings, so leave them alone
|
|
1147
|
+
return code
|
|
1148
|
+
|
|
1149
|
+
|
|
1150
|
+
def _unescape_code_newlines(obj: Any) -> Any:
|
|
1151
|
+
"""
|
|
1152
|
+
Fix double-escaped newlines in Pydantic model string fields.
|
|
1153
|
+
|
|
1154
|
+
Some models (e.g., Gemini) return JSON with \\\\n instead of \\n in code strings,
|
|
1155
|
+
resulting in literal backslash-n text instead of actual newlines after JSON parsing.
|
|
1156
|
+
This function recursively unescapes these in string fields of Pydantic models.
|
|
1157
|
+
|
|
1158
|
+
Also repairs Python syntax errors in code-like string fields (e.g., trailing quotes).
|
|
1159
|
+
|
|
1160
|
+
The check uses literal backslash-n (2 chars) vs actual newline (1 char):
|
|
1161
|
+
- '\\\\n' in Python source = literal backslash + n (2 chars) - needs fixing
|
|
1162
|
+
- '\\n' in Python source = newline character (1 char) - already correct
|
|
1163
|
+
|
|
1164
|
+
Args:
|
|
1165
|
+
obj: A Pydantic model, dict, list, or primitive value
|
|
1166
|
+
|
|
1167
|
+
Returns:
|
|
1168
|
+
The same object with string fields unescaped and code fields repaired
|
|
1169
|
+
"""
|
|
1170
|
+
if obj is None:
|
|
1171
|
+
return obj
|
|
1172
|
+
|
|
1173
|
+
def _process_string(s: str) -> str:
|
|
1174
|
+
"""Process a string: unescape newlines and repair Python syntax if needed."""
|
|
1175
|
+
result = s
|
|
1176
|
+
# Smart unescape that preserves \n inside string literals
|
|
1177
|
+
if _looks_like_python_code(result):
|
|
1178
|
+
result = _smart_unescape_code(result)
|
|
1179
|
+
result = _repair_python_syntax(result)
|
|
1180
|
+
else:
|
|
1181
|
+
# For non-code strings, do simple unescape
|
|
1182
|
+
LITERAL_BACKSLASH_N = '\\' + 'n'
|
|
1183
|
+
LITERAL_BACKSLASH_R_N = '\\' + 'r' + '\\' + 'n'
|
|
1184
|
+
LITERAL_BACKSLASH_T = '\\' + 't'
|
|
1185
|
+
if LITERAL_BACKSLASH_N in result:
|
|
1186
|
+
result = result.replace(LITERAL_BACKSLASH_R_N, '\r\n')
|
|
1187
|
+
result = result.replace(LITERAL_BACKSLASH_N, '\n')
|
|
1188
|
+
result = result.replace(LITERAL_BACKSLASH_T, '\t')
|
|
1189
|
+
return result
|
|
1190
|
+
|
|
1191
|
+
# Handle Pydantic models
|
|
1192
|
+
if isinstance(obj, BaseModel):
|
|
1193
|
+
# Get all field values and process strings
|
|
1194
|
+
for field_name in obj.model_fields:
|
|
1195
|
+
value = getattr(obj, field_name)
|
|
1196
|
+
if isinstance(value, str):
|
|
1197
|
+
processed = _process_string(value)
|
|
1198
|
+
if processed != value:
|
|
1199
|
+
object.__setattr__(obj, field_name, processed)
|
|
1200
|
+
elif isinstance(value, (dict, list, BaseModel)):
|
|
1201
|
+
_unescape_code_newlines(value)
|
|
1202
|
+
return obj
|
|
1203
|
+
|
|
1204
|
+
# Handle dicts
|
|
1205
|
+
if isinstance(obj, dict):
|
|
1206
|
+
for key, value in obj.items():
|
|
1207
|
+
if isinstance(value, str):
|
|
1208
|
+
obj[key] = _process_string(value)
|
|
1209
|
+
elif isinstance(value, (dict, list)):
|
|
1210
|
+
_unescape_code_newlines(value)
|
|
1211
|
+
return obj
|
|
1212
|
+
|
|
1213
|
+
# Handle lists
|
|
1214
|
+
if isinstance(obj, list):
|
|
1215
|
+
for i, item in enumerate(obj):
|
|
1216
|
+
if isinstance(item, str):
|
|
1217
|
+
obj[i] = _process_string(item)
|
|
1218
|
+
elif isinstance(item, (dict, list, BaseModel)):
|
|
1219
|
+
_unescape_code_newlines(item)
|
|
1220
|
+
return obj
|
|
1221
|
+
|
|
1222
|
+
return obj
|
|
1223
|
+
|
|
1224
|
+
|
|
1225
|
+
def _has_invalid_python_code(obj: Any) -> bool:
|
|
1226
|
+
"""
|
|
1227
|
+
Check if any code-like string fields have invalid Python syntax.
|
|
1228
|
+
|
|
1229
|
+
This is used after _unescape_code_newlines to detect if repair failed
|
|
1230
|
+
and we should retry with cache disabled.
|
|
1231
|
+
|
|
1232
|
+
Args:
|
|
1233
|
+
obj: A Pydantic model, dict, list, or primitive value
|
|
1234
|
+
|
|
1235
|
+
Returns:
|
|
1236
|
+
True if there are invalid code fields that couldn't be repaired
|
|
1237
|
+
"""
|
|
1238
|
+
import ast
|
|
1239
|
+
|
|
1240
|
+
if obj is None:
|
|
1241
|
+
return False
|
|
1242
|
+
|
|
1243
|
+
if isinstance(obj, str):
|
|
1244
|
+
if _looks_like_python_code(obj):
|
|
1245
|
+
try:
|
|
1246
|
+
ast.parse(obj)
|
|
1247
|
+
return False # Valid
|
|
1248
|
+
except SyntaxError:
|
|
1249
|
+
return True # Invalid
|
|
1250
|
+
return False
|
|
1251
|
+
|
|
1252
|
+
if isinstance(obj, BaseModel):
|
|
1253
|
+
for field_name in obj.model_fields:
|
|
1254
|
+
value = getattr(obj, field_name)
|
|
1255
|
+
if _has_invalid_python_code(value):
|
|
1256
|
+
return True
|
|
1257
|
+
return False
|
|
1258
|
+
|
|
1259
|
+
if isinstance(obj, dict):
|
|
1260
|
+
for value in obj.values():
|
|
1261
|
+
if _has_invalid_python_code(value):
|
|
1262
|
+
return True
|
|
1263
|
+
return False
|
|
1264
|
+
|
|
1265
|
+
if isinstance(obj, list):
|
|
1266
|
+
for item in obj:
|
|
1267
|
+
if _has_invalid_python_code(item):
|
|
1268
|
+
return True
|
|
1269
|
+
return False
|
|
1270
|
+
|
|
1271
|
+
return False
|
|
1272
|
+
|
|
1273
|
+
|
|
669
1274
|
# --- Main Function ---
|
|
670
1275
|
|
|
671
1276
|
def llm_invoke(
|
|
@@ -675,6 +1280,7 @@ def llm_invoke(
|
|
|
675
1280
|
temperature: float = 0.1,
|
|
676
1281
|
verbose: bool = False,
|
|
677
1282
|
output_pydantic: Optional[Type[BaseModel]] = None,
|
|
1283
|
+
output_schema: Optional[Dict[str, Any]] = None,
|
|
678
1284
|
time: float = 0.25,
|
|
679
1285
|
use_batch_mode: bool = False,
|
|
680
1286
|
messages: Optional[Union[List[Dict[str, str]], List[List[Dict[str, str]]]]] = None,
|
|
@@ -691,6 +1297,7 @@ def llm_invoke(
|
|
|
691
1297
|
temperature: LLM temperature.
|
|
692
1298
|
verbose: Print detailed logs.
|
|
693
1299
|
output_pydantic: Optional Pydantic model for structured output.
|
|
1300
|
+
output_schema: Optional raw JSON schema dictionary for structured output (alternative to output_pydantic).
|
|
694
1301
|
time: Relative thinking time (0-1, default 0.25).
|
|
695
1302
|
use_batch_mode: Use batch completion if True.
|
|
696
1303
|
messages: Pre-formatted list of messages (or list of lists for batch). If provided, ignores prompt and input_json.
|
|
@@ -808,6 +1415,16 @@ def llm_invoke(
|
|
|
808
1415
|
# --- 3. Iterate Through Candidates and Invoke LLM ---
|
|
809
1416
|
last_exception = None
|
|
810
1417
|
newly_acquired_keys: Dict[str, bool] = {} # Track keys obtained in this run
|
|
1418
|
+
|
|
1419
|
+
# Initialize variables for retry section
|
|
1420
|
+
response_format = None
|
|
1421
|
+
time_kwargs = {}
|
|
1422
|
+
|
|
1423
|
+
# Update global rate map for callback cost fallback
|
|
1424
|
+
try:
|
|
1425
|
+
_set_model_rate_map(model_df)
|
|
1426
|
+
except Exception:
|
|
1427
|
+
pass
|
|
811
1428
|
|
|
812
1429
|
for model_info in candidate_models:
|
|
813
1430
|
model_name_litellm = model_info['model']
|
|
@@ -818,6 +1435,9 @@ def llm_invoke(
|
|
|
818
1435
|
logger.info(f"\n[ATTEMPT] Trying model: {model_name_litellm} (Provider: {provider})")
|
|
819
1436
|
|
|
820
1437
|
retry_with_same_model = True
|
|
1438
|
+
# Track per-model temperature adjustment attempt (avoid infinite loop)
|
|
1439
|
+
current_temperature = temperature
|
|
1440
|
+
temp_adjustment_done = False
|
|
821
1441
|
while retry_with_same_model:
|
|
822
1442
|
retry_with_same_model = False # Assume success unless auth error on new key
|
|
823
1443
|
|
|
@@ -832,7 +1452,8 @@ def llm_invoke(
|
|
|
832
1452
|
litellm_kwargs: Dict[str, Any] = {
|
|
833
1453
|
"model": model_name_litellm,
|
|
834
1454
|
"messages": formatted_messages,
|
|
835
|
-
|
|
1455
|
+
# Use a local adjustable temperature to allow provider-specific fallbacks
|
|
1456
|
+
"temperature": current_temperature,
|
|
836
1457
|
}
|
|
837
1458
|
|
|
838
1459
|
api_key_name_from_csv = model_info.get('api_key') # From CSV
|
|
@@ -845,7 +1466,14 @@ def llm_invoke(
|
|
|
845
1466
|
if is_vertex_model and api_key_name_from_csv == 'VERTEX_CREDENTIALS':
|
|
846
1467
|
credentials_file_path = os.getenv("VERTEX_CREDENTIALS") # Path from env var
|
|
847
1468
|
vertex_project_env = os.getenv("VERTEX_PROJECT")
|
|
848
|
-
|
|
1469
|
+
# Check for per-model location override, fall back to env var
|
|
1470
|
+
model_location = model_info.get('location')
|
|
1471
|
+
if pd.notna(model_location) and str(model_location).strip():
|
|
1472
|
+
vertex_location_env = str(model_location).strip()
|
|
1473
|
+
if verbose:
|
|
1474
|
+
logger.info(f"[INFO] Using per-model location override: '{vertex_location_env}' for model '{model_name_litellm}'")
|
|
1475
|
+
else:
|
|
1476
|
+
vertex_location_env = os.getenv("VERTEX_LOCATION")
|
|
849
1477
|
|
|
850
1478
|
if credentials_file_path and vertex_project_env and vertex_location_env:
|
|
851
1479
|
try:
|
|
@@ -859,14 +1487,23 @@ def llm_invoke(
|
|
|
859
1487
|
if verbose:
|
|
860
1488
|
logger.info(f"[INFO] For Vertex AI: using vertex_credentials from '{credentials_file_path}', project '{vertex_project_env}', location '{vertex_location_env}'.")
|
|
861
1489
|
except FileNotFoundError:
|
|
1490
|
+
# Still pass project and location so ADC can work
|
|
1491
|
+
litellm_kwargs["vertex_project"] = vertex_project_env
|
|
1492
|
+
litellm_kwargs["vertex_location"] = vertex_location_env
|
|
862
1493
|
if verbose:
|
|
863
|
-
logger.
|
|
1494
|
+
logger.warning(f"[WARN] Vertex credentials file not found at '{credentials_file_path}'. Using ADC with project '{vertex_project_env}', location '{vertex_location_env}'.")
|
|
864
1495
|
except json.JSONDecodeError:
|
|
1496
|
+
# Still pass project and location so ADC can work
|
|
1497
|
+
litellm_kwargs["vertex_project"] = vertex_project_env
|
|
1498
|
+
litellm_kwargs["vertex_location"] = vertex_location_env
|
|
865
1499
|
if verbose:
|
|
866
|
-
logger.error(f"[ERROR] Failed to decode JSON from Vertex credentials file: '{credentials_file_path}'.
|
|
1500
|
+
logger.error(f"[ERROR] Failed to decode JSON from Vertex credentials file: '{credentials_file_path}'. Using ADC with project '{vertex_project_env}', location '{vertex_location_env}'.")
|
|
867
1501
|
except Exception as e:
|
|
1502
|
+
# Still pass project and location so ADC can work
|
|
1503
|
+
litellm_kwargs["vertex_project"] = vertex_project_env
|
|
1504
|
+
litellm_kwargs["vertex_location"] = vertex_location_env
|
|
868
1505
|
if verbose:
|
|
869
|
-
logger.error(f"[ERROR] Failed to load
|
|
1506
|
+
logger.error(f"[ERROR] Failed to load Vertex credentials from '{credentials_file_path}': {e}. Using ADC with project '{vertex_project_env}', location '{vertex_location_env}'.")
|
|
870
1507
|
else:
|
|
871
1508
|
if verbose:
|
|
872
1509
|
logger.warning(f"[WARN] For Vertex AI (using '{api_key_name_from_csv}'): One or more required environment variables (VERTEX_CREDENTIALS, VERTEX_PROJECT, VERTEX_LOCATION) are missing.")
|
|
@@ -885,9 +1522,16 @@ def llm_invoke(
|
|
|
885
1522
|
|
|
886
1523
|
# If this model is Vertex AI AND uses a direct API key string (not VERTEX_CREDENTIALS from CSV),
|
|
887
1524
|
# also pass project and location from env vars.
|
|
888
|
-
if is_vertex_model:
|
|
1525
|
+
if is_vertex_model:
|
|
889
1526
|
vertex_project_env = os.getenv("VERTEX_PROJECT")
|
|
890
|
-
|
|
1527
|
+
# Check for per-model location override, fall back to env var
|
|
1528
|
+
model_location = model_info.get('location')
|
|
1529
|
+
if pd.notna(model_location) and str(model_location).strip():
|
|
1530
|
+
vertex_location_env = str(model_location).strip()
|
|
1531
|
+
if verbose:
|
|
1532
|
+
logger.info(f"[INFO] Using per-model location override: '{vertex_location_env}' for model '{model_name_litellm}'")
|
|
1533
|
+
else:
|
|
1534
|
+
vertex_location_env = os.getenv("VERTEX_LOCATION")
|
|
891
1535
|
if vertex_project_env and vertex_location_env:
|
|
892
1536
|
litellm_kwargs["vertex_project"] = vertex_project_env
|
|
893
1537
|
litellm_kwargs["vertex_location"] = vertex_location_env
|
|
@@ -901,13 +1545,36 @@ def llm_invoke(
|
|
|
901
1545
|
elif verbose: # No api_key_name_from_csv in CSV for this model
|
|
902
1546
|
logger.info(f"[INFO] No API key name specified in CSV for model '{model_name_litellm}'. LiteLLM will use its default authentication mechanisms (e.g., standard provider env vars or ADC for Vertex AI).")
|
|
903
1547
|
|
|
904
|
-
# Add api_base if present in CSV
|
|
1548
|
+
# Add base_url/api_base override if present in CSV
|
|
905
1549
|
api_base = model_info.get('base_url')
|
|
906
1550
|
if pd.notna(api_base) and api_base:
|
|
1551
|
+
# LiteLLM prefers `base_url`; some older paths accept `api_base`.
|
|
1552
|
+
litellm_kwargs["base_url"] = str(api_base)
|
|
907
1553
|
litellm_kwargs["api_base"] = str(api_base)
|
|
908
1554
|
|
|
909
|
-
#
|
|
910
|
-
|
|
1555
|
+
# Provider-specific defaults (e.g., LM Studio)
|
|
1556
|
+
model_name_lower = str(model_name_litellm).lower()
|
|
1557
|
+
provider_lower_for_model = provider.lower()
|
|
1558
|
+
is_lm_studio = model_name_lower.startswith('lm_studio/') or provider_lower_for_model == 'lm_studio'
|
|
1559
|
+
is_groq = model_name_lower.startswith('groq/') or provider_lower_for_model == 'groq'
|
|
1560
|
+
if is_lm_studio:
|
|
1561
|
+
# Ensure base_url is set (fallback to env LM_STUDIO_API_BASE or localhost)
|
|
1562
|
+
if not litellm_kwargs.get("base_url"):
|
|
1563
|
+
lm_studio_base = os.getenv("LM_STUDIO_API_BASE", "http://localhost:1234/v1")
|
|
1564
|
+
litellm_kwargs["base_url"] = lm_studio_base
|
|
1565
|
+
litellm_kwargs["api_base"] = lm_studio_base
|
|
1566
|
+
if verbose:
|
|
1567
|
+
logger.info(f"[INFO] Using LM Studio base_url: {lm_studio_base}")
|
|
1568
|
+
|
|
1569
|
+
# Ensure a non-empty api_key; LM Studio accepts any non-empty token (e.g., 'lm-studio')
|
|
1570
|
+
if not litellm_kwargs.get("api_key"):
|
|
1571
|
+
lm_studio_key = os.getenv("LM_STUDIO_API_KEY") or "lm-studio"
|
|
1572
|
+
litellm_kwargs["api_key"] = lm_studio_key
|
|
1573
|
+
if verbose:
|
|
1574
|
+
logger.info("[INFO] Using LM Studio api_key placeholder (set LM_STUDIO_API_KEY to customize).")
|
|
1575
|
+
|
|
1576
|
+
# Handle Structured Output (JSON Mode / Pydantic / JSON Schema)
|
|
1577
|
+
if output_pydantic or output_schema:
|
|
911
1578
|
# Check if model supports structured output based on CSV flag or LiteLLM check
|
|
912
1579
|
supports_structured = model_info.get('structured_output', False)
|
|
913
1580
|
# Optional: Add litellm.supports_response_schema check if CSV flag is unreliable
|
|
@@ -916,18 +1583,87 @@ def llm_invoke(
|
|
|
916
1583
|
# except: pass # Ignore errors in supports_response_schema check
|
|
917
1584
|
|
|
918
1585
|
if supports_structured:
|
|
919
|
-
if
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
1586
|
+
if output_pydantic:
|
|
1587
|
+
if verbose:
|
|
1588
|
+
logger.info(f"[INFO] Requesting structured output (Pydantic: {output_pydantic.__name__}) for {model_name_litellm}")
|
|
1589
|
+
# Use explicit json_object format with response_schema for better Gemini/Vertex AI compatibility
|
|
1590
|
+
# Passing Pydantic class directly may not trigger native structured output for all providers
|
|
1591
|
+
response_format = {
|
|
1592
|
+
"type": "json_object",
|
|
1593
|
+
"response_schema": output_pydantic.model_json_schema()
|
|
1594
|
+
}
|
|
1595
|
+
else: # output_schema is set
|
|
1596
|
+
if verbose:
|
|
1597
|
+
logger.info(f"[INFO] Requesting structured output (JSON Schema) for {model_name_litellm}")
|
|
1598
|
+
# LiteLLM expects {"type": "json_schema", "json_schema": {"name": "response", "schema": schema_dict, "strict": true}}
|
|
1599
|
+
# OR for some providers just the schema dict if type is json_object.
|
|
1600
|
+
# Best practice for broad compatibility via LiteLLM is usually the dict directly or wrapped.
|
|
1601
|
+
# For now, let's assume we pass the schema dict as 'response_format' which LiteLLM handles for many providers
|
|
1602
|
+
# or wrap it if needed. LiteLLM 1.40+ supports passing the dict directly for many.
|
|
1603
|
+
response_format = {
|
|
1604
|
+
"type": "json_schema",
|
|
1605
|
+
"json_schema": {
|
|
1606
|
+
"name": "response",
|
|
1607
|
+
"schema": output_schema,
|
|
1608
|
+
"strict": False
|
|
1609
|
+
}
|
|
1610
|
+
}
|
|
1611
|
+
|
|
1612
|
+
litellm_kwargs["response_format"] = response_format
|
|
1613
|
+
|
|
1614
|
+
# LM Studio requires "json_schema" format, not "json_object"
|
|
1615
|
+
# Use extra_body to bypass litellm.drop_params stripping the schema
|
|
1616
|
+
if is_lm_studio and response_format and response_format.get("type") == "json_object":
|
|
1617
|
+
schema = response_format.get("response_schema", {})
|
|
1618
|
+
lm_studio_response_format = {
|
|
1619
|
+
"type": "json_schema",
|
|
1620
|
+
"json_schema": {
|
|
1621
|
+
"name": "response",
|
|
1622
|
+
"strict": True,
|
|
1623
|
+
"schema": schema
|
|
1624
|
+
}
|
|
1625
|
+
}
|
|
1626
|
+
# Use extra_body to bypass drop_params - passes directly to API
|
|
1627
|
+
litellm_kwargs["extra_body"] = {"response_format": lm_studio_response_format}
|
|
1628
|
+
# Remove from regular response_format to avoid conflicts
|
|
1629
|
+
if "response_format" in litellm_kwargs:
|
|
1630
|
+
del litellm_kwargs["response_format"]
|
|
1631
|
+
if verbose:
|
|
1632
|
+
logger.info(f"[INFO] Using extra_body for LM Studio response_format to bypass drop_params")
|
|
1633
|
+
|
|
1634
|
+
# Groq has issues with tool-based structured output - use JSON mode with schema in prompt
|
|
1635
|
+
if is_groq and response_format:
|
|
1636
|
+
# Get the schema to include in system prompt
|
|
1637
|
+
if output_pydantic:
|
|
1638
|
+
schema = output_pydantic.model_json_schema()
|
|
1639
|
+
else:
|
|
1640
|
+
schema = output_schema
|
|
1641
|
+
|
|
1642
|
+
# Use simple json_object mode (Groq's tool_use often fails)
|
|
1643
|
+
litellm_kwargs["response_format"] = {"type": "json_object"}
|
|
1644
|
+
|
|
1645
|
+
# Prepend schema instruction to messages (json module is imported at top of file)
|
|
1646
|
+
schema_instruction = f"You must respond with valid JSON matching this schema:\n```json\n{json.dumps(schema, indent=2)}\n```\nRespond ONLY with the JSON object, no other text."
|
|
1647
|
+
|
|
1648
|
+
# Find or create system message to prepend schema
|
|
1649
|
+
messages_list = litellm_kwargs.get("messages", [])
|
|
1650
|
+
if messages_list and messages_list[0].get("role") == "system":
|
|
1651
|
+
messages_list[0]["content"] = schema_instruction + "\n\n" + messages_list[0]["content"]
|
|
1652
|
+
else:
|
|
1653
|
+
messages_list.insert(0, {"role": "system", "content": schema_instruction})
|
|
1654
|
+
litellm_kwargs["messages"] = messages_list
|
|
1655
|
+
|
|
1656
|
+
if verbose:
|
|
1657
|
+
logger.info(f"[INFO] Using JSON object mode with schema in prompt for Groq (avoiding tool_use issues)")
|
|
1658
|
+
|
|
924
1659
|
# As a fallback, one could use:
|
|
925
1660
|
# litellm_kwargs["response_format"] = {"type": "json_object"}
|
|
926
1661
|
# And potentially enable client-side validation:
|
|
927
1662
|
# litellm.enable_json_schema_validation = True # Enable globally if needed
|
|
928
1663
|
else:
|
|
1664
|
+
schema_name = output_pydantic.__name__ if output_pydantic else "JSON Schema"
|
|
929
1665
|
if verbose:
|
|
930
|
-
logger.warning(f"[WARN] Model {model_name_litellm} does not support structured output via CSV flag. Output might not be valid {
|
|
1666
|
+
logger.warning(f"[WARN] Model {model_name_litellm} does not support structured output via CSV flag. Output might not be valid {schema_name}.")
|
|
931
1667
|
# Proceed without forcing JSON mode, parsing will be attempted later
|
|
932
1668
|
|
|
933
1669
|
# --- NEW REASONING LOGIC ---
|
|
@@ -942,7 +1678,9 @@ def llm_invoke(
|
|
|
942
1678
|
# Currently known: Anthropic uses 'thinking'
|
|
943
1679
|
# Model name comparison is more robust than provider string
|
|
944
1680
|
if provider == 'anthropic': # Check provider column instead of model prefix
|
|
945
|
-
|
|
1681
|
+
thinking_param = {"type": "enabled", "budget_tokens": budget}
|
|
1682
|
+
litellm_kwargs["thinking"] = thinking_param
|
|
1683
|
+
time_kwargs["thinking"] = thinking_param
|
|
946
1684
|
if verbose:
|
|
947
1685
|
logger.info(f"[INFO] Requesting Anthropic thinking (budget type) with budget: {budget} tokens for {model_name_litellm}")
|
|
948
1686
|
else:
|
|
@@ -960,10 +1698,32 @@ def llm_invoke(
|
|
|
960
1698
|
effort = "high"
|
|
961
1699
|
elif time > 0.3:
|
|
962
1700
|
effort = "medium"
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
1701
|
+
|
|
1702
|
+
# Map effort parameter per-provider/model family
|
|
1703
|
+
model_lower = str(model_name_litellm).lower()
|
|
1704
|
+
provider_lower = str(provider).lower()
|
|
1705
|
+
|
|
1706
|
+
if provider_lower == 'openai' and model_lower.startswith('gpt-5'):
|
|
1707
|
+
# OpenAI 5-series uses Responses API with nested 'reasoning'
|
|
1708
|
+
reasoning_obj = {"effort": effort, "summary": "auto"}
|
|
1709
|
+
litellm_kwargs["reasoning"] = reasoning_obj
|
|
1710
|
+
time_kwargs["reasoning"] = reasoning_obj
|
|
1711
|
+
if verbose:
|
|
1712
|
+
logger.info(f"[INFO] Requesting OpenAI reasoning.effort='{effort}' for {model_name_litellm} (Responses API)")
|
|
1713
|
+
|
|
1714
|
+
elif provider_lower == 'openai' and model_lower.startswith('o') and 'mini' not in model_lower:
|
|
1715
|
+
# Historical o* models may use LiteLLM's generic reasoning_effort param
|
|
1716
|
+
litellm_kwargs["reasoning_effort"] = effort
|
|
1717
|
+
time_kwargs["reasoning_effort"] = effort
|
|
1718
|
+
if verbose:
|
|
1719
|
+
logger.info(f"[INFO] Requesting reasoning_effort='{effort}' for {model_name_litellm}")
|
|
1720
|
+
|
|
1721
|
+
else:
|
|
1722
|
+
# Fallback to LiteLLM generic param when supported by provider adapter
|
|
1723
|
+
litellm_kwargs["reasoning_effort"] = effort
|
|
1724
|
+
time_kwargs["reasoning_effort"] = effort
|
|
1725
|
+
if verbose:
|
|
1726
|
+
logger.info(f"[INFO] Requesting generic reasoning_effort='{effort}' for {model_name_litellm}")
|
|
967
1727
|
|
|
968
1728
|
elif reasoning_type == 'none':
|
|
969
1729
|
if verbose:
|
|
@@ -995,6 +1755,166 @@ def llm_invoke(
|
|
|
995
1755
|
logger.debug("NOT ENABLING CACHING: litellm.cache is None at call time")
|
|
996
1756
|
|
|
997
1757
|
|
|
1758
|
+
# Route OpenAI gpt-5* models through Responses API to support 'reasoning'
|
|
1759
|
+
model_lower_for_call = str(model_name_litellm).lower()
|
|
1760
|
+
provider_lower_for_call = str(provider).lower()
|
|
1761
|
+
|
|
1762
|
+
if (
|
|
1763
|
+
not use_batch_mode
|
|
1764
|
+
and provider_lower_for_call == 'openai'
|
|
1765
|
+
and model_lower_for_call.startswith('gpt-5')
|
|
1766
|
+
):
|
|
1767
|
+
if verbose:
|
|
1768
|
+
logger.info(f"[INFO] Calling LiteLLM Responses API for {model_name_litellm}...")
|
|
1769
|
+
try:
|
|
1770
|
+
# Build input text from messages
|
|
1771
|
+
if isinstance(formatted_messages, list) and formatted_messages and isinstance(formatted_messages[0], dict):
|
|
1772
|
+
input_text = "\n\n".join(f"{m.get('role','user')}: {m.get('content','')}" for m in formatted_messages)
|
|
1773
|
+
else:
|
|
1774
|
+
# Fallback: string cast
|
|
1775
|
+
input_text = str(formatted_messages)
|
|
1776
|
+
|
|
1777
|
+
# Derive effort mapping already computed in time_kwargs
|
|
1778
|
+
reasoning_param = time_kwargs.get("reasoning")
|
|
1779
|
+
|
|
1780
|
+
# Build text.format block for structured output
|
|
1781
|
+
# Default to plain text format
|
|
1782
|
+
text_block = {"format": {"type": "text"}}
|
|
1783
|
+
|
|
1784
|
+
# If structured output requested, use text.format with json_schema
|
|
1785
|
+
# This is the correct way to enforce structured output via litellm.responses()
|
|
1786
|
+
if output_pydantic or output_schema:
|
|
1787
|
+
try:
|
|
1788
|
+
if output_pydantic:
|
|
1789
|
+
schema = output_pydantic.model_json_schema()
|
|
1790
|
+
name = output_pydantic.__name__
|
|
1791
|
+
else:
|
|
1792
|
+
schema = output_schema
|
|
1793
|
+
name = "response"
|
|
1794
|
+
|
|
1795
|
+
# Add additionalProperties: false for strict mode (required by OpenAI)
|
|
1796
|
+
schema['additionalProperties'] = False
|
|
1797
|
+
|
|
1798
|
+
# Use text.format with json_schema for structured output
|
|
1799
|
+
text_block = {
|
|
1800
|
+
"format": {
|
|
1801
|
+
"type": "json_schema",
|
|
1802
|
+
"name": name,
|
|
1803
|
+
"strict": True,
|
|
1804
|
+
"schema": schema,
|
|
1805
|
+
}
|
|
1806
|
+
}
|
|
1807
|
+
if verbose:
|
|
1808
|
+
logger.info(f"[INFO] Using structured output via text.format for Responses API")
|
|
1809
|
+
except Exception as schema_e:
|
|
1810
|
+
logger.warning(f"[WARN] Failed to derive JSON schema: {schema_e}. Proceeding with plain text format.")
|
|
1811
|
+
|
|
1812
|
+
# Build kwargs for litellm.responses()
|
|
1813
|
+
responses_kwargs = {
|
|
1814
|
+
"model": model_name_litellm,
|
|
1815
|
+
"input": input_text,
|
|
1816
|
+
"text": text_block,
|
|
1817
|
+
}
|
|
1818
|
+
if verbose and temperature not in (None, 0, 0.0):
|
|
1819
|
+
logger.info("[INFO] Skipping 'temperature' for OpenAI GPT-5 Responses call (unsupported by API).")
|
|
1820
|
+
if reasoning_param is not None:
|
|
1821
|
+
responses_kwargs["reasoning"] = reasoning_param
|
|
1822
|
+
|
|
1823
|
+
# Call litellm.responses() which handles the API interaction
|
|
1824
|
+
resp = litellm.responses(**responses_kwargs)
|
|
1825
|
+
|
|
1826
|
+
# Extract text result from response
|
|
1827
|
+
result_text = None
|
|
1828
|
+
try:
|
|
1829
|
+
# LiteLLM responses return output as a list of items
|
|
1830
|
+
for item in resp.output:
|
|
1831
|
+
if getattr(item, 'type', None) == 'message' and hasattr(item, 'content') and item.content:
|
|
1832
|
+
for content_item in item.content:
|
|
1833
|
+
if hasattr(content_item, 'text'):
|
|
1834
|
+
result_text = content_item.text
|
|
1835
|
+
break
|
|
1836
|
+
if result_text:
|
|
1837
|
+
break
|
|
1838
|
+
except Exception:
|
|
1839
|
+
result_text = None
|
|
1840
|
+
|
|
1841
|
+
# Calculate cost using usage + CSV rates
|
|
1842
|
+
total_cost = 0.0
|
|
1843
|
+
usage = getattr(resp, "usage", None)
|
|
1844
|
+
if usage is not None:
|
|
1845
|
+
in_tok = getattr(usage, "input_tokens", 0) or 0
|
|
1846
|
+
out_tok = getattr(usage, "output_tokens", 0) or 0
|
|
1847
|
+
in_rate = model_info.get('input', 0.0) or 0.0
|
|
1848
|
+
out_rate = model_info.get('output', 0.0) or 0.0
|
|
1849
|
+
total_cost = (in_tok * in_rate + out_tok * out_rate) / 1_000_000.0
|
|
1850
|
+
|
|
1851
|
+
# Parse result if Pydantic output requested
|
|
1852
|
+
final_result = None
|
|
1853
|
+
if output_pydantic and result_text:
|
|
1854
|
+
try:
|
|
1855
|
+
final_result = output_pydantic.model_validate_json(result_text)
|
|
1856
|
+
except Exception as e:
|
|
1857
|
+
# With structured output, parsing should succeed
|
|
1858
|
+
# But if it fails, try JSON repair as fallback
|
|
1859
|
+
logger.warning(f"[WARN] Pydantic parse failed on Responses output: {e}. Attempting JSON repair...")
|
|
1860
|
+
|
|
1861
|
+
# Try extracting from fenced JSON blocks first
|
|
1862
|
+
fenced = _extract_fenced_json_block(result_text)
|
|
1863
|
+
candidates: List[str] = []
|
|
1864
|
+
if fenced:
|
|
1865
|
+
candidates.append(fenced)
|
|
1866
|
+
else:
|
|
1867
|
+
candidates.extend(_extract_balanced_json_objects(result_text))
|
|
1868
|
+
|
|
1869
|
+
# Also try the raw text as-is after stripping fences
|
|
1870
|
+
cleaned = result_text.strip()
|
|
1871
|
+
if cleaned.startswith("```json"):
|
|
1872
|
+
cleaned = cleaned[7:]
|
|
1873
|
+
elif cleaned.startswith("```"):
|
|
1874
|
+
cleaned = cleaned[3:]
|
|
1875
|
+
if cleaned.endswith("```"):
|
|
1876
|
+
cleaned = cleaned[:-3]
|
|
1877
|
+
cleaned = cleaned.strip()
|
|
1878
|
+
if cleaned and cleaned not in candidates:
|
|
1879
|
+
candidates.append(cleaned)
|
|
1880
|
+
|
|
1881
|
+
parse_succeeded = False
|
|
1882
|
+
for cand in candidates:
|
|
1883
|
+
try:
|
|
1884
|
+
final_result = output_pydantic.model_validate_json(cand)
|
|
1885
|
+
parse_succeeded = True
|
|
1886
|
+
logger.info(f"[SUCCESS] JSON repair succeeded for Responses output")
|
|
1887
|
+
break
|
|
1888
|
+
except Exception:
|
|
1889
|
+
continue
|
|
1890
|
+
|
|
1891
|
+
if not parse_succeeded:
|
|
1892
|
+
logger.error(f"[ERROR] All JSON repair attempts failed for Responses output. Original error: {e}")
|
|
1893
|
+
final_result = f"ERROR: Failed to parse structured output from Responses API. Raw: {repr(result_text)[:200]}"
|
|
1894
|
+
else:
|
|
1895
|
+
final_result = result_text
|
|
1896
|
+
|
|
1897
|
+
if verbose:
|
|
1898
|
+
logger.info(f"[RESULT] Model Used: {model_name_litellm}")
|
|
1899
|
+
logger.info(f"[RESULT] Total Cost (estimated): ${total_cost:.6g}")
|
|
1900
|
+
|
|
1901
|
+
return {
|
|
1902
|
+
'result': final_result,
|
|
1903
|
+
'cost': total_cost,
|
|
1904
|
+
'model_name': model_name_litellm,
|
|
1905
|
+
'thinking_output': None,
|
|
1906
|
+
}
|
|
1907
|
+
except Exception as e:
|
|
1908
|
+
last_exception = e
|
|
1909
|
+
logger.error(f"[ERROR] OpenAI Responses call failed for {model_name_litellm}: {e}")
|
|
1910
|
+
# Remove 'reasoning' key to avoid OpenAI Chat API unknown param errors
|
|
1911
|
+
if "reasoning" in litellm_kwargs:
|
|
1912
|
+
try:
|
|
1913
|
+
litellm_kwargs.pop("reasoning", None)
|
|
1914
|
+
except Exception:
|
|
1915
|
+
pass
|
|
1916
|
+
# Fall through to LiteLLM path as a fallback
|
|
1917
|
+
|
|
998
1918
|
if use_batch_mode:
|
|
999
1919
|
if verbose:
|
|
1000
1920
|
logger.info(f"[INFO] Calling litellm.batch_completion for {model_name_litellm}...")
|
|
@@ -1002,6 +1922,16 @@ def llm_invoke(
|
|
|
1002
1922
|
|
|
1003
1923
|
|
|
1004
1924
|
else:
|
|
1925
|
+
# Anthropic requirement: when 'thinking' is enabled, temperature must be 1
|
|
1926
|
+
try:
|
|
1927
|
+
if provider.lower() == 'anthropic' and 'thinking' in litellm_kwargs:
|
|
1928
|
+
if litellm_kwargs.get('temperature') != 1:
|
|
1929
|
+
if verbose:
|
|
1930
|
+
logger.info("[INFO] Anthropic thinking enabled: forcing temperature=1 for compliance.")
|
|
1931
|
+
litellm_kwargs['temperature'] = 1
|
|
1932
|
+
current_temperature = 1
|
|
1933
|
+
except Exception:
|
|
1934
|
+
pass
|
|
1005
1935
|
if verbose:
|
|
1006
1936
|
logger.info(f"[INFO] Calling litellm.completion for {model_name_litellm}...")
|
|
1007
1937
|
response = litellm.completion(**litellm_kwargs)
|
|
@@ -1059,13 +1989,12 @@ def llm_invoke(
|
|
|
1059
1989
|
retry_response = litellm.completion(
|
|
1060
1990
|
model=model_name_litellm,
|
|
1061
1991
|
messages=retry_messages,
|
|
1062
|
-
temperature=
|
|
1992
|
+
temperature=current_temperature,
|
|
1063
1993
|
response_format=response_format,
|
|
1064
|
-
max_completion_tokens=max_tokens,
|
|
1065
1994
|
**time_kwargs
|
|
1066
1995
|
)
|
|
1067
|
-
# Re-enable cache
|
|
1068
|
-
litellm.cache =
|
|
1996
|
+
# Re-enable cache - restore original configured cache (restore to original state, even if None)
|
|
1997
|
+
litellm.cache = configured_cache
|
|
1069
1998
|
# Extract result from retry
|
|
1070
1999
|
retry_raw_result = retry_response.choices[0].message.content
|
|
1071
2000
|
if retry_raw_result is not None:
|
|
@@ -1083,21 +2012,66 @@ def llm_invoke(
|
|
|
1083
2012
|
logger.error(f"[ERROR] Cannot retry - batch mode or missing prompt/input_json")
|
|
1084
2013
|
results.append("ERROR: LLM returned None content and cannot retry")
|
|
1085
2014
|
continue
|
|
1086
|
-
|
|
1087
|
-
|
|
2015
|
+
|
|
2016
|
+
# Check for malformed JSON response (excessive trailing newlines causing truncation)
|
|
2017
|
+
# This can happen when Gemini generates thousands of \n in JSON string values
|
|
2018
|
+
if isinstance(raw_result, str) and _is_malformed_json_response(raw_result):
|
|
2019
|
+
logger.warning(f"[WARNING] Detected malformed JSON response with excessive trailing newlines for item {i}. Retrying with cache bypass...")
|
|
2020
|
+
if not use_batch_mode and prompt and input_json is not None:
|
|
2021
|
+
# Add a small space to bypass cache
|
|
2022
|
+
modified_prompt = prompt + " "
|
|
2023
|
+
try:
|
|
2024
|
+
retry_messages = _format_messages(modified_prompt, input_json, use_batch_mode)
|
|
2025
|
+
# Disable cache for retry
|
|
2026
|
+
original_cache = litellm.cache
|
|
2027
|
+
litellm.cache = None
|
|
2028
|
+
retry_response = litellm.completion(
|
|
2029
|
+
model=model_name_litellm,
|
|
2030
|
+
messages=retry_messages,
|
|
2031
|
+
temperature=current_temperature,
|
|
2032
|
+
response_format=response_format,
|
|
2033
|
+
**time_kwargs
|
|
2034
|
+
)
|
|
2035
|
+
# Re-enable cache
|
|
2036
|
+
litellm.cache = original_cache
|
|
2037
|
+
# Extract result from retry
|
|
2038
|
+
retry_raw_result = retry_response.choices[0].message.content
|
|
2039
|
+
if retry_raw_result is not None and not _is_malformed_json_response(retry_raw_result):
|
|
2040
|
+
logger.info(f"[SUCCESS] Cache bypass retry for malformed JSON succeeded for item {i}")
|
|
2041
|
+
raw_result = retry_raw_result
|
|
2042
|
+
else:
|
|
2043
|
+
# Retry also failed, but we'll continue with repair logic below
|
|
2044
|
+
logger.warning(f"[WARNING] Cache bypass retry also returned malformed JSON for item {i}, attempting repair...")
|
|
2045
|
+
except Exception as retry_e:
|
|
2046
|
+
logger.warning(f"[WARNING] Cache bypass retry for malformed JSON failed for item {i}: {retry_e}, attempting repair...")
|
|
2047
|
+
else:
|
|
2048
|
+
logger.warning(f"[WARNING] Cannot retry malformed JSON - batch mode or missing prompt/input_json, attempting repair...")
|
|
2049
|
+
|
|
2050
|
+
if output_pydantic or output_schema:
|
|
1088
2051
|
parsed_result = None
|
|
1089
2052
|
json_string_to_parse = None
|
|
1090
2053
|
|
|
1091
2054
|
try:
|
|
1092
|
-
# Attempt 1: Check if LiteLLM already parsed it
|
|
1093
|
-
if isinstance(raw_result, output_pydantic):
|
|
2055
|
+
# Attempt 1: Check if LiteLLM already parsed it (only for Pydantic)
|
|
2056
|
+
if output_pydantic and isinstance(raw_result, output_pydantic):
|
|
1094
2057
|
parsed_result = raw_result
|
|
1095
2058
|
if verbose:
|
|
1096
2059
|
logger.debug("[DEBUG] Pydantic object received directly from LiteLLM.")
|
|
1097
2060
|
|
|
1098
2061
|
# Attempt 2: Check if raw_result is dict-like and validate
|
|
1099
2062
|
elif isinstance(raw_result, dict):
|
|
1100
|
-
|
|
2063
|
+
if output_pydantic:
|
|
2064
|
+
parsed_result = output_pydantic.model_validate(raw_result)
|
|
2065
|
+
else:
|
|
2066
|
+
# Validate against JSON schema
|
|
2067
|
+
try:
|
|
2068
|
+
import jsonschema
|
|
2069
|
+
jsonschema.validate(instance=raw_result, schema=output_schema)
|
|
2070
|
+
parsed_result = json.dumps(raw_result) # Return as JSON string for consistency
|
|
2071
|
+
except ImportError:
|
|
2072
|
+
logger.warning("jsonschema not installed, skipping validation")
|
|
2073
|
+
parsed_result = json.dumps(raw_result)
|
|
2074
|
+
|
|
1101
2075
|
if verbose:
|
|
1102
2076
|
logger.debug("[DEBUG] Validated dictionary-like object directly.")
|
|
1103
2077
|
|
|
@@ -1105,26 +2079,59 @@ def llm_invoke(
|
|
|
1105
2079
|
elif isinstance(raw_result, str):
|
|
1106
2080
|
json_string_to_parse = raw_result # Start with the raw string
|
|
1107
2081
|
try:
|
|
1108
|
-
#
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
if
|
|
1112
|
-
|
|
1113
|
-
# Basic check if it looks like JSON
|
|
1114
|
-
if potential_json.strip().startswith('{') and potential_json.strip().endswith('}'):
|
|
1115
|
-
if verbose:
|
|
1116
|
-
logger.debug(f"[DEBUG] Attempting to parse extracted JSON block: '{potential_json}'")
|
|
1117
|
-
parsed_result = output_pydantic.model_validate_json(potential_json)
|
|
1118
|
-
else:
|
|
1119
|
-
# If block extraction fails, try cleaning markdown next
|
|
1120
|
-
raise ValueError("Extracted block doesn't look like JSON")
|
|
2082
|
+
# 1) Prefer fenced ```json blocks
|
|
2083
|
+
fenced = _extract_fenced_json_block(raw_result)
|
|
2084
|
+
candidates: List[str] = []
|
|
2085
|
+
if fenced:
|
|
2086
|
+
candidates.append(fenced)
|
|
1121
2087
|
else:
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
2088
|
+
# 2) Fall back to scanning for balanced JSON objects
|
|
2089
|
+
candidates.extend(_extract_balanced_json_objects(raw_result))
|
|
2090
|
+
|
|
2091
|
+
if not candidates:
|
|
2092
|
+
raise ValueError("No JSON-like content found")
|
|
2093
|
+
|
|
2094
|
+
parse_err: Optional[Exception] = None
|
|
2095
|
+
for cand in candidates:
|
|
2096
|
+
try:
|
|
2097
|
+
if verbose:
|
|
2098
|
+
logger.debug(f"[DEBUG] Attempting to parse candidate JSON block: {cand}")
|
|
2099
|
+
|
|
2100
|
+
if output_pydantic:
|
|
2101
|
+
parsed_result = output_pydantic.model_validate_json(cand)
|
|
2102
|
+
else:
|
|
2103
|
+
# Parse JSON and validate against schema
|
|
2104
|
+
loaded = json.loads(cand)
|
|
2105
|
+
try:
|
|
2106
|
+
import jsonschema
|
|
2107
|
+
jsonschema.validate(instance=loaded, schema=output_schema)
|
|
2108
|
+
except ImportError:
|
|
2109
|
+
pass # Skip validation if lib missing
|
|
2110
|
+
parsed_result = cand # Return string if valid
|
|
2111
|
+
|
|
2112
|
+
json_string_to_parse = cand
|
|
2113
|
+
parse_err = None
|
|
2114
|
+
break
|
|
2115
|
+
except (json.JSONDecodeError, ValidationError, ValueError) as pe:
|
|
2116
|
+
# Also catch jsonschema.ValidationError if imported
|
|
2117
|
+
parse_err = pe
|
|
2118
|
+
try:
|
|
2119
|
+
import jsonschema
|
|
2120
|
+
if isinstance(pe, jsonschema.ValidationError):
|
|
2121
|
+
parse_err = pe
|
|
2122
|
+
except ImportError:
|
|
2123
|
+
pass
|
|
2124
|
+
|
|
2125
|
+
if parsed_result is None:
|
|
2126
|
+
# If none of the candidates parsed, raise last error
|
|
2127
|
+
if parse_err is not None:
|
|
2128
|
+
raise parse_err
|
|
2129
|
+
raise ValueError("Unable to parse any JSON candidates")
|
|
2130
|
+
except (json.JSONDecodeError, ValidationError, ValueError, Exception) as extraction_error:
|
|
2131
|
+
# Catch generic Exception to handle jsonschema errors without explicit import here
|
|
1125
2132
|
if verbose:
|
|
1126
|
-
logger.debug(f"[DEBUG] JSON
|
|
1127
|
-
#
|
|
2133
|
+
logger.debug(f"[DEBUG] JSON extraction/validation failed ('{extraction_error}'). Trying fence cleaning.")
|
|
2134
|
+
# Last resort: strip any leading/trailing code fences and retry
|
|
1128
2135
|
cleaned_result_str = raw_result.strip()
|
|
1129
2136
|
if cleaned_result_str.startswith("```json"):
|
|
1130
2137
|
cleaned_result_str = cleaned_result_str[7:]
|
|
@@ -1133,35 +2140,166 @@ def llm_invoke(
|
|
|
1133
2140
|
if cleaned_result_str.endswith("```"):
|
|
1134
2141
|
cleaned_result_str = cleaned_result_str[:-3]
|
|
1135
2142
|
cleaned_result_str = cleaned_result_str.strip()
|
|
1136
|
-
# Check
|
|
1137
|
-
|
|
2143
|
+
# Check for complete JSON object or array
|
|
2144
|
+
is_complete_object = cleaned_result_str.startswith('{') and cleaned_result_str.endswith('}')
|
|
2145
|
+
is_complete_array = cleaned_result_str.startswith('[') and cleaned_result_str.endswith(']')
|
|
2146
|
+
if is_complete_object or is_complete_array:
|
|
1138
2147
|
if verbose:
|
|
1139
|
-
logger.debug(f"[DEBUG] Attempting parse after
|
|
1140
|
-
json_string_to_parse = cleaned_result_str
|
|
1141
|
-
|
|
2148
|
+
logger.debug(f"[DEBUG] Attempting parse after generic fence cleaning. Cleaned string: '{cleaned_result_str}'")
|
|
2149
|
+
json_string_to_parse = cleaned_result_str
|
|
2150
|
+
|
|
2151
|
+
if output_pydantic:
|
|
2152
|
+
parsed_result = output_pydantic.model_validate_json(json_string_to_parse)
|
|
2153
|
+
else:
|
|
2154
|
+
loaded = json.loads(json_string_to_parse)
|
|
2155
|
+
try:
|
|
2156
|
+
import jsonschema
|
|
2157
|
+
jsonschema.validate(instance=loaded, schema=output_schema)
|
|
2158
|
+
except ImportError:
|
|
2159
|
+
pass
|
|
2160
|
+
parsed_result = json_string_to_parse
|
|
2161
|
+
elif cleaned_result_str.startswith('{') or cleaned_result_str.startswith('['):
|
|
2162
|
+
# Attempt to repair truncated JSON (e.g., missing closing braces)
|
|
2163
|
+
# This can happen when Gemini generates excessive trailing content
|
|
2164
|
+
# that causes token limit truncation
|
|
2165
|
+
if verbose:
|
|
2166
|
+
logger.debug(f"[DEBUG] JSON appears truncated (missing closing brace). Attempting repair.")
|
|
2167
|
+
|
|
2168
|
+
# Try to find the last valid JSON structure
|
|
2169
|
+
# For simple schemas like {"extracted_code": "..."}, we can try to close it
|
|
2170
|
+
repaired = cleaned_result_str.rstrip()
|
|
2171
|
+
|
|
2172
|
+
# Strip trailing escaped newline sequences (\\n in the JSON string)
|
|
2173
|
+
# These appear as literal backslash-n when Gemini generates excessive newlines
|
|
2174
|
+
while repaired.endswith('\\n'):
|
|
2175
|
+
repaired = repaired[:-2]
|
|
2176
|
+
# Also strip trailing literal backslashes that might be orphaned
|
|
2177
|
+
repaired = repaired.rstrip('\\')
|
|
2178
|
+
|
|
2179
|
+
# If we're in the middle of a string value, try to close it
|
|
2180
|
+
# Count unescaped quotes to determine if we're inside a string
|
|
2181
|
+
# Simple heuristic: if it ends without proper closure, add closing
|
|
2182
|
+
is_array = cleaned_result_str.startswith('[')
|
|
2183
|
+
expected_end = ']' if is_array else '}'
|
|
2184
|
+
if not repaired.endswith(expected_end):
|
|
2185
|
+
# Try adding various closures to repair
|
|
2186
|
+
if is_array:
|
|
2187
|
+
repair_attempts = [
|
|
2188
|
+
repaired + '}]', # Close object and array
|
|
2189
|
+
repaired + '"}]', # Close string, object and array
|
|
2190
|
+
repaired + '"}}]', # Close string, nested object and array
|
|
2191
|
+
repaired.rstrip(',') + ']', # Remove trailing comma and close array
|
|
2192
|
+
repaired.rstrip('"') + '"}]', # Handle partial string end
|
|
2193
|
+
]
|
|
2194
|
+
else:
|
|
2195
|
+
repair_attempts = [
|
|
2196
|
+
repaired + '"}', # Close string and object
|
|
2197
|
+
repaired + '"}\n}', # Close string and nested object
|
|
2198
|
+
repaired + '"}}}', # Deeper nesting
|
|
2199
|
+
repaired.rstrip(',') + '}', # Remove trailing comma
|
|
2200
|
+
repaired.rstrip('"') + '"}', # Handle partial string end
|
|
2201
|
+
]
|
|
2202
|
+
|
|
2203
|
+
for attempt in repair_attempts:
|
|
2204
|
+
try:
|
|
2205
|
+
if output_pydantic:
|
|
2206
|
+
parsed_result = output_pydantic.model_validate_json(attempt)
|
|
2207
|
+
else:
|
|
2208
|
+
loaded = json.loads(attempt)
|
|
2209
|
+
try:
|
|
2210
|
+
import jsonschema
|
|
2211
|
+
jsonschema.validate(instance=loaded, schema=output_schema)
|
|
2212
|
+
except ImportError:
|
|
2213
|
+
pass
|
|
2214
|
+
parsed_result = attempt
|
|
2215
|
+
|
|
2216
|
+
if verbose:
|
|
2217
|
+
logger.info(f"[INFO] Successfully repaired truncated JSON response")
|
|
2218
|
+
json_string_to_parse = attempt
|
|
2219
|
+
break
|
|
2220
|
+
except (json.JSONDecodeError, ValidationError, ValueError):
|
|
2221
|
+
continue
|
|
2222
|
+
|
|
2223
|
+
if parsed_result is None:
|
|
2224
|
+
raise ValueError("Content after cleaning doesn't look like JSON (and repair attempts failed)")
|
|
1142
2225
|
else:
|
|
1143
|
-
|
|
1144
|
-
raise ValueError("Content after cleaning markdown doesn't look like JSON")
|
|
2226
|
+
raise ValueError("Content after cleaning doesn't look like JSON")
|
|
1145
2227
|
|
|
1146
2228
|
|
|
1147
2229
|
# Check if any parsing attempt succeeded
|
|
1148
2230
|
if parsed_result is None:
|
|
2231
|
+
target_name = output_pydantic.__name__ if output_pydantic else "JSON Schema"
|
|
1149
2232
|
# This case should ideally be caught by exceptions above, but as a safeguard:
|
|
1150
|
-
raise TypeError(f"Raw result type {type(raw_result)} or content could not be validated/parsed against {
|
|
2233
|
+
raise TypeError(f"Raw result type {type(raw_result)} or content could not be validated/parsed against {target_name}.")
|
|
1151
2234
|
|
|
1152
|
-
except (ValidationError, json.JSONDecodeError, TypeError, ValueError) as parse_error:
|
|
1153
|
-
|
|
2235
|
+
except (ValidationError, json.JSONDecodeError, TypeError, ValueError, Exception) as parse_error:
|
|
2236
|
+
target_name = output_pydantic.__name__ if output_pydantic else "JSON Schema"
|
|
2237
|
+
logger.error(f"[ERROR] Failed to parse response into {target_name} for item {i}: {parse_error}")
|
|
1154
2238
|
# Use the string that was last attempted for parsing in the error message
|
|
1155
2239
|
error_content = json_string_to_parse if json_string_to_parse is not None else raw_result
|
|
1156
2240
|
logger.error("[ERROR] Content attempted for parsing: %s", repr(error_content)) # CORRECTED (or use f-string)
|
|
1157
|
-
results.append(f"ERROR: Failed to parse
|
|
2241
|
+
results.append(f"ERROR: Failed to parse structured output. Raw: {repr(raw_result)}")
|
|
1158
2242
|
continue # Skip appending result below if parsing failed
|
|
1159
2243
|
|
|
1160
|
-
#
|
|
2244
|
+
# Post-process: unescape newlines and repair Python syntax
|
|
2245
|
+
_unescape_code_newlines(parsed_result)
|
|
2246
|
+
|
|
2247
|
+
# Check if code fields still have invalid Python syntax after repair
|
|
2248
|
+
# If so, retry without cache to get a fresh response
|
|
2249
|
+
if _has_invalid_python_code(parsed_result):
|
|
2250
|
+
logger.warning(f"[WARNING] Detected invalid Python syntax in code fields for item {i} after repair. Retrying with cache bypass...")
|
|
2251
|
+
if not use_batch_mode and prompt and input_json is not None:
|
|
2252
|
+
# Add a small variation to bypass cache
|
|
2253
|
+
modified_prompt = prompt + " " # Two spaces to differentiate from other retries
|
|
2254
|
+
try:
|
|
2255
|
+
retry_messages = _format_messages(modified_prompt, input_json, use_batch_mode)
|
|
2256
|
+
# Disable cache for retry
|
|
2257
|
+
original_cache = litellm.cache
|
|
2258
|
+
litellm.cache = None
|
|
2259
|
+
retry_response = litellm.completion(
|
|
2260
|
+
model=model_name_litellm,
|
|
2261
|
+
messages=retry_messages,
|
|
2262
|
+
temperature=current_temperature,
|
|
2263
|
+
response_format=response_format,
|
|
2264
|
+
**time_kwargs
|
|
2265
|
+
)
|
|
2266
|
+
# Re-enable cache
|
|
2267
|
+
litellm.cache = original_cache
|
|
2268
|
+
# Extract and re-parse the retry result
|
|
2269
|
+
retry_raw_result = retry_response.choices[0].message.content
|
|
2270
|
+
if retry_raw_result is not None:
|
|
2271
|
+
# Re-parse the retry result
|
|
2272
|
+
retry_parsed = None
|
|
2273
|
+
if output_pydantic:
|
|
2274
|
+
if isinstance(retry_raw_result, output_pydantic):
|
|
2275
|
+
retry_parsed = retry_raw_result
|
|
2276
|
+
elif isinstance(retry_raw_result, dict):
|
|
2277
|
+
retry_parsed = output_pydantic.model_validate(retry_raw_result)
|
|
2278
|
+
elif isinstance(retry_raw_result, str):
|
|
2279
|
+
retry_parsed = output_pydantic.model_validate_json(retry_raw_result)
|
|
2280
|
+
elif output_schema and isinstance(retry_raw_result, str):
|
|
2281
|
+
retry_parsed = retry_raw_result # Keep as string for schema validation
|
|
2282
|
+
|
|
2283
|
+
if retry_parsed is not None:
|
|
2284
|
+
_unescape_code_newlines(retry_parsed)
|
|
2285
|
+
if not _has_invalid_python_code(retry_parsed):
|
|
2286
|
+
logger.info(f"[SUCCESS] Cache bypass retry for invalid Python code succeeded for item {i}")
|
|
2287
|
+
parsed_result = retry_parsed
|
|
2288
|
+
else:
|
|
2289
|
+
logger.warning(f"[WARNING] Cache bypass retry still has invalid Python code for item {i}, using original")
|
|
2290
|
+
else:
|
|
2291
|
+
logger.warning(f"[WARNING] Cache bypass retry returned unparseable result for item {i}")
|
|
2292
|
+
else:
|
|
2293
|
+
logger.warning(f"[WARNING] Cache bypass retry returned None for item {i}")
|
|
2294
|
+
except Exception as retry_e:
|
|
2295
|
+
logger.warning(f"[WARNING] Cache bypass retry for invalid Python code failed for item {i}: {retry_e}")
|
|
2296
|
+
else:
|
|
2297
|
+
logger.warning(f"[WARNING] Cannot retry invalid Python code - batch mode or missing prompt/input_json")
|
|
2298
|
+
|
|
1161
2299
|
results.append(parsed_result)
|
|
1162
2300
|
|
|
1163
2301
|
else:
|
|
1164
|
-
# If output_pydantic was not requested, append the raw result
|
|
2302
|
+
# If output_pydantic/schema was not requested, append the raw result
|
|
1165
2303
|
results.append(raw_result)
|
|
1166
2304
|
|
|
1167
2305
|
except (AttributeError, IndexError) as e:
|
|
@@ -1244,10 +2382,40 @@ def llm_invoke(
|
|
|
1244
2382
|
Exception) as e: # Catch generic Exception last
|
|
1245
2383
|
last_exception = e
|
|
1246
2384
|
error_type = type(e).__name__
|
|
2385
|
+
error_str = str(e)
|
|
2386
|
+
|
|
2387
|
+
# Provider-specific handling for Anthropic temperature + thinking rules.
|
|
2388
|
+
# Two scenarios we auto-correct:
|
|
2389
|
+
# 1) temperature==1 without thinking -> retry with 0.99
|
|
2390
|
+
# 2) thinking enabled but temperature!=1 -> retry with 1
|
|
2391
|
+
lower_err = error_str.lower()
|
|
2392
|
+
if (not temp_adjustment_done) and ("temperature" in lower_err) and ("thinking" in lower_err):
|
|
2393
|
+
anthropic_thinking_sent = ('thinking' in litellm_kwargs) and (provider.lower() == 'anthropic')
|
|
2394
|
+
# Decide direction of adjustment based on whether thinking was enabled in the call
|
|
2395
|
+
if anthropic_thinking_sent:
|
|
2396
|
+
# thinking enabled -> force temperature=1
|
|
2397
|
+
adjusted_temp = 1
|
|
2398
|
+
logger.warning(
|
|
2399
|
+
f"[WARN] {model_name_litellm}: Anthropic with thinking requires temperature=1. "
|
|
2400
|
+
f"Retrying with temperature={adjusted_temp}."
|
|
2401
|
+
)
|
|
2402
|
+
else:
|
|
2403
|
+
# thinking not enabled -> avoid temperature=1
|
|
2404
|
+
adjusted_temp = 0.99
|
|
2405
|
+
logger.warning(
|
|
2406
|
+
f"[WARN] {model_name_litellm}: Provider rejected temperature=1 without thinking. "
|
|
2407
|
+
f"Retrying with temperature={adjusted_temp}."
|
|
2408
|
+
)
|
|
2409
|
+
current_temperature = adjusted_temp
|
|
2410
|
+
temp_adjustment_done = True
|
|
2411
|
+
retry_with_same_model = True
|
|
2412
|
+
if verbose:
|
|
2413
|
+
logger.debug(f"Retrying {model_name_litellm} with adjusted temperature {current_temperature}")
|
|
2414
|
+
continue
|
|
2415
|
+
|
|
1247
2416
|
logger.error(f"[ERROR] Invocation failed for {model_name_litellm} ({error_type}): {e}. Trying next model.")
|
|
1248
2417
|
# Log more details in verbose mode
|
|
1249
2418
|
if verbose:
|
|
1250
|
-
# import traceback # Not needed if using exc_info=True
|
|
1251
2419
|
logger.debug(f"Detailed exception traceback for {model_name_litellm}:", exc_info=True)
|
|
1252
2420
|
break # Break inner loop, try next model candidate
|
|
1253
2421
|
|
|
@@ -1275,7 +2443,7 @@ if __name__ == "__main__":
|
|
|
1275
2443
|
response = llm_invoke(
|
|
1276
2444
|
prompt="Tell me a short joke about {topic}.",
|
|
1277
2445
|
input_json={"topic": "programmers"},
|
|
1278
|
-
strength=0.5, # Use base model (gpt-
|
|
2446
|
+
strength=0.5, # Use base model (gpt-5-nano)
|
|
1279
2447
|
temperature=0.7,
|
|
1280
2448
|
verbose=True
|
|
1281
2449
|
)
|
|
@@ -1356,7 +2524,7 @@ if __name__ == "__main__":
|
|
|
1356
2524
|
{"role": "system", "content": "You are a helpful assistant."},
|
|
1357
2525
|
{"role": "user", "content": "What is the capital of France?"}
|
|
1358
2526
|
]
|
|
1359
|
-
# Strength 0.5 should select gpt-
|
|
2527
|
+
# Strength 0.5 should select gpt-5-nano
|
|
1360
2528
|
response_messages = llm_invoke(
|
|
1361
2529
|
messages=custom_messages,
|
|
1362
2530
|
strength=0.5,
|