pdd-cli 0.0.45__py3-none-any.whl → 0.0.118__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdd/__init__.py +40 -8
- pdd/agentic_bug.py +323 -0
- pdd/agentic_bug_orchestrator.py +497 -0
- pdd/agentic_change.py +231 -0
- pdd/agentic_change_orchestrator.py +526 -0
- pdd/agentic_common.py +598 -0
- pdd/agentic_crash.py +534 -0
- pdd/agentic_e2e_fix.py +319 -0
- pdd/agentic_e2e_fix_orchestrator.py +426 -0
- pdd/agentic_fix.py +1294 -0
- pdd/agentic_langtest.py +162 -0
- pdd/agentic_update.py +387 -0
- pdd/agentic_verify.py +183 -0
- pdd/architecture_sync.py +565 -0
- pdd/auth_service.py +210 -0
- pdd/auto_deps_main.py +71 -51
- pdd/auto_include.py +245 -5
- pdd/auto_update.py +125 -47
- pdd/bug_main.py +196 -23
- pdd/bug_to_unit_test.py +2 -0
- pdd/change_main.py +11 -4
- pdd/cli.py +22 -1181
- pdd/cmd_test_main.py +350 -150
- pdd/code_generator.py +60 -18
- pdd/code_generator_main.py +790 -57
- pdd/commands/__init__.py +48 -0
- pdd/commands/analysis.py +306 -0
- pdd/commands/auth.py +309 -0
- pdd/commands/connect.py +290 -0
- pdd/commands/fix.py +163 -0
- pdd/commands/generate.py +257 -0
- pdd/commands/maintenance.py +175 -0
- pdd/commands/misc.py +87 -0
- pdd/commands/modify.py +256 -0
- pdd/commands/report.py +144 -0
- pdd/commands/sessions.py +284 -0
- pdd/commands/templates.py +215 -0
- pdd/commands/utility.py +110 -0
- pdd/config_resolution.py +58 -0
- pdd/conflicts_main.py +8 -3
- pdd/construct_paths.py +589 -111
- pdd/context_generator.py +10 -2
- pdd/context_generator_main.py +175 -76
- pdd/continue_generation.py +53 -10
- pdd/core/__init__.py +33 -0
- pdd/core/cli.py +527 -0
- pdd/core/cloud.py +237 -0
- pdd/core/dump.py +554 -0
- pdd/core/errors.py +67 -0
- pdd/core/remote_session.py +61 -0
- pdd/core/utils.py +90 -0
- pdd/crash_main.py +262 -33
- pdd/data/language_format.csv +71 -63
- pdd/data/llm_model.csv +20 -18
- pdd/detect_change_main.py +5 -4
- pdd/docs/prompting_guide.md +864 -0
- pdd/docs/whitepaper_with_benchmarks/data_and_functions/benchmark_analysis.py +495 -0
- pdd/docs/whitepaper_with_benchmarks/data_and_functions/creation_compare.py +528 -0
- pdd/fix_code_loop.py +523 -95
- pdd/fix_code_module_errors.py +6 -2
- pdd/fix_error_loop.py +491 -92
- pdd/fix_errors_from_unit_tests.py +4 -3
- pdd/fix_main.py +278 -21
- pdd/fix_verification_errors.py +12 -100
- pdd/fix_verification_errors_loop.py +529 -286
- pdd/fix_verification_main.py +294 -89
- pdd/frontend/dist/assets/index-B5DZHykP.css +1 -0
- pdd/frontend/dist/assets/index-DQ3wkeQ2.js +449 -0
- pdd/frontend/dist/index.html +376 -0
- pdd/frontend/dist/logo.svg +33 -0
- pdd/generate_output_paths.py +139 -15
- pdd/generate_test.py +218 -146
- pdd/get_comment.py +19 -44
- pdd/get_extension.py +8 -9
- pdd/get_jwt_token.py +318 -22
- pdd/get_language.py +8 -7
- pdd/get_run_command.py +75 -0
- pdd/get_test_command.py +68 -0
- pdd/git_update.py +70 -19
- pdd/incremental_code_generator.py +2 -2
- pdd/insert_includes.py +13 -4
- pdd/llm_invoke.py +1711 -181
- pdd/load_prompt_template.py +19 -12
- pdd/path_resolution.py +140 -0
- pdd/pdd_completion.fish +25 -2
- pdd/pdd_completion.sh +30 -4
- pdd/pdd_completion.zsh +79 -4
- pdd/postprocess.py +14 -4
- pdd/preprocess.py +293 -24
- pdd/preprocess_main.py +41 -6
- pdd/prompts/agentic_bug_step10_pr_LLM.prompt +182 -0
- pdd/prompts/agentic_bug_step1_duplicate_LLM.prompt +73 -0
- pdd/prompts/agentic_bug_step2_docs_LLM.prompt +129 -0
- pdd/prompts/agentic_bug_step3_triage_LLM.prompt +95 -0
- pdd/prompts/agentic_bug_step4_reproduce_LLM.prompt +97 -0
- pdd/prompts/agentic_bug_step5_root_cause_LLM.prompt +123 -0
- pdd/prompts/agentic_bug_step6_test_plan_LLM.prompt +107 -0
- pdd/prompts/agentic_bug_step7_generate_LLM.prompt +172 -0
- pdd/prompts/agentic_bug_step8_verify_LLM.prompt +119 -0
- pdd/prompts/agentic_bug_step9_e2e_test_LLM.prompt +289 -0
- pdd/prompts/agentic_change_step10_identify_issues_LLM.prompt +1006 -0
- pdd/prompts/agentic_change_step11_fix_issues_LLM.prompt +984 -0
- pdd/prompts/agentic_change_step12_create_pr_LLM.prompt +131 -0
- pdd/prompts/agentic_change_step1_duplicate_LLM.prompt +73 -0
- pdd/prompts/agentic_change_step2_docs_LLM.prompt +101 -0
- pdd/prompts/agentic_change_step3_research_LLM.prompt +126 -0
- pdd/prompts/agentic_change_step4_clarify_LLM.prompt +164 -0
- pdd/prompts/agentic_change_step5_docs_change_LLM.prompt +981 -0
- pdd/prompts/agentic_change_step6_devunits_LLM.prompt +1005 -0
- pdd/prompts/agentic_change_step7_architecture_LLM.prompt +1044 -0
- pdd/prompts/agentic_change_step8_analyze_LLM.prompt +1027 -0
- pdd/prompts/agentic_change_step9_implement_LLM.prompt +1077 -0
- pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
- pdd/prompts/agentic_e2e_fix_step1_unit_tests_LLM.prompt +90 -0
- pdd/prompts/agentic_e2e_fix_step2_e2e_tests_LLM.prompt +91 -0
- pdd/prompts/agentic_e2e_fix_step3_root_cause_LLM.prompt +89 -0
- pdd/prompts/agentic_e2e_fix_step4_fix_e2e_tests_LLM.prompt +96 -0
- pdd/prompts/agentic_e2e_fix_step5_identify_devunits_LLM.prompt +91 -0
- pdd/prompts/agentic_e2e_fix_step6_create_unit_tests_LLM.prompt +106 -0
- pdd/prompts/agentic_e2e_fix_step7_verify_tests_LLM.prompt +116 -0
- pdd/prompts/agentic_e2e_fix_step8_run_pdd_fix_LLM.prompt +120 -0
- pdd/prompts/agentic_e2e_fix_step9_verify_all_LLM.prompt +146 -0
- pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
- pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
- pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
- pdd/prompts/agentic_update_LLM.prompt +925 -0
- pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
- pdd/prompts/auto_include_LLM.prompt +122 -905
- pdd/prompts/change_LLM.prompt +3093 -1
- pdd/prompts/detect_change_LLM.prompt +686 -27
- pdd/prompts/example_generator_LLM.prompt +22 -1
- pdd/prompts/extract_code_LLM.prompt +5 -1
- pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
- pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
- pdd/prompts/extract_promptline_LLM.prompt +17 -11
- pdd/prompts/find_verification_errors_LLM.prompt +6 -0
- pdd/prompts/fix_code_module_errors_LLM.prompt +12 -2
- pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +9 -0
- pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
- pdd/prompts/generate_test_LLM.prompt +41 -7
- pdd/prompts/generate_test_from_example_LLM.prompt +115 -0
- pdd/prompts/increase_tests_LLM.prompt +1 -5
- pdd/prompts/insert_includes_LLM.prompt +316 -186
- pdd/prompts/prompt_code_diff_LLM.prompt +119 -0
- pdd/prompts/prompt_diff_LLM.prompt +82 -0
- pdd/prompts/trace_LLM.prompt +25 -22
- pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
- pdd/prompts/update_prompt_LLM.prompt +22 -1
- pdd/pytest_output.py +127 -12
- pdd/remote_session.py +876 -0
- pdd/render_mermaid.py +236 -0
- pdd/server/__init__.py +52 -0
- pdd/server/app.py +335 -0
- pdd/server/click_executor.py +587 -0
- pdd/server/executor.py +338 -0
- pdd/server/jobs.py +661 -0
- pdd/server/models.py +241 -0
- pdd/server/routes/__init__.py +31 -0
- pdd/server/routes/architecture.py +451 -0
- pdd/server/routes/auth.py +364 -0
- pdd/server/routes/commands.py +929 -0
- pdd/server/routes/config.py +42 -0
- pdd/server/routes/files.py +603 -0
- pdd/server/routes/prompts.py +1322 -0
- pdd/server/routes/websocket.py +473 -0
- pdd/server/security.py +243 -0
- pdd/server/terminal_spawner.py +209 -0
- pdd/server/token_counter.py +222 -0
- pdd/setup_tool.py +648 -0
- pdd/simple_math.py +2 -0
- pdd/split_main.py +3 -2
- pdd/summarize_directory.py +237 -195
- pdd/sync_animation.py +8 -4
- pdd/sync_determine_operation.py +839 -112
- pdd/sync_main.py +351 -57
- pdd/sync_orchestration.py +1400 -756
- pdd/sync_tui.py +848 -0
- pdd/template_expander.py +161 -0
- pdd/template_registry.py +264 -0
- pdd/templates/architecture/architecture_json.prompt +237 -0
- pdd/templates/generic/generate_prompt.prompt +174 -0
- pdd/trace.py +168 -12
- pdd/trace_main.py +4 -3
- pdd/track_cost.py +140 -63
- pdd/unfinished_prompt.py +51 -4
- pdd/update_main.py +567 -67
- pdd/update_model_costs.py +2 -2
- pdd/update_prompt.py +19 -4
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/METADATA +29 -11
- pdd_cli-0.0.118.dist-info/RECORD +227 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/licenses/LICENSE +1 -1
- pdd_cli-0.0.45.dist-info/RECORD +0 -116
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/WHEEL +0 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/entry_points.txt +0 -0
- {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/top_level.txt +0 -0
pdd/llm_invoke.py
CHANGED
|
@@ -5,6 +5,8 @@ import os
|
|
|
5
5
|
import pandas as pd
|
|
6
6
|
import litellm
|
|
7
7
|
import logging # ADDED FOR DETAILED LOGGING
|
|
8
|
+
import importlib.resources
|
|
9
|
+
from litellm.caching.caching import Cache # Fix for LiteLLM v1.75.5+
|
|
8
10
|
|
|
9
11
|
# --- Configure Standard Python Logging ---
|
|
10
12
|
logger = logging.getLogger("pdd.llm_invoke")
|
|
@@ -24,6 +26,16 @@ litellm_logger = logging.getLogger("litellm")
|
|
|
24
26
|
litellm_log_level = os.getenv("LITELLM_LOG_LEVEL", "WARNING" if PRODUCTION_MODE else "INFO")
|
|
25
27
|
litellm_logger.setLevel(getattr(logging, litellm_log_level, logging.WARNING))
|
|
26
28
|
|
|
29
|
+
# Ensure LiteLLM drops provider-unsupported params instead of erroring
|
|
30
|
+
# This prevents failures like UnsupportedParamsError for OpenAI gpt-5-* when
|
|
31
|
+
# passing generic params (e.g., reasoning_effort) not accepted by that API path.
|
|
32
|
+
try:
|
|
33
|
+
_drop_params_env = os.getenv("LITELLM_DROP_PARAMS", "true")
|
|
34
|
+
litellm.drop_params = str(_drop_params_env).lower() in ("1", "true", "yes", "on")
|
|
35
|
+
except Exception:
|
|
36
|
+
# Be conservative: default to True even if env parsing fails
|
|
37
|
+
litellm.drop_params = True
|
|
38
|
+
|
|
27
39
|
# Add a console handler if none exists
|
|
28
40
|
if not logger.handlers:
|
|
29
41
|
console_handler = logging.StreamHandler()
|
|
@@ -69,14 +81,14 @@ import json
|
|
|
69
81
|
# from rich import print as rprint # Replaced with logger
|
|
70
82
|
from dotenv import load_dotenv
|
|
71
83
|
from pathlib import Path
|
|
72
|
-
from typing import Optional, Dict, List, Any, Type, Union
|
|
84
|
+
from typing import Optional, Dict, List, Any, Type, Union, Tuple
|
|
73
85
|
from pydantic import BaseModel, ValidationError
|
|
74
86
|
import openai # Import openai for exception handling as LiteLLM maps to its types
|
|
75
|
-
from langchain_core.prompts import PromptTemplate
|
|
76
87
|
import warnings
|
|
77
88
|
import time as time_module # Alias to avoid conflict with 'time' parameter
|
|
78
89
|
# Import the default model constant
|
|
79
90
|
from pdd import DEFAULT_LLM_MODEL
|
|
91
|
+
from pdd.path_resolution import get_default_resolver
|
|
80
92
|
|
|
81
93
|
# Opt-in to future pandas behavior regarding downcasting
|
|
82
94
|
try:
|
|
@@ -86,6 +98,242 @@ except pd._config.config.OptionError:
|
|
|
86
98
|
pass
|
|
87
99
|
|
|
88
100
|
|
|
101
|
+
# --- Custom Exceptions ---
|
|
102
|
+
|
|
103
|
+
class SchemaValidationError(Exception):
|
|
104
|
+
"""Raised when LLM response fails Pydantic/JSON schema validation.
|
|
105
|
+
|
|
106
|
+
This exception triggers model fallback when caught at the outer exception
|
|
107
|
+
handler level, allowing the next candidate model to be tried.
|
|
108
|
+
|
|
109
|
+
Issue #168: Previously, validation errors only logged an error and continued
|
|
110
|
+
to the next batch item, never triggering model fallback.
|
|
111
|
+
"""
|
|
112
|
+
|
|
113
|
+
def __init__(self, message: str, raw_response: Any = None, item_index: int = 0):
|
|
114
|
+
super().__init__(message)
|
|
115
|
+
self.raw_response = raw_response
|
|
116
|
+
self.item_index = item_index
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class CloudFallbackError(Exception):
|
|
120
|
+
"""Raised when cloud execution fails and should fall back to local.
|
|
121
|
+
|
|
122
|
+
This exception is caught internally and triggers fallback to local execution
|
|
123
|
+
when cloud is unavailable (network errors, timeouts, auth failures).
|
|
124
|
+
"""
|
|
125
|
+
pass
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class CloudInvocationError(Exception):
|
|
129
|
+
"""Raised when cloud invocation fails with a non-recoverable error.
|
|
130
|
+
|
|
131
|
+
This exception indicates a cloud error that should not fall back to local,
|
|
132
|
+
such as validation errors returned by the cloud endpoint.
|
|
133
|
+
"""
|
|
134
|
+
pass
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
class InsufficientCreditsError(Exception):
|
|
138
|
+
"""Raised when user has insufficient credits for cloud execution.
|
|
139
|
+
|
|
140
|
+
This exception is raised when the cloud returns 402 (Payment Required)
|
|
141
|
+
and should NOT fall back to local execution - the user needs to know.
|
|
142
|
+
"""
|
|
143
|
+
pass
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
# --- Cloud Execution Helpers ---
|
|
147
|
+
|
|
148
|
+
def _ensure_all_properties_required(schema: Dict[str, Any]) -> Dict[str, Any]:
|
|
149
|
+
"""Ensure ALL properties are in the required array (OpenAI strict mode requirement).
|
|
150
|
+
|
|
151
|
+
OpenAI's strict mode requires that all properties in a JSON schema are listed
|
|
152
|
+
in the 'required' array. Pydantic's model_json_schema() only includes fields
|
|
153
|
+
without default values in 'required', which causes OpenAI to reject the schema.
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
schema: A JSON schema dictionary
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
The schema with all properties added to 'required'
|
|
160
|
+
"""
|
|
161
|
+
if 'properties' in schema:
|
|
162
|
+
schema['required'] = list(schema['properties'].keys())
|
|
163
|
+
return schema
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def _pydantic_to_json_schema(pydantic_class: Type[BaseModel]) -> Dict[str, Any]:
|
|
167
|
+
"""Convert a Pydantic model class to JSON Schema for cloud transport.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
pydantic_class: A Pydantic BaseModel subclass
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
JSON Schema dictionary that can be serialized and sent to cloud
|
|
174
|
+
"""
|
|
175
|
+
schema = pydantic_class.model_json_schema()
|
|
176
|
+
# Ensure all properties are in required array (OpenAI strict mode requirement)
|
|
177
|
+
_ensure_all_properties_required(schema)
|
|
178
|
+
# Include class name for debugging/logging purposes
|
|
179
|
+
schema['__pydantic_class_name__'] = pydantic_class.__name__
|
|
180
|
+
return schema
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def _validate_with_pydantic(
|
|
184
|
+
result: Any,
|
|
185
|
+
pydantic_class: Type[BaseModel]
|
|
186
|
+
) -> BaseModel:
|
|
187
|
+
"""Validate cloud response using original Pydantic class.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
result: The result from cloud (dict or JSON string)
|
|
191
|
+
pydantic_class: The Pydantic model to validate against
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
Validated Pydantic model instance
|
|
195
|
+
|
|
196
|
+
Raises:
|
|
197
|
+
ValidationError: If validation fails
|
|
198
|
+
"""
|
|
199
|
+
if isinstance(result, dict):
|
|
200
|
+
return pydantic_class.model_validate(result)
|
|
201
|
+
elif isinstance(result, str):
|
|
202
|
+
return pydantic_class.model_validate_json(result)
|
|
203
|
+
elif isinstance(result, pydantic_class):
|
|
204
|
+
# Already validated
|
|
205
|
+
return result
|
|
206
|
+
raise ValueError(f"Cannot validate result type {type(result)} with Pydantic model")
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def _llm_invoke_cloud(
|
|
210
|
+
prompt: Optional[str],
|
|
211
|
+
input_json: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]],
|
|
212
|
+
strength: float,
|
|
213
|
+
temperature: float,
|
|
214
|
+
verbose: bool,
|
|
215
|
+
output_pydantic: Optional[Type[BaseModel]],
|
|
216
|
+
output_schema: Optional[Dict[str, Any]],
|
|
217
|
+
time: float,
|
|
218
|
+
use_batch_mode: bool,
|
|
219
|
+
messages: Optional[Union[List[Dict[str, str]], List[List[Dict[str, str]]]]],
|
|
220
|
+
language: Optional[str],
|
|
221
|
+
) -> Dict[str, Any]:
|
|
222
|
+
"""Execute llm_invoke via cloud endpoint.
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
All parameters match llm_invoke signature
|
|
226
|
+
|
|
227
|
+
Returns:
|
|
228
|
+
Dictionary with 'result', 'cost', 'model_name', 'thinking_output'
|
|
229
|
+
|
|
230
|
+
Raises:
|
|
231
|
+
CloudFallbackError: For recoverable errors (network, timeout, auth)
|
|
232
|
+
InsufficientCreditsError: For 402 Payment Required
|
|
233
|
+
CloudInvocationError: For non-recoverable cloud errors
|
|
234
|
+
"""
|
|
235
|
+
import requests
|
|
236
|
+
from rich.console import Console
|
|
237
|
+
|
|
238
|
+
# Lazy import to avoid circular dependency
|
|
239
|
+
from pdd.core.cloud import CloudConfig
|
|
240
|
+
|
|
241
|
+
console = Console()
|
|
242
|
+
CLOUD_TIMEOUT = 300 # 5 minutes
|
|
243
|
+
|
|
244
|
+
# Get JWT token
|
|
245
|
+
jwt_token = CloudConfig.get_jwt_token(verbose=verbose)
|
|
246
|
+
if not jwt_token:
|
|
247
|
+
raise CloudFallbackError("Could not authenticate with cloud")
|
|
248
|
+
|
|
249
|
+
# Prepare payload
|
|
250
|
+
payload: Dict[str, Any] = {
|
|
251
|
+
"strength": strength,
|
|
252
|
+
"temperature": temperature,
|
|
253
|
+
"time": time,
|
|
254
|
+
"verbose": verbose,
|
|
255
|
+
"useBatchMode": use_batch_mode,
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
if language:
|
|
259
|
+
payload["language"] = language
|
|
260
|
+
|
|
261
|
+
# Add prompt/messages
|
|
262
|
+
if messages:
|
|
263
|
+
payload["messages"] = messages
|
|
264
|
+
else:
|
|
265
|
+
payload["prompt"] = prompt
|
|
266
|
+
payload["inputJson"] = input_json
|
|
267
|
+
|
|
268
|
+
# Handle output schema
|
|
269
|
+
if output_pydantic:
|
|
270
|
+
payload["outputSchema"] = _pydantic_to_json_schema(output_pydantic)
|
|
271
|
+
elif output_schema:
|
|
272
|
+
payload["outputSchema"] = output_schema
|
|
273
|
+
|
|
274
|
+
# Make request
|
|
275
|
+
headers = {
|
|
276
|
+
"Authorization": f"Bearer {jwt_token}",
|
|
277
|
+
"Content-Type": "application/json"
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
cloud_url = CloudConfig.get_endpoint_url("llmInvoke")
|
|
281
|
+
|
|
282
|
+
if verbose:
|
|
283
|
+
logger.debug(f"Cloud llm_invoke request to: {cloud_url}")
|
|
284
|
+
|
|
285
|
+
try:
|
|
286
|
+
response = requests.post(
|
|
287
|
+
cloud_url,
|
|
288
|
+
json=payload,
|
|
289
|
+
headers=headers,
|
|
290
|
+
timeout=CLOUD_TIMEOUT
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
if response.status_code == 200:
|
|
294
|
+
data = response.json()
|
|
295
|
+
result = data.get("result")
|
|
296
|
+
|
|
297
|
+
# Validate with Pydantic if specified
|
|
298
|
+
if output_pydantic and result:
|
|
299
|
+
try:
|
|
300
|
+
result = _validate_with_pydantic(result, output_pydantic)
|
|
301
|
+
except (ValidationError, ValueError) as e:
|
|
302
|
+
logger.warning(f"Cloud response validation failed: {e}")
|
|
303
|
+
# Return raw result if validation fails
|
|
304
|
+
pass
|
|
305
|
+
|
|
306
|
+
return {
|
|
307
|
+
"result": result,
|
|
308
|
+
"cost": data.get("totalCost", 0.0),
|
|
309
|
+
"model_name": data.get("modelName", "cloud_model"),
|
|
310
|
+
"thinking_output": data.get("thinkingOutput"),
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
elif response.status_code == 402:
|
|
314
|
+
error_msg = response.json().get("error", "Insufficient credits")
|
|
315
|
+
raise InsufficientCreditsError(error_msg)
|
|
316
|
+
|
|
317
|
+
elif response.status_code in (401, 403):
|
|
318
|
+
error_msg = response.json().get("error", f"Authentication failed ({response.status_code})")
|
|
319
|
+
raise CloudFallbackError(error_msg)
|
|
320
|
+
|
|
321
|
+
elif response.status_code >= 500:
|
|
322
|
+
error_msg = response.json().get("error", f"Server error ({response.status_code})")
|
|
323
|
+
raise CloudFallbackError(error_msg)
|
|
324
|
+
|
|
325
|
+
else:
|
|
326
|
+
error_msg = response.json().get("error", f"HTTP {response.status_code}")
|
|
327
|
+
raise CloudInvocationError(f"Cloud llm_invoke failed: {error_msg}")
|
|
328
|
+
|
|
329
|
+
except requests.exceptions.Timeout:
|
|
330
|
+
raise CloudFallbackError("Cloud request timed out")
|
|
331
|
+
except requests.exceptions.ConnectionError as e:
|
|
332
|
+
raise CloudFallbackError(f"Cloud connection failed: {e}")
|
|
333
|
+
except requests.exceptions.RequestException as e:
|
|
334
|
+
raise CloudFallbackError(f"Cloud request failed: {e}")
|
|
335
|
+
|
|
336
|
+
|
|
89
337
|
def _is_wsl_environment() -> bool:
|
|
90
338
|
"""
|
|
91
339
|
Detect if we're running in WSL (Windows Subsystem for Linux) environment.
|
|
@@ -112,6 +360,22 @@ def _is_wsl_environment() -> bool:
|
|
|
112
360
|
return False
|
|
113
361
|
|
|
114
362
|
|
|
363
|
+
def _openai_responses_supports_response_format() -> bool:
|
|
364
|
+
"""Detect if current OpenAI Python SDK supports `response_format` on Responses.create.
|
|
365
|
+
|
|
366
|
+
Returns True if the installed SDK exposes a `response_format` parameter on
|
|
367
|
+
`openai.resources.responses.Responses.create`, else False. This avoids
|
|
368
|
+
sending unsupported kwargs and triggering TypeError at runtime.
|
|
369
|
+
"""
|
|
370
|
+
try:
|
|
371
|
+
import inspect
|
|
372
|
+
from openai.resources.responses import Responses
|
|
373
|
+
sig = inspect.signature(Responses.create)
|
|
374
|
+
return "response_format" in sig.parameters
|
|
375
|
+
except Exception:
|
|
376
|
+
return False
|
|
377
|
+
|
|
378
|
+
|
|
115
379
|
def _get_environment_info() -> Dict[str, str]:
|
|
116
380
|
"""
|
|
117
381
|
Get environment information for debugging and error reporting.
|
|
@@ -142,60 +406,109 @@ def _get_environment_info() -> Dict[str, str]:
|
|
|
142
406
|
|
|
143
407
|
# --- Constants and Configuration ---
|
|
144
408
|
|
|
145
|
-
# Determine project root:
|
|
146
|
-
PROJECT_ROOT = None
|
|
409
|
+
# Determine project root: use PathResolver to ignore package-root PDD_PATH values.
|
|
147
410
|
PDD_PATH_ENV = os.getenv("PDD_PATH")
|
|
148
|
-
|
|
149
411
|
if PDD_PATH_ENV:
|
|
150
|
-
_path_from_env = Path(PDD_PATH_ENV)
|
|
151
|
-
if _path_from_env.is_dir():
|
|
152
|
-
PROJECT_ROOT = _path_from_env.resolve()
|
|
153
|
-
logger.debug(f"Using PROJECT_ROOT from PDD_PATH: {PROJECT_ROOT}")
|
|
154
|
-
else:
|
|
155
|
-
warnings.warn(f"PDD_PATH environment variable ('{PDD_PATH_ENV}') is set but not a valid directory. Attempting auto-detection.")
|
|
156
|
-
|
|
157
|
-
if PROJECT_ROOT is None: # If PDD_PATH wasn't set or was invalid
|
|
158
412
|
try:
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
has_data = (current_dir / "data").is_dir()
|
|
167
|
-
has_dotenv = (current_dir / ".env").exists()
|
|
168
|
-
|
|
169
|
-
if has_git or has_pyproject or has_data or has_dotenv:
|
|
170
|
-
PROJECT_ROOT = current_dir
|
|
171
|
-
logger.debug(f"Determined PROJECT_ROOT by marker search from CWD: {PROJECT_ROOT}")
|
|
172
|
-
break
|
|
173
|
-
|
|
174
|
-
parent_dir = current_dir.parent
|
|
175
|
-
if parent_dir == current_dir: # Reached filesystem root
|
|
176
|
-
break
|
|
177
|
-
current_dir = parent_dir
|
|
413
|
+
_path_from_env = Path(PDD_PATH_ENV).expanduser().resolve()
|
|
414
|
+
if not _path_from_env.is_dir():
|
|
415
|
+
warnings.warn(
|
|
416
|
+
f"PDD_PATH environment variable ('{PDD_PATH_ENV}') is set but not a valid directory. Attempting auto-detection."
|
|
417
|
+
)
|
|
418
|
+
except Exception as e:
|
|
419
|
+
warnings.warn(f"Error validating PDD_PATH environment variable: {e}")
|
|
178
420
|
|
|
179
|
-
|
|
180
|
-
|
|
421
|
+
resolver = get_default_resolver()
|
|
422
|
+
PROJECT_ROOT = resolver.resolve_project_root()
|
|
423
|
+
PROJECT_ROOT_FROM_ENV = resolver.pdd_path_env is not None and PROJECT_ROOT == resolver.pdd_path_env
|
|
424
|
+
logger.debug(f"Using PROJECT_ROOT: {PROJECT_ROOT}")
|
|
181
425
|
|
|
182
|
-
if PROJECT_ROOT is None: # Fallback to CWD if no method succeeded
|
|
183
|
-
PROJECT_ROOT = Path.cwd().resolve()
|
|
184
|
-
warnings.warn(f"Could not determine project root automatically. Using current working directory: {PROJECT_ROOT}. Ensure this is the intended root or set the PDD_PATH environment variable.")
|
|
185
426
|
|
|
427
|
+
# ENV_PATH is set after _is_env_path_package_dir is defined (see below)
|
|
186
428
|
|
|
187
|
-
ENV_PATH = PROJECT_ROOT / ".env"
|
|
188
429
|
# --- Determine LLM_MODEL_CSV_PATH ---
|
|
189
|
-
# Prioritize ~/.pdd/llm_model.csv
|
|
430
|
+
# Prioritize ~/.pdd/llm_model.csv, then a project .pdd from the current CWD,
|
|
431
|
+
# then PROJECT_ROOT (which may be set from PDD_PATH), else fall back to package.
|
|
190
432
|
user_pdd_dir = Path.home() / ".pdd"
|
|
191
433
|
user_model_csv_path = user_pdd_dir / "llm_model.csv"
|
|
192
434
|
|
|
435
|
+
def _detect_project_root_from_cwd(max_levels: int = 5) -> Path:
|
|
436
|
+
"""Search upwards from the current working directory for common project markers.
|
|
437
|
+
|
|
438
|
+
This intentionally ignores PDD_PATH to support CLI invocations that set
|
|
439
|
+
PDD_PATH to the installed package location. We want to honor a real project
|
|
440
|
+
checkout's .pdd/llm_model.csv when running inside it.
|
|
441
|
+
"""
|
|
442
|
+
try:
|
|
443
|
+
current_dir = Path.cwd().resolve()
|
|
444
|
+
for _ in range(max_levels):
|
|
445
|
+
if (
|
|
446
|
+
(current_dir / ".git").exists()
|
|
447
|
+
or (current_dir / "pyproject.toml").exists()
|
|
448
|
+
or (current_dir / "data").is_dir()
|
|
449
|
+
or (current_dir / ".env").exists()
|
|
450
|
+
):
|
|
451
|
+
return current_dir
|
|
452
|
+
parent = current_dir.parent
|
|
453
|
+
if parent == current_dir:
|
|
454
|
+
break
|
|
455
|
+
current_dir = parent
|
|
456
|
+
except Exception:
|
|
457
|
+
pass
|
|
458
|
+
return Path.cwd().resolve()
|
|
459
|
+
|
|
460
|
+
# Resolve candidates
|
|
461
|
+
project_root_from_cwd = _detect_project_root_from_cwd()
|
|
462
|
+
project_csv_from_cwd = project_root_from_cwd / ".pdd" / "llm_model.csv"
|
|
463
|
+
project_csv_from_env = PROJECT_ROOT / ".pdd" / "llm_model.csv"
|
|
464
|
+
|
|
465
|
+
# Detect whether PDD_PATH points to the installed package directory. If so,
|
|
466
|
+
# don't prioritize it over the real project from CWD.
|
|
467
|
+
try:
|
|
468
|
+
_installed_pkg_root = importlib.resources.files('pdd')
|
|
469
|
+
# importlib.resources.files returns a Traversable; get a FS path string if possible
|
|
470
|
+
try:
|
|
471
|
+
_installed_pkg_root_path = Path(str(_installed_pkg_root))
|
|
472
|
+
except Exception:
|
|
473
|
+
_installed_pkg_root_path = None
|
|
474
|
+
except Exception:
|
|
475
|
+
_installed_pkg_root_path = None
|
|
476
|
+
|
|
477
|
+
def _is_env_path_package_dir(env_path: Path) -> bool:
|
|
478
|
+
try:
|
|
479
|
+
if _installed_pkg_root_path is None:
|
|
480
|
+
return False
|
|
481
|
+
env_path = env_path.resolve()
|
|
482
|
+
pkg_path = _installed_pkg_root_path.resolve()
|
|
483
|
+
# Treat equal or subpath as package dir
|
|
484
|
+
return env_path == pkg_path or str(env_path).startswith(str(pkg_path))
|
|
485
|
+
except Exception:
|
|
486
|
+
return False
|
|
487
|
+
|
|
488
|
+
# ENV_PATH: Use CWD-based project root when PDD_PATH points to package directory
|
|
489
|
+
# This ensures .env is written to the user's project, not the installed package location
|
|
490
|
+
if _is_env_path_package_dir(PROJECT_ROOT):
|
|
491
|
+
ENV_PATH = project_root_from_cwd / ".env"
|
|
492
|
+
logger.debug(f"PDD_PATH points to package; using ENV_PATH from CWD: {ENV_PATH}")
|
|
493
|
+
else:
|
|
494
|
+
ENV_PATH = PROJECT_ROOT / ".env"
|
|
495
|
+
|
|
496
|
+
# Selection order
|
|
193
497
|
if user_model_csv_path.is_file():
|
|
194
498
|
LLM_MODEL_CSV_PATH = user_model_csv_path
|
|
195
499
|
logger.info(f"Using user-specific LLM model CSV: {LLM_MODEL_CSV_PATH}")
|
|
500
|
+
elif PROJECT_ROOT_FROM_ENV and project_csv_from_env.is_file():
|
|
501
|
+
# Honor an explicitly-set PDD_PATH pointing to a real project directory
|
|
502
|
+
LLM_MODEL_CSV_PATH = project_csv_from_env
|
|
503
|
+
logger.info(f"Using project-specific LLM model CSV (from PDD_PATH): {LLM_MODEL_CSV_PATH}")
|
|
504
|
+
elif project_csv_from_cwd.is_file():
|
|
505
|
+
# Otherwise, prefer the project relative to the current working directory
|
|
506
|
+
LLM_MODEL_CSV_PATH = project_csv_from_cwd
|
|
507
|
+
logger.info(f"Using project-specific LLM model CSV (from CWD): {LLM_MODEL_CSV_PATH}")
|
|
196
508
|
else:
|
|
197
|
-
|
|
198
|
-
|
|
509
|
+
# Neither exists, we'll use a marker path that _load_model_data will handle
|
|
510
|
+
LLM_MODEL_CSV_PATH = None
|
|
511
|
+
logger.info("No local LLM model CSV found, will use package default")
|
|
199
512
|
# ---------------------------------
|
|
200
513
|
|
|
201
514
|
# Load environment variables from .env file
|
|
@@ -225,6 +538,7 @@ if GCS_HMAC_SECRET_ACCESS_KEY:
|
|
|
225
538
|
GCS_HMAC_SECRET_ACCESS_KEY = GCS_HMAC_SECRET_ACCESS_KEY.strip()
|
|
226
539
|
|
|
227
540
|
cache_configured = False
|
|
541
|
+
configured_cache = None # Store the configured cache instance for restoration
|
|
228
542
|
|
|
229
543
|
if GCS_BUCKET_NAME and GCS_HMAC_ACCESS_KEY_ID and GCS_HMAC_SECRET_ACCESS_KEY:
|
|
230
544
|
# Store original AWS credentials before overwriting for GCS cache setup
|
|
@@ -238,12 +552,13 @@ if GCS_BUCKET_NAME and GCS_HMAC_ACCESS_KEY_ID and GCS_HMAC_SECRET_ACCESS_KEY:
|
|
|
238
552
|
os.environ['AWS_SECRET_ACCESS_KEY'] = GCS_HMAC_SECRET_ACCESS_KEY
|
|
239
553
|
# os.environ['AWS_REGION_NAME'] = GCS_REGION_NAME # Uncomment if needed
|
|
240
554
|
|
|
241
|
-
|
|
555
|
+
configured_cache = Cache(
|
|
242
556
|
type="s3",
|
|
243
557
|
s3_bucket_name=GCS_BUCKET_NAME,
|
|
244
558
|
s3_region_name=GCS_REGION_NAME, # Pass region explicitly to cache
|
|
245
559
|
s3_endpoint_url=GCS_ENDPOINT_URL,
|
|
246
560
|
)
|
|
561
|
+
litellm.cache = configured_cache
|
|
247
562
|
logger.info(f"LiteLLM cache configured for GCS bucket (S3 compatible): {GCS_BUCKET_NAME}")
|
|
248
563
|
cache_configured = True
|
|
249
564
|
|
|
@@ -268,15 +583,22 @@ if GCS_BUCKET_NAME and GCS_HMAC_ACCESS_KEY_ID and GCS_HMAC_SECRET_ACCESS_KEY:
|
|
|
268
583
|
elif 'AWS_REGION_NAME' in os.environ:
|
|
269
584
|
pass # Or just leave it if the temporary setting wasn't done/needed
|
|
270
585
|
|
|
586
|
+
# Check if caching is disabled via environment variable
|
|
587
|
+
if os.getenv("LITELLM_CACHE_DISABLE") == "1":
|
|
588
|
+
logger.info("LiteLLM caching disabled via LITELLM_CACHE_DISABLE=1")
|
|
589
|
+
litellm.cache = None
|
|
590
|
+
cache_configured = True
|
|
591
|
+
|
|
271
592
|
if not cache_configured:
|
|
272
593
|
try:
|
|
273
|
-
# Try
|
|
594
|
+
# Try disk-based cache as a fallback
|
|
274
595
|
sqlite_cache_path = PROJECT_ROOT / "litellm_cache.sqlite"
|
|
275
|
-
|
|
276
|
-
|
|
596
|
+
configured_cache = Cache(type="disk", disk_cache_dir=str(sqlite_cache_path))
|
|
597
|
+
litellm.cache = configured_cache
|
|
598
|
+
logger.info(f"LiteLLM disk cache configured at {sqlite_cache_path}")
|
|
277
599
|
cache_configured = True
|
|
278
600
|
except Exception as e2:
|
|
279
|
-
warnings.warn(f"Failed to configure LiteLLM
|
|
601
|
+
warnings.warn(f"Failed to configure LiteLLM disk cache: {e2}. Caching is disabled.")
|
|
280
602
|
litellm.cache = None
|
|
281
603
|
|
|
282
604
|
if not cache_configured:
|
|
@@ -314,29 +636,49 @@ def _litellm_success_callback(
|
|
|
314
636
|
cost_val = litellm.completion_cost(completion_response=completion_response)
|
|
315
637
|
calculated_cost = cost_val if cost_val is not None else 0.0
|
|
316
638
|
except Exception as e1:
|
|
317
|
-
# Attempt 2:
|
|
318
|
-
#
|
|
319
|
-
# This is often needed for batch completion items.
|
|
639
|
+
# Attempt 2: Compute via tokens and model mapping. If LiteLLM mapping is
|
|
640
|
+
# missing or API differs, fall back to CSV rates in _MODEL_RATE_MAP.
|
|
320
641
|
logger.debug(f"Attempting cost calculation with fallback method: {e1}")
|
|
321
642
|
try:
|
|
322
|
-
model_name = kwargs.get("model")
|
|
643
|
+
model_name = kwargs.get("model")
|
|
323
644
|
if model_name and usage:
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
645
|
+
in_tok = getattr(usage, 'prompt_tokens', None)
|
|
646
|
+
out_tok = getattr(usage, 'completion_tokens', None)
|
|
647
|
+
# Some providers may use 'input_tokens'/'output_tokens'
|
|
648
|
+
if in_tok is None:
|
|
649
|
+
in_tok = getattr(usage, 'input_tokens', 0)
|
|
650
|
+
if out_tok is None:
|
|
651
|
+
out_tok = getattr(usage, 'output_tokens', 0)
|
|
652
|
+
|
|
653
|
+
# Try LiteLLM helper (arg names vary across versions)
|
|
654
|
+
try:
|
|
655
|
+
cost_val = litellm.completion_cost(
|
|
656
|
+
model=model_name,
|
|
657
|
+
prompt_tokens=in_tok,
|
|
658
|
+
completion_tokens=out_tok,
|
|
659
|
+
)
|
|
660
|
+
calculated_cost = cost_val if cost_val is not None else 0.0
|
|
661
|
+
except TypeError:
|
|
662
|
+
# Older/newer versions may require input/output token names
|
|
663
|
+
try:
|
|
664
|
+
cost_val = litellm.completion_cost(
|
|
665
|
+
model=model_name,
|
|
666
|
+
input_tokens=in_tok,
|
|
667
|
+
output_tokens=out_tok,
|
|
668
|
+
)
|
|
669
|
+
calculated_cost = cost_val if cost_val is not None else 0.0
|
|
670
|
+
except Exception as e3:
|
|
671
|
+
# Final fallback: compute using CSV rates
|
|
672
|
+
rates = _MODEL_RATE_MAP.get(str(model_name))
|
|
673
|
+
if rates is not None:
|
|
674
|
+
in_rate, out_rate = rates
|
|
675
|
+
calculated_cost = (float(in_tok or 0) * in_rate + float(out_tok or 0) * out_rate) / 1_000_000.0
|
|
676
|
+
else:
|
|
677
|
+
calculated_cost = 0.0
|
|
678
|
+
logger.debug(f"Cost calculation failed with LiteLLM token API; used CSV rates if available. Detail: {e3}")
|
|
332
679
|
else:
|
|
333
|
-
# If we can't get model name or usage, fallback to 0
|
|
334
680
|
calculated_cost = 0.0
|
|
335
|
-
# Optional: Log the original error e1 if needed
|
|
336
|
-
# logger.warning(f"[Callback WARN] Failed to calculate cost with response object ({e1}) and fallback failed.")
|
|
337
681
|
except Exception as e2:
|
|
338
|
-
# Optional: Log secondary error e2 if needed
|
|
339
|
-
# logger.warning(f"[Callback WARN] Failed to calculate cost with fallback method: {e2}")
|
|
340
682
|
calculated_cost = 0.0 # Default to 0 on any error
|
|
341
683
|
logger.debug(f"Cost calculation failed with fallback method: {e2}")
|
|
342
684
|
|
|
@@ -354,14 +696,108 @@ def _litellm_success_callback(
|
|
|
354
696
|
# Register the callback with LiteLLM
|
|
355
697
|
litellm.success_callback = [_litellm_success_callback]
|
|
356
698
|
|
|
699
|
+
# --- Cost Mapping Support (CSV Rates) ---
|
|
700
|
+
# Populate from CSV inside llm_invoke; used by callback fallback
|
|
701
|
+
_MODEL_RATE_MAP: Dict[str, Tuple[float, float]] = {}
|
|
702
|
+
|
|
703
|
+
def _set_model_rate_map(df: pd.DataFrame) -> None:
|
|
704
|
+
global _MODEL_RATE_MAP
|
|
705
|
+
try:
|
|
706
|
+
_MODEL_RATE_MAP = {
|
|
707
|
+
str(row['model']): (
|
|
708
|
+
float(row['input']) if pd.notna(row['input']) else 0.0,
|
|
709
|
+
float(row['output']) if pd.notna(row['output']) else 0.0,
|
|
710
|
+
)
|
|
711
|
+
for _, row in df.iterrows()
|
|
712
|
+
}
|
|
713
|
+
except Exception:
|
|
714
|
+
_MODEL_RATE_MAP = {}
|
|
715
|
+
|
|
357
716
|
# --- Helper Functions ---
|
|
358
717
|
|
|
359
|
-
def
|
|
360
|
-
"""
|
|
361
|
-
if
|
|
362
|
-
|
|
718
|
+
def _is_malformed_json_response(content: str, threshold: int = 100) -> bool:
|
|
719
|
+
"""
|
|
720
|
+
Detect if a JSON response appears malformed due to excessive trailing newlines.
|
|
721
|
+
|
|
722
|
+
This can happen when Gemini generates thousands of \n characters in a JSON string value,
|
|
723
|
+
causing the response to be truncated and missing closing braces.
|
|
724
|
+
|
|
725
|
+
Args:
|
|
726
|
+
content: The raw response content string
|
|
727
|
+
threshold: Number of consecutive trailing \n sequences to consider malformed
|
|
728
|
+
|
|
729
|
+
Returns:
|
|
730
|
+
True if the response appears malformed, False otherwise
|
|
731
|
+
"""
|
|
732
|
+
if not content or not isinstance(content, str):
|
|
733
|
+
return False
|
|
734
|
+
|
|
735
|
+
# Check if it starts like JSON but doesn't end properly
|
|
736
|
+
stripped = content.strip()
|
|
737
|
+
if not stripped.startswith('{'):
|
|
738
|
+
return False
|
|
739
|
+
|
|
740
|
+
# If it ends with }, it's probably fine
|
|
741
|
+
if stripped.endswith('}'):
|
|
742
|
+
return False
|
|
743
|
+
|
|
744
|
+
# Count trailing \n sequences (escaped newlines in JSON strings)
|
|
745
|
+
# The pattern \n in a JSON string appears as \\n in the raw content
|
|
746
|
+
trailing_newline_count = 0
|
|
747
|
+
check_content = stripped
|
|
748
|
+
while check_content.endswith('\\n'):
|
|
749
|
+
trailing_newline_count += 1
|
|
750
|
+
check_content = check_content[:-2]
|
|
751
|
+
|
|
752
|
+
# If there are many trailing \n sequences, it's likely malformed
|
|
753
|
+
if trailing_newline_count >= threshold:
|
|
754
|
+
return True
|
|
755
|
+
|
|
756
|
+
# Also check for response that looks truncated mid-string
|
|
757
|
+
# (ends with characters that suggest we're inside a JSON string value)
|
|
758
|
+
if not stripped.endswith('}') and not stripped.endswith(']') and not stripped.endswith('"'):
|
|
759
|
+
# Could be truncated in the middle of an escaped sequence
|
|
760
|
+
if stripped.endswith('\\'):
|
|
761
|
+
return True
|
|
762
|
+
|
|
763
|
+
return False
|
|
764
|
+
|
|
765
|
+
|
|
766
|
+
def _load_model_data(csv_path: Optional[Path]) -> pd.DataFrame:
|
|
767
|
+
"""Loads and preprocesses the LLM model data from CSV.
|
|
768
|
+
|
|
769
|
+
Args:
|
|
770
|
+
csv_path: Path to CSV file, or None to use package default
|
|
771
|
+
|
|
772
|
+
Returns:
|
|
773
|
+
DataFrame with model configuration data
|
|
774
|
+
"""
|
|
775
|
+
# If csv_path is provided, try to load from it
|
|
776
|
+
if csv_path is not None:
|
|
777
|
+
if not csv_path.exists():
|
|
778
|
+
logger.warning(f"Specified LLM model CSV not found at {csv_path}, trying package default")
|
|
779
|
+
csv_path = None
|
|
780
|
+
else:
|
|
781
|
+
try:
|
|
782
|
+
df = pd.read_csv(csv_path)
|
|
783
|
+
logger.debug(f"Loaded model data from {csv_path}")
|
|
784
|
+
# Continue with the rest of the function...
|
|
785
|
+
except Exception as e:
|
|
786
|
+
logger.warning(f"Failed to load CSV from {csv_path}: {e}, trying package default")
|
|
787
|
+
csv_path = None
|
|
788
|
+
|
|
789
|
+
# If csv_path is None or loading failed, use package default
|
|
790
|
+
if csv_path is None:
|
|
791
|
+
try:
|
|
792
|
+
# Use importlib.resources to load the packaged CSV
|
|
793
|
+
csv_data = importlib.resources.files('pdd').joinpath('data/llm_model.csv').read_text()
|
|
794
|
+
import io
|
|
795
|
+
df = pd.read_csv(io.StringIO(csv_data))
|
|
796
|
+
logger.info("Loaded model data from package default")
|
|
797
|
+
except Exception as e:
|
|
798
|
+
raise FileNotFoundError(f"Failed to load default LLM model CSV from package: {e}")
|
|
799
|
+
|
|
363
800
|
try:
|
|
364
|
-
df = pd.read_csv(csv_path)
|
|
365
801
|
# Basic validation and type conversion
|
|
366
802
|
required_cols = ['provider', 'model', 'input', 'output', 'coding_arena_elo', 'api_key', 'structured_output', 'reasoning_type']
|
|
367
803
|
for col in required_cols:
|
|
@@ -434,11 +870,26 @@ def _select_model_candidates(
|
|
|
434
870
|
# Try finding base model in the *original* df in case it was filtered out
|
|
435
871
|
original_base = model_df[model_df['model'] == base_model_name]
|
|
436
872
|
if not original_base.empty:
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
873
|
+
# Base exists but may be misconfigured (e.g., missing API key). Keep erroring loudly.
|
|
874
|
+
raise ValueError(
|
|
875
|
+
f"Base model '{base_model_name}' found in CSV but requires API key '{original_base.iloc[0]['api_key']}' which might be missing or invalid configuration."
|
|
876
|
+
)
|
|
877
|
+
# Option A': Soft fallback – choose a reasonable surrogate base and continue
|
|
878
|
+
# Strategy (simplified and deterministic): pick the first available model
|
|
879
|
+
# from the CSV as the surrogate base. This mirrors typical CSV ordering
|
|
880
|
+
# expectations and keeps behavior predictable across environments.
|
|
881
|
+
try:
|
|
882
|
+
base_model = available_df.iloc[0]
|
|
883
|
+
logger.warning(
|
|
884
|
+
f"Base model '{base_model_name}' not found in CSV. Falling back to surrogate base '{base_model['model']}' (Option A')."
|
|
885
|
+
)
|
|
886
|
+
except Exception:
|
|
887
|
+
# If any unexpected error occurs during fallback, raise a clear error
|
|
888
|
+
raise ValueError(
|
|
889
|
+
f"Specified base model '{base_model_name}' not found and fallback selection failed. Check your LLM model CSV."
|
|
890
|
+
)
|
|
891
|
+
else:
|
|
892
|
+
base_model = base_model_row.iloc[0]
|
|
442
893
|
|
|
443
894
|
# 3. Determine Target and Sort
|
|
444
895
|
candidates = []
|
|
@@ -449,9 +900,10 @@ def _select_model_candidates(
|
|
|
449
900
|
# Sort remaining by ELO descending as fallback
|
|
450
901
|
available_df['sort_metric'] = -available_df['coding_arena_elo'] # Negative for descending sort
|
|
451
902
|
candidates = available_df.sort_values(by='sort_metric').to_dict('records')
|
|
452
|
-
# Ensure base model is first if it exists
|
|
453
|
-
|
|
454
|
-
|
|
903
|
+
# Ensure effective base model is first if it exists (supports surrogate base)
|
|
904
|
+
effective_base_name = str(base_model['model']) if isinstance(base_model, pd.Series) else base_model_name
|
|
905
|
+
if any(c['model'] == effective_base_name for c in candidates):
|
|
906
|
+
candidates.sort(key=lambda x: 0 if x['model'] == effective_base_name else 1)
|
|
455
907
|
target_metric_value = f"Base Model ELO: {base_model['coding_arena_elo']}"
|
|
456
908
|
|
|
457
909
|
elif strength < 0.5:
|
|
@@ -556,6 +1008,45 @@ def _sanitize_api_key(key_value: str) -> str:
|
|
|
556
1008
|
return sanitized
|
|
557
1009
|
|
|
558
1010
|
|
|
1011
|
+
def _save_key_to_env_file(key_name: str, value: str, env_path: Path) -> None:
|
|
1012
|
+
"""Save or update a key in the .env file.
|
|
1013
|
+
|
|
1014
|
+
- Replaces existing key in-place (no comment + append)
|
|
1015
|
+
- Removes old commented versions of the same key (Issue #183)
|
|
1016
|
+
- Preserves all other content
|
|
1017
|
+
"""
|
|
1018
|
+
lines = []
|
|
1019
|
+
if env_path.exists():
|
|
1020
|
+
with open(env_path, 'r') as f:
|
|
1021
|
+
lines = f.readlines()
|
|
1022
|
+
|
|
1023
|
+
new_lines = []
|
|
1024
|
+
key_replaced = False
|
|
1025
|
+
prefix = f"{key_name}="
|
|
1026
|
+
prefix_spaced = f"{key_name} ="
|
|
1027
|
+
|
|
1028
|
+
for line in lines:
|
|
1029
|
+
stripped = line.strip()
|
|
1030
|
+
# Skip old commented versions of this key (cleanup accumulation)
|
|
1031
|
+
if stripped.startswith(f"# {prefix}") or stripped.startswith(f"# {prefix_spaced}"):
|
|
1032
|
+
continue
|
|
1033
|
+
elif stripped.startswith(prefix) or stripped.startswith(prefix_spaced):
|
|
1034
|
+
# Replace in-place
|
|
1035
|
+
new_lines.append(f'{key_name}="{value}"\n')
|
|
1036
|
+
key_replaced = True
|
|
1037
|
+
else:
|
|
1038
|
+
new_lines.append(line)
|
|
1039
|
+
|
|
1040
|
+
# Add key if not found
|
|
1041
|
+
if not key_replaced:
|
|
1042
|
+
if new_lines and not new_lines[-1].endswith('\n'):
|
|
1043
|
+
new_lines.append('\n')
|
|
1044
|
+
new_lines.append(f'{key_name}="{value}"\n')
|
|
1045
|
+
|
|
1046
|
+
with open(env_path, 'w') as f:
|
|
1047
|
+
f.writelines(new_lines)
|
|
1048
|
+
|
|
1049
|
+
|
|
559
1050
|
def _ensure_api_key(model_info: Dict[str, Any], newly_acquired_keys: Dict[str, bool], verbose: bool) -> bool:
|
|
560
1051
|
"""Checks for API key in env, prompts user if missing, and updates .env."""
|
|
561
1052
|
key_name = model_info.get('api_key')
|
|
@@ -576,6 +1067,12 @@ def _ensure_api_key(model_info: Dict[str, Any], newly_acquired_keys: Dict[str, b
|
|
|
576
1067
|
return True
|
|
577
1068
|
else:
|
|
578
1069
|
logger.warning(f"API key environment variable '{key_name}' for model '{model_info.get('model')}' is not set.")
|
|
1070
|
+
|
|
1071
|
+
# Skip prompting if --force flag is set (non-interactive mode)
|
|
1072
|
+
if os.environ.get('PDD_FORCE'):
|
|
1073
|
+
logger.error(f"API key '{key_name}' not set. In --force mode, skipping interactive prompt.")
|
|
1074
|
+
return False
|
|
1075
|
+
|
|
579
1076
|
try:
|
|
580
1077
|
# Interactive prompt
|
|
581
1078
|
user_provided_key = input(f"Please enter the API key for {key_name}: ").strip()
|
|
@@ -593,39 +1090,7 @@ def _ensure_api_key(model_info: Dict[str, Any], newly_acquired_keys: Dict[str, b
|
|
|
593
1090
|
|
|
594
1091
|
# Update .env file
|
|
595
1092
|
try:
|
|
596
|
-
|
|
597
|
-
if ENV_PATH.exists():
|
|
598
|
-
with open(ENV_PATH, 'r') as f:
|
|
599
|
-
lines = f.readlines()
|
|
600
|
-
|
|
601
|
-
new_lines = []
|
|
602
|
-
# key_updated = False
|
|
603
|
-
prefix = f"{key_name}="
|
|
604
|
-
prefix_spaced = f"{key_name} =" # Handle potential spaces
|
|
605
|
-
|
|
606
|
-
for line in lines:
|
|
607
|
-
stripped_line = line.strip()
|
|
608
|
-
if stripped_line.startswith(prefix) or stripped_line.startswith(prefix_spaced):
|
|
609
|
-
# Comment out the old key
|
|
610
|
-
new_lines.append(f"# {line}")
|
|
611
|
-
# key_updated = True # Indicates we found an old line to comment
|
|
612
|
-
elif stripped_line.startswith(f"# {prefix}") or stripped_line.startswith(f"# {prefix_spaced}"):
|
|
613
|
-
# Keep already commented lines as they are
|
|
614
|
-
new_lines.append(line)
|
|
615
|
-
else:
|
|
616
|
-
new_lines.append(line)
|
|
617
|
-
|
|
618
|
-
# Append the new key, ensuring quotes for robustness
|
|
619
|
-
new_key_line = f'{key_name}="{user_provided_key}"\n'
|
|
620
|
-
# Add newline before if file not empty and doesn't end with newline
|
|
621
|
-
if new_lines and not new_lines[-1].endswith('\n'):
|
|
622
|
-
new_lines.append('\n')
|
|
623
|
-
new_lines.append(new_key_line)
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
with open(ENV_PATH, 'w') as f:
|
|
627
|
-
f.writelines(new_lines)
|
|
628
|
-
|
|
1093
|
+
_save_key_to_env_file(key_name, user_provided_key, ENV_PATH)
|
|
629
1094
|
logger.info(f"API key '{key_name}' saved to {ENV_PATH}.")
|
|
630
1095
|
logger.warning("SECURITY WARNING: The API key has been saved to your .env file. "
|
|
631
1096
|
"Ensure this file is kept secure and is included in your .gitignore.")
|
|
@@ -647,7 +1112,6 @@ def _ensure_api_key(model_info: Dict[str, Any], newly_acquired_keys: Dict[str, b
|
|
|
647
1112
|
def _format_messages(prompt: str, input_data: Union[Dict[str, Any], List[Dict[str, Any]]], use_batch_mode: bool) -> Union[List[Dict[str, str]], List[List[Dict[str, str]]]]:
|
|
648
1113
|
"""Formats prompt and input into LiteLLM message format."""
|
|
649
1114
|
try:
|
|
650
|
-
prompt_template = PromptTemplate.from_template(prompt)
|
|
651
1115
|
if use_batch_mode:
|
|
652
1116
|
if not isinstance(input_data, list):
|
|
653
1117
|
raise ValueError("input_json must be a list of dictionaries when use_batch_mode is True.")
|
|
@@ -655,19 +1119,424 @@ def _format_messages(prompt: str, input_data: Union[Dict[str, Any], List[Dict[st
|
|
|
655
1119
|
for item in input_data:
|
|
656
1120
|
if not isinstance(item, dict):
|
|
657
1121
|
raise ValueError("Each item in input_json list must be a dictionary for batch mode.")
|
|
658
|
-
formatted_prompt =
|
|
1122
|
+
formatted_prompt = prompt.format(**item)
|
|
659
1123
|
all_messages.append([{"role": "user", "content": formatted_prompt}])
|
|
660
1124
|
return all_messages
|
|
661
1125
|
else:
|
|
662
1126
|
if not isinstance(input_data, dict):
|
|
663
1127
|
raise ValueError("input_json must be a dictionary when use_batch_mode is False.")
|
|
664
|
-
formatted_prompt =
|
|
1128
|
+
formatted_prompt = prompt.format(**input_data)
|
|
665
1129
|
return [{"role": "user", "content": formatted_prompt}]
|
|
666
1130
|
except KeyError as e:
|
|
667
|
-
raise ValueError(f"Prompt formatting error: Missing key {e} in input_json for prompt
|
|
1131
|
+
raise ValueError(f"Prompt formatting error: Missing key {e} in input_json for prompt string.") from e
|
|
668
1132
|
except Exception as e:
|
|
669
1133
|
raise ValueError(f"Error formatting prompt: {e}") from e
|
|
670
1134
|
|
|
1135
|
+
# --- JSON Extraction Helpers ---
|
|
1136
|
+
import re
|
|
1137
|
+
|
|
1138
|
+
def _extract_fenced_json_block(text: str) -> Optional[str]:
|
|
1139
|
+
try:
|
|
1140
|
+
m = re.search(r"```json\s*(\{[\s\S]*?\})\s*```", text, flags=re.IGNORECASE)
|
|
1141
|
+
if m:
|
|
1142
|
+
return m.group(1)
|
|
1143
|
+
return None
|
|
1144
|
+
except Exception:
|
|
1145
|
+
return None
|
|
1146
|
+
|
|
1147
|
+
def _extract_balanced_json_objects(text: str) -> List[str]:
|
|
1148
|
+
results: List[str] = []
|
|
1149
|
+
brace_stack = 0
|
|
1150
|
+
start_idx = -1
|
|
1151
|
+
in_string = False
|
|
1152
|
+
escape = False
|
|
1153
|
+
for i, ch in enumerate(text):
|
|
1154
|
+
if in_string:
|
|
1155
|
+
if escape:
|
|
1156
|
+
escape = False
|
|
1157
|
+
elif ch == '\\':
|
|
1158
|
+
escape = True
|
|
1159
|
+
elif ch == '"':
|
|
1160
|
+
in_string = False
|
|
1161
|
+
continue
|
|
1162
|
+
else:
|
|
1163
|
+
if ch == '"':
|
|
1164
|
+
in_string = True
|
|
1165
|
+
continue
|
|
1166
|
+
if ch == '{':
|
|
1167
|
+
if brace_stack == 0:
|
|
1168
|
+
start_idx = i
|
|
1169
|
+
brace_stack += 1
|
|
1170
|
+
elif ch == '}':
|
|
1171
|
+
if brace_stack > 0:
|
|
1172
|
+
brace_stack -= 1
|
|
1173
|
+
if brace_stack == 0 and start_idx != -1:
|
|
1174
|
+
results.append(text[start_idx:i+1])
|
|
1175
|
+
start_idx = -1
|
|
1176
|
+
return results
|
|
1177
|
+
|
|
1178
|
+
|
|
1179
|
+
def _looks_like_python_code(s: str) -> bool:
|
|
1180
|
+
"""
|
|
1181
|
+
Heuristic check if a string looks like Python code.
|
|
1182
|
+
|
|
1183
|
+
Used to determine if we should attempt Python syntax repair on a string field.
|
|
1184
|
+
"""
|
|
1185
|
+
if not s or len(s) < 10:
|
|
1186
|
+
return False
|
|
1187
|
+
# Check for common Python patterns
|
|
1188
|
+
code_indicators = ('def ', 'class ', 'import ', 'from ', 'if __name__', 'return ', 'print(')
|
|
1189
|
+
return any(indicator in s for indicator in code_indicators)
|
|
1190
|
+
|
|
1191
|
+
|
|
1192
|
+
# Field names known to contain prose text, not Python code
|
|
1193
|
+
# These are skipped during syntax validation to avoid false positives
|
|
1194
|
+
_PROSE_FIELD_NAMES = frozenset({
|
|
1195
|
+
'reasoning', # PromptAnalysis - completeness reasoning
|
|
1196
|
+
'explanation', # TrimResultsOutput, FixerOutput - prose explanations
|
|
1197
|
+
'analysis', # DiffAnalysis, CodePatchResult - analysis text
|
|
1198
|
+
'change_instructions', # ChangeInstruction, ConflictChange - instructions
|
|
1199
|
+
'change_description', # DiffAnalysis - description of changes
|
|
1200
|
+
'planned_modifications', # CodePatchResult - modification plans
|
|
1201
|
+
'details', # VerificationOutput - issue details
|
|
1202
|
+
'description', # General prose descriptions
|
|
1203
|
+
'focus', # Focus descriptions
|
|
1204
|
+
'file_summary', # FileSummary - prose summaries of file contents
|
|
1205
|
+
})
|
|
1206
|
+
|
|
1207
|
+
|
|
1208
|
+
def _is_prose_field_name(field_name: str) -> bool:
|
|
1209
|
+
"""Check if a field name indicates it contains prose, not code.
|
|
1210
|
+
|
|
1211
|
+
Used to skip syntax validation on prose fields that may contain
|
|
1212
|
+
Python keywords (like 'return' or 'import') but are not actual code.
|
|
1213
|
+
"""
|
|
1214
|
+
return field_name.lower() in _PROSE_FIELD_NAMES
|
|
1215
|
+
|
|
1216
|
+
|
|
1217
|
+
def _repair_python_syntax(code: str) -> str:
|
|
1218
|
+
"""
|
|
1219
|
+
Validate Python code syntax and attempt repairs if invalid.
|
|
1220
|
+
|
|
1221
|
+
Sometimes LLMs include spurious characters at string boundaries,
|
|
1222
|
+
especially when the code contains quotes. This function attempts
|
|
1223
|
+
to detect and repair such issues.
|
|
1224
|
+
|
|
1225
|
+
Args:
|
|
1226
|
+
code: Python code string to validate/repair
|
|
1227
|
+
|
|
1228
|
+
Returns:
|
|
1229
|
+
Repaired code if a fix was found, otherwise original code
|
|
1230
|
+
"""
|
|
1231
|
+
import ast
|
|
1232
|
+
|
|
1233
|
+
if not code or not code.strip():
|
|
1234
|
+
return code
|
|
1235
|
+
|
|
1236
|
+
# First, try to parse as-is
|
|
1237
|
+
try:
|
|
1238
|
+
ast.parse(code)
|
|
1239
|
+
return code # Valid, no repair needed
|
|
1240
|
+
except SyntaxError:
|
|
1241
|
+
pass
|
|
1242
|
+
|
|
1243
|
+
# Try common repairs
|
|
1244
|
+
repaired = code
|
|
1245
|
+
|
|
1246
|
+
# Repair 1: Trailing spurious quote (the specific issue we've seen)
|
|
1247
|
+
for quote in ['"', "'"]:
|
|
1248
|
+
if repaired.rstrip().endswith(quote):
|
|
1249
|
+
candidate = repaired.rstrip()[:-1]
|
|
1250
|
+
try:
|
|
1251
|
+
ast.parse(candidate)
|
|
1252
|
+
logger.info(f"[INFO] Repaired code by removing trailing {quote!r}")
|
|
1253
|
+
return candidate
|
|
1254
|
+
except SyntaxError:
|
|
1255
|
+
pass
|
|
1256
|
+
|
|
1257
|
+
# Repair 2: Leading spurious quote
|
|
1258
|
+
for quote in ['"', "'"]:
|
|
1259
|
+
if repaired.lstrip().startswith(quote):
|
|
1260
|
+
candidate = repaired.lstrip()[1:]
|
|
1261
|
+
try:
|
|
1262
|
+
ast.parse(candidate)
|
|
1263
|
+
logger.info(f"[INFO] Repaired code by removing leading {quote!r}")
|
|
1264
|
+
return candidate
|
|
1265
|
+
except SyntaxError:
|
|
1266
|
+
pass
|
|
1267
|
+
|
|
1268
|
+
# Repair 3: Both leading and trailing spurious quotes
|
|
1269
|
+
for quote in ['"', "'"]:
|
|
1270
|
+
stripped = repaired.strip()
|
|
1271
|
+
if stripped.startswith(quote) and stripped.endswith(quote):
|
|
1272
|
+
candidate = stripped[1:-1]
|
|
1273
|
+
try:
|
|
1274
|
+
ast.parse(candidate)
|
|
1275
|
+
logger.info(f"[INFO] Repaired code by removing surrounding {quote!r}")
|
|
1276
|
+
return candidate
|
|
1277
|
+
except SyntaxError:
|
|
1278
|
+
pass
|
|
1279
|
+
|
|
1280
|
+
# If no repair worked, return original (let it fail downstream)
|
|
1281
|
+
return code
|
|
1282
|
+
|
|
1283
|
+
|
|
1284
|
+
def _smart_unescape_code(code: str) -> str:
|
|
1285
|
+
"""
|
|
1286
|
+
Unescape literal \\n sequences in code while preserving them inside string literals.
|
|
1287
|
+
|
|
1288
|
+
When LLMs return code as JSON, newlines get double-escaped. After JSON parsing,
|
|
1289
|
+
we have literal backslash-n (2 chars) that should be actual newlines for code
|
|
1290
|
+
structure, BUT escape sequences inside Python strings (like print("\\n")) should
|
|
1291
|
+
remain as escape sequences.
|
|
1292
|
+
|
|
1293
|
+
Args:
|
|
1294
|
+
code: Python code that may have literal \\n sequences
|
|
1295
|
+
|
|
1296
|
+
Returns:
|
|
1297
|
+
Code with structural newlines unescaped but string literals preserved
|
|
1298
|
+
"""
|
|
1299
|
+
LITERAL_BACKSLASH_N = '\\' + 'n' # Literal \n (2 chars)
|
|
1300
|
+
|
|
1301
|
+
if LITERAL_BACKSLASH_N not in code:
|
|
1302
|
+
return code
|
|
1303
|
+
|
|
1304
|
+
# First, check if the code already has actual newlines (mixed state)
|
|
1305
|
+
# If it does, we need to be more careful
|
|
1306
|
+
has_actual_newlines = '\n' in code
|
|
1307
|
+
|
|
1308
|
+
if not has_actual_newlines:
|
|
1309
|
+
# All newlines are escaped - this is the double-escaped case
|
|
1310
|
+
# We need to unescape them but preserve \n inside string literals
|
|
1311
|
+
|
|
1312
|
+
# Strategy: Use a placeholder for \n inside strings, unescape all, then restore
|
|
1313
|
+
# We detect string literals by tracking quote state
|
|
1314
|
+
|
|
1315
|
+
result = []
|
|
1316
|
+
i = 0
|
|
1317
|
+
in_string = False
|
|
1318
|
+
string_char = None
|
|
1319
|
+
in_fstring = False
|
|
1320
|
+
|
|
1321
|
+
# Placeholder that won't appear in code
|
|
1322
|
+
PLACEHOLDER = '\x00NEWLINE_ESCAPE\x00'
|
|
1323
|
+
|
|
1324
|
+
while i < len(code):
|
|
1325
|
+
# Check for escape sequences (both actual and literal)
|
|
1326
|
+
if i + 1 < len(code) and code[i] == '\\':
|
|
1327
|
+
next_char = code[i + 1]
|
|
1328
|
+
|
|
1329
|
+
if in_string:
|
|
1330
|
+
# Inside a string - preserve escape sequences
|
|
1331
|
+
if next_char == 'n':
|
|
1332
|
+
result.append(PLACEHOLDER)
|
|
1333
|
+
i += 2
|
|
1334
|
+
continue
|
|
1335
|
+
elif next_char == 't':
|
|
1336
|
+
result.append('\\' + 't') # Keep \t as-is in strings
|
|
1337
|
+
i += 2
|
|
1338
|
+
continue
|
|
1339
|
+
elif next_char == 'r':
|
|
1340
|
+
result.append('\\' + 'r') # Keep \r as-is in strings
|
|
1341
|
+
i += 2
|
|
1342
|
+
continue
|
|
1343
|
+
elif next_char in ('"', "'", '\\'):
|
|
1344
|
+
# Keep escaped quotes and backslashes
|
|
1345
|
+
result.append(code[i:i+2])
|
|
1346
|
+
i += 2
|
|
1347
|
+
continue
|
|
1348
|
+
|
|
1349
|
+
# Check for string delimiters
|
|
1350
|
+
if not in_string:
|
|
1351
|
+
# Check for triple quotes first
|
|
1352
|
+
if i + 2 < len(code) and code[i:i+3] in ('"""', "'''"):
|
|
1353
|
+
in_string = True
|
|
1354
|
+
string_char = code[i:i+3]
|
|
1355
|
+
# Check if preceded by 'f' for f-string
|
|
1356
|
+
in_fstring = i > 0 and code[i-1] == 'f'
|
|
1357
|
+
result.append(code[i:i+3])
|
|
1358
|
+
i += 3
|
|
1359
|
+
continue
|
|
1360
|
+
elif code[i] in ('"', "'"):
|
|
1361
|
+
in_string = True
|
|
1362
|
+
string_char = code[i]
|
|
1363
|
+
in_fstring = i > 0 and code[i-1] == 'f'
|
|
1364
|
+
result.append(code[i])
|
|
1365
|
+
i += 1
|
|
1366
|
+
continue
|
|
1367
|
+
else:
|
|
1368
|
+
# Check for end of string
|
|
1369
|
+
if len(string_char) == 3: # Triple quote
|
|
1370
|
+
if i + 2 < len(code) and code[i:i+3] == string_char:
|
|
1371
|
+
in_string = False
|
|
1372
|
+
in_fstring = False
|
|
1373
|
+
result.append(code[i:i+3])
|
|
1374
|
+
i += 3
|
|
1375
|
+
continue
|
|
1376
|
+
else: # Single quote
|
|
1377
|
+
if code[i] == string_char:
|
|
1378
|
+
in_string = False
|
|
1379
|
+
in_fstring = False
|
|
1380
|
+
result.append(code[i])
|
|
1381
|
+
i += 1
|
|
1382
|
+
continue
|
|
1383
|
+
|
|
1384
|
+
result.append(code[i])
|
|
1385
|
+
i += 1
|
|
1386
|
+
|
|
1387
|
+
intermediate = ''.join(result)
|
|
1388
|
+
|
|
1389
|
+
# Now unescape all remaining \n (these are structural)
|
|
1390
|
+
LITERAL_BACKSLASH_R_N = '\\' + 'r' + '\\' + 'n'
|
|
1391
|
+
LITERAL_BACKSLASH_T = '\\' + 't'
|
|
1392
|
+
|
|
1393
|
+
intermediate = intermediate.replace(LITERAL_BACKSLASH_R_N, '\r\n')
|
|
1394
|
+
intermediate = intermediate.replace(LITERAL_BACKSLASH_N, '\n')
|
|
1395
|
+
intermediate = intermediate.replace(LITERAL_BACKSLASH_T, '\t')
|
|
1396
|
+
|
|
1397
|
+
# Restore placeholders to \n (as escape sequences in strings)
|
|
1398
|
+
result_code = intermediate.replace(PLACEHOLDER, '\\n')
|
|
1399
|
+
|
|
1400
|
+
return result_code
|
|
1401
|
+
else:
|
|
1402
|
+
# Mixed state - some actual newlines, some literal \n
|
|
1403
|
+
# This means the JSON parsing already converted some, but not all
|
|
1404
|
+
# The literal \n remaining are likely in strings, so leave them alone
|
|
1405
|
+
return code
|
|
1406
|
+
|
|
1407
|
+
|
|
1408
|
+
def _unescape_code_newlines(obj: Any) -> Any:
|
|
1409
|
+
"""
|
|
1410
|
+
Fix double-escaped newlines in Pydantic model string fields.
|
|
1411
|
+
|
|
1412
|
+
Some models (e.g., Gemini) return JSON with \\\\n instead of \\n in code strings,
|
|
1413
|
+
resulting in literal backslash-n text instead of actual newlines after JSON parsing.
|
|
1414
|
+
This function recursively unescapes these in string fields of Pydantic models.
|
|
1415
|
+
|
|
1416
|
+
Also repairs Python syntax errors in code-like string fields (e.g., trailing quotes).
|
|
1417
|
+
|
|
1418
|
+
The check uses literal backslash-n (2 chars) vs actual newline (1 char):
|
|
1419
|
+
- '\\\\n' in Python source = literal backslash + n (2 chars) - needs fixing
|
|
1420
|
+
- '\\n' in Python source = newline character (1 char) - already correct
|
|
1421
|
+
|
|
1422
|
+
Args:
|
|
1423
|
+
obj: A Pydantic model, dict, list, or primitive value
|
|
1424
|
+
|
|
1425
|
+
Returns:
|
|
1426
|
+
The same object with string fields unescaped and code fields repaired
|
|
1427
|
+
"""
|
|
1428
|
+
if obj is None:
|
|
1429
|
+
return obj
|
|
1430
|
+
|
|
1431
|
+
def _process_string(s: str) -> str:
|
|
1432
|
+
"""Process a string: unescape newlines and repair Python syntax if needed."""
|
|
1433
|
+
result = s
|
|
1434
|
+
# Smart unescape that preserves \n inside string literals
|
|
1435
|
+
if _looks_like_python_code(result):
|
|
1436
|
+
result = _smart_unescape_code(result)
|
|
1437
|
+
result = _repair_python_syntax(result)
|
|
1438
|
+
else:
|
|
1439
|
+
# For non-code strings, do simple unescape
|
|
1440
|
+
LITERAL_BACKSLASH_N = '\\' + 'n'
|
|
1441
|
+
LITERAL_BACKSLASH_R_N = '\\' + 'r' + '\\' + 'n'
|
|
1442
|
+
LITERAL_BACKSLASH_T = '\\' + 't'
|
|
1443
|
+
if LITERAL_BACKSLASH_N in result:
|
|
1444
|
+
result = result.replace(LITERAL_BACKSLASH_R_N, '\r\n')
|
|
1445
|
+
result = result.replace(LITERAL_BACKSLASH_N, '\n')
|
|
1446
|
+
result = result.replace(LITERAL_BACKSLASH_T, '\t')
|
|
1447
|
+
return result
|
|
1448
|
+
|
|
1449
|
+
# Handle Pydantic models
|
|
1450
|
+
if isinstance(obj, BaseModel):
|
|
1451
|
+
# Get all field values and process strings
|
|
1452
|
+
for field_name in obj.model_fields:
|
|
1453
|
+
value = getattr(obj, field_name)
|
|
1454
|
+
if isinstance(value, str):
|
|
1455
|
+
processed = _process_string(value)
|
|
1456
|
+
if processed != value:
|
|
1457
|
+
object.__setattr__(obj, field_name, processed)
|
|
1458
|
+
elif isinstance(value, (dict, list, BaseModel)):
|
|
1459
|
+
_unescape_code_newlines(value)
|
|
1460
|
+
return obj
|
|
1461
|
+
|
|
1462
|
+
# Handle dicts
|
|
1463
|
+
if isinstance(obj, dict):
|
|
1464
|
+
for key, value in obj.items():
|
|
1465
|
+
if isinstance(value, str):
|
|
1466
|
+
obj[key] = _process_string(value)
|
|
1467
|
+
elif isinstance(value, (dict, list)):
|
|
1468
|
+
_unescape_code_newlines(value)
|
|
1469
|
+
return obj
|
|
1470
|
+
|
|
1471
|
+
# Handle lists
|
|
1472
|
+
if isinstance(obj, list):
|
|
1473
|
+
for i, item in enumerate(obj):
|
|
1474
|
+
if isinstance(item, str):
|
|
1475
|
+
obj[i] = _process_string(item)
|
|
1476
|
+
elif isinstance(item, (dict, list, BaseModel)):
|
|
1477
|
+
_unescape_code_newlines(item)
|
|
1478
|
+
return obj
|
|
1479
|
+
|
|
1480
|
+
return obj
|
|
1481
|
+
|
|
1482
|
+
|
|
1483
|
+
def _has_invalid_python_code(obj: Any, field_name: str = "") -> bool:
|
|
1484
|
+
"""
|
|
1485
|
+
Check if any code-like string fields have invalid Python syntax.
|
|
1486
|
+
|
|
1487
|
+
This is used after _unescape_code_newlines to detect if repair failed
|
|
1488
|
+
and we should retry with cache disabled.
|
|
1489
|
+
|
|
1490
|
+
Skips fields in _PROSE_FIELD_NAMES to avoid false positives on prose
|
|
1491
|
+
text that mentions code patterns (e.g., "ends on a return statement").
|
|
1492
|
+
|
|
1493
|
+
Args:
|
|
1494
|
+
obj: A Pydantic model, dict, list, or primitive value
|
|
1495
|
+
field_name: The name of the field being validated (used to skip prose)
|
|
1496
|
+
|
|
1497
|
+
Returns:
|
|
1498
|
+
True if there are invalid code fields that couldn't be repaired
|
|
1499
|
+
"""
|
|
1500
|
+
import ast
|
|
1501
|
+
|
|
1502
|
+
if obj is None:
|
|
1503
|
+
return False
|
|
1504
|
+
|
|
1505
|
+
if isinstance(obj, str):
|
|
1506
|
+
# Skip validation for known prose fields
|
|
1507
|
+
if _is_prose_field_name(field_name):
|
|
1508
|
+
return False
|
|
1509
|
+
if _looks_like_python_code(obj):
|
|
1510
|
+
try:
|
|
1511
|
+
ast.parse(obj)
|
|
1512
|
+
return False # Valid
|
|
1513
|
+
except SyntaxError:
|
|
1514
|
+
return True # Invalid
|
|
1515
|
+
return False
|
|
1516
|
+
|
|
1517
|
+
if isinstance(obj, BaseModel):
|
|
1518
|
+
for name in obj.model_fields:
|
|
1519
|
+
value = getattr(obj, name)
|
|
1520
|
+
if _has_invalid_python_code(value, field_name=name):
|
|
1521
|
+
return True
|
|
1522
|
+
return False
|
|
1523
|
+
|
|
1524
|
+
if isinstance(obj, dict):
|
|
1525
|
+
for key, value in obj.items():
|
|
1526
|
+
fname = key if isinstance(key, str) else ""
|
|
1527
|
+
if _has_invalid_python_code(value, field_name=fname):
|
|
1528
|
+
return True
|
|
1529
|
+
return False
|
|
1530
|
+
|
|
1531
|
+
if isinstance(obj, list):
|
|
1532
|
+
for item in obj:
|
|
1533
|
+
if _has_invalid_python_code(item, field_name=field_name):
|
|
1534
|
+
return True
|
|
1535
|
+
return False
|
|
1536
|
+
|
|
1537
|
+
return False
|
|
1538
|
+
|
|
1539
|
+
|
|
671
1540
|
# --- Main Function ---
|
|
672
1541
|
|
|
673
1542
|
def llm_invoke(
|
|
@@ -677,9 +1546,12 @@ def llm_invoke(
|
|
|
677
1546
|
temperature: float = 0.1,
|
|
678
1547
|
verbose: bool = False,
|
|
679
1548
|
output_pydantic: Optional[Type[BaseModel]] = None,
|
|
680
|
-
|
|
1549
|
+
output_schema: Optional[Dict[str, Any]] = None,
|
|
1550
|
+
time: Optional[float] = 0.25,
|
|
681
1551
|
use_batch_mode: bool = False,
|
|
682
1552
|
messages: Optional[Union[List[Dict[str, str]], List[List[Dict[str, str]]]]] = None,
|
|
1553
|
+
language: Optional[str] = None,
|
|
1554
|
+
use_cloud: Optional[bool] = None,
|
|
683
1555
|
) -> Dict[str, Any]:
|
|
684
1556
|
"""
|
|
685
1557
|
Runs a prompt with given input using LiteLLM, handling model selection,
|
|
@@ -693,9 +1565,11 @@ def llm_invoke(
|
|
|
693
1565
|
temperature: LLM temperature.
|
|
694
1566
|
verbose: Print detailed logs.
|
|
695
1567
|
output_pydantic: Optional Pydantic model for structured output.
|
|
1568
|
+
output_schema: Optional raw JSON schema dictionary for structured output (alternative to output_pydantic).
|
|
696
1569
|
time: Relative thinking time (0-1, default 0.25).
|
|
697
1570
|
use_batch_mode: Use batch completion if True.
|
|
698
1571
|
messages: Pre-formatted list of messages (or list of lists for batch). If provided, ignores prompt and input_json.
|
|
1572
|
+
use_cloud: None=auto-detect (cloud if enabled, local if PDD_FORCE_LOCAL=1), True=force cloud, False=force local.
|
|
699
1573
|
|
|
700
1574
|
Returns:
|
|
701
1575
|
Dictionary containing 'result', 'cost', 'model_name', 'thinking_output'.
|
|
@@ -704,6 +1578,7 @@ def llm_invoke(
|
|
|
704
1578
|
ValueError: For invalid inputs or prompt formatting errors.
|
|
705
1579
|
FileNotFoundError: If llm_model.csv is missing.
|
|
706
1580
|
RuntimeError: If all candidate models fail.
|
|
1581
|
+
InsufficientCreditsError: If cloud execution fails due to insufficient credits.
|
|
707
1582
|
openai.*Error: If LiteLLM encounters API errors after retries.
|
|
708
1583
|
"""
|
|
709
1584
|
# Set verbose logging if requested
|
|
@@ -720,6 +1595,58 @@ def llm_invoke(
|
|
|
720
1595
|
logger.debug(f" time: {time}")
|
|
721
1596
|
logger.debug(f" use_batch_mode: {use_batch_mode}")
|
|
722
1597
|
logger.debug(f" messages: {'provided' if messages else 'None'}")
|
|
1598
|
+
logger.debug(f" use_cloud: {use_cloud}")
|
|
1599
|
+
|
|
1600
|
+
# --- 0. Cloud Execution Path ---
|
|
1601
|
+
# Determine cloud usage: explicit param > environment > default (local)
|
|
1602
|
+
if use_cloud is None:
|
|
1603
|
+
# Check environment for cloud preference
|
|
1604
|
+
# PDD_FORCE_LOCAL=1 forces local execution
|
|
1605
|
+
force_local = os.environ.get("PDD_FORCE_LOCAL", "").lower() in ("1", "true", "yes")
|
|
1606
|
+
if force_local:
|
|
1607
|
+
use_cloud = False
|
|
1608
|
+
else:
|
|
1609
|
+
# Try to use cloud if credentials are configured
|
|
1610
|
+
try:
|
|
1611
|
+
from pdd.core.cloud import CloudConfig
|
|
1612
|
+
use_cloud = CloudConfig.is_cloud_enabled()
|
|
1613
|
+
except ImportError:
|
|
1614
|
+
use_cloud = False
|
|
1615
|
+
|
|
1616
|
+
if use_cloud:
|
|
1617
|
+
from rich.console import Console
|
|
1618
|
+
console = Console()
|
|
1619
|
+
|
|
1620
|
+
if verbose:
|
|
1621
|
+
logger.debug("Attempting cloud execution...")
|
|
1622
|
+
|
|
1623
|
+
try:
|
|
1624
|
+
return _llm_invoke_cloud(
|
|
1625
|
+
prompt=prompt,
|
|
1626
|
+
input_json=input_json,
|
|
1627
|
+
strength=strength,
|
|
1628
|
+
temperature=temperature,
|
|
1629
|
+
verbose=verbose,
|
|
1630
|
+
output_pydantic=output_pydantic,
|
|
1631
|
+
output_schema=output_schema,
|
|
1632
|
+
time=time,
|
|
1633
|
+
use_batch_mode=use_batch_mode,
|
|
1634
|
+
messages=messages,
|
|
1635
|
+
language=language,
|
|
1636
|
+
)
|
|
1637
|
+
except CloudFallbackError as e:
|
|
1638
|
+
# Notify user and fall back to local execution
|
|
1639
|
+
console.print(f"[yellow]Cloud execution failed ({e}), falling back to local execution...[/yellow]")
|
|
1640
|
+
logger.warning(f"Cloud fallback: {e}")
|
|
1641
|
+
# Continue to local execution below
|
|
1642
|
+
except InsufficientCreditsError:
|
|
1643
|
+
# Re-raise credit errors - user needs to know
|
|
1644
|
+
raise
|
|
1645
|
+
except CloudInvocationError as e:
|
|
1646
|
+
# Non-recoverable cloud error - notify and fall back
|
|
1647
|
+
console.print(f"[yellow]Cloud error ({e}), falling back to local execution...[/yellow]")
|
|
1648
|
+
logger.warning(f"Cloud invocation error: {e}")
|
|
1649
|
+
# Continue to local execution below
|
|
723
1650
|
|
|
724
1651
|
# --- 1. Load Environment & Validate Inputs ---
|
|
725
1652
|
# .env loading happens at module level
|
|
@@ -744,6 +1671,10 @@ def llm_invoke(
|
|
|
744
1671
|
else:
|
|
745
1672
|
raise ValueError("Either 'messages' or both 'prompt' and 'input_json' must be provided.")
|
|
746
1673
|
|
|
1674
|
+
# Handle None time (means "no reasoning requested")
|
|
1675
|
+
if time is None:
|
|
1676
|
+
time = 0.0
|
|
1677
|
+
|
|
747
1678
|
if not (0.0 <= strength <= 1.0):
|
|
748
1679
|
raise ValueError("'strength' must be between 0.0 and 1.0.")
|
|
749
1680
|
if not (0.0 <= temperature <= 2.0): # Common range for temperature
|
|
@@ -810,6 +1741,16 @@ def llm_invoke(
|
|
|
810
1741
|
# --- 3. Iterate Through Candidates and Invoke LLM ---
|
|
811
1742
|
last_exception = None
|
|
812
1743
|
newly_acquired_keys: Dict[str, bool] = {} # Track keys obtained in this run
|
|
1744
|
+
|
|
1745
|
+
# Initialize variables for retry section
|
|
1746
|
+
response_format = None
|
|
1747
|
+
time_kwargs = {}
|
|
1748
|
+
|
|
1749
|
+
# Update global rate map for callback cost fallback
|
|
1750
|
+
try:
|
|
1751
|
+
_set_model_rate_map(model_df)
|
|
1752
|
+
except Exception:
|
|
1753
|
+
pass
|
|
813
1754
|
|
|
814
1755
|
for model_info in candidate_models:
|
|
815
1756
|
model_name_litellm = model_info['model']
|
|
@@ -820,6 +1761,9 @@ def llm_invoke(
|
|
|
820
1761
|
logger.info(f"\n[ATTEMPT] Trying model: {model_name_litellm} (Provider: {provider})")
|
|
821
1762
|
|
|
822
1763
|
retry_with_same_model = True
|
|
1764
|
+
# Track per-model temperature adjustment attempt (avoid infinite loop)
|
|
1765
|
+
current_temperature = temperature
|
|
1766
|
+
temp_adjustment_done = False
|
|
823
1767
|
while retry_with_same_model:
|
|
824
1768
|
retry_with_same_model = False # Assume success unless auth error on new key
|
|
825
1769
|
|
|
@@ -834,7 +1778,10 @@ def llm_invoke(
|
|
|
834
1778
|
litellm_kwargs: Dict[str, Any] = {
|
|
835
1779
|
"model": model_name_litellm,
|
|
836
1780
|
"messages": formatted_messages,
|
|
837
|
-
|
|
1781
|
+
# Use a local adjustable temperature to allow provider-specific fallbacks
|
|
1782
|
+
"temperature": current_temperature,
|
|
1783
|
+
# Retry on transient network errors (APIError, TimeoutError, ServiceUnavailableError)
|
|
1784
|
+
"num_retries": 2,
|
|
838
1785
|
}
|
|
839
1786
|
|
|
840
1787
|
api_key_name_from_csv = model_info.get('api_key') # From CSV
|
|
@@ -847,7 +1794,14 @@ def llm_invoke(
|
|
|
847
1794
|
if is_vertex_model and api_key_name_from_csv == 'VERTEX_CREDENTIALS':
|
|
848
1795
|
credentials_file_path = os.getenv("VERTEX_CREDENTIALS") # Path from env var
|
|
849
1796
|
vertex_project_env = os.getenv("VERTEX_PROJECT")
|
|
850
|
-
|
|
1797
|
+
# Check for per-model location override, fall back to env var
|
|
1798
|
+
model_location = model_info.get('location')
|
|
1799
|
+
if pd.notna(model_location) and str(model_location).strip():
|
|
1800
|
+
vertex_location_env = str(model_location).strip()
|
|
1801
|
+
if verbose:
|
|
1802
|
+
logger.info(f"[INFO] Using per-model location override: '{vertex_location_env}' for model '{model_name_litellm}'")
|
|
1803
|
+
else:
|
|
1804
|
+
vertex_location_env = os.getenv("VERTEX_LOCATION")
|
|
851
1805
|
|
|
852
1806
|
if credentials_file_path and vertex_project_env and vertex_location_env:
|
|
853
1807
|
try:
|
|
@@ -861,14 +1815,23 @@ def llm_invoke(
|
|
|
861
1815
|
if verbose:
|
|
862
1816
|
logger.info(f"[INFO] For Vertex AI: using vertex_credentials from '{credentials_file_path}', project '{vertex_project_env}', location '{vertex_location_env}'.")
|
|
863
1817
|
except FileNotFoundError:
|
|
1818
|
+
# Still pass project and location so ADC can work
|
|
1819
|
+
litellm_kwargs["vertex_project"] = vertex_project_env
|
|
1820
|
+
litellm_kwargs["vertex_location"] = vertex_location_env
|
|
864
1821
|
if verbose:
|
|
865
|
-
logger.
|
|
1822
|
+
logger.warning(f"[WARN] Vertex credentials file not found at '{credentials_file_path}'. Using ADC with project '{vertex_project_env}', location '{vertex_location_env}'.")
|
|
866
1823
|
except json.JSONDecodeError:
|
|
1824
|
+
# Still pass project and location so ADC can work
|
|
1825
|
+
litellm_kwargs["vertex_project"] = vertex_project_env
|
|
1826
|
+
litellm_kwargs["vertex_location"] = vertex_location_env
|
|
867
1827
|
if verbose:
|
|
868
|
-
logger.error(f"[ERROR] Failed to decode JSON from Vertex credentials file: '{credentials_file_path}'.
|
|
1828
|
+
logger.error(f"[ERROR] Failed to decode JSON from Vertex credentials file: '{credentials_file_path}'. Using ADC with project '{vertex_project_env}', location '{vertex_location_env}'.")
|
|
869
1829
|
except Exception as e:
|
|
1830
|
+
# Still pass project and location so ADC can work
|
|
1831
|
+
litellm_kwargs["vertex_project"] = vertex_project_env
|
|
1832
|
+
litellm_kwargs["vertex_location"] = vertex_location_env
|
|
870
1833
|
if verbose:
|
|
871
|
-
logger.error(f"[ERROR] Failed to load
|
|
1834
|
+
logger.error(f"[ERROR] Failed to load Vertex credentials from '{credentials_file_path}': {e}. Using ADC with project '{vertex_project_env}', location '{vertex_location_env}'.")
|
|
872
1835
|
else:
|
|
873
1836
|
if verbose:
|
|
874
1837
|
logger.warning(f"[WARN] For Vertex AI (using '{api_key_name_from_csv}'): One or more required environment variables (VERTEX_CREDENTIALS, VERTEX_PROJECT, VERTEX_LOCATION) are missing.")
|
|
@@ -887,9 +1850,16 @@ def llm_invoke(
|
|
|
887
1850
|
|
|
888
1851
|
# If this model is Vertex AI AND uses a direct API key string (not VERTEX_CREDENTIALS from CSV),
|
|
889
1852
|
# also pass project and location from env vars.
|
|
890
|
-
if is_vertex_model:
|
|
1853
|
+
if is_vertex_model:
|
|
891
1854
|
vertex_project_env = os.getenv("VERTEX_PROJECT")
|
|
892
|
-
|
|
1855
|
+
# Check for per-model location override, fall back to env var
|
|
1856
|
+
model_location = model_info.get('location')
|
|
1857
|
+
if pd.notna(model_location) and str(model_location).strip():
|
|
1858
|
+
vertex_location_env = str(model_location).strip()
|
|
1859
|
+
if verbose:
|
|
1860
|
+
logger.info(f"[INFO] Using per-model location override: '{vertex_location_env}' for model '{model_name_litellm}'")
|
|
1861
|
+
else:
|
|
1862
|
+
vertex_location_env = os.getenv("VERTEX_LOCATION")
|
|
893
1863
|
if vertex_project_env and vertex_location_env:
|
|
894
1864
|
litellm_kwargs["vertex_project"] = vertex_project_env
|
|
895
1865
|
litellm_kwargs["vertex_location"] = vertex_location_env
|
|
@@ -903,13 +1873,36 @@ def llm_invoke(
|
|
|
903
1873
|
elif verbose: # No api_key_name_from_csv in CSV for this model
|
|
904
1874
|
logger.info(f"[INFO] No API key name specified in CSV for model '{model_name_litellm}'. LiteLLM will use its default authentication mechanisms (e.g., standard provider env vars or ADC for Vertex AI).")
|
|
905
1875
|
|
|
906
|
-
# Add api_base if present in CSV
|
|
1876
|
+
# Add base_url/api_base override if present in CSV
|
|
907
1877
|
api_base = model_info.get('base_url')
|
|
908
1878
|
if pd.notna(api_base) and api_base:
|
|
1879
|
+
# LiteLLM prefers `base_url`; some older paths accept `api_base`.
|
|
1880
|
+
litellm_kwargs["base_url"] = str(api_base)
|
|
909
1881
|
litellm_kwargs["api_base"] = str(api_base)
|
|
910
1882
|
|
|
911
|
-
#
|
|
912
|
-
|
|
1883
|
+
# Provider-specific defaults (e.g., LM Studio)
|
|
1884
|
+
model_name_lower = str(model_name_litellm).lower()
|
|
1885
|
+
provider_lower_for_model = provider.lower()
|
|
1886
|
+
is_lm_studio = model_name_lower.startswith('lm_studio/') or provider_lower_for_model == 'lm_studio'
|
|
1887
|
+
is_groq = model_name_lower.startswith('groq/') or provider_lower_for_model == 'groq'
|
|
1888
|
+
if is_lm_studio:
|
|
1889
|
+
# Ensure base_url is set (fallback to env LM_STUDIO_API_BASE or localhost)
|
|
1890
|
+
if not litellm_kwargs.get("base_url"):
|
|
1891
|
+
lm_studio_base = os.getenv("LM_STUDIO_API_BASE", "http://localhost:1234/v1")
|
|
1892
|
+
litellm_kwargs["base_url"] = lm_studio_base
|
|
1893
|
+
litellm_kwargs["api_base"] = lm_studio_base
|
|
1894
|
+
if verbose:
|
|
1895
|
+
logger.info(f"[INFO] Using LM Studio base_url: {lm_studio_base}")
|
|
1896
|
+
|
|
1897
|
+
# Ensure a non-empty api_key; LM Studio accepts any non-empty token (e.g., 'lm-studio')
|
|
1898
|
+
if not litellm_kwargs.get("api_key"):
|
|
1899
|
+
lm_studio_key = os.getenv("LM_STUDIO_API_KEY") or "lm-studio"
|
|
1900
|
+
litellm_kwargs["api_key"] = lm_studio_key
|
|
1901
|
+
if verbose:
|
|
1902
|
+
logger.info("[INFO] Using LM Studio api_key placeholder (set LM_STUDIO_API_KEY to customize).")
|
|
1903
|
+
|
|
1904
|
+
# Handle Structured Output (JSON Mode / Pydantic / JSON Schema)
|
|
1905
|
+
if output_pydantic or output_schema:
|
|
913
1906
|
# Check if model supports structured output based on CSV flag or LiteLLM check
|
|
914
1907
|
supports_structured = model_info.get('structured_output', False)
|
|
915
1908
|
# Optional: Add litellm.supports_response_schema check if CSV flag is unreliable
|
|
@@ -918,18 +1911,98 @@ def llm_invoke(
|
|
|
918
1911
|
# except: pass # Ignore errors in supports_response_schema check
|
|
919
1912
|
|
|
920
1913
|
if supports_structured:
|
|
921
|
-
if
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
1914
|
+
if output_pydantic:
|
|
1915
|
+
if verbose:
|
|
1916
|
+
logger.info(f"[INFO] Requesting structured output (Pydantic: {output_pydantic.__name__}) for {model_name_litellm}")
|
|
1917
|
+
# Use json_schema with strict=True to enforce ALL required fields are present
|
|
1918
|
+
# This prevents LLMs from omitting required fields when they think they're not needed
|
|
1919
|
+
schema = output_pydantic.model_json_schema()
|
|
1920
|
+
# Ensure all properties are in required array (OpenAI strict mode requirement)
|
|
1921
|
+
_ensure_all_properties_required(schema)
|
|
1922
|
+
# Add additionalProperties: false for strict mode (required by OpenAI)
|
|
1923
|
+
schema["additionalProperties"] = False
|
|
1924
|
+
response_format = {
|
|
1925
|
+
"type": "json_schema",
|
|
1926
|
+
"json_schema": {
|
|
1927
|
+
"name": output_pydantic.__name__,
|
|
1928
|
+
"schema": schema,
|
|
1929
|
+
"strict": True
|
|
1930
|
+
}
|
|
1931
|
+
}
|
|
1932
|
+
else: # output_schema is set
|
|
1933
|
+
if verbose:
|
|
1934
|
+
logger.info(f"[INFO] Requesting structured output (JSON Schema) for {model_name_litellm}")
|
|
1935
|
+
# LiteLLM expects {"type": "json_schema", "json_schema": {"name": "response", "schema": schema_dict, "strict": true}}
|
|
1936
|
+
# OR for some providers just the schema dict if type is json_object.
|
|
1937
|
+
# Best practice for broad compatibility via LiteLLM is usually the dict directly or wrapped.
|
|
1938
|
+
# For now, let's assume we pass the schema dict as 'response_format' which LiteLLM handles for many providers
|
|
1939
|
+
# or wrap it if needed. LiteLLM 1.40+ supports passing the dict directly for many.
|
|
1940
|
+
response_format = {
|
|
1941
|
+
"type": "json_schema",
|
|
1942
|
+
"json_schema": {
|
|
1943
|
+
"name": "response",
|
|
1944
|
+
"schema": output_schema,
|
|
1945
|
+
"strict": False
|
|
1946
|
+
}
|
|
1947
|
+
}
|
|
1948
|
+
# Add additionalProperties: false for strict mode (required by OpenAI)
|
|
1949
|
+
response_format["json_schema"]["schema"]["additionalProperties"] = False
|
|
1950
|
+
|
|
1951
|
+
litellm_kwargs["response_format"] = response_format
|
|
1952
|
+
|
|
1953
|
+
# LM Studio requires "json_schema" format, not "json_object"
|
|
1954
|
+
# Use extra_body to bypass litellm.drop_params stripping the schema
|
|
1955
|
+
if is_lm_studio and response_format and response_format.get("type") == "json_object":
|
|
1956
|
+
schema = response_format.get("response_schema", {})
|
|
1957
|
+
lm_studio_response_format = {
|
|
1958
|
+
"type": "json_schema",
|
|
1959
|
+
"json_schema": {
|
|
1960
|
+
"name": "response",
|
|
1961
|
+
"strict": True,
|
|
1962
|
+
"schema": schema
|
|
1963
|
+
}
|
|
1964
|
+
}
|
|
1965
|
+
# Use extra_body to bypass drop_params - passes directly to API
|
|
1966
|
+
litellm_kwargs["extra_body"] = {"response_format": lm_studio_response_format}
|
|
1967
|
+
# Remove from regular response_format to avoid conflicts
|
|
1968
|
+
if "response_format" in litellm_kwargs:
|
|
1969
|
+
del litellm_kwargs["response_format"]
|
|
1970
|
+
if verbose:
|
|
1971
|
+
logger.info(f"[INFO] Using extra_body for LM Studio response_format to bypass drop_params")
|
|
1972
|
+
|
|
1973
|
+
# Groq has issues with tool-based structured output - use JSON mode with schema in prompt
|
|
1974
|
+
if is_groq and response_format:
|
|
1975
|
+
# Get the schema to include in system prompt
|
|
1976
|
+
if output_pydantic:
|
|
1977
|
+
schema = output_pydantic.model_json_schema()
|
|
1978
|
+
else:
|
|
1979
|
+
schema = output_schema
|
|
1980
|
+
|
|
1981
|
+
# Use simple json_object mode (Groq's tool_use often fails)
|
|
1982
|
+
litellm_kwargs["response_format"] = {"type": "json_object"}
|
|
1983
|
+
|
|
1984
|
+
# Prepend schema instruction to messages (json module is imported at top of file)
|
|
1985
|
+
schema_instruction = f"You must respond with valid JSON matching this schema:\n```json\n{json.dumps(schema, indent=2)}\n```\nRespond ONLY with the JSON object, no other text."
|
|
1986
|
+
|
|
1987
|
+
# Find or create system message to prepend schema
|
|
1988
|
+
messages_list = litellm_kwargs.get("messages", [])
|
|
1989
|
+
if messages_list and messages_list[0].get("role") == "system":
|
|
1990
|
+
messages_list[0]["content"] = schema_instruction + "\n\n" + messages_list[0]["content"]
|
|
1991
|
+
else:
|
|
1992
|
+
messages_list.insert(0, {"role": "system", "content": schema_instruction})
|
|
1993
|
+
litellm_kwargs["messages"] = messages_list
|
|
1994
|
+
|
|
1995
|
+
if verbose:
|
|
1996
|
+
logger.info(f"[INFO] Using JSON object mode with schema in prompt for Groq (avoiding tool_use issues)")
|
|
1997
|
+
|
|
926
1998
|
# As a fallback, one could use:
|
|
927
1999
|
# litellm_kwargs["response_format"] = {"type": "json_object"}
|
|
928
2000
|
# And potentially enable client-side validation:
|
|
929
2001
|
# litellm.enable_json_schema_validation = True # Enable globally if needed
|
|
930
2002
|
else:
|
|
2003
|
+
schema_name = output_pydantic.__name__ if output_pydantic else "JSON Schema"
|
|
931
2004
|
if verbose:
|
|
932
|
-
logger.warning(f"[WARN] Model {model_name_litellm} does not support structured output via CSV flag. Output might not be valid {
|
|
2005
|
+
logger.warning(f"[WARN] Model {model_name_litellm} does not support structured output via CSV flag. Output might not be valid {schema_name}.")
|
|
933
2006
|
# Proceed without forcing JSON mode, parsing will be attempted later
|
|
934
2007
|
|
|
935
2008
|
# --- NEW REASONING LOGIC ---
|
|
@@ -944,7 +2017,9 @@ def llm_invoke(
|
|
|
944
2017
|
# Currently known: Anthropic uses 'thinking'
|
|
945
2018
|
# Model name comparison is more robust than provider string
|
|
946
2019
|
if provider == 'anthropic': # Check provider column instead of model prefix
|
|
947
|
-
|
|
2020
|
+
thinking_param = {"type": "enabled", "budget_tokens": budget}
|
|
2021
|
+
litellm_kwargs["thinking"] = thinking_param
|
|
2022
|
+
time_kwargs["thinking"] = thinking_param
|
|
948
2023
|
if verbose:
|
|
949
2024
|
logger.info(f"[INFO] Requesting Anthropic thinking (budget type) with budget: {budget} tokens for {model_name_litellm}")
|
|
950
2025
|
else:
|
|
@@ -962,10 +2037,32 @@ def llm_invoke(
|
|
|
962
2037
|
effort = "high"
|
|
963
2038
|
elif time > 0.3:
|
|
964
2039
|
effort = "medium"
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
2040
|
+
|
|
2041
|
+
# Map effort parameter per-provider/model family
|
|
2042
|
+
model_lower = str(model_name_litellm).lower()
|
|
2043
|
+
provider_lower = str(provider).lower()
|
|
2044
|
+
|
|
2045
|
+
if provider_lower == 'openai' and model_lower.startswith('gpt-5'):
|
|
2046
|
+
# OpenAI 5-series uses Responses API with nested 'reasoning'
|
|
2047
|
+
reasoning_obj = {"effort": effort, "summary": "auto"}
|
|
2048
|
+
litellm_kwargs["reasoning"] = reasoning_obj
|
|
2049
|
+
time_kwargs["reasoning"] = reasoning_obj
|
|
2050
|
+
if verbose:
|
|
2051
|
+
logger.info(f"[INFO] Requesting OpenAI reasoning.effort='{effort}' for {model_name_litellm} (Responses API)")
|
|
2052
|
+
|
|
2053
|
+
elif provider_lower == 'openai' and model_lower.startswith('o') and 'mini' not in model_lower:
|
|
2054
|
+
# Historical o* models may use LiteLLM's generic reasoning_effort param
|
|
2055
|
+
litellm_kwargs["reasoning_effort"] = effort
|
|
2056
|
+
time_kwargs["reasoning_effort"] = effort
|
|
2057
|
+
if verbose:
|
|
2058
|
+
logger.info(f"[INFO] Requesting reasoning_effort='{effort}' for {model_name_litellm}")
|
|
2059
|
+
|
|
2060
|
+
else:
|
|
2061
|
+
# Fallback to LiteLLM generic param when supported by provider adapter
|
|
2062
|
+
litellm_kwargs["reasoning_effort"] = effort
|
|
2063
|
+
time_kwargs["reasoning_effort"] = effort
|
|
2064
|
+
if verbose:
|
|
2065
|
+
logger.info(f"[INFO] Requesting generic reasoning_effort='{effort}' for {model_name_litellm}")
|
|
969
2066
|
|
|
970
2067
|
elif reasoning_type == 'none':
|
|
971
2068
|
if verbose:
|
|
@@ -997,6 +2094,168 @@ def llm_invoke(
|
|
|
997
2094
|
logger.debug("NOT ENABLING CACHING: litellm.cache is None at call time")
|
|
998
2095
|
|
|
999
2096
|
|
|
2097
|
+
# Route OpenAI gpt-5* models through Responses API to support 'reasoning'
|
|
2098
|
+
model_lower_for_call = str(model_name_litellm).lower()
|
|
2099
|
+
provider_lower_for_call = str(provider).lower()
|
|
2100
|
+
|
|
2101
|
+
if (
|
|
2102
|
+
not use_batch_mode
|
|
2103
|
+
and provider_lower_for_call == 'openai'
|
|
2104
|
+
and model_lower_for_call.startswith('gpt-5')
|
|
2105
|
+
):
|
|
2106
|
+
if verbose:
|
|
2107
|
+
logger.info(f"[INFO] Calling LiteLLM Responses API for {model_name_litellm}...")
|
|
2108
|
+
try:
|
|
2109
|
+
# Build input text from messages
|
|
2110
|
+
if isinstance(formatted_messages, list) and formatted_messages and isinstance(formatted_messages[0], dict):
|
|
2111
|
+
input_text = "\n\n".join(f"{m.get('role','user')}: {m.get('content','')}" for m in formatted_messages)
|
|
2112
|
+
else:
|
|
2113
|
+
# Fallback: string cast
|
|
2114
|
+
input_text = str(formatted_messages)
|
|
2115
|
+
|
|
2116
|
+
# Derive effort mapping already computed in time_kwargs
|
|
2117
|
+
reasoning_param = time_kwargs.get("reasoning")
|
|
2118
|
+
|
|
2119
|
+
# Build text.format block for structured output
|
|
2120
|
+
# Default to plain text format
|
|
2121
|
+
text_block = {"format": {"type": "text"}}
|
|
2122
|
+
|
|
2123
|
+
# If structured output requested, use text.format with json_schema
|
|
2124
|
+
# This is the correct way to enforce structured output via litellm.responses()
|
|
2125
|
+
if output_pydantic or output_schema:
|
|
2126
|
+
try:
|
|
2127
|
+
if output_pydantic:
|
|
2128
|
+
schema = output_pydantic.model_json_schema()
|
|
2129
|
+
name = output_pydantic.__name__
|
|
2130
|
+
else:
|
|
2131
|
+
schema = output_schema
|
|
2132
|
+
name = "response"
|
|
2133
|
+
|
|
2134
|
+
# Ensure all properties are in required array (OpenAI strict mode requirement)
|
|
2135
|
+
_ensure_all_properties_required(schema)
|
|
2136
|
+
# Add additionalProperties: false for strict mode (required by OpenAI)
|
|
2137
|
+
schema['additionalProperties'] = False
|
|
2138
|
+
|
|
2139
|
+
# Use text.format with json_schema for structured output
|
|
2140
|
+
text_block = {
|
|
2141
|
+
"format": {
|
|
2142
|
+
"type": "json_schema",
|
|
2143
|
+
"name": name,
|
|
2144
|
+
"strict": True,
|
|
2145
|
+
"schema": schema,
|
|
2146
|
+
}
|
|
2147
|
+
}
|
|
2148
|
+
if verbose:
|
|
2149
|
+
logger.info(f"[INFO] Using structured output via text.format for Responses API")
|
|
2150
|
+
except Exception as schema_e:
|
|
2151
|
+
logger.warning(f"[WARN] Failed to derive JSON schema: {schema_e}. Proceeding with plain text format.")
|
|
2152
|
+
|
|
2153
|
+
# Build kwargs for litellm.responses()
|
|
2154
|
+
responses_kwargs = {
|
|
2155
|
+
"model": model_name_litellm,
|
|
2156
|
+
"input": input_text,
|
|
2157
|
+
"text": text_block,
|
|
2158
|
+
}
|
|
2159
|
+
if verbose and temperature not in (None, 0, 0.0):
|
|
2160
|
+
logger.info("[INFO] Skipping 'temperature' for OpenAI GPT-5 Responses call (unsupported by API).")
|
|
2161
|
+
if reasoning_param is not None:
|
|
2162
|
+
responses_kwargs["reasoning"] = reasoning_param
|
|
2163
|
+
|
|
2164
|
+
# Call litellm.responses() which handles the API interaction
|
|
2165
|
+
resp = litellm.responses(**responses_kwargs)
|
|
2166
|
+
|
|
2167
|
+
# Extract text result from response
|
|
2168
|
+
result_text = None
|
|
2169
|
+
try:
|
|
2170
|
+
# LiteLLM responses return output as a list of items
|
|
2171
|
+
for item in resp.output:
|
|
2172
|
+
if getattr(item, 'type', None) == 'message' and hasattr(item, 'content') and item.content:
|
|
2173
|
+
for content_item in item.content:
|
|
2174
|
+
if hasattr(content_item, 'text'):
|
|
2175
|
+
result_text = content_item.text
|
|
2176
|
+
break
|
|
2177
|
+
if result_text:
|
|
2178
|
+
break
|
|
2179
|
+
except Exception:
|
|
2180
|
+
result_text = None
|
|
2181
|
+
|
|
2182
|
+
# Calculate cost using usage + CSV rates
|
|
2183
|
+
total_cost = 0.0
|
|
2184
|
+
usage = getattr(resp, "usage", None)
|
|
2185
|
+
if usage is not None:
|
|
2186
|
+
in_tok = getattr(usage, "input_tokens", 0) or 0
|
|
2187
|
+
out_tok = getattr(usage, "output_tokens", 0) or 0
|
|
2188
|
+
in_rate = model_info.get('input', 0.0) or 0.0
|
|
2189
|
+
out_rate = model_info.get('output', 0.0) or 0.0
|
|
2190
|
+
total_cost = (in_tok * in_rate + out_tok * out_rate) / 1_000_000.0
|
|
2191
|
+
|
|
2192
|
+
# Parse result if Pydantic output requested
|
|
2193
|
+
final_result = None
|
|
2194
|
+
if output_pydantic and result_text:
|
|
2195
|
+
try:
|
|
2196
|
+
final_result = output_pydantic.model_validate_json(result_text)
|
|
2197
|
+
except Exception as e:
|
|
2198
|
+
# With structured output, parsing should succeed
|
|
2199
|
+
# But if it fails, try JSON repair as fallback
|
|
2200
|
+
logger.warning(f"[WARN] Pydantic parse failed on Responses output: {e}. Attempting JSON repair...")
|
|
2201
|
+
|
|
2202
|
+
# Try extracting from fenced JSON blocks first
|
|
2203
|
+
fenced = _extract_fenced_json_block(result_text)
|
|
2204
|
+
candidates: List[str] = []
|
|
2205
|
+
if fenced:
|
|
2206
|
+
candidates.append(fenced)
|
|
2207
|
+
else:
|
|
2208
|
+
candidates.extend(_extract_balanced_json_objects(result_text))
|
|
2209
|
+
|
|
2210
|
+
# Also try the raw text as-is after stripping fences
|
|
2211
|
+
cleaned = result_text.strip()
|
|
2212
|
+
if cleaned.startswith("```json"):
|
|
2213
|
+
cleaned = cleaned[7:]
|
|
2214
|
+
elif cleaned.startswith("```"):
|
|
2215
|
+
cleaned = cleaned[3:]
|
|
2216
|
+
if cleaned.endswith("```"):
|
|
2217
|
+
cleaned = cleaned[:-3]
|
|
2218
|
+
cleaned = cleaned.strip()
|
|
2219
|
+
if cleaned and cleaned not in candidates:
|
|
2220
|
+
candidates.append(cleaned)
|
|
2221
|
+
|
|
2222
|
+
parse_succeeded = False
|
|
2223
|
+
for cand in candidates:
|
|
2224
|
+
try:
|
|
2225
|
+
final_result = output_pydantic.model_validate_json(cand)
|
|
2226
|
+
parse_succeeded = True
|
|
2227
|
+
logger.info(f"[SUCCESS] JSON repair succeeded for Responses output")
|
|
2228
|
+
break
|
|
2229
|
+
except Exception:
|
|
2230
|
+
continue
|
|
2231
|
+
|
|
2232
|
+
if not parse_succeeded:
|
|
2233
|
+
logger.error(f"[ERROR] All JSON repair attempts failed for Responses output. Original error: {e}")
|
|
2234
|
+
final_result = f"ERROR: Failed to parse structured output from Responses API. Raw: {repr(result_text)[:200]}"
|
|
2235
|
+
else:
|
|
2236
|
+
final_result = result_text
|
|
2237
|
+
|
|
2238
|
+
if verbose:
|
|
2239
|
+
logger.info(f"[RESULT] Model Used: {model_name_litellm}")
|
|
2240
|
+
logger.info(f"[RESULT] Total Cost (estimated): ${total_cost:.6g}")
|
|
2241
|
+
|
|
2242
|
+
return {
|
|
2243
|
+
'result': final_result,
|
|
2244
|
+
'cost': total_cost,
|
|
2245
|
+
'model_name': model_name_litellm,
|
|
2246
|
+
'thinking_output': None,
|
|
2247
|
+
}
|
|
2248
|
+
except Exception as e:
|
|
2249
|
+
last_exception = e
|
|
2250
|
+
logger.error(f"[ERROR] OpenAI Responses call failed for {model_name_litellm}: {e}")
|
|
2251
|
+
# Remove 'reasoning' key to avoid OpenAI Chat API unknown param errors
|
|
2252
|
+
if "reasoning" in litellm_kwargs:
|
|
2253
|
+
try:
|
|
2254
|
+
litellm_kwargs.pop("reasoning", None)
|
|
2255
|
+
except Exception:
|
|
2256
|
+
pass
|
|
2257
|
+
# Fall through to LiteLLM path as a fallback
|
|
2258
|
+
|
|
1000
2259
|
if use_batch_mode:
|
|
1001
2260
|
if verbose:
|
|
1002
2261
|
logger.info(f"[INFO] Calling litellm.batch_completion for {model_name_litellm}...")
|
|
@@ -1004,6 +2263,16 @@ def llm_invoke(
|
|
|
1004
2263
|
|
|
1005
2264
|
|
|
1006
2265
|
else:
|
|
2266
|
+
# Anthropic requirement: when 'thinking' is enabled, temperature must be 1
|
|
2267
|
+
try:
|
|
2268
|
+
if provider.lower() == 'anthropic' and 'thinking' in litellm_kwargs:
|
|
2269
|
+
if litellm_kwargs.get('temperature') != 1:
|
|
2270
|
+
if verbose:
|
|
2271
|
+
logger.info("[INFO] Anthropic thinking enabled: forcing temperature=1 for compliance.")
|
|
2272
|
+
litellm_kwargs['temperature'] = 1
|
|
2273
|
+
current_temperature = 1
|
|
2274
|
+
except Exception:
|
|
2275
|
+
pass
|
|
1007
2276
|
if verbose:
|
|
1008
2277
|
logger.info(f"[INFO] Calling litellm.completion for {model_name_litellm}...")
|
|
1009
2278
|
response = litellm.completion(**litellm_kwargs)
|
|
@@ -1013,6 +2282,12 @@ def llm_invoke(
|
|
|
1013
2282
|
if verbose:
|
|
1014
2283
|
logger.info(f"[SUCCESS] Invocation successful for {model_name_litellm} (took {end_time - start_time:.2f}s)")
|
|
1015
2284
|
|
|
2285
|
+
# Build retry kwargs with provider credentials from litellm_kwargs
|
|
2286
|
+
# Issue #185: Retry calls were missing vertex_location, vertex_project, etc.
|
|
2287
|
+
retry_provider_kwargs = {k: v for k, v in litellm_kwargs.items()
|
|
2288
|
+
if k in ('vertex_credentials', 'vertex_project', 'vertex_location',
|
|
2289
|
+
'api_key', 'base_url', 'api_base')}
|
|
2290
|
+
|
|
1016
2291
|
# --- 7. Process Response ---
|
|
1017
2292
|
results = []
|
|
1018
2293
|
thinking_outputs = []
|
|
@@ -1061,13 +2336,13 @@ def llm_invoke(
|
|
|
1061
2336
|
retry_response = litellm.completion(
|
|
1062
2337
|
model=model_name_litellm,
|
|
1063
2338
|
messages=retry_messages,
|
|
1064
|
-
temperature=
|
|
2339
|
+
temperature=current_temperature,
|
|
1065
2340
|
response_format=response_format,
|
|
1066
|
-
|
|
1067
|
-
**
|
|
2341
|
+
**time_kwargs,
|
|
2342
|
+
**retry_provider_kwargs # Issue #185: Pass Vertex AI credentials
|
|
1068
2343
|
)
|
|
1069
|
-
# Re-enable cache
|
|
1070
|
-
litellm.cache =
|
|
2344
|
+
# Re-enable cache - restore original configured cache (restore to original state, even if None)
|
|
2345
|
+
litellm.cache = configured_cache
|
|
1071
2346
|
# Extract result from retry
|
|
1072
2347
|
retry_raw_result = retry_response.choices[0].message.content
|
|
1073
2348
|
if retry_raw_result is not None:
|
|
@@ -1085,21 +2360,67 @@ def llm_invoke(
|
|
|
1085
2360
|
logger.error(f"[ERROR] Cannot retry - batch mode or missing prompt/input_json")
|
|
1086
2361
|
results.append("ERROR: LLM returned None content and cannot retry")
|
|
1087
2362
|
continue
|
|
1088
|
-
|
|
1089
|
-
|
|
2363
|
+
|
|
2364
|
+
# Check for malformed JSON response (excessive trailing newlines causing truncation)
|
|
2365
|
+
# This can happen when Gemini generates thousands of \n in JSON string values
|
|
2366
|
+
if isinstance(raw_result, str) and _is_malformed_json_response(raw_result):
|
|
2367
|
+
logger.warning(f"[WARNING] Detected malformed JSON response with excessive trailing newlines for item {i}. Retrying with cache bypass...")
|
|
2368
|
+
if not use_batch_mode and prompt and input_json is not None:
|
|
2369
|
+
# Add a small space to bypass cache
|
|
2370
|
+
modified_prompt = prompt + " "
|
|
2371
|
+
try:
|
|
2372
|
+
retry_messages = _format_messages(modified_prompt, input_json, use_batch_mode)
|
|
2373
|
+
# Disable cache for retry
|
|
2374
|
+
original_cache = litellm.cache
|
|
2375
|
+
litellm.cache = None
|
|
2376
|
+
retry_response = litellm.completion(
|
|
2377
|
+
model=model_name_litellm,
|
|
2378
|
+
messages=retry_messages,
|
|
2379
|
+
temperature=current_temperature,
|
|
2380
|
+
response_format=response_format,
|
|
2381
|
+
**time_kwargs,
|
|
2382
|
+
**retry_provider_kwargs # Issue #185: Pass Vertex AI credentials
|
|
2383
|
+
)
|
|
2384
|
+
# Re-enable cache
|
|
2385
|
+
litellm.cache = original_cache
|
|
2386
|
+
# Extract result from retry
|
|
2387
|
+
retry_raw_result = retry_response.choices[0].message.content
|
|
2388
|
+
if retry_raw_result is not None and not _is_malformed_json_response(retry_raw_result):
|
|
2389
|
+
logger.info(f"[SUCCESS] Cache bypass retry for malformed JSON succeeded for item {i}")
|
|
2390
|
+
raw_result = retry_raw_result
|
|
2391
|
+
else:
|
|
2392
|
+
# Retry also failed, but we'll continue with repair logic below
|
|
2393
|
+
logger.warning(f"[WARNING] Cache bypass retry also returned malformed JSON for item {i}, attempting repair...")
|
|
2394
|
+
except Exception as retry_e:
|
|
2395
|
+
logger.warning(f"[WARNING] Cache bypass retry for malformed JSON failed for item {i}: {retry_e}, attempting repair...")
|
|
2396
|
+
else:
|
|
2397
|
+
logger.warning(f"[WARNING] Cannot retry malformed JSON - batch mode or missing prompt/input_json, attempting repair...")
|
|
2398
|
+
|
|
2399
|
+
if output_pydantic or output_schema:
|
|
1090
2400
|
parsed_result = None
|
|
1091
2401
|
json_string_to_parse = None
|
|
1092
2402
|
|
|
1093
2403
|
try:
|
|
1094
|
-
# Attempt 1: Check if LiteLLM already parsed it
|
|
1095
|
-
if isinstance(raw_result, output_pydantic):
|
|
2404
|
+
# Attempt 1: Check if LiteLLM already parsed it (only for Pydantic)
|
|
2405
|
+
if output_pydantic and isinstance(raw_result, output_pydantic):
|
|
1096
2406
|
parsed_result = raw_result
|
|
1097
2407
|
if verbose:
|
|
1098
2408
|
logger.debug("[DEBUG] Pydantic object received directly from LiteLLM.")
|
|
1099
2409
|
|
|
1100
2410
|
# Attempt 2: Check if raw_result is dict-like and validate
|
|
1101
2411
|
elif isinstance(raw_result, dict):
|
|
1102
|
-
|
|
2412
|
+
if output_pydantic:
|
|
2413
|
+
parsed_result = output_pydantic.model_validate(raw_result)
|
|
2414
|
+
else:
|
|
2415
|
+
# Validate against JSON schema
|
|
2416
|
+
try:
|
|
2417
|
+
import jsonschema
|
|
2418
|
+
jsonschema.validate(instance=raw_result, schema=output_schema)
|
|
2419
|
+
parsed_result = json.dumps(raw_result) # Return as JSON string for consistency
|
|
2420
|
+
except ImportError:
|
|
2421
|
+
logger.warning("jsonschema not installed, skipping validation")
|
|
2422
|
+
parsed_result = json.dumps(raw_result)
|
|
2423
|
+
|
|
1103
2424
|
if verbose:
|
|
1104
2425
|
logger.debug("[DEBUG] Validated dictionary-like object directly.")
|
|
1105
2426
|
|
|
@@ -1107,26 +2428,59 @@ def llm_invoke(
|
|
|
1107
2428
|
elif isinstance(raw_result, str):
|
|
1108
2429
|
json_string_to_parse = raw_result # Start with the raw string
|
|
1109
2430
|
try:
|
|
1110
|
-
#
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
if
|
|
1114
|
-
|
|
1115
|
-
# Basic check if it looks like JSON
|
|
1116
|
-
if potential_json.strip().startswith('{') and potential_json.strip().endswith('}'):
|
|
1117
|
-
if verbose:
|
|
1118
|
-
logger.debug(f"[DEBUG] Attempting to parse extracted JSON block: '{potential_json}'")
|
|
1119
|
-
parsed_result = output_pydantic.model_validate_json(potential_json)
|
|
1120
|
-
else:
|
|
1121
|
-
# If block extraction fails, try cleaning markdown next
|
|
1122
|
-
raise ValueError("Extracted block doesn't look like JSON")
|
|
2431
|
+
# 1) Prefer fenced ```json blocks
|
|
2432
|
+
fenced = _extract_fenced_json_block(raw_result)
|
|
2433
|
+
candidates: List[str] = []
|
|
2434
|
+
if fenced:
|
|
2435
|
+
candidates.append(fenced)
|
|
1123
2436
|
else:
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
2437
|
+
# 2) Fall back to scanning for balanced JSON objects
|
|
2438
|
+
candidates.extend(_extract_balanced_json_objects(raw_result))
|
|
2439
|
+
|
|
2440
|
+
if not candidates:
|
|
2441
|
+
raise ValueError("No JSON-like content found")
|
|
2442
|
+
|
|
2443
|
+
parse_err: Optional[Exception] = None
|
|
2444
|
+
for cand in candidates:
|
|
2445
|
+
try:
|
|
2446
|
+
if verbose:
|
|
2447
|
+
logger.debug(f"[DEBUG] Attempting to parse candidate JSON block: {cand}")
|
|
2448
|
+
|
|
2449
|
+
if output_pydantic:
|
|
2450
|
+
parsed_result = output_pydantic.model_validate_json(cand)
|
|
2451
|
+
else:
|
|
2452
|
+
# Parse JSON and validate against schema
|
|
2453
|
+
loaded = json.loads(cand)
|
|
2454
|
+
try:
|
|
2455
|
+
import jsonschema
|
|
2456
|
+
jsonschema.validate(instance=loaded, schema=output_schema)
|
|
2457
|
+
except ImportError:
|
|
2458
|
+
pass # Skip validation if lib missing
|
|
2459
|
+
parsed_result = cand # Return string if valid
|
|
2460
|
+
|
|
2461
|
+
json_string_to_parse = cand
|
|
2462
|
+
parse_err = None
|
|
2463
|
+
break
|
|
2464
|
+
except (json.JSONDecodeError, ValidationError, ValueError) as pe:
|
|
2465
|
+
# Also catch jsonschema.ValidationError if imported
|
|
2466
|
+
parse_err = pe
|
|
2467
|
+
try:
|
|
2468
|
+
import jsonschema
|
|
2469
|
+
if isinstance(pe, jsonschema.ValidationError):
|
|
2470
|
+
parse_err = pe
|
|
2471
|
+
except ImportError:
|
|
2472
|
+
pass
|
|
2473
|
+
|
|
2474
|
+
if parsed_result is None:
|
|
2475
|
+
# If none of the candidates parsed, raise last error
|
|
2476
|
+
if parse_err is not None:
|
|
2477
|
+
raise parse_err
|
|
2478
|
+
raise ValueError("Unable to parse any JSON candidates")
|
|
2479
|
+
except (json.JSONDecodeError, ValidationError, ValueError, Exception) as extraction_error:
|
|
2480
|
+
# Catch generic Exception to handle jsonschema errors without explicit import here
|
|
1127
2481
|
if verbose:
|
|
1128
|
-
logger.debug(f"[DEBUG] JSON
|
|
1129
|
-
#
|
|
2482
|
+
logger.debug(f"[DEBUG] JSON extraction/validation failed ('{extraction_error}'). Trying fence cleaning.")
|
|
2483
|
+
# Last resort: strip any leading/trailing code fences and retry
|
|
1130
2484
|
cleaned_result_str = raw_result.strip()
|
|
1131
2485
|
if cleaned_result_str.startswith("```json"):
|
|
1132
2486
|
cleaned_result_str = cleaned_result_str[7:]
|
|
@@ -1135,35 +2489,173 @@ def llm_invoke(
|
|
|
1135
2489
|
if cleaned_result_str.endswith("```"):
|
|
1136
2490
|
cleaned_result_str = cleaned_result_str[:-3]
|
|
1137
2491
|
cleaned_result_str = cleaned_result_str.strip()
|
|
1138
|
-
# Check
|
|
1139
|
-
|
|
2492
|
+
# Check for complete JSON object or array
|
|
2493
|
+
is_complete_object = cleaned_result_str.startswith('{') and cleaned_result_str.endswith('}')
|
|
2494
|
+
is_complete_array = cleaned_result_str.startswith('[') and cleaned_result_str.endswith(']')
|
|
2495
|
+
if is_complete_object or is_complete_array:
|
|
2496
|
+
if verbose:
|
|
2497
|
+
logger.debug(f"[DEBUG] Attempting parse after generic fence cleaning. Cleaned string: '{cleaned_result_str}'")
|
|
2498
|
+
json_string_to_parse = cleaned_result_str
|
|
2499
|
+
|
|
2500
|
+
if output_pydantic:
|
|
2501
|
+
parsed_result = output_pydantic.model_validate_json(json_string_to_parse)
|
|
2502
|
+
else:
|
|
2503
|
+
loaded = json.loads(json_string_to_parse)
|
|
2504
|
+
try:
|
|
2505
|
+
import jsonschema
|
|
2506
|
+
jsonschema.validate(instance=loaded, schema=output_schema)
|
|
2507
|
+
except ImportError:
|
|
2508
|
+
pass
|
|
2509
|
+
parsed_result = json_string_to_parse
|
|
2510
|
+
elif cleaned_result_str.startswith('{') or cleaned_result_str.startswith('['):
|
|
2511
|
+
# Attempt to repair truncated JSON (e.g., missing closing braces)
|
|
2512
|
+
# This can happen when Gemini generates excessive trailing content
|
|
2513
|
+
# that causes token limit truncation
|
|
1140
2514
|
if verbose:
|
|
1141
|
-
logger.debug(f"[DEBUG]
|
|
1142
|
-
|
|
1143
|
-
|
|
2515
|
+
logger.debug(f"[DEBUG] JSON appears truncated (missing closing brace). Attempting repair.")
|
|
2516
|
+
|
|
2517
|
+
# Try to find the last valid JSON structure
|
|
2518
|
+
# For simple schemas like {"extracted_code": "..."}, we can try to close it
|
|
2519
|
+
repaired = cleaned_result_str.rstrip()
|
|
2520
|
+
|
|
2521
|
+
# Strip trailing escaped newline sequences (\\n in the JSON string)
|
|
2522
|
+
# These appear as literal backslash-n when Gemini generates excessive newlines
|
|
2523
|
+
while repaired.endswith('\\n'):
|
|
2524
|
+
repaired = repaired[:-2]
|
|
2525
|
+
# Also strip trailing literal backslashes that might be orphaned
|
|
2526
|
+
repaired = repaired.rstrip('\\')
|
|
2527
|
+
|
|
2528
|
+
# If we're in the middle of a string value, try to close it
|
|
2529
|
+
# Count unescaped quotes to determine if we're inside a string
|
|
2530
|
+
# Simple heuristic: if it ends without proper closure, add closing
|
|
2531
|
+
is_array = cleaned_result_str.startswith('[')
|
|
2532
|
+
expected_end = ']' if is_array else '}'
|
|
2533
|
+
if not repaired.endswith(expected_end):
|
|
2534
|
+
# Try adding various closures to repair
|
|
2535
|
+
if is_array:
|
|
2536
|
+
repair_attempts = [
|
|
2537
|
+
repaired + '}]', # Close object and array
|
|
2538
|
+
repaired + '"}]', # Close string, object and array
|
|
2539
|
+
repaired + '"}}]', # Close string, nested object and array
|
|
2540
|
+
repaired.rstrip(',') + ']', # Remove trailing comma and close array
|
|
2541
|
+
repaired.rstrip('"') + '"}]', # Handle partial string end
|
|
2542
|
+
]
|
|
2543
|
+
else:
|
|
2544
|
+
repair_attempts = [
|
|
2545
|
+
repaired + '"}', # Close string and object
|
|
2546
|
+
repaired + '"}\n}', # Close string and nested object
|
|
2547
|
+
repaired + '"}}}', # Deeper nesting
|
|
2548
|
+
repaired.rstrip(',') + '}', # Remove trailing comma
|
|
2549
|
+
repaired.rstrip('"') + '"}', # Handle partial string end
|
|
2550
|
+
]
|
|
2551
|
+
|
|
2552
|
+
for attempt in repair_attempts:
|
|
2553
|
+
try:
|
|
2554
|
+
if output_pydantic:
|
|
2555
|
+
parsed_result = output_pydantic.model_validate_json(attempt)
|
|
2556
|
+
else:
|
|
2557
|
+
loaded = json.loads(attempt)
|
|
2558
|
+
try:
|
|
2559
|
+
import jsonschema
|
|
2560
|
+
jsonschema.validate(instance=loaded, schema=output_schema)
|
|
2561
|
+
except ImportError:
|
|
2562
|
+
pass
|
|
2563
|
+
parsed_result = attempt
|
|
2564
|
+
|
|
2565
|
+
if verbose:
|
|
2566
|
+
logger.info(f"[INFO] Successfully repaired truncated JSON response")
|
|
2567
|
+
json_string_to_parse = attempt
|
|
2568
|
+
break
|
|
2569
|
+
except (json.JSONDecodeError, ValidationError, ValueError):
|
|
2570
|
+
continue
|
|
2571
|
+
|
|
2572
|
+
if parsed_result is None:
|
|
2573
|
+
raise ValueError("Content after cleaning doesn't look like JSON (and repair attempts failed)")
|
|
1144
2574
|
else:
|
|
1145
|
-
|
|
1146
|
-
raise ValueError("Content after cleaning markdown doesn't look like JSON")
|
|
2575
|
+
raise ValueError("Content after cleaning doesn't look like JSON")
|
|
1147
2576
|
|
|
1148
2577
|
|
|
1149
2578
|
# Check if any parsing attempt succeeded
|
|
1150
2579
|
if parsed_result is None:
|
|
2580
|
+
target_name = output_pydantic.__name__ if output_pydantic else "JSON Schema"
|
|
1151
2581
|
# This case should ideally be caught by exceptions above, but as a safeguard:
|
|
1152
|
-
raise TypeError(f"Raw result type {type(raw_result)} or content could not be validated/parsed against {
|
|
2582
|
+
raise TypeError(f"Raw result type {type(raw_result)} or content could not be validated/parsed against {target_name}.")
|
|
1153
2583
|
|
|
1154
|
-
except (ValidationError, json.JSONDecodeError, TypeError, ValueError) as parse_error:
|
|
1155
|
-
|
|
2584
|
+
except (ValidationError, json.JSONDecodeError, TypeError, ValueError, Exception) as parse_error:
|
|
2585
|
+
target_name = output_pydantic.__name__ if output_pydantic else "JSON Schema"
|
|
2586
|
+
logger.error(f"[ERROR] Failed to parse response into {target_name} for item {i}: {parse_error}")
|
|
1156
2587
|
# Use the string that was last attempted for parsing in the error message
|
|
1157
2588
|
error_content = json_string_to_parse if json_string_to_parse is not None else raw_result
|
|
1158
|
-
logger.error("[ERROR] Content attempted for parsing: %s", repr(error_content))
|
|
1159
|
-
|
|
1160
|
-
continue
|
|
2589
|
+
logger.error("[ERROR] Content attempted for parsing: %s", repr(error_content))
|
|
2590
|
+
# Issue #168: Raise SchemaValidationError to trigger model fallback
|
|
2591
|
+
# Previously this used `continue` which only skipped to the next batch item
|
|
2592
|
+
raise SchemaValidationError(
|
|
2593
|
+
f"Failed to parse response into {target_name}: {parse_error}",
|
|
2594
|
+
raw_response=raw_result,
|
|
2595
|
+
item_index=i
|
|
2596
|
+
) from parse_error
|
|
2597
|
+
|
|
2598
|
+
# Post-process: unescape newlines and repair Python syntax
|
|
2599
|
+
_unescape_code_newlines(parsed_result)
|
|
2600
|
+
|
|
2601
|
+
# Check if code fields still have invalid Python syntax after repair
|
|
2602
|
+
# If so, retry without cache to get a fresh response
|
|
2603
|
+
# Skip validation for non-Python languages to avoid false positives
|
|
2604
|
+
if language in (None, "python") and _has_invalid_python_code(parsed_result):
|
|
2605
|
+
logger.warning(f"[WARNING] Detected invalid Python syntax in code fields for item {i} after repair. Retrying with cache bypass...")
|
|
2606
|
+
if not use_batch_mode and prompt and input_json is not None:
|
|
2607
|
+
# Add a small variation to bypass cache
|
|
2608
|
+
modified_prompt = prompt + " " # Two spaces to differentiate from other retries
|
|
2609
|
+
try:
|
|
2610
|
+
retry_messages = _format_messages(modified_prompt, input_json, use_batch_mode)
|
|
2611
|
+
# Disable cache for retry
|
|
2612
|
+
original_cache = litellm.cache
|
|
2613
|
+
litellm.cache = None
|
|
2614
|
+
retry_response = litellm.completion(
|
|
2615
|
+
model=model_name_litellm,
|
|
2616
|
+
messages=retry_messages,
|
|
2617
|
+
temperature=current_temperature,
|
|
2618
|
+
response_format=response_format,
|
|
2619
|
+
**time_kwargs,
|
|
2620
|
+
**retry_provider_kwargs # Issue #185: Pass Vertex AI credentials
|
|
2621
|
+
)
|
|
2622
|
+
# Re-enable cache
|
|
2623
|
+
litellm.cache = original_cache
|
|
2624
|
+
# Extract and re-parse the retry result
|
|
2625
|
+
retry_raw_result = retry_response.choices[0].message.content
|
|
2626
|
+
if retry_raw_result is not None:
|
|
2627
|
+
# Re-parse the retry result
|
|
2628
|
+
retry_parsed = None
|
|
2629
|
+
if output_pydantic:
|
|
2630
|
+
if isinstance(retry_raw_result, output_pydantic):
|
|
2631
|
+
retry_parsed = retry_raw_result
|
|
2632
|
+
elif isinstance(retry_raw_result, dict):
|
|
2633
|
+
retry_parsed = output_pydantic.model_validate(retry_raw_result)
|
|
2634
|
+
elif isinstance(retry_raw_result, str):
|
|
2635
|
+
retry_parsed = output_pydantic.model_validate_json(retry_raw_result)
|
|
2636
|
+
elif output_schema and isinstance(retry_raw_result, str):
|
|
2637
|
+
retry_parsed = retry_raw_result # Keep as string for schema validation
|
|
2638
|
+
|
|
2639
|
+
if retry_parsed is not None:
|
|
2640
|
+
_unescape_code_newlines(retry_parsed)
|
|
2641
|
+
if not _has_invalid_python_code(retry_parsed):
|
|
2642
|
+
logger.info(f"[SUCCESS] Cache bypass retry for invalid Python code succeeded for item {i}")
|
|
2643
|
+
parsed_result = retry_parsed
|
|
2644
|
+
else:
|
|
2645
|
+
logger.warning(f"[WARNING] Cache bypass retry still has invalid Python code for item {i}, using original")
|
|
2646
|
+
else:
|
|
2647
|
+
logger.warning(f"[WARNING] Cache bypass retry returned unparseable result for item {i}")
|
|
2648
|
+
else:
|
|
2649
|
+
logger.warning(f"[WARNING] Cache bypass retry returned None for item {i}")
|
|
2650
|
+
except Exception as retry_e:
|
|
2651
|
+
logger.warning(f"[WARNING] Cache bypass retry for invalid Python code failed for item {i}: {retry_e}")
|
|
2652
|
+
else:
|
|
2653
|
+
logger.warning(f"[WARNING] Cannot retry invalid Python code - batch mode or missing prompt/input_json")
|
|
1161
2654
|
|
|
1162
|
-
# If parsing succeeded, append the parsed_result
|
|
1163
2655
|
results.append(parsed_result)
|
|
1164
2656
|
|
|
1165
2657
|
else:
|
|
1166
|
-
# If output_pydantic was not requested, append the raw result
|
|
2658
|
+
# If output_pydantic/schema was not requested, append the raw result
|
|
1167
2659
|
results.append(raw_result)
|
|
1168
2660
|
|
|
1169
2661
|
except (AttributeError, IndexError) as e:
|
|
@@ -1241,15 +2733,53 @@ def llm_invoke(
|
|
|
1241
2733
|
logger.warning(f"[AUTH ERROR] Authentication failed for {model_name_litellm} using existing key '{api_key_name}'. Trying next model.")
|
|
1242
2734
|
break # Break inner loop, try next model candidate
|
|
1243
2735
|
|
|
2736
|
+
except SchemaValidationError as e:
|
|
2737
|
+
# Issue #168: Schema validation failures now trigger model fallback
|
|
2738
|
+
last_exception = e
|
|
2739
|
+
logger.warning(f"[SCHEMA ERROR] Validation failed for {model_name_litellm}: {e}. Trying next model.")
|
|
2740
|
+
if verbose:
|
|
2741
|
+
logger.debug(f"Raw response that failed validation: {repr(e.raw_response)}")
|
|
2742
|
+
break # Break inner loop, try next model candidate
|
|
2743
|
+
|
|
1244
2744
|
except (openai.RateLimitError, openai.APITimeoutError, openai.APIConnectionError,
|
|
1245
2745
|
openai.APIStatusError, openai.BadRequestError, openai.InternalServerError,
|
|
1246
2746
|
Exception) as e: # Catch generic Exception last
|
|
1247
2747
|
last_exception = e
|
|
1248
2748
|
error_type = type(e).__name__
|
|
2749
|
+
error_str = str(e)
|
|
2750
|
+
|
|
2751
|
+
# Provider-specific handling for Anthropic temperature + thinking rules.
|
|
2752
|
+
# Two scenarios we auto-correct:
|
|
2753
|
+
# 1) temperature==1 without thinking -> retry with 0.99
|
|
2754
|
+
# 2) thinking enabled but temperature!=1 -> retry with 1
|
|
2755
|
+
lower_err = error_str.lower()
|
|
2756
|
+
if (not temp_adjustment_done) and ("temperature" in lower_err) and ("thinking" in lower_err):
|
|
2757
|
+
anthropic_thinking_sent = ('thinking' in litellm_kwargs) and (provider.lower() == 'anthropic')
|
|
2758
|
+
# Decide direction of adjustment based on whether thinking was enabled in the call
|
|
2759
|
+
if anthropic_thinking_sent:
|
|
2760
|
+
# thinking enabled -> force temperature=1
|
|
2761
|
+
adjusted_temp = 1
|
|
2762
|
+
logger.warning(
|
|
2763
|
+
f"[WARN] {model_name_litellm}: Anthropic with thinking requires temperature=1. "
|
|
2764
|
+
f"Retrying with temperature={adjusted_temp}."
|
|
2765
|
+
)
|
|
2766
|
+
else:
|
|
2767
|
+
# thinking not enabled -> avoid temperature=1
|
|
2768
|
+
adjusted_temp = 0.99
|
|
2769
|
+
logger.warning(
|
|
2770
|
+
f"[WARN] {model_name_litellm}: Provider rejected temperature=1 without thinking. "
|
|
2771
|
+
f"Retrying with temperature={adjusted_temp}."
|
|
2772
|
+
)
|
|
2773
|
+
current_temperature = adjusted_temp
|
|
2774
|
+
temp_adjustment_done = True
|
|
2775
|
+
retry_with_same_model = True
|
|
2776
|
+
if verbose:
|
|
2777
|
+
logger.debug(f"Retrying {model_name_litellm} with adjusted temperature {current_temperature}")
|
|
2778
|
+
continue
|
|
2779
|
+
|
|
1249
2780
|
logger.error(f"[ERROR] Invocation failed for {model_name_litellm} ({error_type}): {e}. Trying next model.")
|
|
1250
2781
|
# Log more details in verbose mode
|
|
1251
2782
|
if verbose:
|
|
1252
|
-
# import traceback # Not needed if using exc_info=True
|
|
1253
2783
|
logger.debug(f"Detailed exception traceback for {model_name_litellm}:", exc_info=True)
|
|
1254
2784
|
break # Break inner loop, try next model candidate
|
|
1255
2785
|
|
|
@@ -1277,7 +2807,7 @@ if __name__ == "__main__":
|
|
|
1277
2807
|
response = llm_invoke(
|
|
1278
2808
|
prompt="Tell me a short joke about {topic}.",
|
|
1279
2809
|
input_json={"topic": "programmers"},
|
|
1280
|
-
strength=0.5, # Use base model (gpt-
|
|
2810
|
+
strength=0.5, # Use base model (gpt-5-nano)
|
|
1281
2811
|
temperature=0.7,
|
|
1282
2812
|
verbose=True
|
|
1283
2813
|
)
|
|
@@ -1358,7 +2888,7 @@ if __name__ == "__main__":
|
|
|
1358
2888
|
{"role": "system", "content": "You are a helpful assistant."},
|
|
1359
2889
|
{"role": "user", "content": "What is the capital of France?"}
|
|
1360
2890
|
]
|
|
1361
|
-
# Strength 0.5 should select gpt-
|
|
2891
|
+
# Strength 0.5 should select gpt-5-nano
|
|
1362
2892
|
response_messages = llm_invoke(
|
|
1363
2893
|
messages=custom_messages,
|
|
1364
2894
|
strength=0.5,
|