pdd-cli 0.0.45__py3-none-any.whl → 0.0.118__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. pdd/__init__.py +40 -8
  2. pdd/agentic_bug.py +323 -0
  3. pdd/agentic_bug_orchestrator.py +497 -0
  4. pdd/agentic_change.py +231 -0
  5. pdd/agentic_change_orchestrator.py +526 -0
  6. pdd/agentic_common.py +598 -0
  7. pdd/agentic_crash.py +534 -0
  8. pdd/agentic_e2e_fix.py +319 -0
  9. pdd/agentic_e2e_fix_orchestrator.py +426 -0
  10. pdd/agentic_fix.py +1294 -0
  11. pdd/agentic_langtest.py +162 -0
  12. pdd/agentic_update.py +387 -0
  13. pdd/agentic_verify.py +183 -0
  14. pdd/architecture_sync.py +565 -0
  15. pdd/auth_service.py +210 -0
  16. pdd/auto_deps_main.py +71 -51
  17. pdd/auto_include.py +245 -5
  18. pdd/auto_update.py +125 -47
  19. pdd/bug_main.py +196 -23
  20. pdd/bug_to_unit_test.py +2 -0
  21. pdd/change_main.py +11 -4
  22. pdd/cli.py +22 -1181
  23. pdd/cmd_test_main.py +350 -150
  24. pdd/code_generator.py +60 -18
  25. pdd/code_generator_main.py +790 -57
  26. pdd/commands/__init__.py +48 -0
  27. pdd/commands/analysis.py +306 -0
  28. pdd/commands/auth.py +309 -0
  29. pdd/commands/connect.py +290 -0
  30. pdd/commands/fix.py +163 -0
  31. pdd/commands/generate.py +257 -0
  32. pdd/commands/maintenance.py +175 -0
  33. pdd/commands/misc.py +87 -0
  34. pdd/commands/modify.py +256 -0
  35. pdd/commands/report.py +144 -0
  36. pdd/commands/sessions.py +284 -0
  37. pdd/commands/templates.py +215 -0
  38. pdd/commands/utility.py +110 -0
  39. pdd/config_resolution.py +58 -0
  40. pdd/conflicts_main.py +8 -3
  41. pdd/construct_paths.py +589 -111
  42. pdd/context_generator.py +10 -2
  43. pdd/context_generator_main.py +175 -76
  44. pdd/continue_generation.py +53 -10
  45. pdd/core/__init__.py +33 -0
  46. pdd/core/cli.py +527 -0
  47. pdd/core/cloud.py +237 -0
  48. pdd/core/dump.py +554 -0
  49. pdd/core/errors.py +67 -0
  50. pdd/core/remote_session.py +61 -0
  51. pdd/core/utils.py +90 -0
  52. pdd/crash_main.py +262 -33
  53. pdd/data/language_format.csv +71 -63
  54. pdd/data/llm_model.csv +20 -18
  55. pdd/detect_change_main.py +5 -4
  56. pdd/docs/prompting_guide.md +864 -0
  57. pdd/docs/whitepaper_with_benchmarks/data_and_functions/benchmark_analysis.py +495 -0
  58. pdd/docs/whitepaper_with_benchmarks/data_and_functions/creation_compare.py +528 -0
  59. pdd/fix_code_loop.py +523 -95
  60. pdd/fix_code_module_errors.py +6 -2
  61. pdd/fix_error_loop.py +491 -92
  62. pdd/fix_errors_from_unit_tests.py +4 -3
  63. pdd/fix_main.py +278 -21
  64. pdd/fix_verification_errors.py +12 -100
  65. pdd/fix_verification_errors_loop.py +529 -286
  66. pdd/fix_verification_main.py +294 -89
  67. pdd/frontend/dist/assets/index-B5DZHykP.css +1 -0
  68. pdd/frontend/dist/assets/index-DQ3wkeQ2.js +449 -0
  69. pdd/frontend/dist/index.html +376 -0
  70. pdd/frontend/dist/logo.svg +33 -0
  71. pdd/generate_output_paths.py +139 -15
  72. pdd/generate_test.py +218 -146
  73. pdd/get_comment.py +19 -44
  74. pdd/get_extension.py +8 -9
  75. pdd/get_jwt_token.py +318 -22
  76. pdd/get_language.py +8 -7
  77. pdd/get_run_command.py +75 -0
  78. pdd/get_test_command.py +68 -0
  79. pdd/git_update.py +70 -19
  80. pdd/incremental_code_generator.py +2 -2
  81. pdd/insert_includes.py +13 -4
  82. pdd/llm_invoke.py +1711 -181
  83. pdd/load_prompt_template.py +19 -12
  84. pdd/path_resolution.py +140 -0
  85. pdd/pdd_completion.fish +25 -2
  86. pdd/pdd_completion.sh +30 -4
  87. pdd/pdd_completion.zsh +79 -4
  88. pdd/postprocess.py +14 -4
  89. pdd/preprocess.py +293 -24
  90. pdd/preprocess_main.py +41 -6
  91. pdd/prompts/agentic_bug_step10_pr_LLM.prompt +182 -0
  92. pdd/prompts/agentic_bug_step1_duplicate_LLM.prompt +73 -0
  93. pdd/prompts/agentic_bug_step2_docs_LLM.prompt +129 -0
  94. pdd/prompts/agentic_bug_step3_triage_LLM.prompt +95 -0
  95. pdd/prompts/agentic_bug_step4_reproduce_LLM.prompt +97 -0
  96. pdd/prompts/agentic_bug_step5_root_cause_LLM.prompt +123 -0
  97. pdd/prompts/agentic_bug_step6_test_plan_LLM.prompt +107 -0
  98. pdd/prompts/agentic_bug_step7_generate_LLM.prompt +172 -0
  99. pdd/prompts/agentic_bug_step8_verify_LLM.prompt +119 -0
  100. pdd/prompts/agentic_bug_step9_e2e_test_LLM.prompt +289 -0
  101. pdd/prompts/agentic_change_step10_identify_issues_LLM.prompt +1006 -0
  102. pdd/prompts/agentic_change_step11_fix_issues_LLM.prompt +984 -0
  103. pdd/prompts/agentic_change_step12_create_pr_LLM.prompt +131 -0
  104. pdd/prompts/agentic_change_step1_duplicate_LLM.prompt +73 -0
  105. pdd/prompts/agentic_change_step2_docs_LLM.prompt +101 -0
  106. pdd/prompts/agentic_change_step3_research_LLM.prompt +126 -0
  107. pdd/prompts/agentic_change_step4_clarify_LLM.prompt +164 -0
  108. pdd/prompts/agentic_change_step5_docs_change_LLM.prompt +981 -0
  109. pdd/prompts/agentic_change_step6_devunits_LLM.prompt +1005 -0
  110. pdd/prompts/agentic_change_step7_architecture_LLM.prompt +1044 -0
  111. pdd/prompts/agentic_change_step8_analyze_LLM.prompt +1027 -0
  112. pdd/prompts/agentic_change_step9_implement_LLM.prompt +1077 -0
  113. pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
  114. pdd/prompts/agentic_e2e_fix_step1_unit_tests_LLM.prompt +90 -0
  115. pdd/prompts/agentic_e2e_fix_step2_e2e_tests_LLM.prompt +91 -0
  116. pdd/prompts/agentic_e2e_fix_step3_root_cause_LLM.prompt +89 -0
  117. pdd/prompts/agentic_e2e_fix_step4_fix_e2e_tests_LLM.prompt +96 -0
  118. pdd/prompts/agentic_e2e_fix_step5_identify_devunits_LLM.prompt +91 -0
  119. pdd/prompts/agentic_e2e_fix_step6_create_unit_tests_LLM.prompt +106 -0
  120. pdd/prompts/agentic_e2e_fix_step7_verify_tests_LLM.prompt +116 -0
  121. pdd/prompts/agentic_e2e_fix_step8_run_pdd_fix_LLM.prompt +120 -0
  122. pdd/prompts/agentic_e2e_fix_step9_verify_all_LLM.prompt +146 -0
  123. pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
  124. pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
  125. pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
  126. pdd/prompts/agentic_update_LLM.prompt +925 -0
  127. pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
  128. pdd/prompts/auto_include_LLM.prompt +122 -905
  129. pdd/prompts/change_LLM.prompt +3093 -1
  130. pdd/prompts/detect_change_LLM.prompt +686 -27
  131. pdd/prompts/example_generator_LLM.prompt +22 -1
  132. pdd/prompts/extract_code_LLM.prompt +5 -1
  133. pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
  134. pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
  135. pdd/prompts/extract_promptline_LLM.prompt +17 -11
  136. pdd/prompts/find_verification_errors_LLM.prompt +6 -0
  137. pdd/prompts/fix_code_module_errors_LLM.prompt +12 -2
  138. pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +9 -0
  139. pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
  140. pdd/prompts/generate_test_LLM.prompt +41 -7
  141. pdd/prompts/generate_test_from_example_LLM.prompt +115 -0
  142. pdd/prompts/increase_tests_LLM.prompt +1 -5
  143. pdd/prompts/insert_includes_LLM.prompt +316 -186
  144. pdd/prompts/prompt_code_diff_LLM.prompt +119 -0
  145. pdd/prompts/prompt_diff_LLM.prompt +82 -0
  146. pdd/prompts/trace_LLM.prompt +25 -22
  147. pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
  148. pdd/prompts/update_prompt_LLM.prompt +22 -1
  149. pdd/pytest_output.py +127 -12
  150. pdd/remote_session.py +876 -0
  151. pdd/render_mermaid.py +236 -0
  152. pdd/server/__init__.py +52 -0
  153. pdd/server/app.py +335 -0
  154. pdd/server/click_executor.py +587 -0
  155. pdd/server/executor.py +338 -0
  156. pdd/server/jobs.py +661 -0
  157. pdd/server/models.py +241 -0
  158. pdd/server/routes/__init__.py +31 -0
  159. pdd/server/routes/architecture.py +451 -0
  160. pdd/server/routes/auth.py +364 -0
  161. pdd/server/routes/commands.py +929 -0
  162. pdd/server/routes/config.py +42 -0
  163. pdd/server/routes/files.py +603 -0
  164. pdd/server/routes/prompts.py +1322 -0
  165. pdd/server/routes/websocket.py +473 -0
  166. pdd/server/security.py +243 -0
  167. pdd/server/terminal_spawner.py +209 -0
  168. pdd/server/token_counter.py +222 -0
  169. pdd/setup_tool.py +648 -0
  170. pdd/simple_math.py +2 -0
  171. pdd/split_main.py +3 -2
  172. pdd/summarize_directory.py +237 -195
  173. pdd/sync_animation.py +8 -4
  174. pdd/sync_determine_operation.py +839 -112
  175. pdd/sync_main.py +351 -57
  176. pdd/sync_orchestration.py +1400 -756
  177. pdd/sync_tui.py +848 -0
  178. pdd/template_expander.py +161 -0
  179. pdd/template_registry.py +264 -0
  180. pdd/templates/architecture/architecture_json.prompt +237 -0
  181. pdd/templates/generic/generate_prompt.prompt +174 -0
  182. pdd/trace.py +168 -12
  183. pdd/trace_main.py +4 -3
  184. pdd/track_cost.py +140 -63
  185. pdd/unfinished_prompt.py +51 -4
  186. pdd/update_main.py +567 -67
  187. pdd/update_model_costs.py +2 -2
  188. pdd/update_prompt.py +19 -4
  189. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/METADATA +29 -11
  190. pdd_cli-0.0.118.dist-info/RECORD +227 -0
  191. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/licenses/LICENSE +1 -1
  192. pdd_cli-0.0.45.dist-info/RECORD +0 -116
  193. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/WHEEL +0 -0
  194. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/entry_points.txt +0 -0
  195. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/top_level.txt +0 -0
pdd/llm_invoke.py CHANGED
@@ -5,6 +5,8 @@ import os
5
5
  import pandas as pd
6
6
  import litellm
7
7
  import logging # ADDED FOR DETAILED LOGGING
8
+ import importlib.resources
9
+ from litellm.caching.caching import Cache # Fix for LiteLLM v1.75.5+
8
10
 
9
11
  # --- Configure Standard Python Logging ---
10
12
  logger = logging.getLogger("pdd.llm_invoke")
@@ -24,6 +26,16 @@ litellm_logger = logging.getLogger("litellm")
24
26
  litellm_log_level = os.getenv("LITELLM_LOG_LEVEL", "WARNING" if PRODUCTION_MODE else "INFO")
25
27
  litellm_logger.setLevel(getattr(logging, litellm_log_level, logging.WARNING))
26
28
 
29
+ # Ensure LiteLLM drops provider-unsupported params instead of erroring
30
+ # This prevents failures like UnsupportedParamsError for OpenAI gpt-5-* when
31
+ # passing generic params (e.g., reasoning_effort) not accepted by that API path.
32
+ try:
33
+ _drop_params_env = os.getenv("LITELLM_DROP_PARAMS", "true")
34
+ litellm.drop_params = str(_drop_params_env).lower() in ("1", "true", "yes", "on")
35
+ except Exception:
36
+ # Be conservative: default to True even if env parsing fails
37
+ litellm.drop_params = True
38
+
27
39
  # Add a console handler if none exists
28
40
  if not logger.handlers:
29
41
  console_handler = logging.StreamHandler()
@@ -69,14 +81,14 @@ import json
69
81
  # from rich import print as rprint # Replaced with logger
70
82
  from dotenv import load_dotenv
71
83
  from pathlib import Path
72
- from typing import Optional, Dict, List, Any, Type, Union
84
+ from typing import Optional, Dict, List, Any, Type, Union, Tuple
73
85
  from pydantic import BaseModel, ValidationError
74
86
  import openai # Import openai for exception handling as LiteLLM maps to its types
75
- from langchain_core.prompts import PromptTemplate
76
87
  import warnings
77
88
  import time as time_module # Alias to avoid conflict with 'time' parameter
78
89
  # Import the default model constant
79
90
  from pdd import DEFAULT_LLM_MODEL
91
+ from pdd.path_resolution import get_default_resolver
80
92
 
81
93
  # Opt-in to future pandas behavior regarding downcasting
82
94
  try:
@@ -86,6 +98,242 @@ except pd._config.config.OptionError:
86
98
  pass
87
99
 
88
100
 
101
+ # --- Custom Exceptions ---
102
+
103
+ class SchemaValidationError(Exception):
104
+ """Raised when LLM response fails Pydantic/JSON schema validation.
105
+
106
+ This exception triggers model fallback when caught at the outer exception
107
+ handler level, allowing the next candidate model to be tried.
108
+
109
+ Issue #168: Previously, validation errors only logged an error and continued
110
+ to the next batch item, never triggering model fallback.
111
+ """
112
+
113
+ def __init__(self, message: str, raw_response: Any = None, item_index: int = 0):
114
+ super().__init__(message)
115
+ self.raw_response = raw_response
116
+ self.item_index = item_index
117
+
118
+
119
+ class CloudFallbackError(Exception):
120
+ """Raised when cloud execution fails and should fall back to local.
121
+
122
+ This exception is caught internally and triggers fallback to local execution
123
+ when cloud is unavailable (network errors, timeouts, auth failures).
124
+ """
125
+ pass
126
+
127
+
128
+ class CloudInvocationError(Exception):
129
+ """Raised when cloud invocation fails with a non-recoverable error.
130
+
131
+ This exception indicates a cloud error that should not fall back to local,
132
+ such as validation errors returned by the cloud endpoint.
133
+ """
134
+ pass
135
+
136
+
137
+ class InsufficientCreditsError(Exception):
138
+ """Raised when user has insufficient credits for cloud execution.
139
+
140
+ This exception is raised when the cloud returns 402 (Payment Required)
141
+ and should NOT fall back to local execution - the user needs to know.
142
+ """
143
+ pass
144
+
145
+
146
+ # --- Cloud Execution Helpers ---
147
+
148
+ def _ensure_all_properties_required(schema: Dict[str, Any]) -> Dict[str, Any]:
149
+ """Ensure ALL properties are in the required array (OpenAI strict mode requirement).
150
+
151
+ OpenAI's strict mode requires that all properties in a JSON schema are listed
152
+ in the 'required' array. Pydantic's model_json_schema() only includes fields
153
+ without default values in 'required', which causes OpenAI to reject the schema.
154
+
155
+ Args:
156
+ schema: A JSON schema dictionary
157
+
158
+ Returns:
159
+ The schema with all properties added to 'required'
160
+ """
161
+ if 'properties' in schema:
162
+ schema['required'] = list(schema['properties'].keys())
163
+ return schema
164
+
165
+
166
+ def _pydantic_to_json_schema(pydantic_class: Type[BaseModel]) -> Dict[str, Any]:
167
+ """Convert a Pydantic model class to JSON Schema for cloud transport.
168
+
169
+ Args:
170
+ pydantic_class: A Pydantic BaseModel subclass
171
+
172
+ Returns:
173
+ JSON Schema dictionary that can be serialized and sent to cloud
174
+ """
175
+ schema = pydantic_class.model_json_schema()
176
+ # Ensure all properties are in required array (OpenAI strict mode requirement)
177
+ _ensure_all_properties_required(schema)
178
+ # Include class name for debugging/logging purposes
179
+ schema['__pydantic_class_name__'] = pydantic_class.__name__
180
+ return schema
181
+
182
+
183
+ def _validate_with_pydantic(
184
+ result: Any,
185
+ pydantic_class: Type[BaseModel]
186
+ ) -> BaseModel:
187
+ """Validate cloud response using original Pydantic class.
188
+
189
+ Args:
190
+ result: The result from cloud (dict or JSON string)
191
+ pydantic_class: The Pydantic model to validate against
192
+
193
+ Returns:
194
+ Validated Pydantic model instance
195
+
196
+ Raises:
197
+ ValidationError: If validation fails
198
+ """
199
+ if isinstance(result, dict):
200
+ return pydantic_class.model_validate(result)
201
+ elif isinstance(result, str):
202
+ return pydantic_class.model_validate_json(result)
203
+ elif isinstance(result, pydantic_class):
204
+ # Already validated
205
+ return result
206
+ raise ValueError(f"Cannot validate result type {type(result)} with Pydantic model")
207
+
208
+
209
+ def _llm_invoke_cloud(
210
+ prompt: Optional[str],
211
+ input_json: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]],
212
+ strength: float,
213
+ temperature: float,
214
+ verbose: bool,
215
+ output_pydantic: Optional[Type[BaseModel]],
216
+ output_schema: Optional[Dict[str, Any]],
217
+ time: float,
218
+ use_batch_mode: bool,
219
+ messages: Optional[Union[List[Dict[str, str]], List[List[Dict[str, str]]]]],
220
+ language: Optional[str],
221
+ ) -> Dict[str, Any]:
222
+ """Execute llm_invoke via cloud endpoint.
223
+
224
+ Args:
225
+ All parameters match llm_invoke signature
226
+
227
+ Returns:
228
+ Dictionary with 'result', 'cost', 'model_name', 'thinking_output'
229
+
230
+ Raises:
231
+ CloudFallbackError: For recoverable errors (network, timeout, auth)
232
+ InsufficientCreditsError: For 402 Payment Required
233
+ CloudInvocationError: For non-recoverable cloud errors
234
+ """
235
+ import requests
236
+ from rich.console import Console
237
+
238
+ # Lazy import to avoid circular dependency
239
+ from pdd.core.cloud import CloudConfig
240
+
241
+ console = Console()
242
+ CLOUD_TIMEOUT = 300 # 5 minutes
243
+
244
+ # Get JWT token
245
+ jwt_token = CloudConfig.get_jwt_token(verbose=verbose)
246
+ if not jwt_token:
247
+ raise CloudFallbackError("Could not authenticate with cloud")
248
+
249
+ # Prepare payload
250
+ payload: Dict[str, Any] = {
251
+ "strength": strength,
252
+ "temperature": temperature,
253
+ "time": time,
254
+ "verbose": verbose,
255
+ "useBatchMode": use_batch_mode,
256
+ }
257
+
258
+ if language:
259
+ payload["language"] = language
260
+
261
+ # Add prompt/messages
262
+ if messages:
263
+ payload["messages"] = messages
264
+ else:
265
+ payload["prompt"] = prompt
266
+ payload["inputJson"] = input_json
267
+
268
+ # Handle output schema
269
+ if output_pydantic:
270
+ payload["outputSchema"] = _pydantic_to_json_schema(output_pydantic)
271
+ elif output_schema:
272
+ payload["outputSchema"] = output_schema
273
+
274
+ # Make request
275
+ headers = {
276
+ "Authorization": f"Bearer {jwt_token}",
277
+ "Content-Type": "application/json"
278
+ }
279
+
280
+ cloud_url = CloudConfig.get_endpoint_url("llmInvoke")
281
+
282
+ if verbose:
283
+ logger.debug(f"Cloud llm_invoke request to: {cloud_url}")
284
+
285
+ try:
286
+ response = requests.post(
287
+ cloud_url,
288
+ json=payload,
289
+ headers=headers,
290
+ timeout=CLOUD_TIMEOUT
291
+ )
292
+
293
+ if response.status_code == 200:
294
+ data = response.json()
295
+ result = data.get("result")
296
+
297
+ # Validate with Pydantic if specified
298
+ if output_pydantic and result:
299
+ try:
300
+ result = _validate_with_pydantic(result, output_pydantic)
301
+ except (ValidationError, ValueError) as e:
302
+ logger.warning(f"Cloud response validation failed: {e}")
303
+ # Return raw result if validation fails
304
+ pass
305
+
306
+ return {
307
+ "result": result,
308
+ "cost": data.get("totalCost", 0.0),
309
+ "model_name": data.get("modelName", "cloud_model"),
310
+ "thinking_output": data.get("thinkingOutput"),
311
+ }
312
+
313
+ elif response.status_code == 402:
314
+ error_msg = response.json().get("error", "Insufficient credits")
315
+ raise InsufficientCreditsError(error_msg)
316
+
317
+ elif response.status_code in (401, 403):
318
+ error_msg = response.json().get("error", f"Authentication failed ({response.status_code})")
319
+ raise CloudFallbackError(error_msg)
320
+
321
+ elif response.status_code >= 500:
322
+ error_msg = response.json().get("error", f"Server error ({response.status_code})")
323
+ raise CloudFallbackError(error_msg)
324
+
325
+ else:
326
+ error_msg = response.json().get("error", f"HTTP {response.status_code}")
327
+ raise CloudInvocationError(f"Cloud llm_invoke failed: {error_msg}")
328
+
329
+ except requests.exceptions.Timeout:
330
+ raise CloudFallbackError("Cloud request timed out")
331
+ except requests.exceptions.ConnectionError as e:
332
+ raise CloudFallbackError(f"Cloud connection failed: {e}")
333
+ except requests.exceptions.RequestException as e:
334
+ raise CloudFallbackError(f"Cloud request failed: {e}")
335
+
336
+
89
337
  def _is_wsl_environment() -> bool:
90
338
  """
91
339
  Detect if we're running in WSL (Windows Subsystem for Linux) environment.
@@ -112,6 +360,22 @@ def _is_wsl_environment() -> bool:
112
360
  return False
113
361
 
114
362
 
363
+ def _openai_responses_supports_response_format() -> bool:
364
+ """Detect if current OpenAI Python SDK supports `response_format` on Responses.create.
365
+
366
+ Returns True if the installed SDK exposes a `response_format` parameter on
367
+ `openai.resources.responses.Responses.create`, else False. This avoids
368
+ sending unsupported kwargs and triggering TypeError at runtime.
369
+ """
370
+ try:
371
+ import inspect
372
+ from openai.resources.responses import Responses
373
+ sig = inspect.signature(Responses.create)
374
+ return "response_format" in sig.parameters
375
+ except Exception:
376
+ return False
377
+
378
+
115
379
  def _get_environment_info() -> Dict[str, str]:
116
380
  """
117
381
  Get environment information for debugging and error reporting.
@@ -142,60 +406,109 @@ def _get_environment_info() -> Dict[str, str]:
142
406
 
143
407
  # --- Constants and Configuration ---
144
408
 
145
- # Determine project root: 1. PDD_PATH env var, 2. Search upwards from script, 3. CWD
146
- PROJECT_ROOT = None
409
+ # Determine project root: use PathResolver to ignore package-root PDD_PATH values.
147
410
  PDD_PATH_ENV = os.getenv("PDD_PATH")
148
-
149
411
  if PDD_PATH_ENV:
150
- _path_from_env = Path(PDD_PATH_ENV)
151
- if _path_from_env.is_dir():
152
- PROJECT_ROOT = _path_from_env.resolve()
153
- logger.debug(f"Using PROJECT_ROOT from PDD_PATH: {PROJECT_ROOT}")
154
- else:
155
- warnings.warn(f"PDD_PATH environment variable ('{PDD_PATH_ENV}') is set but not a valid directory. Attempting auto-detection.")
156
-
157
- if PROJECT_ROOT is None: # If PDD_PATH wasn't set or was invalid
158
412
  try:
159
- # Start from the current working directory (where user is running PDD)
160
- current_dir = Path.cwd().resolve()
161
- # Look for project markers (e.g., .git, pyproject.toml, data/, .env)
162
- # Go up a maximum of 5 levels to prevent infinite loops
163
- for _ in range(5):
164
- has_git = (current_dir / ".git").exists()
165
- has_pyproject = (current_dir / "pyproject.toml").exists()
166
- has_data = (current_dir / "data").is_dir()
167
- has_dotenv = (current_dir / ".env").exists()
168
-
169
- if has_git or has_pyproject or has_data or has_dotenv:
170
- PROJECT_ROOT = current_dir
171
- logger.debug(f"Determined PROJECT_ROOT by marker search from CWD: {PROJECT_ROOT}")
172
- break
173
-
174
- parent_dir = current_dir.parent
175
- if parent_dir == current_dir: # Reached filesystem root
176
- break
177
- current_dir = parent_dir
413
+ _path_from_env = Path(PDD_PATH_ENV).expanduser().resolve()
414
+ if not _path_from_env.is_dir():
415
+ warnings.warn(
416
+ f"PDD_PATH environment variable ('{PDD_PATH_ENV}') is set but not a valid directory. Attempting auto-detection."
417
+ )
418
+ except Exception as e:
419
+ warnings.warn(f"Error validating PDD_PATH environment variable: {e}")
178
420
 
179
- except Exception as e: # Catch potential permission errors etc.
180
- warnings.warn(f"Error during project root auto-detection from current working directory: {e}")
421
+ resolver = get_default_resolver()
422
+ PROJECT_ROOT = resolver.resolve_project_root()
423
+ PROJECT_ROOT_FROM_ENV = resolver.pdd_path_env is not None and PROJECT_ROOT == resolver.pdd_path_env
424
+ logger.debug(f"Using PROJECT_ROOT: {PROJECT_ROOT}")
181
425
 
182
- if PROJECT_ROOT is None: # Fallback to CWD if no method succeeded
183
- PROJECT_ROOT = Path.cwd().resolve()
184
- warnings.warn(f"Could not determine project root automatically. Using current working directory: {PROJECT_ROOT}. Ensure this is the intended root or set the PDD_PATH environment variable.")
185
426
 
427
+ # ENV_PATH is set after _is_env_path_package_dir is defined (see below)
186
428
 
187
- ENV_PATH = PROJECT_ROOT / ".env"
188
429
  # --- Determine LLM_MODEL_CSV_PATH ---
189
- # Prioritize ~/.pdd/llm_model.csv
430
+ # Prioritize ~/.pdd/llm_model.csv, then a project .pdd from the current CWD,
431
+ # then PROJECT_ROOT (which may be set from PDD_PATH), else fall back to package.
190
432
  user_pdd_dir = Path.home() / ".pdd"
191
433
  user_model_csv_path = user_pdd_dir / "llm_model.csv"
192
434
 
435
+ def _detect_project_root_from_cwd(max_levels: int = 5) -> Path:
436
+ """Search upwards from the current working directory for common project markers.
437
+
438
+ This intentionally ignores PDD_PATH to support CLI invocations that set
439
+ PDD_PATH to the installed package location. We want to honor a real project
440
+ checkout's .pdd/llm_model.csv when running inside it.
441
+ """
442
+ try:
443
+ current_dir = Path.cwd().resolve()
444
+ for _ in range(max_levels):
445
+ if (
446
+ (current_dir / ".git").exists()
447
+ or (current_dir / "pyproject.toml").exists()
448
+ or (current_dir / "data").is_dir()
449
+ or (current_dir / ".env").exists()
450
+ ):
451
+ return current_dir
452
+ parent = current_dir.parent
453
+ if parent == current_dir:
454
+ break
455
+ current_dir = parent
456
+ except Exception:
457
+ pass
458
+ return Path.cwd().resolve()
459
+
460
+ # Resolve candidates
461
+ project_root_from_cwd = _detect_project_root_from_cwd()
462
+ project_csv_from_cwd = project_root_from_cwd / ".pdd" / "llm_model.csv"
463
+ project_csv_from_env = PROJECT_ROOT / ".pdd" / "llm_model.csv"
464
+
465
+ # Detect whether PDD_PATH points to the installed package directory. If so,
466
+ # don't prioritize it over the real project from CWD.
467
+ try:
468
+ _installed_pkg_root = importlib.resources.files('pdd')
469
+ # importlib.resources.files returns a Traversable; get a FS path string if possible
470
+ try:
471
+ _installed_pkg_root_path = Path(str(_installed_pkg_root))
472
+ except Exception:
473
+ _installed_pkg_root_path = None
474
+ except Exception:
475
+ _installed_pkg_root_path = None
476
+
477
+ def _is_env_path_package_dir(env_path: Path) -> bool:
478
+ try:
479
+ if _installed_pkg_root_path is None:
480
+ return False
481
+ env_path = env_path.resolve()
482
+ pkg_path = _installed_pkg_root_path.resolve()
483
+ # Treat equal or subpath as package dir
484
+ return env_path == pkg_path or str(env_path).startswith(str(pkg_path))
485
+ except Exception:
486
+ return False
487
+
488
+ # ENV_PATH: Use CWD-based project root when PDD_PATH points to package directory
489
+ # This ensures .env is written to the user's project, not the installed package location
490
+ if _is_env_path_package_dir(PROJECT_ROOT):
491
+ ENV_PATH = project_root_from_cwd / ".env"
492
+ logger.debug(f"PDD_PATH points to package; using ENV_PATH from CWD: {ENV_PATH}")
493
+ else:
494
+ ENV_PATH = PROJECT_ROOT / ".env"
495
+
496
+ # Selection order
193
497
  if user_model_csv_path.is_file():
194
498
  LLM_MODEL_CSV_PATH = user_model_csv_path
195
499
  logger.info(f"Using user-specific LLM model CSV: {LLM_MODEL_CSV_PATH}")
500
+ elif PROJECT_ROOT_FROM_ENV and project_csv_from_env.is_file():
501
+ # Honor an explicitly-set PDD_PATH pointing to a real project directory
502
+ LLM_MODEL_CSV_PATH = project_csv_from_env
503
+ logger.info(f"Using project-specific LLM model CSV (from PDD_PATH): {LLM_MODEL_CSV_PATH}")
504
+ elif project_csv_from_cwd.is_file():
505
+ # Otherwise, prefer the project relative to the current working directory
506
+ LLM_MODEL_CSV_PATH = project_csv_from_cwd
507
+ logger.info(f"Using project-specific LLM model CSV (from CWD): {LLM_MODEL_CSV_PATH}")
196
508
  else:
197
- LLM_MODEL_CSV_PATH = PROJECT_ROOT / "data" / "llm_model.csv"
198
- logger.info(f"Using project LLM model CSV: {LLM_MODEL_CSV_PATH}")
509
+ # Neither exists, we'll use a marker path that _load_model_data will handle
510
+ LLM_MODEL_CSV_PATH = None
511
+ logger.info("No local LLM model CSV found, will use package default")
199
512
  # ---------------------------------
200
513
 
201
514
  # Load environment variables from .env file
@@ -225,6 +538,7 @@ if GCS_HMAC_SECRET_ACCESS_KEY:
225
538
  GCS_HMAC_SECRET_ACCESS_KEY = GCS_HMAC_SECRET_ACCESS_KEY.strip()
226
539
 
227
540
  cache_configured = False
541
+ configured_cache = None # Store the configured cache instance for restoration
228
542
 
229
543
  if GCS_BUCKET_NAME and GCS_HMAC_ACCESS_KEY_ID and GCS_HMAC_SECRET_ACCESS_KEY:
230
544
  # Store original AWS credentials before overwriting for GCS cache setup
@@ -238,12 +552,13 @@ if GCS_BUCKET_NAME and GCS_HMAC_ACCESS_KEY_ID and GCS_HMAC_SECRET_ACCESS_KEY:
238
552
  os.environ['AWS_SECRET_ACCESS_KEY'] = GCS_HMAC_SECRET_ACCESS_KEY
239
553
  # os.environ['AWS_REGION_NAME'] = GCS_REGION_NAME # Uncomment if needed
240
554
 
241
- litellm.cache = litellm.Cache(
555
+ configured_cache = Cache(
242
556
  type="s3",
243
557
  s3_bucket_name=GCS_BUCKET_NAME,
244
558
  s3_region_name=GCS_REGION_NAME, # Pass region explicitly to cache
245
559
  s3_endpoint_url=GCS_ENDPOINT_URL,
246
560
  )
561
+ litellm.cache = configured_cache
247
562
  logger.info(f"LiteLLM cache configured for GCS bucket (S3 compatible): {GCS_BUCKET_NAME}")
248
563
  cache_configured = True
249
564
 
@@ -268,15 +583,22 @@ if GCS_BUCKET_NAME and GCS_HMAC_ACCESS_KEY_ID and GCS_HMAC_SECRET_ACCESS_KEY:
268
583
  elif 'AWS_REGION_NAME' in os.environ:
269
584
  pass # Or just leave it if the temporary setting wasn't done/needed
270
585
 
586
+ # Check if caching is disabled via environment variable
587
+ if os.getenv("LITELLM_CACHE_DISABLE") == "1":
588
+ logger.info("LiteLLM caching disabled via LITELLM_CACHE_DISABLE=1")
589
+ litellm.cache = None
590
+ cache_configured = True
591
+
271
592
  if not cache_configured:
272
593
  try:
273
- # Try SQLite-based cache as a fallback
594
+ # Try disk-based cache as a fallback
274
595
  sqlite_cache_path = PROJECT_ROOT / "litellm_cache.sqlite"
275
- litellm.cache = litellm.Cache(type="sqlite", cache_path=str(sqlite_cache_path))
276
- logger.info(f"LiteLLM SQLite cache configured at {sqlite_cache_path}")
596
+ configured_cache = Cache(type="disk", disk_cache_dir=str(sqlite_cache_path))
597
+ litellm.cache = configured_cache
598
+ logger.info(f"LiteLLM disk cache configured at {sqlite_cache_path}")
277
599
  cache_configured = True
278
600
  except Exception as e2:
279
- warnings.warn(f"Failed to configure LiteLLM SQLite cache: {e2}. Caching is disabled.")
601
+ warnings.warn(f"Failed to configure LiteLLM disk cache: {e2}. Caching is disabled.")
280
602
  litellm.cache = None
281
603
 
282
604
  if not cache_configured:
@@ -314,29 +636,49 @@ def _litellm_success_callback(
314
636
  cost_val = litellm.completion_cost(completion_response=completion_response)
315
637
  calculated_cost = cost_val if cost_val is not None else 0.0
316
638
  except Exception as e1:
317
- # Attempt 2: If response object failed (e.g., missing provider in model name),
318
- # try again using explicit model from kwargs and tokens from usage.
319
- # This is often needed for batch completion items.
639
+ # Attempt 2: Compute via tokens and model mapping. If LiteLLM mapping is
640
+ # missing or API differs, fall back to CSV rates in _MODEL_RATE_MAP.
320
641
  logger.debug(f"Attempting cost calculation with fallback method: {e1}")
321
642
  try:
322
- model_name = kwargs.get("model") # Get original model name from input kwargs
643
+ model_name = kwargs.get("model")
323
644
  if model_name and usage:
324
- prompt_tokens = getattr(usage, 'prompt_tokens', 0)
325
- completion_tokens = getattr(usage, 'completion_tokens', 0)
326
- cost_val = litellm.completion_cost(
327
- model=model_name,
328
- prompt_tokens=prompt_tokens,
329
- completion_tokens=completion_tokens
330
- )
331
- calculated_cost = cost_val if cost_val is not None else 0.0
645
+ in_tok = getattr(usage, 'prompt_tokens', None)
646
+ out_tok = getattr(usage, 'completion_tokens', None)
647
+ # Some providers may use 'input_tokens'/'output_tokens'
648
+ if in_tok is None:
649
+ in_tok = getattr(usage, 'input_tokens', 0)
650
+ if out_tok is None:
651
+ out_tok = getattr(usage, 'output_tokens', 0)
652
+
653
+ # Try LiteLLM helper (arg names vary across versions)
654
+ try:
655
+ cost_val = litellm.completion_cost(
656
+ model=model_name,
657
+ prompt_tokens=in_tok,
658
+ completion_tokens=out_tok,
659
+ )
660
+ calculated_cost = cost_val if cost_val is not None else 0.0
661
+ except TypeError:
662
+ # Older/newer versions may require input/output token names
663
+ try:
664
+ cost_val = litellm.completion_cost(
665
+ model=model_name,
666
+ input_tokens=in_tok,
667
+ output_tokens=out_tok,
668
+ )
669
+ calculated_cost = cost_val if cost_val is not None else 0.0
670
+ except Exception as e3:
671
+ # Final fallback: compute using CSV rates
672
+ rates = _MODEL_RATE_MAP.get(str(model_name))
673
+ if rates is not None:
674
+ in_rate, out_rate = rates
675
+ calculated_cost = (float(in_tok or 0) * in_rate + float(out_tok or 0) * out_rate) / 1_000_000.0
676
+ else:
677
+ calculated_cost = 0.0
678
+ logger.debug(f"Cost calculation failed with LiteLLM token API; used CSV rates if available. Detail: {e3}")
332
679
  else:
333
- # If we can't get model name or usage, fallback to 0
334
680
  calculated_cost = 0.0
335
- # Optional: Log the original error e1 if needed
336
- # logger.warning(f"[Callback WARN] Failed to calculate cost with response object ({e1}) and fallback failed.")
337
681
  except Exception as e2:
338
- # Optional: Log secondary error e2 if needed
339
- # logger.warning(f"[Callback WARN] Failed to calculate cost with fallback method: {e2}")
340
682
  calculated_cost = 0.0 # Default to 0 on any error
341
683
  logger.debug(f"Cost calculation failed with fallback method: {e2}")
342
684
 
@@ -354,14 +696,108 @@ def _litellm_success_callback(
354
696
  # Register the callback with LiteLLM
355
697
  litellm.success_callback = [_litellm_success_callback]
356
698
 
699
+ # --- Cost Mapping Support (CSV Rates) ---
700
+ # Populate from CSV inside llm_invoke; used by callback fallback
701
+ _MODEL_RATE_MAP: Dict[str, Tuple[float, float]] = {}
702
+
703
+ def _set_model_rate_map(df: pd.DataFrame) -> None:
704
+ global _MODEL_RATE_MAP
705
+ try:
706
+ _MODEL_RATE_MAP = {
707
+ str(row['model']): (
708
+ float(row['input']) if pd.notna(row['input']) else 0.0,
709
+ float(row['output']) if pd.notna(row['output']) else 0.0,
710
+ )
711
+ for _, row in df.iterrows()
712
+ }
713
+ except Exception:
714
+ _MODEL_RATE_MAP = {}
715
+
357
716
  # --- Helper Functions ---
358
717
 
359
- def _load_model_data(csv_path: Path) -> pd.DataFrame:
360
- """Loads and preprocesses the LLM model data from CSV."""
361
- if not csv_path.exists():
362
- raise FileNotFoundError(f"LLM model CSV not found at {csv_path}")
718
+ def _is_malformed_json_response(content: str, threshold: int = 100) -> bool:
719
+ """
720
+ Detect if a JSON response appears malformed due to excessive trailing newlines.
721
+
722
+ This can happen when Gemini generates thousands of \n characters in a JSON string value,
723
+ causing the response to be truncated and missing closing braces.
724
+
725
+ Args:
726
+ content: The raw response content string
727
+ threshold: Number of consecutive trailing \n sequences to consider malformed
728
+
729
+ Returns:
730
+ True if the response appears malformed, False otherwise
731
+ """
732
+ if not content or not isinstance(content, str):
733
+ return False
734
+
735
+ # Check if it starts like JSON but doesn't end properly
736
+ stripped = content.strip()
737
+ if not stripped.startswith('{'):
738
+ return False
739
+
740
+ # If it ends with }, it's probably fine
741
+ if stripped.endswith('}'):
742
+ return False
743
+
744
+ # Count trailing \n sequences (escaped newlines in JSON strings)
745
+ # The pattern \n in a JSON string appears as \\n in the raw content
746
+ trailing_newline_count = 0
747
+ check_content = stripped
748
+ while check_content.endswith('\\n'):
749
+ trailing_newline_count += 1
750
+ check_content = check_content[:-2]
751
+
752
+ # If there are many trailing \n sequences, it's likely malformed
753
+ if trailing_newline_count >= threshold:
754
+ return True
755
+
756
+ # Also check for response that looks truncated mid-string
757
+ # (ends with characters that suggest we're inside a JSON string value)
758
+ if not stripped.endswith('}') and not stripped.endswith(']') and not stripped.endswith('"'):
759
+ # Could be truncated in the middle of an escaped sequence
760
+ if stripped.endswith('\\'):
761
+ return True
762
+
763
+ return False
764
+
765
+
766
+ def _load_model_data(csv_path: Optional[Path]) -> pd.DataFrame:
767
+ """Loads and preprocesses the LLM model data from CSV.
768
+
769
+ Args:
770
+ csv_path: Path to CSV file, or None to use package default
771
+
772
+ Returns:
773
+ DataFrame with model configuration data
774
+ """
775
+ # If csv_path is provided, try to load from it
776
+ if csv_path is not None:
777
+ if not csv_path.exists():
778
+ logger.warning(f"Specified LLM model CSV not found at {csv_path}, trying package default")
779
+ csv_path = None
780
+ else:
781
+ try:
782
+ df = pd.read_csv(csv_path)
783
+ logger.debug(f"Loaded model data from {csv_path}")
784
+ # Continue with the rest of the function...
785
+ except Exception as e:
786
+ logger.warning(f"Failed to load CSV from {csv_path}: {e}, trying package default")
787
+ csv_path = None
788
+
789
+ # If csv_path is None or loading failed, use package default
790
+ if csv_path is None:
791
+ try:
792
+ # Use importlib.resources to load the packaged CSV
793
+ csv_data = importlib.resources.files('pdd').joinpath('data/llm_model.csv').read_text()
794
+ import io
795
+ df = pd.read_csv(io.StringIO(csv_data))
796
+ logger.info("Loaded model data from package default")
797
+ except Exception as e:
798
+ raise FileNotFoundError(f"Failed to load default LLM model CSV from package: {e}")
799
+
363
800
  try:
364
- df = pd.read_csv(csv_path)
365
801
  # Basic validation and type conversion
366
802
  required_cols = ['provider', 'model', 'input', 'output', 'coding_arena_elo', 'api_key', 'structured_output', 'reasoning_type']
367
803
  for col in required_cols:
@@ -434,11 +870,26 @@ def _select_model_candidates(
434
870
  # Try finding base model in the *original* df in case it was filtered out
435
871
  original_base = model_df[model_df['model'] == base_model_name]
436
872
  if not original_base.empty:
437
- raise ValueError(f"Base model '{base_model_name}' found in CSV but requires API key '{original_base.iloc[0]['api_key']}' which might be missing or invalid configuration.")
438
- else:
439
- raise ValueError(f"Specified base model '{base_model_name}' not found in the LLM model CSV.")
440
-
441
- base_model = base_model_row.iloc[0]
873
+ # Base exists but may be misconfigured (e.g., missing API key). Keep erroring loudly.
874
+ raise ValueError(
875
+ f"Base model '{base_model_name}' found in CSV but requires API key '{original_base.iloc[0]['api_key']}' which might be missing or invalid configuration."
876
+ )
877
+ # Option A': Soft fallback – choose a reasonable surrogate base and continue
878
+ # Strategy (simplified and deterministic): pick the first available model
879
+ # from the CSV as the surrogate base. This mirrors typical CSV ordering
880
+ # expectations and keeps behavior predictable across environments.
881
+ try:
882
+ base_model = available_df.iloc[0]
883
+ logger.warning(
884
+ f"Base model '{base_model_name}' not found in CSV. Falling back to surrogate base '{base_model['model']}' (Option A')."
885
+ )
886
+ except Exception:
887
+ # If any unexpected error occurs during fallback, raise a clear error
888
+ raise ValueError(
889
+ f"Specified base model '{base_model_name}' not found and fallback selection failed. Check your LLM model CSV."
890
+ )
891
+ else:
892
+ base_model = base_model_row.iloc[0]
442
893
 
443
894
  # 3. Determine Target and Sort
444
895
  candidates = []
@@ -449,9 +900,10 @@ def _select_model_candidates(
449
900
  # Sort remaining by ELO descending as fallback
450
901
  available_df['sort_metric'] = -available_df['coding_arena_elo'] # Negative for descending sort
451
902
  candidates = available_df.sort_values(by='sort_metric').to_dict('records')
452
- # Ensure base model is first if it exists
453
- if any(c['model'] == base_model_name for c in candidates):
454
- candidates.sort(key=lambda x: 0 if x['model'] == base_model_name else 1)
903
+ # Ensure effective base model is first if it exists (supports surrogate base)
904
+ effective_base_name = str(base_model['model']) if isinstance(base_model, pd.Series) else base_model_name
905
+ if any(c['model'] == effective_base_name for c in candidates):
906
+ candidates.sort(key=lambda x: 0 if x['model'] == effective_base_name else 1)
455
907
  target_metric_value = f"Base Model ELO: {base_model['coding_arena_elo']}"
456
908
 
457
909
  elif strength < 0.5:
@@ -556,6 +1008,45 @@ def _sanitize_api_key(key_value: str) -> str:
556
1008
  return sanitized
557
1009
 
558
1010
 
1011
+ def _save_key_to_env_file(key_name: str, value: str, env_path: Path) -> None:
1012
+ """Save or update a key in the .env file.
1013
+
1014
+ - Replaces existing key in-place (no comment + append)
1015
+ - Removes old commented versions of the same key (Issue #183)
1016
+ - Preserves all other content
1017
+ """
1018
+ lines = []
1019
+ if env_path.exists():
1020
+ with open(env_path, 'r') as f:
1021
+ lines = f.readlines()
1022
+
1023
+ new_lines = []
1024
+ key_replaced = False
1025
+ prefix = f"{key_name}="
1026
+ prefix_spaced = f"{key_name} ="
1027
+
1028
+ for line in lines:
1029
+ stripped = line.strip()
1030
+ # Skip old commented versions of this key (cleanup accumulation)
1031
+ if stripped.startswith(f"# {prefix}") or stripped.startswith(f"# {prefix_spaced}"):
1032
+ continue
1033
+ elif stripped.startswith(prefix) or stripped.startswith(prefix_spaced):
1034
+ # Replace in-place
1035
+ new_lines.append(f'{key_name}="{value}"\n')
1036
+ key_replaced = True
1037
+ else:
1038
+ new_lines.append(line)
1039
+
1040
+ # Add key if not found
1041
+ if not key_replaced:
1042
+ if new_lines and not new_lines[-1].endswith('\n'):
1043
+ new_lines.append('\n')
1044
+ new_lines.append(f'{key_name}="{value}"\n')
1045
+
1046
+ with open(env_path, 'w') as f:
1047
+ f.writelines(new_lines)
1048
+
1049
+
559
1050
  def _ensure_api_key(model_info: Dict[str, Any], newly_acquired_keys: Dict[str, bool], verbose: bool) -> bool:
560
1051
  """Checks for API key in env, prompts user if missing, and updates .env."""
561
1052
  key_name = model_info.get('api_key')
@@ -576,6 +1067,12 @@ def _ensure_api_key(model_info: Dict[str, Any], newly_acquired_keys: Dict[str, b
576
1067
  return True
577
1068
  else:
578
1069
  logger.warning(f"API key environment variable '{key_name}' for model '{model_info.get('model')}' is not set.")
1070
+
1071
+ # Skip prompting if --force flag is set (non-interactive mode)
1072
+ if os.environ.get('PDD_FORCE'):
1073
+ logger.error(f"API key '{key_name}' not set. In --force mode, skipping interactive prompt.")
1074
+ return False
1075
+
579
1076
  try:
580
1077
  # Interactive prompt
581
1078
  user_provided_key = input(f"Please enter the API key for {key_name}: ").strip()
@@ -593,39 +1090,7 @@ def _ensure_api_key(model_info: Dict[str, Any], newly_acquired_keys: Dict[str, b
593
1090
 
594
1091
  # Update .env file
595
1092
  try:
596
- lines = []
597
- if ENV_PATH.exists():
598
- with open(ENV_PATH, 'r') as f:
599
- lines = f.readlines()
600
-
601
- new_lines = []
602
- # key_updated = False
603
- prefix = f"{key_name}="
604
- prefix_spaced = f"{key_name} =" # Handle potential spaces
605
-
606
- for line in lines:
607
- stripped_line = line.strip()
608
- if stripped_line.startswith(prefix) or stripped_line.startswith(prefix_spaced):
609
- # Comment out the old key
610
- new_lines.append(f"# {line}")
611
- # key_updated = True # Indicates we found an old line to comment
612
- elif stripped_line.startswith(f"# {prefix}") or stripped_line.startswith(f"# {prefix_spaced}"):
613
- # Keep already commented lines as they are
614
- new_lines.append(line)
615
- else:
616
- new_lines.append(line)
617
-
618
- # Append the new key, ensuring quotes for robustness
619
- new_key_line = f'{key_name}="{user_provided_key}"\n'
620
- # Add newline before if file not empty and doesn't end with newline
621
- if new_lines and not new_lines[-1].endswith('\n'):
622
- new_lines.append('\n')
623
- new_lines.append(new_key_line)
624
-
625
-
626
- with open(ENV_PATH, 'w') as f:
627
- f.writelines(new_lines)
628
-
1093
+ _save_key_to_env_file(key_name, user_provided_key, ENV_PATH)
629
1094
  logger.info(f"API key '{key_name}' saved to {ENV_PATH}.")
630
1095
  logger.warning("SECURITY WARNING: The API key has been saved to your .env file. "
631
1096
  "Ensure this file is kept secure and is included in your .gitignore.")
@@ -647,7 +1112,6 @@ def _ensure_api_key(model_info: Dict[str, Any], newly_acquired_keys: Dict[str, b
647
1112
  def _format_messages(prompt: str, input_data: Union[Dict[str, Any], List[Dict[str, Any]]], use_batch_mode: bool) -> Union[List[Dict[str, str]], List[List[Dict[str, str]]]]:
648
1113
  """Formats prompt and input into LiteLLM message format."""
649
1114
  try:
650
- prompt_template = PromptTemplate.from_template(prompt)
651
1115
  if use_batch_mode:
652
1116
  if not isinstance(input_data, list):
653
1117
  raise ValueError("input_json must be a list of dictionaries when use_batch_mode is True.")
@@ -655,19 +1119,424 @@ def _format_messages(prompt: str, input_data: Union[Dict[str, Any], List[Dict[st
655
1119
  for item in input_data:
656
1120
  if not isinstance(item, dict):
657
1121
  raise ValueError("Each item in input_json list must be a dictionary for batch mode.")
658
- formatted_prompt = prompt_template.format(**item)
1122
+ formatted_prompt = prompt.format(**item)
659
1123
  all_messages.append([{"role": "user", "content": formatted_prompt}])
660
1124
  return all_messages
661
1125
  else:
662
1126
  if not isinstance(input_data, dict):
663
1127
  raise ValueError("input_json must be a dictionary when use_batch_mode is False.")
664
- formatted_prompt = prompt_template.format(**input_data)
1128
+ formatted_prompt = prompt.format(**input_data)
665
1129
  return [{"role": "user", "content": formatted_prompt}]
666
1130
  except KeyError as e:
667
- raise ValueError(f"Prompt formatting error: Missing key {e} in input_json for prompt template.") from e
1131
+ raise ValueError(f"Prompt formatting error: Missing key {e} in input_json for prompt string.") from e
668
1132
  except Exception as e:
669
1133
  raise ValueError(f"Error formatting prompt: {e}") from e
670
1134
 
1135
+ # --- JSON Extraction Helpers ---
1136
+ import re
1137
+
1138
+ def _extract_fenced_json_block(text: str) -> Optional[str]:
1139
+ try:
1140
+ m = re.search(r"```json\s*(\{[\s\S]*?\})\s*```", text, flags=re.IGNORECASE)
1141
+ if m:
1142
+ return m.group(1)
1143
+ return None
1144
+ except Exception:
1145
+ return None
1146
+
1147
+ def _extract_balanced_json_objects(text: str) -> List[str]:
1148
+ results: List[str] = []
1149
+ brace_stack = 0
1150
+ start_idx = -1
1151
+ in_string = False
1152
+ escape = False
1153
+ for i, ch in enumerate(text):
1154
+ if in_string:
1155
+ if escape:
1156
+ escape = False
1157
+ elif ch == '\\':
1158
+ escape = True
1159
+ elif ch == '"':
1160
+ in_string = False
1161
+ continue
1162
+ else:
1163
+ if ch == '"':
1164
+ in_string = True
1165
+ continue
1166
+ if ch == '{':
1167
+ if brace_stack == 0:
1168
+ start_idx = i
1169
+ brace_stack += 1
1170
+ elif ch == '}':
1171
+ if brace_stack > 0:
1172
+ brace_stack -= 1
1173
+ if brace_stack == 0 and start_idx != -1:
1174
+ results.append(text[start_idx:i+1])
1175
+ start_idx = -1
1176
+ return results
1177
+
1178
+
1179
+ def _looks_like_python_code(s: str) -> bool:
1180
+ """
1181
+ Heuristic check if a string looks like Python code.
1182
+
1183
+ Used to determine if we should attempt Python syntax repair on a string field.
1184
+ """
1185
+ if not s or len(s) < 10:
1186
+ return False
1187
+ # Check for common Python patterns
1188
+ code_indicators = ('def ', 'class ', 'import ', 'from ', 'if __name__', 'return ', 'print(')
1189
+ return any(indicator in s for indicator in code_indicators)
1190
+
1191
+
1192
+ # Field names known to contain prose text, not Python code
1193
+ # These are skipped during syntax validation to avoid false positives
1194
+ _PROSE_FIELD_NAMES = frozenset({
1195
+ 'reasoning', # PromptAnalysis - completeness reasoning
1196
+ 'explanation', # TrimResultsOutput, FixerOutput - prose explanations
1197
+ 'analysis', # DiffAnalysis, CodePatchResult - analysis text
1198
+ 'change_instructions', # ChangeInstruction, ConflictChange - instructions
1199
+ 'change_description', # DiffAnalysis - description of changes
1200
+ 'planned_modifications', # CodePatchResult - modification plans
1201
+ 'details', # VerificationOutput - issue details
1202
+ 'description', # General prose descriptions
1203
+ 'focus', # Focus descriptions
1204
+ 'file_summary', # FileSummary - prose summaries of file contents
1205
+ })
1206
+
1207
+
1208
+ def _is_prose_field_name(field_name: str) -> bool:
1209
+ """Check if a field name indicates it contains prose, not code.
1210
+
1211
+ Used to skip syntax validation on prose fields that may contain
1212
+ Python keywords (like 'return' or 'import') but are not actual code.
1213
+ """
1214
+ return field_name.lower() in _PROSE_FIELD_NAMES
1215
+
1216
+
1217
+ def _repair_python_syntax(code: str) -> str:
1218
+ """
1219
+ Validate Python code syntax and attempt repairs if invalid.
1220
+
1221
+ Sometimes LLMs include spurious characters at string boundaries,
1222
+ especially when the code contains quotes. This function attempts
1223
+ to detect and repair such issues.
1224
+
1225
+ Args:
1226
+ code: Python code string to validate/repair
1227
+
1228
+ Returns:
1229
+ Repaired code if a fix was found, otherwise original code
1230
+ """
1231
+ import ast
1232
+
1233
+ if not code or not code.strip():
1234
+ return code
1235
+
1236
+ # First, try to parse as-is
1237
+ try:
1238
+ ast.parse(code)
1239
+ return code # Valid, no repair needed
1240
+ except SyntaxError:
1241
+ pass
1242
+
1243
+ # Try common repairs
1244
+ repaired = code
1245
+
1246
+ # Repair 1: Trailing spurious quote (the specific issue we've seen)
1247
+ for quote in ['"', "'"]:
1248
+ if repaired.rstrip().endswith(quote):
1249
+ candidate = repaired.rstrip()[:-1]
1250
+ try:
1251
+ ast.parse(candidate)
1252
+ logger.info(f"[INFO] Repaired code by removing trailing {quote!r}")
1253
+ return candidate
1254
+ except SyntaxError:
1255
+ pass
1256
+
1257
+ # Repair 2: Leading spurious quote
1258
+ for quote in ['"', "'"]:
1259
+ if repaired.lstrip().startswith(quote):
1260
+ candidate = repaired.lstrip()[1:]
1261
+ try:
1262
+ ast.parse(candidate)
1263
+ logger.info(f"[INFO] Repaired code by removing leading {quote!r}")
1264
+ return candidate
1265
+ except SyntaxError:
1266
+ pass
1267
+
1268
+ # Repair 3: Both leading and trailing spurious quotes
1269
+ for quote in ['"', "'"]:
1270
+ stripped = repaired.strip()
1271
+ if stripped.startswith(quote) and stripped.endswith(quote):
1272
+ candidate = stripped[1:-1]
1273
+ try:
1274
+ ast.parse(candidate)
1275
+ logger.info(f"[INFO] Repaired code by removing surrounding {quote!r}")
1276
+ return candidate
1277
+ except SyntaxError:
1278
+ pass
1279
+
1280
+ # If no repair worked, return original (let it fail downstream)
1281
+ return code
1282
+
1283
+
1284
+ def _smart_unescape_code(code: str) -> str:
1285
+ """
1286
+ Unescape literal \\n sequences in code while preserving them inside string literals.
1287
+
1288
+ When LLMs return code as JSON, newlines get double-escaped. After JSON parsing,
1289
+ we have literal backslash-n (2 chars) that should be actual newlines for code
1290
+ structure, BUT escape sequences inside Python strings (like print("\\n")) should
1291
+ remain as escape sequences.
1292
+
1293
+ Args:
1294
+ code: Python code that may have literal \\n sequences
1295
+
1296
+ Returns:
1297
+ Code with structural newlines unescaped but string literals preserved
1298
+ """
1299
+ LITERAL_BACKSLASH_N = '\\' + 'n' # Literal \n (2 chars)
1300
+
1301
+ if LITERAL_BACKSLASH_N not in code:
1302
+ return code
1303
+
1304
+ # First, check if the code already has actual newlines (mixed state)
1305
+ # If it does, we need to be more careful
1306
+ has_actual_newlines = '\n' in code
1307
+
1308
+ if not has_actual_newlines:
1309
+ # All newlines are escaped - this is the double-escaped case
1310
+ # We need to unescape them but preserve \n inside string literals
1311
+
1312
+ # Strategy: Use a placeholder for \n inside strings, unescape all, then restore
1313
+ # We detect string literals by tracking quote state
1314
+
1315
+ result = []
1316
+ i = 0
1317
+ in_string = False
1318
+ string_char = None
1319
+ in_fstring = False
1320
+
1321
+ # Placeholder that won't appear in code
1322
+ PLACEHOLDER = '\x00NEWLINE_ESCAPE\x00'
1323
+
1324
+ while i < len(code):
1325
+ # Check for escape sequences (both actual and literal)
1326
+ if i + 1 < len(code) and code[i] == '\\':
1327
+ next_char = code[i + 1]
1328
+
1329
+ if in_string:
1330
+ # Inside a string - preserve escape sequences
1331
+ if next_char == 'n':
1332
+ result.append(PLACEHOLDER)
1333
+ i += 2
1334
+ continue
1335
+ elif next_char == 't':
1336
+ result.append('\\' + 't') # Keep \t as-is in strings
1337
+ i += 2
1338
+ continue
1339
+ elif next_char == 'r':
1340
+ result.append('\\' + 'r') # Keep \r as-is in strings
1341
+ i += 2
1342
+ continue
1343
+ elif next_char in ('"', "'", '\\'):
1344
+ # Keep escaped quotes and backslashes
1345
+ result.append(code[i:i+2])
1346
+ i += 2
1347
+ continue
1348
+
1349
+ # Check for string delimiters
1350
+ if not in_string:
1351
+ # Check for triple quotes first
1352
+ if i + 2 < len(code) and code[i:i+3] in ('"""', "'''"):
1353
+ in_string = True
1354
+ string_char = code[i:i+3]
1355
+ # Check if preceded by 'f' for f-string
1356
+ in_fstring = i > 0 and code[i-1] == 'f'
1357
+ result.append(code[i:i+3])
1358
+ i += 3
1359
+ continue
1360
+ elif code[i] in ('"', "'"):
1361
+ in_string = True
1362
+ string_char = code[i]
1363
+ in_fstring = i > 0 and code[i-1] == 'f'
1364
+ result.append(code[i])
1365
+ i += 1
1366
+ continue
1367
+ else:
1368
+ # Check for end of string
1369
+ if len(string_char) == 3: # Triple quote
1370
+ if i + 2 < len(code) and code[i:i+3] == string_char:
1371
+ in_string = False
1372
+ in_fstring = False
1373
+ result.append(code[i:i+3])
1374
+ i += 3
1375
+ continue
1376
+ else: # Single quote
1377
+ if code[i] == string_char:
1378
+ in_string = False
1379
+ in_fstring = False
1380
+ result.append(code[i])
1381
+ i += 1
1382
+ continue
1383
+
1384
+ result.append(code[i])
1385
+ i += 1
1386
+
1387
+ intermediate = ''.join(result)
1388
+
1389
+ # Now unescape all remaining \n (these are structural)
1390
+ LITERAL_BACKSLASH_R_N = '\\' + 'r' + '\\' + 'n'
1391
+ LITERAL_BACKSLASH_T = '\\' + 't'
1392
+
1393
+ intermediate = intermediate.replace(LITERAL_BACKSLASH_R_N, '\r\n')
1394
+ intermediate = intermediate.replace(LITERAL_BACKSLASH_N, '\n')
1395
+ intermediate = intermediate.replace(LITERAL_BACKSLASH_T, '\t')
1396
+
1397
+ # Restore placeholders to \n (as escape sequences in strings)
1398
+ result_code = intermediate.replace(PLACEHOLDER, '\\n')
1399
+
1400
+ return result_code
1401
+ else:
1402
+ # Mixed state - some actual newlines, some literal \n
1403
+ # This means the JSON parsing already converted some, but not all
1404
+ # The literal \n remaining are likely in strings, so leave them alone
1405
+ return code
1406
+
1407
+
1408
+ def _unescape_code_newlines(obj: Any) -> Any:
1409
+ """
1410
+ Fix double-escaped newlines in Pydantic model string fields.
1411
+
1412
+ Some models (e.g., Gemini) return JSON with \\\\n instead of \\n in code strings,
1413
+ resulting in literal backslash-n text instead of actual newlines after JSON parsing.
1414
+ This function recursively unescapes these in string fields of Pydantic models.
1415
+
1416
+ Also repairs Python syntax errors in code-like string fields (e.g., trailing quotes).
1417
+
1418
+ The check uses literal backslash-n (2 chars) vs actual newline (1 char):
1419
+ - '\\\\n' in Python source = literal backslash + n (2 chars) - needs fixing
1420
+ - '\\n' in Python source = newline character (1 char) - already correct
1421
+
1422
+ Args:
1423
+ obj: A Pydantic model, dict, list, or primitive value
1424
+
1425
+ Returns:
1426
+ The same object with string fields unescaped and code fields repaired
1427
+ """
1428
+ if obj is None:
1429
+ return obj
1430
+
1431
+ def _process_string(s: str) -> str:
1432
+ """Process a string: unescape newlines and repair Python syntax if needed."""
1433
+ result = s
1434
+ # Smart unescape that preserves \n inside string literals
1435
+ if _looks_like_python_code(result):
1436
+ result = _smart_unescape_code(result)
1437
+ result = _repair_python_syntax(result)
1438
+ else:
1439
+ # For non-code strings, do simple unescape
1440
+ LITERAL_BACKSLASH_N = '\\' + 'n'
1441
+ LITERAL_BACKSLASH_R_N = '\\' + 'r' + '\\' + 'n'
1442
+ LITERAL_BACKSLASH_T = '\\' + 't'
1443
+ if LITERAL_BACKSLASH_N in result:
1444
+ result = result.replace(LITERAL_BACKSLASH_R_N, '\r\n')
1445
+ result = result.replace(LITERAL_BACKSLASH_N, '\n')
1446
+ result = result.replace(LITERAL_BACKSLASH_T, '\t')
1447
+ return result
1448
+
1449
+ # Handle Pydantic models
1450
+ if isinstance(obj, BaseModel):
1451
+ # Get all field values and process strings
1452
+ for field_name in obj.model_fields:
1453
+ value = getattr(obj, field_name)
1454
+ if isinstance(value, str):
1455
+ processed = _process_string(value)
1456
+ if processed != value:
1457
+ object.__setattr__(obj, field_name, processed)
1458
+ elif isinstance(value, (dict, list, BaseModel)):
1459
+ _unescape_code_newlines(value)
1460
+ return obj
1461
+
1462
+ # Handle dicts
1463
+ if isinstance(obj, dict):
1464
+ for key, value in obj.items():
1465
+ if isinstance(value, str):
1466
+ obj[key] = _process_string(value)
1467
+ elif isinstance(value, (dict, list)):
1468
+ _unescape_code_newlines(value)
1469
+ return obj
1470
+
1471
+ # Handle lists
1472
+ if isinstance(obj, list):
1473
+ for i, item in enumerate(obj):
1474
+ if isinstance(item, str):
1475
+ obj[i] = _process_string(item)
1476
+ elif isinstance(item, (dict, list, BaseModel)):
1477
+ _unescape_code_newlines(item)
1478
+ return obj
1479
+
1480
+ return obj
1481
+
1482
+
1483
+ def _has_invalid_python_code(obj: Any, field_name: str = "") -> bool:
1484
+ """
1485
+ Check if any code-like string fields have invalid Python syntax.
1486
+
1487
+ This is used after _unescape_code_newlines to detect if repair failed
1488
+ and we should retry with cache disabled.
1489
+
1490
+ Skips fields in _PROSE_FIELD_NAMES to avoid false positives on prose
1491
+ text that mentions code patterns (e.g., "ends on a return statement").
1492
+
1493
+ Args:
1494
+ obj: A Pydantic model, dict, list, or primitive value
1495
+ field_name: The name of the field being validated (used to skip prose)
1496
+
1497
+ Returns:
1498
+ True if there are invalid code fields that couldn't be repaired
1499
+ """
1500
+ import ast
1501
+
1502
+ if obj is None:
1503
+ return False
1504
+
1505
+ if isinstance(obj, str):
1506
+ # Skip validation for known prose fields
1507
+ if _is_prose_field_name(field_name):
1508
+ return False
1509
+ if _looks_like_python_code(obj):
1510
+ try:
1511
+ ast.parse(obj)
1512
+ return False # Valid
1513
+ except SyntaxError:
1514
+ return True # Invalid
1515
+ return False
1516
+
1517
+ if isinstance(obj, BaseModel):
1518
+ for name in obj.model_fields:
1519
+ value = getattr(obj, name)
1520
+ if _has_invalid_python_code(value, field_name=name):
1521
+ return True
1522
+ return False
1523
+
1524
+ if isinstance(obj, dict):
1525
+ for key, value in obj.items():
1526
+ fname = key if isinstance(key, str) else ""
1527
+ if _has_invalid_python_code(value, field_name=fname):
1528
+ return True
1529
+ return False
1530
+
1531
+ if isinstance(obj, list):
1532
+ for item in obj:
1533
+ if _has_invalid_python_code(item, field_name=field_name):
1534
+ return True
1535
+ return False
1536
+
1537
+ return False
1538
+
1539
+
671
1540
  # --- Main Function ---
672
1541
 
673
1542
  def llm_invoke(
@@ -677,9 +1546,12 @@ def llm_invoke(
677
1546
  temperature: float = 0.1,
678
1547
  verbose: bool = False,
679
1548
  output_pydantic: Optional[Type[BaseModel]] = None,
680
- time: float = 0.25,
1549
+ output_schema: Optional[Dict[str, Any]] = None,
1550
+ time: Optional[float] = 0.25,
681
1551
  use_batch_mode: bool = False,
682
1552
  messages: Optional[Union[List[Dict[str, str]], List[List[Dict[str, str]]]]] = None,
1553
+ language: Optional[str] = None,
1554
+ use_cloud: Optional[bool] = None,
683
1555
  ) -> Dict[str, Any]:
684
1556
  """
685
1557
  Runs a prompt with given input using LiteLLM, handling model selection,
@@ -693,9 +1565,11 @@ def llm_invoke(
693
1565
  temperature: LLM temperature.
694
1566
  verbose: Print detailed logs.
695
1567
  output_pydantic: Optional Pydantic model for structured output.
1568
+ output_schema: Optional raw JSON schema dictionary for structured output (alternative to output_pydantic).
696
1569
  time: Relative thinking time (0-1, default 0.25).
697
1570
  use_batch_mode: Use batch completion if True.
698
1571
  messages: Pre-formatted list of messages (or list of lists for batch). If provided, ignores prompt and input_json.
1572
+ use_cloud: None=auto-detect (cloud if enabled, local if PDD_FORCE_LOCAL=1), True=force cloud, False=force local.
699
1573
 
700
1574
  Returns:
701
1575
  Dictionary containing 'result', 'cost', 'model_name', 'thinking_output'.
@@ -704,6 +1578,7 @@ def llm_invoke(
704
1578
  ValueError: For invalid inputs or prompt formatting errors.
705
1579
  FileNotFoundError: If llm_model.csv is missing.
706
1580
  RuntimeError: If all candidate models fail.
1581
+ InsufficientCreditsError: If cloud execution fails due to insufficient credits.
707
1582
  openai.*Error: If LiteLLM encounters API errors after retries.
708
1583
  """
709
1584
  # Set verbose logging if requested
@@ -720,6 +1595,58 @@ def llm_invoke(
720
1595
  logger.debug(f" time: {time}")
721
1596
  logger.debug(f" use_batch_mode: {use_batch_mode}")
722
1597
  logger.debug(f" messages: {'provided' if messages else 'None'}")
1598
+ logger.debug(f" use_cloud: {use_cloud}")
1599
+
1600
+ # --- 0. Cloud Execution Path ---
1601
+ # Determine cloud usage: explicit param > environment > default (local)
1602
+ if use_cloud is None:
1603
+ # Check environment for cloud preference
1604
+ # PDD_FORCE_LOCAL=1 forces local execution
1605
+ force_local = os.environ.get("PDD_FORCE_LOCAL", "").lower() in ("1", "true", "yes")
1606
+ if force_local:
1607
+ use_cloud = False
1608
+ else:
1609
+ # Try to use cloud if credentials are configured
1610
+ try:
1611
+ from pdd.core.cloud import CloudConfig
1612
+ use_cloud = CloudConfig.is_cloud_enabled()
1613
+ except ImportError:
1614
+ use_cloud = False
1615
+
1616
+ if use_cloud:
1617
+ from rich.console import Console
1618
+ console = Console()
1619
+
1620
+ if verbose:
1621
+ logger.debug("Attempting cloud execution...")
1622
+
1623
+ try:
1624
+ return _llm_invoke_cloud(
1625
+ prompt=prompt,
1626
+ input_json=input_json,
1627
+ strength=strength,
1628
+ temperature=temperature,
1629
+ verbose=verbose,
1630
+ output_pydantic=output_pydantic,
1631
+ output_schema=output_schema,
1632
+ time=time,
1633
+ use_batch_mode=use_batch_mode,
1634
+ messages=messages,
1635
+ language=language,
1636
+ )
1637
+ except CloudFallbackError as e:
1638
+ # Notify user and fall back to local execution
1639
+ console.print(f"[yellow]Cloud execution failed ({e}), falling back to local execution...[/yellow]")
1640
+ logger.warning(f"Cloud fallback: {e}")
1641
+ # Continue to local execution below
1642
+ except InsufficientCreditsError:
1643
+ # Re-raise credit errors - user needs to know
1644
+ raise
1645
+ except CloudInvocationError as e:
1646
+ # Non-recoverable cloud error - notify and fall back
1647
+ console.print(f"[yellow]Cloud error ({e}), falling back to local execution...[/yellow]")
1648
+ logger.warning(f"Cloud invocation error: {e}")
1649
+ # Continue to local execution below
723
1650
 
724
1651
  # --- 1. Load Environment & Validate Inputs ---
725
1652
  # .env loading happens at module level
@@ -744,6 +1671,10 @@ def llm_invoke(
744
1671
  else:
745
1672
  raise ValueError("Either 'messages' or both 'prompt' and 'input_json' must be provided.")
746
1673
 
1674
+ # Handle None time (means "no reasoning requested")
1675
+ if time is None:
1676
+ time = 0.0
1677
+
747
1678
  if not (0.0 <= strength <= 1.0):
748
1679
  raise ValueError("'strength' must be between 0.0 and 1.0.")
749
1680
  if not (0.0 <= temperature <= 2.0): # Common range for temperature
@@ -810,6 +1741,16 @@ def llm_invoke(
810
1741
  # --- 3. Iterate Through Candidates and Invoke LLM ---
811
1742
  last_exception = None
812
1743
  newly_acquired_keys: Dict[str, bool] = {} # Track keys obtained in this run
1744
+
1745
+ # Initialize variables for retry section
1746
+ response_format = None
1747
+ time_kwargs = {}
1748
+
1749
+ # Update global rate map for callback cost fallback
1750
+ try:
1751
+ _set_model_rate_map(model_df)
1752
+ except Exception:
1753
+ pass
813
1754
 
814
1755
  for model_info in candidate_models:
815
1756
  model_name_litellm = model_info['model']
@@ -820,6 +1761,9 @@ def llm_invoke(
820
1761
  logger.info(f"\n[ATTEMPT] Trying model: {model_name_litellm} (Provider: {provider})")
821
1762
 
822
1763
  retry_with_same_model = True
1764
+ # Track per-model temperature adjustment attempt (avoid infinite loop)
1765
+ current_temperature = temperature
1766
+ temp_adjustment_done = False
823
1767
  while retry_with_same_model:
824
1768
  retry_with_same_model = False # Assume success unless auth error on new key
825
1769
 
@@ -834,7 +1778,10 @@ def llm_invoke(
834
1778
  litellm_kwargs: Dict[str, Any] = {
835
1779
  "model": model_name_litellm,
836
1780
  "messages": formatted_messages,
837
- "temperature": temperature,
1781
+ # Use a local adjustable temperature to allow provider-specific fallbacks
1782
+ "temperature": current_temperature,
1783
+ # Retry on transient network errors (APIError, TimeoutError, ServiceUnavailableError)
1784
+ "num_retries": 2,
838
1785
  }
839
1786
 
840
1787
  api_key_name_from_csv = model_info.get('api_key') # From CSV
@@ -847,7 +1794,14 @@ def llm_invoke(
847
1794
  if is_vertex_model and api_key_name_from_csv == 'VERTEX_CREDENTIALS':
848
1795
  credentials_file_path = os.getenv("VERTEX_CREDENTIALS") # Path from env var
849
1796
  vertex_project_env = os.getenv("VERTEX_PROJECT")
850
- vertex_location_env = os.getenv("VERTEX_LOCATION")
1797
+ # Check for per-model location override, fall back to env var
1798
+ model_location = model_info.get('location')
1799
+ if pd.notna(model_location) and str(model_location).strip():
1800
+ vertex_location_env = str(model_location).strip()
1801
+ if verbose:
1802
+ logger.info(f"[INFO] Using per-model location override: '{vertex_location_env}' for model '{model_name_litellm}'")
1803
+ else:
1804
+ vertex_location_env = os.getenv("VERTEX_LOCATION")
851
1805
 
852
1806
  if credentials_file_path and vertex_project_env and vertex_location_env:
853
1807
  try:
@@ -861,14 +1815,23 @@ def llm_invoke(
861
1815
  if verbose:
862
1816
  logger.info(f"[INFO] For Vertex AI: using vertex_credentials from '{credentials_file_path}', project '{vertex_project_env}', location '{vertex_location_env}'.")
863
1817
  except FileNotFoundError:
1818
+ # Still pass project and location so ADC can work
1819
+ litellm_kwargs["vertex_project"] = vertex_project_env
1820
+ litellm_kwargs["vertex_location"] = vertex_location_env
864
1821
  if verbose:
865
- logger.error(f"[ERROR] Vertex credentials file not found at path specified by VERTEX_CREDENTIALS env var: '{credentials_file_path}'. LiteLLM may try ADC or fail.")
1822
+ logger.warning(f"[WARN] Vertex credentials file not found at '{credentials_file_path}'. Using ADC with project '{vertex_project_env}', location '{vertex_location_env}'.")
866
1823
  except json.JSONDecodeError:
1824
+ # Still pass project and location so ADC can work
1825
+ litellm_kwargs["vertex_project"] = vertex_project_env
1826
+ litellm_kwargs["vertex_location"] = vertex_location_env
867
1827
  if verbose:
868
- logger.error(f"[ERROR] Failed to decode JSON from Vertex credentials file: '{credentials_file_path}'. Check file content. LiteLLM may try ADC or fail.")
1828
+ logger.error(f"[ERROR] Failed to decode JSON from Vertex credentials file: '{credentials_file_path}'. Using ADC with project '{vertex_project_env}', location '{vertex_location_env}'.")
869
1829
  except Exception as e:
1830
+ # Still pass project and location so ADC can work
1831
+ litellm_kwargs["vertex_project"] = vertex_project_env
1832
+ litellm_kwargs["vertex_location"] = vertex_location_env
870
1833
  if verbose:
871
- logger.error(f"[ERROR] Failed to load or process Vertex credentials from '{credentials_file_path}': {e}. LiteLLM may try ADC or fail.")
1834
+ logger.error(f"[ERROR] Failed to load Vertex credentials from '{credentials_file_path}': {e}. Using ADC with project '{vertex_project_env}', location '{vertex_location_env}'.")
872
1835
  else:
873
1836
  if verbose:
874
1837
  logger.warning(f"[WARN] For Vertex AI (using '{api_key_name_from_csv}'): One or more required environment variables (VERTEX_CREDENTIALS, VERTEX_PROJECT, VERTEX_LOCATION) are missing.")
@@ -887,9 +1850,16 @@ def llm_invoke(
887
1850
 
888
1851
  # If this model is Vertex AI AND uses a direct API key string (not VERTEX_CREDENTIALS from CSV),
889
1852
  # also pass project and location from env vars.
890
- if is_vertex_model:
1853
+ if is_vertex_model:
891
1854
  vertex_project_env = os.getenv("VERTEX_PROJECT")
892
- vertex_location_env = os.getenv("VERTEX_LOCATION")
1855
+ # Check for per-model location override, fall back to env var
1856
+ model_location = model_info.get('location')
1857
+ if pd.notna(model_location) and str(model_location).strip():
1858
+ vertex_location_env = str(model_location).strip()
1859
+ if verbose:
1860
+ logger.info(f"[INFO] Using per-model location override: '{vertex_location_env}' for model '{model_name_litellm}'")
1861
+ else:
1862
+ vertex_location_env = os.getenv("VERTEX_LOCATION")
893
1863
  if vertex_project_env and vertex_location_env:
894
1864
  litellm_kwargs["vertex_project"] = vertex_project_env
895
1865
  litellm_kwargs["vertex_location"] = vertex_location_env
@@ -903,13 +1873,36 @@ def llm_invoke(
903
1873
  elif verbose: # No api_key_name_from_csv in CSV for this model
904
1874
  logger.info(f"[INFO] No API key name specified in CSV for model '{model_name_litellm}'. LiteLLM will use its default authentication mechanisms (e.g., standard provider env vars or ADC for Vertex AI).")
905
1875
 
906
- # Add api_base if present in CSV
1876
+ # Add base_url/api_base override if present in CSV
907
1877
  api_base = model_info.get('base_url')
908
1878
  if pd.notna(api_base) and api_base:
1879
+ # LiteLLM prefers `base_url`; some older paths accept `api_base`.
1880
+ litellm_kwargs["base_url"] = str(api_base)
909
1881
  litellm_kwargs["api_base"] = str(api_base)
910
1882
 
911
- # Handle Structured Output (JSON Mode / Pydantic)
912
- if output_pydantic:
1883
+ # Provider-specific defaults (e.g., LM Studio)
1884
+ model_name_lower = str(model_name_litellm).lower()
1885
+ provider_lower_for_model = provider.lower()
1886
+ is_lm_studio = model_name_lower.startswith('lm_studio/') or provider_lower_for_model == 'lm_studio'
1887
+ is_groq = model_name_lower.startswith('groq/') or provider_lower_for_model == 'groq'
1888
+ if is_lm_studio:
1889
+ # Ensure base_url is set (fallback to env LM_STUDIO_API_BASE or localhost)
1890
+ if not litellm_kwargs.get("base_url"):
1891
+ lm_studio_base = os.getenv("LM_STUDIO_API_BASE", "http://localhost:1234/v1")
1892
+ litellm_kwargs["base_url"] = lm_studio_base
1893
+ litellm_kwargs["api_base"] = lm_studio_base
1894
+ if verbose:
1895
+ logger.info(f"[INFO] Using LM Studio base_url: {lm_studio_base}")
1896
+
1897
+ # Ensure a non-empty api_key; LM Studio accepts any non-empty token (e.g., 'lm-studio')
1898
+ if not litellm_kwargs.get("api_key"):
1899
+ lm_studio_key = os.getenv("LM_STUDIO_API_KEY") or "lm-studio"
1900
+ litellm_kwargs["api_key"] = lm_studio_key
1901
+ if verbose:
1902
+ logger.info("[INFO] Using LM Studio api_key placeholder (set LM_STUDIO_API_KEY to customize).")
1903
+
1904
+ # Handle Structured Output (JSON Mode / Pydantic / JSON Schema)
1905
+ if output_pydantic or output_schema:
913
1906
  # Check if model supports structured output based on CSV flag or LiteLLM check
914
1907
  supports_structured = model_info.get('structured_output', False)
915
1908
  # Optional: Add litellm.supports_response_schema check if CSV flag is unreliable
@@ -918,18 +1911,98 @@ def llm_invoke(
918
1911
  # except: pass # Ignore errors in supports_response_schema check
919
1912
 
920
1913
  if supports_structured:
921
- if verbose:
922
- logger.info(f"[INFO] Requesting structured output (Pydantic: {output_pydantic.__name__}) for {model_name_litellm}")
923
- # Pass the Pydantic model directly if supported, else use json_object
924
- # LiteLLM handles passing Pydantic models for supported providers
925
- litellm_kwargs["response_format"] = output_pydantic
1914
+ if output_pydantic:
1915
+ if verbose:
1916
+ logger.info(f"[INFO] Requesting structured output (Pydantic: {output_pydantic.__name__}) for {model_name_litellm}")
1917
+ # Use json_schema with strict=True to enforce ALL required fields are present
1918
+ # This prevents LLMs from omitting required fields when they think they're not needed
1919
+ schema = output_pydantic.model_json_schema()
1920
+ # Ensure all properties are in required array (OpenAI strict mode requirement)
1921
+ _ensure_all_properties_required(schema)
1922
+ # Add additionalProperties: false for strict mode (required by OpenAI)
1923
+ schema["additionalProperties"] = False
1924
+ response_format = {
1925
+ "type": "json_schema",
1926
+ "json_schema": {
1927
+ "name": output_pydantic.__name__,
1928
+ "schema": schema,
1929
+ "strict": True
1930
+ }
1931
+ }
1932
+ else: # output_schema is set
1933
+ if verbose:
1934
+ logger.info(f"[INFO] Requesting structured output (JSON Schema) for {model_name_litellm}")
1935
+ # LiteLLM expects {"type": "json_schema", "json_schema": {"name": "response", "schema": schema_dict, "strict": true}}
1936
+ # OR for some providers just the schema dict if type is json_object.
1937
+ # Best practice for broad compatibility via LiteLLM is usually the dict directly or wrapped.
1938
+ # For now, let's assume we pass the schema dict as 'response_format' which LiteLLM handles for many providers
1939
+ # or wrap it if needed. LiteLLM 1.40+ supports passing the dict directly for many.
1940
+ response_format = {
1941
+ "type": "json_schema",
1942
+ "json_schema": {
1943
+ "name": "response",
1944
+ "schema": output_schema,
1945
+ "strict": False
1946
+ }
1947
+ }
1948
+ # Add additionalProperties: false for strict mode (required by OpenAI)
1949
+ response_format["json_schema"]["schema"]["additionalProperties"] = False
1950
+
1951
+ litellm_kwargs["response_format"] = response_format
1952
+
1953
+ # LM Studio requires "json_schema" format, not "json_object"
1954
+ # Use extra_body to bypass litellm.drop_params stripping the schema
1955
+ if is_lm_studio and response_format and response_format.get("type") == "json_object":
1956
+ schema = response_format.get("response_schema", {})
1957
+ lm_studio_response_format = {
1958
+ "type": "json_schema",
1959
+ "json_schema": {
1960
+ "name": "response",
1961
+ "strict": True,
1962
+ "schema": schema
1963
+ }
1964
+ }
1965
+ # Use extra_body to bypass drop_params - passes directly to API
1966
+ litellm_kwargs["extra_body"] = {"response_format": lm_studio_response_format}
1967
+ # Remove from regular response_format to avoid conflicts
1968
+ if "response_format" in litellm_kwargs:
1969
+ del litellm_kwargs["response_format"]
1970
+ if verbose:
1971
+ logger.info(f"[INFO] Using extra_body for LM Studio response_format to bypass drop_params")
1972
+
1973
+ # Groq has issues with tool-based structured output - use JSON mode with schema in prompt
1974
+ if is_groq and response_format:
1975
+ # Get the schema to include in system prompt
1976
+ if output_pydantic:
1977
+ schema = output_pydantic.model_json_schema()
1978
+ else:
1979
+ schema = output_schema
1980
+
1981
+ # Use simple json_object mode (Groq's tool_use often fails)
1982
+ litellm_kwargs["response_format"] = {"type": "json_object"}
1983
+
1984
+ # Prepend schema instruction to messages (json module is imported at top of file)
1985
+ schema_instruction = f"You must respond with valid JSON matching this schema:\n```json\n{json.dumps(schema, indent=2)}\n```\nRespond ONLY with the JSON object, no other text."
1986
+
1987
+ # Find or create system message to prepend schema
1988
+ messages_list = litellm_kwargs.get("messages", [])
1989
+ if messages_list and messages_list[0].get("role") == "system":
1990
+ messages_list[0]["content"] = schema_instruction + "\n\n" + messages_list[0]["content"]
1991
+ else:
1992
+ messages_list.insert(0, {"role": "system", "content": schema_instruction})
1993
+ litellm_kwargs["messages"] = messages_list
1994
+
1995
+ if verbose:
1996
+ logger.info(f"[INFO] Using JSON object mode with schema in prompt for Groq (avoiding tool_use issues)")
1997
+
926
1998
  # As a fallback, one could use:
927
1999
  # litellm_kwargs["response_format"] = {"type": "json_object"}
928
2000
  # And potentially enable client-side validation:
929
2001
  # litellm.enable_json_schema_validation = True # Enable globally if needed
930
2002
  else:
2003
+ schema_name = output_pydantic.__name__ if output_pydantic else "JSON Schema"
931
2004
  if verbose:
932
- logger.warning(f"[WARN] Model {model_name_litellm} does not support structured output via CSV flag. Output might not be valid {output_pydantic.__name__}.")
2005
+ logger.warning(f"[WARN] Model {model_name_litellm} does not support structured output via CSV flag. Output might not be valid {schema_name}.")
933
2006
  # Proceed without forcing JSON mode, parsing will be attempted later
934
2007
 
935
2008
  # --- NEW REASONING LOGIC ---
@@ -944,7 +2017,9 @@ def llm_invoke(
944
2017
  # Currently known: Anthropic uses 'thinking'
945
2018
  # Model name comparison is more robust than provider string
946
2019
  if provider == 'anthropic': # Check provider column instead of model prefix
947
- litellm_kwargs["thinking"] = {"type": "enabled", "budget_tokens": budget}
2020
+ thinking_param = {"type": "enabled", "budget_tokens": budget}
2021
+ litellm_kwargs["thinking"] = thinking_param
2022
+ time_kwargs["thinking"] = thinking_param
948
2023
  if verbose:
949
2024
  logger.info(f"[INFO] Requesting Anthropic thinking (budget type) with budget: {budget} tokens for {model_name_litellm}")
950
2025
  else:
@@ -962,10 +2037,32 @@ def llm_invoke(
962
2037
  effort = "high"
963
2038
  elif time > 0.3:
964
2039
  effort = "medium"
965
- # Use the common 'reasoning_effort' param LiteLLM provides
966
- litellm_kwargs["reasoning_effort"] = effort
967
- if verbose:
968
- logger.info(f"[INFO] Requesting reasoning_effort='{effort}' (effort type) for {model_name_litellm} based on time={time}")
2040
+
2041
+ # Map effort parameter per-provider/model family
2042
+ model_lower = str(model_name_litellm).lower()
2043
+ provider_lower = str(provider).lower()
2044
+
2045
+ if provider_lower == 'openai' and model_lower.startswith('gpt-5'):
2046
+ # OpenAI 5-series uses Responses API with nested 'reasoning'
2047
+ reasoning_obj = {"effort": effort, "summary": "auto"}
2048
+ litellm_kwargs["reasoning"] = reasoning_obj
2049
+ time_kwargs["reasoning"] = reasoning_obj
2050
+ if verbose:
2051
+ logger.info(f"[INFO] Requesting OpenAI reasoning.effort='{effort}' for {model_name_litellm} (Responses API)")
2052
+
2053
+ elif provider_lower == 'openai' and model_lower.startswith('o') and 'mini' not in model_lower:
2054
+ # Historical o* models may use LiteLLM's generic reasoning_effort param
2055
+ litellm_kwargs["reasoning_effort"] = effort
2056
+ time_kwargs["reasoning_effort"] = effort
2057
+ if verbose:
2058
+ logger.info(f"[INFO] Requesting reasoning_effort='{effort}' for {model_name_litellm}")
2059
+
2060
+ else:
2061
+ # Fallback to LiteLLM generic param when supported by provider adapter
2062
+ litellm_kwargs["reasoning_effort"] = effort
2063
+ time_kwargs["reasoning_effort"] = effort
2064
+ if verbose:
2065
+ logger.info(f"[INFO] Requesting generic reasoning_effort='{effort}' for {model_name_litellm}")
969
2066
 
970
2067
  elif reasoning_type == 'none':
971
2068
  if verbose:
@@ -997,6 +2094,168 @@ def llm_invoke(
997
2094
  logger.debug("NOT ENABLING CACHING: litellm.cache is None at call time")
998
2095
 
999
2096
 
2097
+ # Route OpenAI gpt-5* models through Responses API to support 'reasoning'
2098
+ model_lower_for_call = str(model_name_litellm).lower()
2099
+ provider_lower_for_call = str(provider).lower()
2100
+
2101
+ if (
2102
+ not use_batch_mode
2103
+ and provider_lower_for_call == 'openai'
2104
+ and model_lower_for_call.startswith('gpt-5')
2105
+ ):
2106
+ if verbose:
2107
+ logger.info(f"[INFO] Calling LiteLLM Responses API for {model_name_litellm}...")
2108
+ try:
2109
+ # Build input text from messages
2110
+ if isinstance(formatted_messages, list) and formatted_messages and isinstance(formatted_messages[0], dict):
2111
+ input_text = "\n\n".join(f"{m.get('role','user')}: {m.get('content','')}" for m in formatted_messages)
2112
+ else:
2113
+ # Fallback: string cast
2114
+ input_text = str(formatted_messages)
2115
+
2116
+ # Derive effort mapping already computed in time_kwargs
2117
+ reasoning_param = time_kwargs.get("reasoning")
2118
+
2119
+ # Build text.format block for structured output
2120
+ # Default to plain text format
2121
+ text_block = {"format": {"type": "text"}}
2122
+
2123
+ # If structured output requested, use text.format with json_schema
2124
+ # This is the correct way to enforce structured output via litellm.responses()
2125
+ if output_pydantic or output_schema:
2126
+ try:
2127
+ if output_pydantic:
2128
+ schema = output_pydantic.model_json_schema()
2129
+ name = output_pydantic.__name__
2130
+ else:
2131
+ schema = output_schema
2132
+ name = "response"
2133
+
2134
+ # Ensure all properties are in required array (OpenAI strict mode requirement)
2135
+ _ensure_all_properties_required(schema)
2136
+ # Add additionalProperties: false for strict mode (required by OpenAI)
2137
+ schema['additionalProperties'] = False
2138
+
2139
+ # Use text.format with json_schema for structured output
2140
+ text_block = {
2141
+ "format": {
2142
+ "type": "json_schema",
2143
+ "name": name,
2144
+ "strict": True,
2145
+ "schema": schema,
2146
+ }
2147
+ }
2148
+ if verbose:
2149
+ logger.info(f"[INFO] Using structured output via text.format for Responses API")
2150
+ except Exception as schema_e:
2151
+ logger.warning(f"[WARN] Failed to derive JSON schema: {schema_e}. Proceeding with plain text format.")
2152
+
2153
+ # Build kwargs for litellm.responses()
2154
+ responses_kwargs = {
2155
+ "model": model_name_litellm,
2156
+ "input": input_text,
2157
+ "text": text_block,
2158
+ }
2159
+ if verbose and temperature not in (None, 0, 0.0):
2160
+ logger.info("[INFO] Skipping 'temperature' for OpenAI GPT-5 Responses call (unsupported by API).")
2161
+ if reasoning_param is not None:
2162
+ responses_kwargs["reasoning"] = reasoning_param
2163
+
2164
+ # Call litellm.responses() which handles the API interaction
2165
+ resp = litellm.responses(**responses_kwargs)
2166
+
2167
+ # Extract text result from response
2168
+ result_text = None
2169
+ try:
2170
+ # LiteLLM responses return output as a list of items
2171
+ for item in resp.output:
2172
+ if getattr(item, 'type', None) == 'message' and hasattr(item, 'content') and item.content:
2173
+ for content_item in item.content:
2174
+ if hasattr(content_item, 'text'):
2175
+ result_text = content_item.text
2176
+ break
2177
+ if result_text:
2178
+ break
2179
+ except Exception:
2180
+ result_text = None
2181
+
2182
+ # Calculate cost using usage + CSV rates
2183
+ total_cost = 0.0
2184
+ usage = getattr(resp, "usage", None)
2185
+ if usage is not None:
2186
+ in_tok = getattr(usage, "input_tokens", 0) or 0
2187
+ out_tok = getattr(usage, "output_tokens", 0) or 0
2188
+ in_rate = model_info.get('input', 0.0) or 0.0
2189
+ out_rate = model_info.get('output', 0.0) or 0.0
2190
+ total_cost = (in_tok * in_rate + out_tok * out_rate) / 1_000_000.0
2191
+
2192
+ # Parse result if Pydantic output requested
2193
+ final_result = None
2194
+ if output_pydantic and result_text:
2195
+ try:
2196
+ final_result = output_pydantic.model_validate_json(result_text)
2197
+ except Exception as e:
2198
+ # With structured output, parsing should succeed
2199
+ # But if it fails, try JSON repair as fallback
2200
+ logger.warning(f"[WARN] Pydantic parse failed on Responses output: {e}. Attempting JSON repair...")
2201
+
2202
+ # Try extracting from fenced JSON blocks first
2203
+ fenced = _extract_fenced_json_block(result_text)
2204
+ candidates: List[str] = []
2205
+ if fenced:
2206
+ candidates.append(fenced)
2207
+ else:
2208
+ candidates.extend(_extract_balanced_json_objects(result_text))
2209
+
2210
+ # Also try the raw text as-is after stripping fences
2211
+ cleaned = result_text.strip()
2212
+ if cleaned.startswith("```json"):
2213
+ cleaned = cleaned[7:]
2214
+ elif cleaned.startswith("```"):
2215
+ cleaned = cleaned[3:]
2216
+ if cleaned.endswith("```"):
2217
+ cleaned = cleaned[:-3]
2218
+ cleaned = cleaned.strip()
2219
+ if cleaned and cleaned not in candidates:
2220
+ candidates.append(cleaned)
2221
+
2222
+ parse_succeeded = False
2223
+ for cand in candidates:
2224
+ try:
2225
+ final_result = output_pydantic.model_validate_json(cand)
2226
+ parse_succeeded = True
2227
+ logger.info(f"[SUCCESS] JSON repair succeeded for Responses output")
2228
+ break
2229
+ except Exception:
2230
+ continue
2231
+
2232
+ if not parse_succeeded:
2233
+ logger.error(f"[ERROR] All JSON repair attempts failed for Responses output. Original error: {e}")
2234
+ final_result = f"ERROR: Failed to parse structured output from Responses API. Raw: {repr(result_text)[:200]}"
2235
+ else:
2236
+ final_result = result_text
2237
+
2238
+ if verbose:
2239
+ logger.info(f"[RESULT] Model Used: {model_name_litellm}")
2240
+ logger.info(f"[RESULT] Total Cost (estimated): ${total_cost:.6g}")
2241
+
2242
+ return {
2243
+ 'result': final_result,
2244
+ 'cost': total_cost,
2245
+ 'model_name': model_name_litellm,
2246
+ 'thinking_output': None,
2247
+ }
2248
+ except Exception as e:
2249
+ last_exception = e
2250
+ logger.error(f"[ERROR] OpenAI Responses call failed for {model_name_litellm}: {e}")
2251
+ # Remove 'reasoning' key to avoid OpenAI Chat API unknown param errors
2252
+ if "reasoning" in litellm_kwargs:
2253
+ try:
2254
+ litellm_kwargs.pop("reasoning", None)
2255
+ except Exception:
2256
+ pass
2257
+ # Fall through to LiteLLM path as a fallback
2258
+
1000
2259
  if use_batch_mode:
1001
2260
  if verbose:
1002
2261
  logger.info(f"[INFO] Calling litellm.batch_completion for {model_name_litellm}...")
@@ -1004,6 +2263,16 @@ def llm_invoke(
1004
2263
 
1005
2264
 
1006
2265
  else:
2266
+ # Anthropic requirement: when 'thinking' is enabled, temperature must be 1
2267
+ try:
2268
+ if provider.lower() == 'anthropic' and 'thinking' in litellm_kwargs:
2269
+ if litellm_kwargs.get('temperature') != 1:
2270
+ if verbose:
2271
+ logger.info("[INFO] Anthropic thinking enabled: forcing temperature=1 for compliance.")
2272
+ litellm_kwargs['temperature'] = 1
2273
+ current_temperature = 1
2274
+ except Exception:
2275
+ pass
1007
2276
  if verbose:
1008
2277
  logger.info(f"[INFO] Calling litellm.completion for {model_name_litellm}...")
1009
2278
  response = litellm.completion(**litellm_kwargs)
@@ -1013,6 +2282,12 @@ def llm_invoke(
1013
2282
  if verbose:
1014
2283
  logger.info(f"[SUCCESS] Invocation successful for {model_name_litellm} (took {end_time - start_time:.2f}s)")
1015
2284
 
2285
+ # Build retry kwargs with provider credentials from litellm_kwargs
2286
+ # Issue #185: Retry calls were missing vertex_location, vertex_project, etc.
2287
+ retry_provider_kwargs = {k: v for k, v in litellm_kwargs.items()
2288
+ if k in ('vertex_credentials', 'vertex_project', 'vertex_location',
2289
+ 'api_key', 'base_url', 'api_base')}
2290
+
1016
2291
  # --- 7. Process Response ---
1017
2292
  results = []
1018
2293
  thinking_outputs = []
@@ -1061,13 +2336,13 @@ def llm_invoke(
1061
2336
  retry_response = litellm.completion(
1062
2337
  model=model_name_litellm,
1063
2338
  messages=retry_messages,
1064
- temperature=temperature,
2339
+ temperature=current_temperature,
1065
2340
  response_format=response_format,
1066
- max_completion_tokens=max_tokens,
1067
- **time_kwargs
2341
+ **time_kwargs,
2342
+ **retry_provider_kwargs # Issue #185: Pass Vertex AI credentials
1068
2343
  )
1069
- # Re-enable cache
1070
- litellm.cache = Cache()
2344
+ # Re-enable cache - restore original configured cache (restore to original state, even if None)
2345
+ litellm.cache = configured_cache
1071
2346
  # Extract result from retry
1072
2347
  retry_raw_result = retry_response.choices[0].message.content
1073
2348
  if retry_raw_result is not None:
@@ -1085,21 +2360,67 @@ def llm_invoke(
1085
2360
  logger.error(f"[ERROR] Cannot retry - batch mode or missing prompt/input_json")
1086
2361
  results.append("ERROR: LLM returned None content and cannot retry")
1087
2362
  continue
1088
-
1089
- if output_pydantic:
2363
+
2364
+ # Check for malformed JSON response (excessive trailing newlines causing truncation)
2365
+ # This can happen when Gemini generates thousands of \n in JSON string values
2366
+ if isinstance(raw_result, str) and _is_malformed_json_response(raw_result):
2367
+ logger.warning(f"[WARNING] Detected malformed JSON response with excessive trailing newlines for item {i}. Retrying with cache bypass...")
2368
+ if not use_batch_mode and prompt and input_json is not None:
2369
+ # Add a small space to bypass cache
2370
+ modified_prompt = prompt + " "
2371
+ try:
2372
+ retry_messages = _format_messages(modified_prompt, input_json, use_batch_mode)
2373
+ # Disable cache for retry
2374
+ original_cache = litellm.cache
2375
+ litellm.cache = None
2376
+ retry_response = litellm.completion(
2377
+ model=model_name_litellm,
2378
+ messages=retry_messages,
2379
+ temperature=current_temperature,
2380
+ response_format=response_format,
2381
+ **time_kwargs,
2382
+ **retry_provider_kwargs # Issue #185: Pass Vertex AI credentials
2383
+ )
2384
+ # Re-enable cache
2385
+ litellm.cache = original_cache
2386
+ # Extract result from retry
2387
+ retry_raw_result = retry_response.choices[0].message.content
2388
+ if retry_raw_result is not None and not _is_malformed_json_response(retry_raw_result):
2389
+ logger.info(f"[SUCCESS] Cache bypass retry for malformed JSON succeeded for item {i}")
2390
+ raw_result = retry_raw_result
2391
+ else:
2392
+ # Retry also failed, but we'll continue with repair logic below
2393
+ logger.warning(f"[WARNING] Cache bypass retry also returned malformed JSON for item {i}, attempting repair...")
2394
+ except Exception as retry_e:
2395
+ logger.warning(f"[WARNING] Cache bypass retry for malformed JSON failed for item {i}: {retry_e}, attempting repair...")
2396
+ else:
2397
+ logger.warning(f"[WARNING] Cannot retry malformed JSON - batch mode or missing prompt/input_json, attempting repair...")
2398
+
2399
+ if output_pydantic or output_schema:
1090
2400
  parsed_result = None
1091
2401
  json_string_to_parse = None
1092
2402
 
1093
2403
  try:
1094
- # Attempt 1: Check if LiteLLM already parsed it
1095
- if isinstance(raw_result, output_pydantic):
2404
+ # Attempt 1: Check if LiteLLM already parsed it (only for Pydantic)
2405
+ if output_pydantic and isinstance(raw_result, output_pydantic):
1096
2406
  parsed_result = raw_result
1097
2407
  if verbose:
1098
2408
  logger.debug("[DEBUG] Pydantic object received directly from LiteLLM.")
1099
2409
 
1100
2410
  # Attempt 2: Check if raw_result is dict-like and validate
1101
2411
  elif isinstance(raw_result, dict):
1102
- parsed_result = output_pydantic.model_validate(raw_result)
2412
+ if output_pydantic:
2413
+ parsed_result = output_pydantic.model_validate(raw_result)
2414
+ else:
2415
+ # Validate against JSON schema
2416
+ try:
2417
+ import jsonschema
2418
+ jsonschema.validate(instance=raw_result, schema=output_schema)
2419
+ parsed_result = json.dumps(raw_result) # Return as JSON string for consistency
2420
+ except ImportError:
2421
+ logger.warning("jsonschema not installed, skipping validation")
2422
+ parsed_result = json.dumps(raw_result)
2423
+
1103
2424
  if verbose:
1104
2425
  logger.debug("[DEBUG] Validated dictionary-like object directly.")
1105
2426
 
@@ -1107,26 +2428,59 @@ def llm_invoke(
1107
2428
  elif isinstance(raw_result, str):
1108
2429
  json_string_to_parse = raw_result # Start with the raw string
1109
2430
  try:
1110
- # Look for first { and last }
1111
- start_brace = json_string_to_parse.find('{')
1112
- end_brace = json_string_to_parse.rfind('}')
1113
- if start_brace != -1 and end_brace != -1 and end_brace > start_brace:
1114
- potential_json = json_string_to_parse[start_brace:end_brace+1]
1115
- # Basic check if it looks like JSON
1116
- if potential_json.strip().startswith('{') and potential_json.strip().endswith('}'):
1117
- if verbose:
1118
- logger.debug(f"[DEBUG] Attempting to parse extracted JSON block: '{potential_json}'")
1119
- parsed_result = output_pydantic.model_validate_json(potential_json)
1120
- else:
1121
- # If block extraction fails, try cleaning markdown next
1122
- raise ValueError("Extracted block doesn't look like JSON")
2431
+ # 1) Prefer fenced ```json blocks
2432
+ fenced = _extract_fenced_json_block(raw_result)
2433
+ candidates: List[str] = []
2434
+ if fenced:
2435
+ candidates.append(fenced)
1123
2436
  else:
1124
- # If no braces found, try cleaning markdown next
1125
- raise ValueError("Could not find enclosing {}")
1126
- except (json.JSONDecodeError, ValidationError, ValueError) as extraction_error:
2437
+ # 2) Fall back to scanning for balanced JSON objects
2438
+ candidates.extend(_extract_balanced_json_objects(raw_result))
2439
+
2440
+ if not candidates:
2441
+ raise ValueError("No JSON-like content found")
2442
+
2443
+ parse_err: Optional[Exception] = None
2444
+ for cand in candidates:
2445
+ try:
2446
+ if verbose:
2447
+ logger.debug(f"[DEBUG] Attempting to parse candidate JSON block: {cand}")
2448
+
2449
+ if output_pydantic:
2450
+ parsed_result = output_pydantic.model_validate_json(cand)
2451
+ else:
2452
+ # Parse JSON and validate against schema
2453
+ loaded = json.loads(cand)
2454
+ try:
2455
+ import jsonschema
2456
+ jsonschema.validate(instance=loaded, schema=output_schema)
2457
+ except ImportError:
2458
+ pass # Skip validation if lib missing
2459
+ parsed_result = cand # Return string if valid
2460
+
2461
+ json_string_to_parse = cand
2462
+ parse_err = None
2463
+ break
2464
+ except (json.JSONDecodeError, ValidationError, ValueError) as pe:
2465
+ # Also catch jsonschema.ValidationError if imported
2466
+ parse_err = pe
2467
+ try:
2468
+ import jsonschema
2469
+ if isinstance(pe, jsonschema.ValidationError):
2470
+ parse_err = pe
2471
+ except ImportError:
2472
+ pass
2473
+
2474
+ if parsed_result is None:
2475
+ # If none of the candidates parsed, raise last error
2476
+ if parse_err is not None:
2477
+ raise parse_err
2478
+ raise ValueError("Unable to parse any JSON candidates")
2479
+ except (json.JSONDecodeError, ValidationError, ValueError, Exception) as extraction_error:
2480
+ # Catch generic Exception to handle jsonschema errors without explicit import here
1127
2481
  if verbose:
1128
- logger.debug(f"[DEBUG] JSON block extraction/validation failed ('{extraction_error}'). Trying markdown cleaning.")
1129
- # Fallback: Clean markdown fences and retry JSON validation
2482
+ logger.debug(f"[DEBUG] JSON extraction/validation failed ('{extraction_error}'). Trying fence cleaning.")
2483
+ # Last resort: strip any leading/trailing code fences and retry
1130
2484
  cleaned_result_str = raw_result.strip()
1131
2485
  if cleaned_result_str.startswith("```json"):
1132
2486
  cleaned_result_str = cleaned_result_str[7:]
@@ -1135,35 +2489,173 @@ def llm_invoke(
1135
2489
  if cleaned_result_str.endswith("```"):
1136
2490
  cleaned_result_str = cleaned_result_str[:-3]
1137
2491
  cleaned_result_str = cleaned_result_str.strip()
1138
- # Check again if it looks like JSON before parsing
1139
- if cleaned_result_str.startswith('{') and cleaned_result_str.endswith('}'):
2492
+ # Check for complete JSON object or array
2493
+ is_complete_object = cleaned_result_str.startswith('{') and cleaned_result_str.endswith('}')
2494
+ is_complete_array = cleaned_result_str.startswith('[') and cleaned_result_str.endswith(']')
2495
+ if is_complete_object or is_complete_array:
2496
+ if verbose:
2497
+ logger.debug(f"[DEBUG] Attempting parse after generic fence cleaning. Cleaned string: '{cleaned_result_str}'")
2498
+ json_string_to_parse = cleaned_result_str
2499
+
2500
+ if output_pydantic:
2501
+ parsed_result = output_pydantic.model_validate_json(json_string_to_parse)
2502
+ else:
2503
+ loaded = json.loads(json_string_to_parse)
2504
+ try:
2505
+ import jsonschema
2506
+ jsonschema.validate(instance=loaded, schema=output_schema)
2507
+ except ImportError:
2508
+ pass
2509
+ parsed_result = json_string_to_parse
2510
+ elif cleaned_result_str.startswith('{') or cleaned_result_str.startswith('['):
2511
+ # Attempt to repair truncated JSON (e.g., missing closing braces)
2512
+ # This can happen when Gemini generates excessive trailing content
2513
+ # that causes token limit truncation
1140
2514
  if verbose:
1141
- logger.debug(f"[DEBUG] Attempting parse after cleaning markdown fences. Cleaned string: '{cleaned_result_str}'")
1142
- json_string_to_parse = cleaned_result_str # Update string for error reporting
1143
- parsed_result = output_pydantic.model_validate_json(json_string_to_parse)
2515
+ logger.debug(f"[DEBUG] JSON appears truncated (missing closing brace). Attempting repair.")
2516
+
2517
+ # Try to find the last valid JSON structure
2518
+ # For simple schemas like {"extracted_code": "..."}, we can try to close it
2519
+ repaired = cleaned_result_str.rstrip()
2520
+
2521
+ # Strip trailing escaped newline sequences (\\n in the JSON string)
2522
+ # These appear as literal backslash-n when Gemini generates excessive newlines
2523
+ while repaired.endswith('\\n'):
2524
+ repaired = repaired[:-2]
2525
+ # Also strip trailing literal backslashes that might be orphaned
2526
+ repaired = repaired.rstrip('\\')
2527
+
2528
+ # If we're in the middle of a string value, try to close it
2529
+ # Count unescaped quotes to determine if we're inside a string
2530
+ # Simple heuristic: if it ends without proper closure, add closing
2531
+ is_array = cleaned_result_str.startswith('[')
2532
+ expected_end = ']' if is_array else '}'
2533
+ if not repaired.endswith(expected_end):
2534
+ # Try adding various closures to repair
2535
+ if is_array:
2536
+ repair_attempts = [
2537
+ repaired + '}]', # Close object and array
2538
+ repaired + '"}]', # Close string, object and array
2539
+ repaired + '"}}]', # Close string, nested object and array
2540
+ repaired.rstrip(',') + ']', # Remove trailing comma and close array
2541
+ repaired.rstrip('"') + '"}]', # Handle partial string end
2542
+ ]
2543
+ else:
2544
+ repair_attempts = [
2545
+ repaired + '"}', # Close string and object
2546
+ repaired + '"}\n}', # Close string and nested object
2547
+ repaired + '"}}}', # Deeper nesting
2548
+ repaired.rstrip(',') + '}', # Remove trailing comma
2549
+ repaired.rstrip('"') + '"}', # Handle partial string end
2550
+ ]
2551
+
2552
+ for attempt in repair_attempts:
2553
+ try:
2554
+ if output_pydantic:
2555
+ parsed_result = output_pydantic.model_validate_json(attempt)
2556
+ else:
2557
+ loaded = json.loads(attempt)
2558
+ try:
2559
+ import jsonschema
2560
+ jsonschema.validate(instance=loaded, schema=output_schema)
2561
+ except ImportError:
2562
+ pass
2563
+ parsed_result = attempt
2564
+
2565
+ if verbose:
2566
+ logger.info(f"[INFO] Successfully repaired truncated JSON response")
2567
+ json_string_to_parse = attempt
2568
+ break
2569
+ except (json.JSONDecodeError, ValidationError, ValueError):
2570
+ continue
2571
+
2572
+ if parsed_result is None:
2573
+ raise ValueError("Content after cleaning doesn't look like JSON (and repair attempts failed)")
1144
2574
  else:
1145
- # If still doesn't look like JSON, raise error
1146
- raise ValueError("Content after cleaning markdown doesn't look like JSON")
2575
+ raise ValueError("Content after cleaning doesn't look like JSON")
1147
2576
 
1148
2577
 
1149
2578
  # Check if any parsing attempt succeeded
1150
2579
  if parsed_result is None:
2580
+ target_name = output_pydantic.__name__ if output_pydantic else "JSON Schema"
1151
2581
  # This case should ideally be caught by exceptions above, but as a safeguard:
1152
- raise TypeError(f"Raw result type {type(raw_result)} or content could not be validated/parsed against {output_pydantic.__name__}.")
2582
+ raise TypeError(f"Raw result type {type(raw_result)} or content could not be validated/parsed against {target_name}.")
1153
2583
 
1154
- except (ValidationError, json.JSONDecodeError, TypeError, ValueError) as parse_error:
1155
- logger.error(f"[ERROR] Failed to parse response into Pydantic model {output_pydantic.__name__} for item {i}: {parse_error}")
2584
+ except (ValidationError, json.JSONDecodeError, TypeError, ValueError, Exception) as parse_error:
2585
+ target_name = output_pydantic.__name__ if output_pydantic else "JSON Schema"
2586
+ logger.error(f"[ERROR] Failed to parse response into {target_name} for item {i}: {parse_error}")
1156
2587
  # Use the string that was last attempted for parsing in the error message
1157
2588
  error_content = json_string_to_parse if json_string_to_parse is not None else raw_result
1158
- logger.error("[ERROR] Content attempted for parsing: %s", repr(error_content)) # CORRECTED (or use f-string)
1159
- results.append(f"ERROR: Failed to parse Pydantic. Raw: {repr(raw_result)}")
1160
- continue # Skip appending result below if parsing failed
2589
+ logger.error("[ERROR] Content attempted for parsing: %s", repr(error_content))
2590
+ # Issue #168: Raise SchemaValidationError to trigger model fallback
2591
+ # Previously this used `continue` which only skipped to the next batch item
2592
+ raise SchemaValidationError(
2593
+ f"Failed to parse response into {target_name}: {parse_error}",
2594
+ raw_response=raw_result,
2595
+ item_index=i
2596
+ ) from parse_error
2597
+
2598
+ # Post-process: unescape newlines and repair Python syntax
2599
+ _unescape_code_newlines(parsed_result)
2600
+
2601
+ # Check if code fields still have invalid Python syntax after repair
2602
+ # If so, retry without cache to get a fresh response
2603
+ # Skip validation for non-Python languages to avoid false positives
2604
+ if language in (None, "python") and _has_invalid_python_code(parsed_result):
2605
+ logger.warning(f"[WARNING] Detected invalid Python syntax in code fields for item {i} after repair. Retrying with cache bypass...")
2606
+ if not use_batch_mode and prompt and input_json is not None:
2607
+ # Add a small variation to bypass cache
2608
+ modified_prompt = prompt + " " # Two spaces to differentiate from other retries
2609
+ try:
2610
+ retry_messages = _format_messages(modified_prompt, input_json, use_batch_mode)
2611
+ # Disable cache for retry
2612
+ original_cache = litellm.cache
2613
+ litellm.cache = None
2614
+ retry_response = litellm.completion(
2615
+ model=model_name_litellm,
2616
+ messages=retry_messages,
2617
+ temperature=current_temperature,
2618
+ response_format=response_format,
2619
+ **time_kwargs,
2620
+ **retry_provider_kwargs # Issue #185: Pass Vertex AI credentials
2621
+ )
2622
+ # Re-enable cache
2623
+ litellm.cache = original_cache
2624
+ # Extract and re-parse the retry result
2625
+ retry_raw_result = retry_response.choices[0].message.content
2626
+ if retry_raw_result is not None:
2627
+ # Re-parse the retry result
2628
+ retry_parsed = None
2629
+ if output_pydantic:
2630
+ if isinstance(retry_raw_result, output_pydantic):
2631
+ retry_parsed = retry_raw_result
2632
+ elif isinstance(retry_raw_result, dict):
2633
+ retry_parsed = output_pydantic.model_validate(retry_raw_result)
2634
+ elif isinstance(retry_raw_result, str):
2635
+ retry_parsed = output_pydantic.model_validate_json(retry_raw_result)
2636
+ elif output_schema and isinstance(retry_raw_result, str):
2637
+ retry_parsed = retry_raw_result # Keep as string for schema validation
2638
+
2639
+ if retry_parsed is not None:
2640
+ _unescape_code_newlines(retry_parsed)
2641
+ if not _has_invalid_python_code(retry_parsed):
2642
+ logger.info(f"[SUCCESS] Cache bypass retry for invalid Python code succeeded for item {i}")
2643
+ parsed_result = retry_parsed
2644
+ else:
2645
+ logger.warning(f"[WARNING] Cache bypass retry still has invalid Python code for item {i}, using original")
2646
+ else:
2647
+ logger.warning(f"[WARNING] Cache bypass retry returned unparseable result for item {i}")
2648
+ else:
2649
+ logger.warning(f"[WARNING] Cache bypass retry returned None for item {i}")
2650
+ except Exception as retry_e:
2651
+ logger.warning(f"[WARNING] Cache bypass retry for invalid Python code failed for item {i}: {retry_e}")
2652
+ else:
2653
+ logger.warning(f"[WARNING] Cannot retry invalid Python code - batch mode or missing prompt/input_json")
1161
2654
 
1162
- # If parsing succeeded, append the parsed_result
1163
2655
  results.append(parsed_result)
1164
2656
 
1165
2657
  else:
1166
- # If output_pydantic was not requested, append the raw result
2658
+ # If output_pydantic/schema was not requested, append the raw result
1167
2659
  results.append(raw_result)
1168
2660
 
1169
2661
  except (AttributeError, IndexError) as e:
@@ -1241,15 +2733,53 @@ def llm_invoke(
1241
2733
  logger.warning(f"[AUTH ERROR] Authentication failed for {model_name_litellm} using existing key '{api_key_name}'. Trying next model.")
1242
2734
  break # Break inner loop, try next model candidate
1243
2735
 
2736
+ except SchemaValidationError as e:
2737
+ # Issue #168: Schema validation failures now trigger model fallback
2738
+ last_exception = e
2739
+ logger.warning(f"[SCHEMA ERROR] Validation failed for {model_name_litellm}: {e}. Trying next model.")
2740
+ if verbose:
2741
+ logger.debug(f"Raw response that failed validation: {repr(e.raw_response)}")
2742
+ break # Break inner loop, try next model candidate
2743
+
1244
2744
  except (openai.RateLimitError, openai.APITimeoutError, openai.APIConnectionError,
1245
2745
  openai.APIStatusError, openai.BadRequestError, openai.InternalServerError,
1246
2746
  Exception) as e: # Catch generic Exception last
1247
2747
  last_exception = e
1248
2748
  error_type = type(e).__name__
2749
+ error_str = str(e)
2750
+
2751
+ # Provider-specific handling for Anthropic temperature + thinking rules.
2752
+ # Two scenarios we auto-correct:
2753
+ # 1) temperature==1 without thinking -> retry with 0.99
2754
+ # 2) thinking enabled but temperature!=1 -> retry with 1
2755
+ lower_err = error_str.lower()
2756
+ if (not temp_adjustment_done) and ("temperature" in lower_err) and ("thinking" in lower_err):
2757
+ anthropic_thinking_sent = ('thinking' in litellm_kwargs) and (provider.lower() == 'anthropic')
2758
+ # Decide direction of adjustment based on whether thinking was enabled in the call
2759
+ if anthropic_thinking_sent:
2760
+ # thinking enabled -> force temperature=1
2761
+ adjusted_temp = 1
2762
+ logger.warning(
2763
+ f"[WARN] {model_name_litellm}: Anthropic with thinking requires temperature=1. "
2764
+ f"Retrying with temperature={adjusted_temp}."
2765
+ )
2766
+ else:
2767
+ # thinking not enabled -> avoid temperature=1
2768
+ adjusted_temp = 0.99
2769
+ logger.warning(
2770
+ f"[WARN] {model_name_litellm}: Provider rejected temperature=1 without thinking. "
2771
+ f"Retrying with temperature={adjusted_temp}."
2772
+ )
2773
+ current_temperature = adjusted_temp
2774
+ temp_adjustment_done = True
2775
+ retry_with_same_model = True
2776
+ if verbose:
2777
+ logger.debug(f"Retrying {model_name_litellm} with adjusted temperature {current_temperature}")
2778
+ continue
2779
+
1249
2780
  logger.error(f"[ERROR] Invocation failed for {model_name_litellm} ({error_type}): {e}. Trying next model.")
1250
2781
  # Log more details in verbose mode
1251
2782
  if verbose:
1252
- # import traceback # Not needed if using exc_info=True
1253
2783
  logger.debug(f"Detailed exception traceback for {model_name_litellm}:", exc_info=True)
1254
2784
  break # Break inner loop, try next model candidate
1255
2785
 
@@ -1277,7 +2807,7 @@ if __name__ == "__main__":
1277
2807
  response = llm_invoke(
1278
2808
  prompt="Tell me a short joke about {topic}.",
1279
2809
  input_json={"topic": "programmers"},
1280
- strength=0.5, # Use base model (gpt-4.1-nano)
2810
+ strength=0.5, # Use base model (gpt-5-nano)
1281
2811
  temperature=0.7,
1282
2812
  verbose=True
1283
2813
  )
@@ -1358,7 +2888,7 @@ if __name__ == "__main__":
1358
2888
  {"role": "system", "content": "You are a helpful assistant."},
1359
2889
  {"role": "user", "content": "What is the capital of France?"}
1360
2890
  ]
1361
- # Strength 0.5 should select gpt-4.1-nano
2891
+ # Strength 0.5 should select gpt-5-nano
1362
2892
  response_messages = llm_invoke(
1363
2893
  messages=custom_messages,
1364
2894
  strength=0.5,