hindsight-api 0.0.21__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. hindsight_api/__init__.py +10 -2
  2. hindsight_api/alembic/README +1 -0
  3. hindsight_api/alembic/env.py +146 -0
  4. hindsight_api/alembic/script.py.mako +28 -0
  5. hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +274 -0
  6. hindsight_api/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py +70 -0
  7. hindsight_api/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py +39 -0
  8. hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py +48 -0
  9. hindsight_api/alembic/versions/e0a1b2c3d4e5_disposition_to_3_traits.py +62 -0
  10. hindsight_api/alembic/versions/rename_personality_to_disposition.py +65 -0
  11. hindsight_api/api/__init__.py +2 -4
  12. hindsight_api/api/http.py +112 -164
  13. hindsight_api/api/mcp.py +2 -1
  14. hindsight_api/config.py +154 -0
  15. hindsight_api/engine/__init__.py +7 -2
  16. hindsight_api/engine/cross_encoder.py +225 -16
  17. hindsight_api/engine/embeddings.py +198 -19
  18. hindsight_api/engine/entity_resolver.py +56 -29
  19. hindsight_api/engine/llm_wrapper.py +147 -106
  20. hindsight_api/engine/memory_engine.py +337 -192
  21. hindsight_api/engine/response_models.py +15 -17
  22. hindsight_api/engine/retain/bank_utils.py +25 -35
  23. hindsight_api/engine/retain/entity_processing.py +5 -5
  24. hindsight_api/engine/retain/fact_extraction.py +86 -24
  25. hindsight_api/engine/retain/fact_storage.py +1 -1
  26. hindsight_api/engine/retain/link_creation.py +12 -6
  27. hindsight_api/engine/retain/link_utils.py +50 -56
  28. hindsight_api/engine/retain/observation_regeneration.py +264 -0
  29. hindsight_api/engine/retain/orchestrator.py +31 -44
  30. hindsight_api/engine/retain/types.py +14 -0
  31. hindsight_api/engine/search/reranking.py +6 -10
  32. hindsight_api/engine/search/retrieval.py +2 -2
  33. hindsight_api/engine/search/think_utils.py +59 -30
  34. hindsight_api/engine/search/tracer.py +1 -1
  35. hindsight_api/main.py +201 -0
  36. hindsight_api/migrations.py +61 -39
  37. hindsight_api/models.py +1 -2
  38. hindsight_api/pg0.py +17 -36
  39. hindsight_api/server.py +43 -0
  40. {hindsight_api-0.0.21.dist-info → hindsight_api-0.1.1.dist-info}/METADATA +2 -3
  41. hindsight_api-0.1.1.dist-info/RECORD +60 -0
  42. hindsight_api-0.1.1.dist-info/entry_points.txt +2 -0
  43. hindsight_api/cli.py +0 -128
  44. hindsight_api/web/__init__.py +0 -12
  45. hindsight_api/web/server.py +0 -109
  46. hindsight_api-0.0.21.dist-info/RECORD +0 -50
  47. hindsight_api-0.0.21.dist-info/entry_points.txt +0 -2
  48. {hindsight_api-0.0.21.dist-info → hindsight_api-0.1.1.dist-info}/WHEEL +0 -0
@@ -5,12 +5,15 @@ import os
5
5
  import time
6
6
  import asyncio
7
7
  from typing import Optional, Any, Dict, List
8
- from openai import AsyncOpenAI, RateLimitError, APIError, APIStatusError, LengthFinishReasonError
8
+ from openai import AsyncOpenAI, RateLimitError, APIError, APIStatusError, APIConnectionError, LengthFinishReasonError
9
9
  from google import genai
10
10
  from google.genai import types as genai_types
11
11
  from google.genai import errors as genai_errors
12
12
  import logging
13
13
 
14
+ # Seed applied to every Groq request for deterministic behavior.
15
+ DEFAULT_LLM_SEED = 4242
16
+
14
17
  logger = logging.getLogger(__name__)
15
18
 
16
19
  # Disable httpx logging
@@ -31,8 +34,12 @@ class OutputTooLongError(Exception):
31
34
  pass
32
35
 
33
36
 
34
- class LLMConfig:
35
- """Configuration for an LLM provider."""
37
+ class LLMProvider:
38
+ """
39
+ Unified LLM provider.
40
+
41
+ Supports OpenAI, Groq, Ollama (OpenAI-compatible), and Gemini.
42
+ """
36
43
 
37
44
  def __init__(
38
45
  self,
@@ -40,25 +47,29 @@ class LLMConfig:
40
47
  api_key: str,
41
48
  base_url: str,
42
49
  model: str,
50
+ reasoning_effort: str = "low",
43
51
  ):
44
52
  """
45
- Initialize LLM configuration.
53
+ Initialize LLM provider.
46
54
 
47
55
  Args:
48
- provider: Provider name ("openai", "groq", "ollama"). Required.
49
- api_key: API key. Required.
50
- base_url: Base URL. Required.
51
- model: Model name. Required.
56
+ provider: Provider name ("openai", "groq", "ollama", "gemini").
57
+ api_key: API key.
58
+ base_url: Base URL for the API.
59
+ model: Model name.
60
+ reasoning_effort: Reasoning effort level for supported providers.
52
61
  """
53
62
  self.provider = provider.lower()
54
63
  self.api_key = api_key
55
64
  self.base_url = base_url
56
65
  self.model = model
66
+ self.reasoning_effort = reasoning_effort
57
67
 
58
68
  # Validate provider
59
- if self.provider not in ["openai", "groq", "ollama", "gemini"]:
69
+ valid_providers = ["openai", "groq", "ollama", "gemini"]
70
+ if self.provider not in valid_providers:
60
71
  raise ValueError(
61
- f"Invalid LLM provider: {self.provider}. Must be 'openai', 'groq', 'ollama', or 'gemini'."
72
+ f"Invalid LLM provider: {self.provider}. Must be one of: {', '.join(valid_providers)}"
62
73
  )
63
74
 
64
75
  # Set default base URLs
@@ -69,24 +80,18 @@ class LLMConfig:
69
80
  self.base_url = "http://localhost:11434/v1"
70
81
 
71
82
  # Validate API key (not needed for ollama)
72
- if self.provider not in ["ollama"] and not self.api_key:
73
- raise ValueError(
74
- f"API key not found for {self.provider}"
75
- )
83
+ if self.provider != "ollama" and not self.api_key:
84
+ raise ValueError(f"API key not found for {self.provider}")
76
85
 
77
- # Create client (private - use .call() method instead)
78
- # Disable automatic retries - we handle retries in the call() method
86
+ # Create client based on provider
79
87
  if self.provider == "gemini":
80
88
  self._gemini_client = genai.Client(api_key=self.api_key)
81
- self._client = None # Not used for Gemini
89
+ self._client = None
82
90
  elif self.provider == "ollama":
83
91
  self._client = AsyncOpenAI(api_key="ollama", base_url=self.base_url, max_retries=0)
84
92
  self._gemini_client = None
85
- elif self.base_url:
86
- self._client = AsyncOpenAI(api_key=self.api_key, base_url=self.base_url, max_retries=0)
87
- self._gemini_client = None
88
93
  else:
89
- self._client = AsyncOpenAI(api_key=self.api_key, max_retries=0)
94
+ self._client = AsyncOpenAI(api_key=self.api_key, base_url=self.base_url, max_retries=0)
90
95
  self._gemini_client = None
91
96
 
92
97
  logger.info(
@@ -97,125 +102,141 @@ class LLMConfig:
97
102
  self,
98
103
  messages: List[Dict[str, str]],
99
104
  response_format: Optional[Any] = None,
105
+ max_completion_tokens: Optional[int] = None,
106
+ temperature: Optional[float] = None,
100
107
  scope: str = "memory",
101
108
  max_retries: int = 10,
102
109
  initial_backoff: float = 1.0,
103
110
  max_backoff: float = 60.0,
104
111
  skip_validation: bool = False,
105
- **kwargs
106
112
  ) -> Any:
107
113
  """
108
- Make an LLM API call with consistent configuration and retry logic.
114
+ Make an LLM API call with retry logic.
109
115
 
110
116
  Args:
111
- messages: List of message dicts with 'role' and 'content'
112
- response_format: Optional Pydantic model for structured output
113
- scope: Scope identifier (e.g., 'memory', 'judge') for future tracking
114
- max_retries: Maximum number of retry attempts (default: 5)
115
- initial_backoff: Initial backoff time in seconds (default: 1.0)
116
- max_backoff: Maximum backoff time in seconds (default: 60.0)
117
- **kwargs: Additional parameters to pass to the API (temperature, max_tokens, etc.)
117
+ messages: List of message dicts with 'role' and 'content'.
118
+ response_format: Optional Pydantic model for structured output.
119
+ max_completion_tokens: Maximum tokens in response.
120
+ temperature: Sampling temperature (0.0-2.0).
121
+ scope: Scope identifier for tracking.
122
+ max_retries: Maximum retry attempts.
123
+ initial_backoff: Initial backoff time in seconds.
124
+ max_backoff: Maximum backoff time in seconds.
125
+ skip_validation: Return raw JSON without Pydantic validation.
118
126
 
119
127
  Returns:
120
- Parsed response if response_format is provided, otherwise the text content
128
+ Parsed response if response_format is provided, otherwise text content.
121
129
 
122
130
  Raises:
123
- Exception: Re-raises any API errors after all retries are exhausted
131
+ OutputTooLongError: If output exceeds token limits.
132
+ Exception: Re-raises API errors after retries exhausted.
124
133
  """
125
- # Use global semaphore to limit concurrent requests
126
134
  async with _global_llm_semaphore:
127
135
  start_time = time.time()
128
136
  import json
129
137
 
130
138
  # Handle Gemini provider separately
131
139
  if self.provider == "gemini":
132
- return await self._call_gemini(messages, response_format, max_retries, initial_backoff, max_backoff, skip_validation, start_time, **kwargs)
140
+ return await self._call_gemini(
141
+ messages, response_format, max_retries, initial_backoff,
142
+ max_backoff, skip_validation, start_time
143
+ )
133
144
 
134
145
  call_params = {
135
146
  "model": self.model,
136
147
  "messages": messages,
137
- **kwargs
138
148
  }
149
+
150
+ if max_completion_tokens is not None:
151
+ call_params["max_completion_tokens"] = max_completion_tokens
152
+ if temperature is not None:
153
+ call_params["temperature"] = temperature
154
+
155
+ # Provider-specific parameters
139
156
  if self.provider == "groq":
157
+ call_params["seed"] = DEFAULT_LLM_SEED
140
158
  call_params["extra_body"] = {
141
159
  "service_tier": "auto",
142
- "reasoning_effort": "low", # Reduce reasoning overhead
143
- "include_reasoning": False, # Disable hidden reasoning tokens
160
+ "reasoning_effort": self.reasoning_effort,
161
+ "include_reasoning": False,
144
162
  }
145
163
 
146
164
  last_exception = None
147
165
 
148
166
  for attempt in range(max_retries + 1):
149
167
  try:
150
- # Use the appropriate response format
151
168
  if response_format is not None:
152
- # Use JSON mode instead of strict parse for flexibility with optional fields
153
- # This allows the LLM to omit optional fields without validation errors
154
-
155
- # Add schema to the system message
169
+ # Add schema to system message for JSON mode
156
170
  if hasattr(response_format, 'model_json_schema'):
157
171
  schema = response_format.model_json_schema()
158
172
  schema_msg = f"\n\nYou must respond with valid JSON matching this schema:\n{json.dumps(schema, indent=2)}"
159
173
 
160
- # Add schema to the system message if present, otherwise prepend as user message
161
174
  if call_params['messages'] and call_params['messages'][0].get('role') == 'system':
162
175
  call_params['messages'][0]['content'] += schema_msg
163
- else:
164
- # No system message, add schema instruction to first user message
165
- if call_params['messages']:
166
- call_params['messages'][0]['content'] = schema_msg + "\n\n" + call_params['messages'][0]['content']
176
+ elif call_params['messages']:
177
+ call_params['messages'][0]['content'] = schema_msg + "\n\n" + call_params['messages'][0]['content']
167
178
 
168
179
  call_params['response_format'] = {"type": "json_object"}
169
180
  response = await self._client.chat.completions.create(**call_params)
170
181
 
171
- # Parse the JSON response
172
182
  content = response.choices[0].message.content
173
183
  json_data = json.loads(content)
174
184
 
175
- # Return raw JSON if skip_validation is True, otherwise validate with Pydantic
176
185
  if skip_validation:
177
186
  result = json_data
178
187
  else:
179
188
  result = response_format.model_validate(json_data)
180
189
  else:
181
- # Standard completion and return text content
182
190
  response = await self._client.chat.completions.create(**call_params)
183
191
  result = response.choices[0].message.content
184
192
 
185
- # Log call details only if it takes more than 5 seconds
193
+ # Log slow calls
186
194
  duration = time.time() - start_time
187
195
  usage = response.usage
188
196
  if duration > 10.0:
189
197
  ratio = max(1, usage.completion_tokens) / usage.prompt_tokens
198
+ cached_tokens = 0
199
+ if hasattr(usage, 'prompt_tokens_details') and usage.prompt_tokens_details:
200
+ cached_tokens = getattr(usage.prompt_tokens_details, 'cached_tokens', 0) or 0
201
+ cache_info = f", cached_tokens={cached_tokens}" if cached_tokens > 0 else ""
190
202
  logger.info(
191
203
  f"slow llm call: model={self.provider}/{self.model}, "
192
204
  f"input_tokens={usage.prompt_tokens}, output_tokens={usage.completion_tokens}, "
193
- f"total_tokens={usage.total_tokens}, time={duration:.3f}s, ratio out/in={ratio:.2f}"
205
+ f"total_tokens={usage.total_tokens}{cache_info}, time={duration:.3f}s, ratio out/in={ratio:.2f}"
194
206
  )
195
207
 
196
208
  return result
197
209
 
198
210
  except LengthFinishReasonError as e:
199
- # Output exceeded token limits - raise bridge exception for caller to handle
200
211
  logger.warning(f"LLM output exceeded token limits: {str(e)}")
201
212
  raise OutputTooLongError(
202
213
  f"LLM output exceeded token limits. Input may need to be split into smaller chunks."
203
214
  ) from e
204
215
 
216
+ except APIConnectionError as e:
217
+ last_exception = e
218
+ if attempt < max_retries:
219
+ logger.warning(f"Connection error, retrying... (attempt {attempt + 1}/{max_retries + 1})")
220
+ backoff = min(initial_backoff * (2 ** attempt), max_backoff)
221
+ await asyncio.sleep(backoff)
222
+ continue
223
+ else:
224
+ logger.error(f"Connection error after {max_retries + 1} attempts: {str(e)}")
225
+ raise
226
+
205
227
  except APIStatusError as e:
228
+ # Fast fail on 4xx client errors (except 429 rate limit and 498 which is treated as server error)
229
+ if 400 <= e.status_code < 500 and e.status_code not in (429, 498):
230
+ logger.error(f"Client error (HTTP {e.status_code}), not retrying: {str(e)}")
231
+ raise
232
+
206
233
  last_exception = e
207
234
  if attempt < max_retries:
208
- # Calculate exponential backoff with jitter
209
235
  backoff = min(initial_backoff * (2 ** attempt), max_backoff)
210
- # Add jitter (±20%)
211
236
  jitter = backoff * 0.2 * (2 * (time.time() % 1) - 1)
212
237
  sleep_time = backoff + jitter
213
-
214
- # Only log if it's a non-retryable error or final attempt
215
- # Silent retry for common transient errors like capacity exceeded
216
238
  await asyncio.sleep(sleep_time)
217
239
  else:
218
- # Log only on final failed attempt
219
240
  logger.error(f"API error after {max_retries + 1} attempts: {str(e)}")
220
241
  raise
221
242
 
@@ -223,7 +244,6 @@ class LLMConfig:
223
244
  logger.error(f"Unexpected error during LLM call: {type(e).__name__}: {str(e)}")
224
245
  raise
225
246
 
226
- # This should never be reached, but just in case
227
247
  if last_exception:
228
248
  raise last_exception
229
249
  raise RuntimeError(f"LLM call failed after all retries with no exception captured")
@@ -237,13 +257,11 @@ class LLMConfig:
237
257
  max_backoff: float,
238
258
  skip_validation: bool,
239
259
  start_time: float,
240
- **kwargs
241
260
  ) -> Any:
242
- """Handle Gemini-specific API calls using google-genai SDK."""
261
+ """Handle Gemini-specific API calls."""
243
262
  import json
244
263
 
245
264
  # Convert OpenAI-style messages to Gemini format
246
- # Gemini uses 'user' and 'model' roles, and system instructions are separate
247
265
  system_instruction = None
248
266
  gemini_contents = []
249
267
 
@@ -252,7 +270,6 @@ class LLMConfig:
252
270
  content = msg.get('content', '')
253
271
 
254
272
  if role == 'system':
255
- # Accumulate system messages as system instruction
256
273
  if system_instruction:
257
274
  system_instruction += "\n\n" + content
258
275
  else:
@@ -262,7 +279,7 @@ class LLMConfig:
262
279
  role="model",
263
280
  parts=[genai_types.Part(text=content)]
264
281
  ))
265
- else: # user or any other role
282
+ else:
266
283
  gemini_contents.append(genai_types.Content(
267
284
  role="user",
268
285
  parts=[genai_types.Part(text=content)]
@@ -281,12 +298,9 @@ class LLMConfig:
281
298
  config_kwargs = {}
282
299
  if system_instruction:
283
300
  config_kwargs['system_instruction'] = system_instruction
284
- if 'temperature' in kwargs:
285
- config_kwargs['temperature'] = kwargs['temperature']
286
- if 'max_tokens' in kwargs:
287
- config_kwargs['max_output_tokens'] = kwargs['max_tokens']
288
301
  if response_format is not None:
289
302
  config_kwargs['response_mime_type'] = 'application/json'
303
+ config_kwargs['response_schema'] = response_format
290
304
 
291
305
  generation_config = genai_types.GenerateContentConfig(**config_kwargs) if config_kwargs else None
292
306
 
@@ -302,11 +316,24 @@ class LLMConfig:
302
316
 
303
317
  content = response.text
304
318
 
319
+ # Handle empty response
320
+ if content is None:
321
+ block_reason = None
322
+ if hasattr(response, 'candidates') and response.candidates:
323
+ candidate = response.candidates[0]
324
+ if hasattr(candidate, 'finish_reason'):
325
+ block_reason = candidate.finish_reason
326
+
327
+ if attempt < max_retries:
328
+ logger.warning(f"Gemini returned empty response (reason: {block_reason}), retrying...")
329
+ backoff = min(initial_backoff * (2 ** attempt), max_backoff)
330
+ await asyncio.sleep(backoff)
331
+ continue
332
+ else:
333
+ raise RuntimeError(f"Gemini returned empty response after {max_retries + 1} attempts")
334
+
305
335
  if response_format is not None:
306
- # Parse the JSON response
307
336
  json_data = json.loads(content)
308
-
309
- # Return raw JSON if skip_validation is True, otherwise validate with Pydantic
310
337
  if skip_validation:
311
338
  result = json_data
312
339
  else:
@@ -314,7 +341,7 @@ class LLMConfig:
314
341
  else:
315
342
  result = content
316
343
 
317
- # Log call details only if it takes more than 10 seconds
344
+ # Log slow calls
318
345
  duration = time.time() - start_time
319
346
  if duration > 10.0 and hasattr(response, 'usage_metadata') and response.usage_metadata:
320
347
  usage = response.usage_metadata
@@ -326,15 +353,30 @@ class LLMConfig:
326
353
 
327
354
  return result
328
355
 
356
+ except json.JSONDecodeError as e:
357
+ last_exception = e
358
+ if attempt < max_retries:
359
+ logger.warning(f"Gemini returned invalid JSON, retrying...")
360
+ backoff = min(initial_backoff * (2 ** attempt), max_backoff)
361
+ await asyncio.sleep(backoff)
362
+ continue
363
+ else:
364
+ logger.error(f"Gemini returned invalid JSON after {max_retries + 1} attempts")
365
+ raise
366
+
329
367
  except genai_errors.APIError as e:
330
- # Handle rate limits and server errors with retry
331
- if e.code in (429, 503, 500):
368
+ # Fast fail on 4xx client errors (except 429 rate limit)
369
+ if e.code and 400 <= e.code < 500 and e.code != 429:
370
+ logger.error(f"Gemini client error (HTTP {e.code}), not retrying: {str(e)}")
371
+ raise
372
+
373
+ # Retry on 429 and 5xx
374
+ if e.code in (429, 500, 502, 503, 504):
332
375
  last_exception = e
333
376
  if attempt < max_retries:
334
377
  backoff = min(initial_backoff * (2 ** attempt), max_backoff)
335
378
  jitter = backoff * 0.2 * (2 * (time.time() % 1) - 1)
336
- sleep_time = backoff + jitter
337
- await asyncio.sleep(sleep_time)
379
+ await asyncio.sleep(backoff + jitter)
338
380
  else:
339
381
  logger.error(f"Gemini API error after {max_retries + 1} attempts: {str(e)}")
340
382
  raise
@@ -348,57 +390,56 @@ class LLMConfig:
348
390
 
349
391
  if last_exception:
350
392
  raise last_exception
351
- raise RuntimeError(f"Gemini call failed after all retries with no exception captured")
393
+ raise RuntimeError(f"Gemini call failed after all retries")
352
394
 
353
395
  @classmethod
354
- def for_memory(cls) -> "LLMConfig":
355
- """Create configuration for memory operations from environment variables."""
396
+ def for_memory(cls) -> "LLMProvider":
397
+ """Create provider for memory operations from environment variables."""
356
398
  provider = os.getenv("HINDSIGHT_API_LLM_PROVIDER", "groq")
357
399
  api_key = os.getenv("HINDSIGHT_API_LLM_API_KEY")
358
- base_url = os.getenv("HINDSIGHT_API_LLM_BASE_URL")
400
+ base_url = os.getenv("HINDSIGHT_API_LLM_BASE_URL", "")
359
401
  model = os.getenv("HINDSIGHT_API_LLM_MODEL", "openai/gpt-oss-120b")
360
402
 
361
- # Set default base URL if not provided
362
- if not base_url:
363
- if provider == "groq":
364
- base_url = "https://api.groq.com/openai/v1"
365
- elif provider == "ollama":
366
- base_url = "http://localhost:11434/v1"
367
- else:
368
- base_url = ""
369
-
370
403
  return cls(
371
404
  provider=provider,
372
405
  api_key=api_key,
373
406
  base_url=base_url,
374
407
  model=model,
408
+ reasoning_effort="low"
375
409
  )
376
410
 
377
411
  @classmethod
378
- def for_judge(cls) -> "LLMConfig":
379
- """
380
- Create configuration for judge/evaluator operations from environment variables.
412
+ def for_answer_generation(cls) -> "LLMProvider":
413
+ """Create provider for answer generation. Falls back to memory config if not set."""
414
+ provider = os.getenv("HINDSIGHT_API_ANSWER_LLM_PROVIDER", os.getenv("HINDSIGHT_API_LLM_PROVIDER", "groq"))
415
+ api_key = os.getenv("HINDSIGHT_API_ANSWER_LLM_API_KEY", os.getenv("HINDSIGHT_API_LLM_API_KEY"))
416
+ base_url = os.getenv("HINDSIGHT_API_ANSWER_LLM_BASE_URL", os.getenv("HINDSIGHT_API_LLM_BASE_URL", ""))
417
+ model = os.getenv("HINDSIGHT_API_ANSWER_LLM_MODEL", os.getenv("HINDSIGHT_API_LLM_MODEL", "openai/gpt-oss-120b"))
381
418
 
382
- Falls back to memory LLM config if judge-specific config not set.
383
- """
384
- # Check if judge-specific config exists, otherwise fall back to memory config
419
+ return cls(
420
+ provider=provider,
421
+ api_key=api_key,
422
+ base_url=base_url,
423
+ model=model,
424
+ reasoning_effort="high"
425
+ )
426
+
427
+ @classmethod
428
+ def for_judge(cls) -> "LLMProvider":
429
+ """Create provider for judge/evaluator operations. Falls back to memory config if not set."""
385
430
  provider = os.getenv("HINDSIGHT_API_JUDGE_LLM_PROVIDER", os.getenv("HINDSIGHT_API_LLM_PROVIDER", "groq"))
386
431
  api_key = os.getenv("HINDSIGHT_API_JUDGE_LLM_API_KEY", os.getenv("HINDSIGHT_API_LLM_API_KEY"))
387
- base_url = os.getenv("HINDSIGHT_API_JUDGE_LLM_BASE_URL", os.getenv("HINDSIGHT_API_LLM_BASE_URL"))
432
+ base_url = os.getenv("HINDSIGHT_API_JUDGE_LLM_BASE_URL", os.getenv("HINDSIGHT_API_LLM_BASE_URL", ""))
388
433
  model = os.getenv("HINDSIGHT_API_JUDGE_LLM_MODEL", os.getenv("HINDSIGHT_API_LLM_MODEL", "openai/gpt-oss-120b"))
389
434
 
390
- # Set default base URL if not provided
391
- if not base_url:
392
- if provider == "groq":
393
- base_url = "https://api.groq.com/openai/v1"
394
- elif provider == "ollama":
395
- base_url = "http://localhost:11434/v1"
396
- else:
397
- base_url = ""
398
-
399
435
  return cls(
400
436
  provider=provider,
401
437
  api_key=api_key,
402
438
  base_url=base_url,
403
439
  model=model,
440
+ reasoning_effort="high"
404
441
  )
442
+
443
+
444
+ # Backwards compatibility alias
445
+ LLMConfig = LLMProvider