hindsight-api 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. hindsight_api/__init__.py +10 -2
  2. hindsight_api/alembic/README +1 -0
  3. hindsight_api/alembic/env.py +146 -0
  4. hindsight_api/alembic/script.py.mako +28 -0
  5. hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +274 -0
  6. hindsight_api/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py +70 -0
  7. hindsight_api/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py +39 -0
  8. hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py +48 -0
  9. hindsight_api/alembic/versions/e0a1b2c3d4e5_disposition_to_3_traits.py +62 -0
  10. hindsight_api/alembic/versions/rename_personality_to_disposition.py +65 -0
  11. hindsight_api/api/http.py +84 -86
  12. hindsight_api/config.py +154 -0
  13. hindsight_api/engine/__init__.py +7 -2
  14. hindsight_api/engine/cross_encoder.py +219 -15
  15. hindsight_api/engine/embeddings.py +192 -18
  16. hindsight_api/engine/llm_wrapper.py +88 -139
  17. hindsight_api/engine/memory_engine.py +71 -51
  18. hindsight_api/engine/retain/bank_utils.py +2 -2
  19. hindsight_api/engine/retain/fact_extraction.py +1 -1
  20. hindsight_api/engine/search/reranking.py +6 -10
  21. hindsight_api/engine/search/tracer.py +1 -1
  22. hindsight_api/main.py +201 -0
  23. hindsight_api/migrations.py +7 -7
  24. hindsight_api/server.py +43 -0
  25. {hindsight_api-0.1.0.dist-info → hindsight_api-0.1.1.dist-info}/METADATA +1 -1
  26. {hindsight_api-0.1.0.dist-info → hindsight_api-0.1.1.dist-info}/RECORD +28 -19
  27. hindsight_api-0.1.1.dist-info/entry_points.txt +2 -0
  28. hindsight_api/cli.py +0 -127
  29. hindsight_api/web/__init__.py +0 -12
  30. hindsight_api/web/server.py +0 -109
  31. hindsight_api-0.1.0.dist-info/entry_points.txt +0 -2
  32. {hindsight_api-0.1.0.dist-info → hindsight_api-0.1.1.dist-info}/WHEEL +0 -0
@@ -34,8 +34,12 @@ class OutputTooLongError(Exception):
34
34
  pass
35
35
 
36
36
 
37
- class LLMConfig:
38
- """Configuration for an LLM provider."""
37
+ class LLMProvider:
38
+ """
39
+ Unified LLM provider.
40
+
41
+ Supports OpenAI, Groq, Ollama (OpenAI-compatible), and Gemini.
42
+ """
39
43
 
40
44
  def __init__(
41
45
  self,
@@ -43,16 +47,17 @@ class LLMConfig:
43
47
  api_key: str,
44
48
  base_url: str,
45
49
  model: str,
46
- reasoning_effort: str = "low",
50
+ reasoning_effort: str = "low",
47
51
  ):
48
52
  """
49
- Initialize LLM configuration.
53
+ Initialize LLM provider.
50
54
 
51
55
  Args:
52
- provider: Provider name ("openai", "groq", "ollama"). Required.
53
- api_key: API key. Required.
54
- base_url: Base URL. Required.
55
- model: Model name. Required.
56
+ provider: Provider name ("openai", "groq", "ollama", "gemini").
57
+ api_key: API key.
58
+ base_url: Base URL for the API.
59
+ model: Model name.
60
+ reasoning_effort: Reasoning effort level for supported providers.
56
61
  """
57
62
  self.provider = provider.lower()
58
63
  self.api_key = api_key
@@ -61,9 +66,10 @@ class LLMConfig:
61
66
  self.reasoning_effort = reasoning_effort
62
67
 
63
68
  # Validate provider
64
- if self.provider not in ["openai", "groq", "ollama", "gemini"]:
69
+ valid_providers = ["openai", "groq", "ollama", "gemini"]
70
+ if self.provider not in valid_providers:
65
71
  raise ValueError(
66
- f"Invalid LLM provider: {self.provider}. Must be 'openai', 'groq', 'ollama', or 'gemini'."
72
+ f"Invalid LLM provider: {self.provider}. Must be one of: {', '.join(valid_providers)}"
67
73
  )
68
74
 
69
75
  # Set default base URLs
@@ -74,24 +80,18 @@ class LLMConfig:
74
80
  self.base_url = "http://localhost:11434/v1"
75
81
 
76
82
  # Validate API key (not needed for ollama)
77
- if self.provider not in ["ollama"] and not self.api_key:
78
- raise ValueError(
79
- f"API key not found for {self.provider}"
80
- )
83
+ if self.provider != "ollama" and not self.api_key:
84
+ raise ValueError(f"API key not found for {self.provider}")
81
85
 
82
- # Create client (private - use .call() method instead)
83
- # Disable automatic retries - we handle retries in the call() method
86
+ # Create client based on provider
84
87
  if self.provider == "gemini":
85
88
  self._gemini_client = genai.Client(api_key=self.api_key)
86
- self._client = None # Not used for Gemini
89
+ self._client = None
87
90
  elif self.provider == "ollama":
88
91
  self._client = AsyncOpenAI(api_key="ollama", base_url=self.base_url, max_retries=0)
89
92
  self._gemini_client = None
90
- elif self.base_url:
91
- self._client = AsyncOpenAI(api_key=self.api_key, base_url=self.base_url, max_retries=0)
92
- self._gemini_client = None
93
93
  else:
94
- self._client = AsyncOpenAI(api_key=self.api_key, max_retries=0)
94
+ self._client = AsyncOpenAI(api_key=self.api_key, base_url=self.base_url, max_retries=0)
95
95
  self._gemini_client = None
96
96
 
97
97
  logger.info(
@@ -102,101 +102,99 @@ class LLMConfig:
102
102
  self,
103
103
  messages: List[Dict[str, str]],
104
104
  response_format: Optional[Any] = None,
105
+ max_completion_tokens: Optional[int] = None,
106
+ temperature: Optional[float] = None,
105
107
  scope: str = "memory",
106
108
  max_retries: int = 10,
107
109
  initial_backoff: float = 1.0,
108
110
  max_backoff: float = 60.0,
109
111
  skip_validation: bool = False,
110
- **kwargs
111
112
  ) -> Any:
112
113
  """
113
- Make an LLM API call with consistent configuration and retry logic.
114
+ Make an LLM API call with retry logic.
114
115
 
115
116
  Args:
116
- messages: List of message dicts with 'role' and 'content'
117
- response_format: Optional Pydantic model for structured output
118
- scope: Scope identifier (e.g., 'memory', 'judge') for future tracking
119
- max_retries: Maximum number of retry attempts (default: 5)
120
- initial_backoff: Initial backoff time in seconds (default: 1.0)
121
- max_backoff: Maximum backoff time in seconds (default: 60.0)
122
- **kwargs: Additional parameters to pass to the API (temperature, max_tokens, etc.)
117
+ messages: List of message dicts with 'role' and 'content'.
118
+ response_format: Optional Pydantic model for structured output.
119
+ max_completion_tokens: Maximum tokens in response.
120
+ temperature: Sampling temperature (0.0-2.0).
121
+ scope: Scope identifier for tracking.
122
+ max_retries: Maximum retry attempts.
123
+ initial_backoff: Initial backoff time in seconds.
124
+ max_backoff: Maximum backoff time in seconds.
125
+ skip_validation: Return raw JSON without Pydantic validation.
123
126
 
124
127
  Returns:
125
- Parsed response if response_format is provided, otherwise the text content
128
+ Parsed response if response_format is provided, otherwise text content.
126
129
 
127
130
  Raises:
128
- Exception: Re-raises any API errors after all retries are exhausted
131
+ OutputTooLongError: If output exceeds token limits.
132
+ Exception: Re-raises API errors after retries exhausted.
129
133
  """
130
- # Use global semaphore to limit concurrent requests
131
134
  async with _global_llm_semaphore:
132
135
  start_time = time.time()
133
136
  import json
134
137
 
135
138
  # Handle Gemini provider separately
136
139
  if self.provider == "gemini":
137
- return await self._call_gemini(messages, response_format, max_retries, initial_backoff, max_backoff, skip_validation, start_time, **kwargs)
140
+ return await self._call_gemini(
141
+ messages, response_format, max_retries, initial_backoff,
142
+ max_backoff, skip_validation, start_time
143
+ )
138
144
 
139
145
  call_params = {
140
146
  "model": self.model,
141
147
  "messages": messages,
142
- **kwargs
143
148
  }
144
149
 
150
+ if max_completion_tokens is not None:
151
+ call_params["max_completion_tokens"] = max_completion_tokens
152
+ if temperature is not None:
153
+ call_params["temperature"] = temperature
154
+
155
+ # Provider-specific parameters
145
156
  if self.provider == "groq":
146
157
  call_params["seed"] = DEFAULT_LLM_SEED
147
-
148
- if self.provider == "groq":
149
158
  call_params["extra_body"] = {
150
159
  "service_tier": "auto",
151
160
  "reasoning_effort": self.reasoning_effort,
152
- "include_reasoning": False, # Disable hidden reasoning tokens
161
+ "include_reasoning": False,
153
162
  }
154
163
 
155
164
  last_exception = None
156
165
 
157
166
  for attempt in range(max_retries + 1):
158
167
  try:
159
- # Use the appropriate response format
160
168
  if response_format is not None:
161
- # Use JSON mode instead of strict parse for flexibility with optional fields
162
- # This allows the LLM to omit optional fields without validation errors
163
-
164
- # Add schema to the system message
169
+ # Add schema to system message for JSON mode
165
170
  if hasattr(response_format, 'model_json_schema'):
166
171
  schema = response_format.model_json_schema()
167
172
  schema_msg = f"\n\nYou must respond with valid JSON matching this schema:\n{json.dumps(schema, indent=2)}"
168
173
 
169
- # Add schema to the system message if present, otherwise prepend as user message
170
174
  if call_params['messages'] and call_params['messages'][0].get('role') == 'system':
171
175
  call_params['messages'][0]['content'] += schema_msg
172
- else:
173
- # No system message, add schema instruction to first user message
174
- if call_params['messages']:
175
- call_params['messages'][0]['content'] = schema_msg + "\n\n" + call_params['messages'][0]['content']
176
+ elif call_params['messages']:
177
+ call_params['messages'][0]['content'] = schema_msg + "\n\n" + call_params['messages'][0]['content']
176
178
 
177
179
  call_params['response_format'] = {"type": "json_object"}
178
180
  response = await self._client.chat.completions.create(**call_params)
179
181
 
180
- # Parse the JSON response
181
182
  content = response.choices[0].message.content
182
183
  json_data = json.loads(content)
183
184
 
184
- # Return raw JSON if skip_validation is True, otherwise validate with Pydantic
185
185
  if skip_validation:
186
186
  result = json_data
187
187
  else:
188
188
  result = response_format.model_validate(json_data)
189
189
  else:
190
- # Standard completion and return text content
191
190
  response = await self._client.chat.completions.create(**call_params)
192
191
  result = response.choices[0].message.content
193
192
 
194
- # Log call details only if it takes more than 5 seconds
193
+ # Log slow calls
195
194
  duration = time.time() - start_time
196
195
  usage = response.usage
197
196
  if duration > 10.0:
198
197
  ratio = max(1, usage.completion_tokens) / usage.prompt_tokens
199
- # Check for cached tokens (OpenAI/Groq may include this)
200
198
  cached_tokens = 0
201
199
  if hasattr(usage, 'prompt_tokens_details') and usage.prompt_tokens_details:
202
200
  cached_tokens = getattr(usage.prompt_tokens_details, 'cached_tokens', 0) or 0
@@ -210,14 +208,12 @@ class LLMConfig:
210
208
  return result
211
209
 
212
210
  except LengthFinishReasonError as e:
213
- # Output exceeded token limits - raise bridge exception for caller to handle
214
211
  logger.warning(f"LLM output exceeded token limits: {str(e)}")
215
212
  raise OutputTooLongError(
216
213
  f"LLM output exceeded token limits. Input may need to be split into smaller chunks."
217
214
  ) from e
218
215
 
219
216
  except APIConnectionError as e:
220
- # Handle connection errors (server disconnected, network issues) with retry
221
217
  last_exception = e
222
218
  if attempt < max_retries:
223
219
  logger.warning(f"Connection error, retrying... (attempt {attempt + 1}/{max_retries + 1})")
@@ -229,19 +225,18 @@ class LLMConfig:
229
225
  raise
230
226
 
231
227
  except APIStatusError as e:
228
+ # Fast fail on 4xx client errors (except 429 rate limit and 498 which is treated as server error)
229
+ if 400 <= e.status_code < 500 and e.status_code not in (429, 498):
230
+ logger.error(f"Client error (HTTP {e.status_code}), not retrying: {str(e)}")
231
+ raise
232
+
232
233
  last_exception = e
233
234
  if attempt < max_retries:
234
- # Calculate exponential backoff with jitter
235
235
  backoff = min(initial_backoff * (2 ** attempt), max_backoff)
236
- # Add jitter (±20%)
237
236
  jitter = backoff * 0.2 * (2 * (time.time() % 1) - 1)
238
237
  sleep_time = backoff + jitter
239
-
240
- # Only log if it's a non-retryable error or final attempt
241
- # Silent retry for common transient errors like capacity exceeded
242
238
  await asyncio.sleep(sleep_time)
243
239
  else:
244
- # Log only on final failed attempt
245
240
  logger.error(f"API error after {max_retries + 1} attempts: {str(e)}")
246
241
  raise
247
242
 
@@ -249,7 +244,6 @@ class LLMConfig:
249
244
  logger.error(f"Unexpected error during LLM call: {type(e).__name__}: {str(e)}")
250
245
  raise
251
246
 
252
- # This should never be reached, but just in case
253
247
  if last_exception:
254
248
  raise last_exception
255
249
  raise RuntimeError(f"LLM call failed after all retries with no exception captured")
@@ -263,13 +257,11 @@ class LLMConfig:
263
257
  max_backoff: float,
264
258
  skip_validation: bool,
265
259
  start_time: float,
266
- **kwargs
267
- ) -> Any:
268
- """Handle Gemini-specific API calls using google-genai SDK."""
260
+ ) -> Any:
261
+ """Handle Gemini-specific API calls."""
269
262
  import json
270
263
 
271
264
  # Convert OpenAI-style messages to Gemini format
272
- # Gemini uses 'user' and 'model' roles, and system instructions are separate
273
265
  system_instruction = None
274
266
  gemini_contents = []
275
267
 
@@ -278,7 +270,6 @@ class LLMConfig:
278
270
  content = msg.get('content', '')
279
271
 
280
272
  if role == 'system':
281
- # Accumulate system messages as system instruction
282
273
  if system_instruction:
283
274
  system_instruction += "\n\n" + content
284
275
  else:
@@ -288,7 +279,7 @@ class LLMConfig:
288
279
  role="model",
289
280
  parts=[genai_types.Part(text=content)]
290
281
  ))
291
- else: # user or any other role
282
+ else:
292
283
  gemini_contents.append(genai_types.Content(
293
284
  role="user",
294
285
  parts=[genai_types.Part(text=content)]
@@ -307,13 +298,8 @@ class LLMConfig:
307
298
  config_kwargs = {}
308
299
  if system_instruction:
309
300
  config_kwargs['system_instruction'] = system_instruction
310
- if 'temperature' in kwargs:
311
- config_kwargs['temperature'] = kwargs['temperature']
312
- if 'max_tokens' in kwargs:
313
- config_kwargs['max_output_tokens'] = kwargs['max_tokens']
314
301
  if response_format is not None:
315
302
  config_kwargs['response_mime_type'] = 'application/json'
316
- # Pass the Pydantic model directly as response_schema for structured output
317
303
  config_kwargs['response_schema'] = response_format
318
304
 
319
305
  generation_config = genai_types.GenerateContentConfig(**config_kwargs) if config_kwargs else None
@@ -330,9 +316,8 @@ class LLMConfig:
330
316
 
331
317
  content = response.text
332
318
 
333
- # Handle empty/None response (can happen with content filtering or timeouts)
319
+ # Handle empty response
334
320
  if content is None:
335
- # Check if there's a block reason
336
321
  block_reason = None
337
322
  if hasattr(response, 'candidates') and response.candidates:
338
323
  candidate = response.candidates[0]
@@ -340,18 +325,15 @@ class LLMConfig:
340
325
  block_reason = candidate.finish_reason
341
326
 
342
327
  if attempt < max_retries:
343
- logger.warning(f"Gemini returned empty response (reason: {block_reason}), retrying... (attempt {attempt + 1}/{max_retries + 1})")
328
+ logger.warning(f"Gemini returned empty response (reason: {block_reason}), retrying...")
344
329
  backoff = min(initial_backoff * (2 ** attempt), max_backoff)
345
330
  await asyncio.sleep(backoff)
346
331
  continue
347
332
  else:
348
- raise RuntimeError(f"Gemini returned empty response after {max_retries + 1} attempts (reason: {block_reason})")
333
+ raise RuntimeError(f"Gemini returned empty response after {max_retries + 1} attempts")
349
334
 
350
335
  if response_format is not None:
351
- # Parse the JSON response
352
336
  json_data = json.loads(content)
353
-
354
- # Return raw JSON if skip_validation is True, otherwise validate with Pydantic
355
337
  if skip_validation:
356
338
  result = json_data
357
339
  else:
@@ -359,42 +341,42 @@ class LLMConfig:
359
341
  else:
360
342
  result = content
361
343
 
362
- # Log call details only if it takes more than 10 seconds
344
+ # Log slow calls
363
345
  duration = time.time() - start_time
364
346
  if duration > 10.0 and hasattr(response, 'usage_metadata') and response.usage_metadata:
365
347
  usage = response.usage_metadata
366
- # Check for cached tokens (Gemini uses cached_content_token_count)
367
- cached_tokens = getattr(usage, 'cached_content_token_count', 0) or 0
368
- cache_info = f", cached_tokens={cached_tokens}" if cached_tokens > 0 else ""
369
348
  logger.info(
370
349
  f"slow llm call: model={self.provider}/{self.model}, "
371
- f"input_tokens={usage.prompt_token_count}, output_tokens={usage.candidates_token_count}{cache_info}, "
350
+ f"input_tokens={usage.prompt_token_count}, output_tokens={usage.candidates_token_count}, "
372
351
  f"time={duration:.3f}s"
373
352
  )
374
353
 
375
354
  return result
376
355
 
377
356
  except json.JSONDecodeError as e:
378
- # Handle truncated JSON responses (often from MAX_TOKENS) with retry
379
357
  last_exception = e
380
358
  if attempt < max_retries:
381
- logger.warning(f"Gemini returned invalid JSON (truncated response?), retrying... (attempt {attempt + 1}/{max_retries + 1})")
359
+ logger.warning(f"Gemini returned invalid JSON, retrying...")
382
360
  backoff = min(initial_backoff * (2 ** attempt), max_backoff)
383
361
  await asyncio.sleep(backoff)
384
362
  continue
385
363
  else:
386
- logger.error(f"Gemini returned invalid JSON after {max_retries + 1} attempts: {str(e)}")
364
+ logger.error(f"Gemini returned invalid JSON after {max_retries + 1} attempts")
387
365
  raise
388
366
 
389
367
  except genai_errors.APIError as e:
390
- # Handle rate limits and server errors with retry
391
- if e.code in (429, 503, 500):
368
+ # Fast fail on 4xx client errors (except 429 rate limit)
369
+ if e.code and 400 <= e.code < 500 and e.code != 429:
370
+ logger.error(f"Gemini client error (HTTP {e.code}), not retrying: {str(e)}")
371
+ raise
372
+
373
+ # Retry on 429 and 5xx
374
+ if e.code in (429, 500, 502, 503, 504):
392
375
  last_exception = e
393
376
  if attempt < max_retries:
394
377
  backoff = min(initial_backoff * (2 ** attempt), max_backoff)
395
378
  jitter = backoff * 0.2 * (2 * (time.time() % 1) - 1)
396
- sleep_time = backoff + jitter
397
- await asyncio.sleep(sleep_time)
379
+ await asyncio.sleep(backoff + jitter)
398
380
  else:
399
381
  logger.error(f"Gemini API error after {max_retries + 1} attempts: {str(e)}")
400
382
  raise
@@ -408,25 +390,16 @@ class LLMConfig:
408
390
 
409
391
  if last_exception:
410
392
  raise last_exception
411
- raise RuntimeError(f"Gemini call failed after all retries with no exception captured")
393
+ raise RuntimeError(f"Gemini call failed after all retries")
412
394
 
413
395
  @classmethod
414
- def for_memory(cls) -> "LLMConfig":
415
- """Create configuration for memory operations from environment variables."""
396
+ def for_memory(cls) -> "LLMProvider":
397
+ """Create provider for memory operations from environment variables."""
416
398
  provider = os.getenv("HINDSIGHT_API_LLM_PROVIDER", "groq")
417
399
  api_key = os.getenv("HINDSIGHT_API_LLM_API_KEY")
418
- base_url = os.getenv("HINDSIGHT_API_LLM_BASE_URL")
400
+ base_url = os.getenv("HINDSIGHT_API_LLM_BASE_URL", "")
419
401
  model = os.getenv("HINDSIGHT_API_LLM_MODEL", "openai/gpt-oss-120b")
420
402
 
421
- # Set default base URL if not provided
422
- if not base_url:
423
- if provider == "groq":
424
- base_url = "https://api.groq.com/openai/v1"
425
- elif provider == "ollama":
426
- base_url = "http://localhost:11434/v1"
427
- else:
428
- base_url = ""
429
-
430
403
  return cls(
431
404
  provider=provider,
432
405
  api_key=api_key,
@@ -436,27 +409,13 @@ class LLMConfig:
436
409
  )
437
410
 
438
411
  @classmethod
439
- def for_answer_generation(cls) -> "LLMConfig":
440
- """
441
- Create configuration for answer generation operations from environment variables.
442
-
443
- Falls back to memory LLM config if answer-specific config not set.
444
- """
445
- # Check if answer-specific config exists, otherwise fall back to memory config
412
+ def for_answer_generation(cls) -> "LLMProvider":
413
+ """Create provider for answer generation. Falls back to memory config if not set."""
446
414
  provider = os.getenv("HINDSIGHT_API_ANSWER_LLM_PROVIDER", os.getenv("HINDSIGHT_API_LLM_PROVIDER", "groq"))
447
415
  api_key = os.getenv("HINDSIGHT_API_ANSWER_LLM_API_KEY", os.getenv("HINDSIGHT_API_LLM_API_KEY"))
448
- base_url = os.getenv("HINDSIGHT_API_ANSWER_LLM_BASE_URL", os.getenv("HINDSIGHT_API_LLM_BASE_URL"))
416
+ base_url = os.getenv("HINDSIGHT_API_ANSWER_LLM_BASE_URL", os.getenv("HINDSIGHT_API_LLM_BASE_URL", ""))
449
417
  model = os.getenv("HINDSIGHT_API_ANSWER_LLM_MODEL", os.getenv("HINDSIGHT_API_LLM_MODEL", "openai/gpt-oss-120b"))
450
418
 
451
- # Set default base URL if not provided
452
- if not base_url:
453
- if provider == "groq":
454
- base_url = "https://api.groq.com/openai/v1"
455
- elif provider == "ollama":
456
- base_url = "http://localhost:11434/v1"
457
- else:
458
- base_url = ""
459
-
460
419
  return cls(
461
420
  provider=provider,
462
421
  api_key=api_key,
@@ -466,27 +425,13 @@ class LLMConfig:
466
425
  )
467
426
 
468
427
  @classmethod
469
- def for_judge(cls) -> "LLMConfig":
470
- """
471
- Create configuration for judge/evaluator operations from environment variables.
472
-
473
- Falls back to memory LLM config if judge-specific config not set.
474
- """
475
- # Check if judge-specific config exists, otherwise fall back to memory config
428
+ def for_judge(cls) -> "LLMProvider":
429
+ """Create provider for judge/evaluator operations. Falls back to memory config if not set."""
476
430
  provider = os.getenv("HINDSIGHT_API_JUDGE_LLM_PROVIDER", os.getenv("HINDSIGHT_API_LLM_PROVIDER", "groq"))
477
431
  api_key = os.getenv("HINDSIGHT_API_JUDGE_LLM_API_KEY", os.getenv("HINDSIGHT_API_LLM_API_KEY"))
478
- base_url = os.getenv("HINDSIGHT_API_JUDGE_LLM_BASE_URL", os.getenv("HINDSIGHT_API_LLM_BASE_URL"))
432
+ base_url = os.getenv("HINDSIGHT_API_JUDGE_LLM_BASE_URL", os.getenv("HINDSIGHT_API_LLM_BASE_URL", ""))
479
433
  model = os.getenv("HINDSIGHT_API_JUDGE_LLM_MODEL", os.getenv("HINDSIGHT_API_LLM_MODEL", "openai/gpt-oss-120b"))
480
434
 
481
- # Set default base URL if not provided
482
- if not base_url:
483
- if provider == "groq":
484
- base_url = "https://api.groq.com/openai/v1"
485
- elif provider == "ollama":
486
- base_url = "http://localhost:11434/v1"
487
- else:
488
- base_url = ""
489
-
490
435
  return cls(
491
436
  provider=provider,
492
437
  api_key=api_key,
@@ -494,3 +439,7 @@ class LLMConfig:
494
439
  model=model,
495
440
  reasoning_effort="high"
496
441
  )
442
+
443
+
444
+ # Backwards compatibility alias
445
+ LLMConfig = LLMProvider