hindsight-api 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. hindsight_api/__init__.py +10 -9
  2. hindsight_api/alembic/env.py +5 -8
  3. hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +266 -180
  4. hindsight_api/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py +32 -32
  5. hindsight_api/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py +11 -11
  6. hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py +7 -12
  7. hindsight_api/alembic/versions/e0a1b2c3d4e5_disposition_to_3_traits.py +23 -15
  8. hindsight_api/alembic/versions/rename_personality_to_disposition.py +30 -21
  9. hindsight_api/api/__init__.py +10 -10
  10. hindsight_api/api/http.py +575 -593
  11. hindsight_api/api/mcp.py +30 -28
  12. hindsight_api/banner.py +13 -6
  13. hindsight_api/config.py +9 -13
  14. hindsight_api/engine/__init__.py +9 -9
  15. hindsight_api/engine/cross_encoder.py +22 -21
  16. hindsight_api/engine/db_utils.py +5 -4
  17. hindsight_api/engine/embeddings.py +22 -21
  18. hindsight_api/engine/entity_resolver.py +81 -75
  19. hindsight_api/engine/llm_wrapper.py +61 -79
  20. hindsight_api/engine/memory_engine.py +603 -625
  21. hindsight_api/engine/query_analyzer.py +100 -97
  22. hindsight_api/engine/response_models.py +105 -106
  23. hindsight_api/engine/retain/__init__.py +9 -16
  24. hindsight_api/engine/retain/bank_utils.py +34 -58
  25. hindsight_api/engine/retain/chunk_storage.py +4 -12
  26. hindsight_api/engine/retain/deduplication.py +9 -28
  27. hindsight_api/engine/retain/embedding_processing.py +4 -11
  28. hindsight_api/engine/retain/embedding_utils.py +3 -4
  29. hindsight_api/engine/retain/entity_processing.py +7 -17
  30. hindsight_api/engine/retain/fact_extraction.py +155 -165
  31. hindsight_api/engine/retain/fact_storage.py +11 -23
  32. hindsight_api/engine/retain/link_creation.py +11 -39
  33. hindsight_api/engine/retain/link_utils.py +166 -95
  34. hindsight_api/engine/retain/observation_regeneration.py +39 -52
  35. hindsight_api/engine/retain/orchestrator.py +72 -62
  36. hindsight_api/engine/retain/types.py +49 -43
  37. hindsight_api/engine/search/__init__.py +5 -5
  38. hindsight_api/engine/search/fusion.py +6 -15
  39. hindsight_api/engine/search/graph_retrieval.py +22 -23
  40. hindsight_api/engine/search/mpfp_retrieval.py +76 -92
  41. hindsight_api/engine/search/observation_utils.py +9 -16
  42. hindsight_api/engine/search/reranking.py +4 -7
  43. hindsight_api/engine/search/retrieval.py +87 -66
  44. hindsight_api/engine/search/scoring.py +5 -7
  45. hindsight_api/engine/search/temporal_extraction.py +8 -11
  46. hindsight_api/engine/search/think_utils.py +115 -39
  47. hindsight_api/engine/search/trace.py +68 -39
  48. hindsight_api/engine/search/tracer.py +44 -35
  49. hindsight_api/engine/search/types.py +20 -17
  50. hindsight_api/engine/task_backend.py +21 -26
  51. hindsight_api/engine/utils.py +25 -10
  52. hindsight_api/main.py +21 -40
  53. hindsight_api/mcp_local.py +190 -0
  54. hindsight_api/metrics.py +44 -30
  55. hindsight_api/migrations.py +10 -8
  56. hindsight_api/models.py +60 -72
  57. hindsight_api/pg0.py +22 -23
  58. hindsight_api/server.py +3 -6
  59. hindsight_api-0.1.7.dist-info/METADATA +178 -0
  60. hindsight_api-0.1.7.dist-info/RECORD +64 -0
  61. {hindsight_api-0.1.5.dist-info → hindsight_api-0.1.7.dist-info}/entry_points.txt +1 -0
  62. hindsight_api-0.1.5.dist-info/METADATA +0 -42
  63. hindsight_api-0.1.5.dist-info/RECORD +0 -63
  64. {hindsight_api-0.1.5.dist-info → hindsight_api-0.1.7.dist-info}/WHEEL +0 -0
@@ -1,15 +1,17 @@
1
1
  """
2
2
  LLM wrapper for unified configuration across providers.
3
3
  """
4
+
5
+ import asyncio
6
+ import logging
4
7
  import os
5
8
  import time
6
- import asyncio
7
- from typing import Optional, Any, Dict, List
8
- from openai import AsyncOpenAI, RateLimitError, APIError, APIStatusError, APIConnectionError, LengthFinishReasonError
9
+ from typing import Any
10
+
9
11
  from google import genai
10
- from google.genai import types as genai_types
11
12
  from google.genai import errors as genai_errors
12
- import logging
13
+ from google.genai import types as genai_types
14
+ from openai import APIConnectionError, APIStatusError, AsyncOpenAI, LengthFinishReasonError
13
15
 
14
16
  # Seed applied to every Groq request for deterministic behavior.
15
17
  DEFAULT_LLM_SEED = 4242
@@ -31,6 +33,7 @@ class OutputTooLongError(Exception):
31
33
  to allow callers to handle output length issues without depending on
32
34
  provider-specific implementations.
33
35
  """
36
+
34
37
  pass
35
38
 
36
39
 
@@ -68,9 +71,7 @@ class LLMProvider:
68
71
  # Validate provider
69
72
  valid_providers = ["openai", "groq", "ollama", "gemini"]
70
73
  if self.provider not in valid_providers:
71
- raise ValueError(
72
- f"Invalid LLM provider: {self.provider}. Must be one of: {', '.join(valid_providers)}"
73
- )
74
+ raise ValueError(f"Invalid LLM provider: {self.provider}. Must be one of: {', '.join(valid_providers)}")
74
75
 
75
76
  # Set default base URLs
76
77
  if not self.base_url:
@@ -106,7 +107,9 @@ class LLMProvider:
106
107
  RuntimeError: If the connection test fails.
107
108
  """
108
109
  try:
109
- logger.info(f"Verifying LLM: provider={self.provider}, model={self.model}, base_url={self.base_url or 'default'}...")
110
+ logger.info(
111
+ f"Verifying LLM: provider={self.provider}, model={self.model}, base_url={self.base_url or 'default'}..."
112
+ )
110
113
  await self.call(
111
114
  messages=[{"role": "user", "content": "Say 'ok'"}],
112
115
  max_completion_tokens=10,
@@ -117,16 +120,14 @@ class LLMProvider:
117
120
  # If we get here without exception, the connection is working
118
121
  logger.info(f"LLM verified: {self.provider}/{self.model}")
119
122
  except Exception as e:
120
- raise RuntimeError(
121
- f"LLM connection verification failed for {self.provider}/{self.model}: {e}"
122
- ) from e
123
+ raise RuntimeError(f"LLM connection verification failed for {self.provider}/{self.model}: {e}") from e
123
124
 
124
125
  async def call(
125
126
  self,
126
- messages: List[Dict[str, str]],
127
- response_format: Optional[Any] = None,
128
- max_completion_tokens: Optional[int] = None,
129
- temperature: Optional[float] = None,
127
+ messages: list[dict[str, str]],
128
+ response_format: Any | None = None,
129
+ max_completion_tokens: int | None = None,
130
+ temperature: float | None = None,
130
131
  scope: str = "memory",
131
132
  max_retries: int = 10,
132
133
  initial_backoff: float = 1.0,
@@ -161,8 +162,7 @@ class LLMProvider:
161
162
  # Handle Gemini provider separately
162
163
  if self.provider == "gemini":
163
164
  return await self._call_gemini(
164
- messages, response_format, max_retries, initial_backoff,
165
- max_backoff, skip_validation, start_time
165
+ messages, response_format, max_retries, initial_backoff, max_backoff, skip_validation, start_time
166
166
  )
167
167
 
168
168
  call_params = {
@@ -213,16 +213,18 @@ class LLMProvider:
213
213
  try:
214
214
  if response_format is not None:
215
215
  # Add schema to system message for JSON mode
216
- if hasattr(response_format, 'model_json_schema'):
216
+ if hasattr(response_format, "model_json_schema"):
217
217
  schema = response_format.model_json_schema()
218
218
  schema_msg = f"\n\nYou must respond with valid JSON matching this schema:\n{json.dumps(schema, indent=2)}"
219
219
 
220
- if call_params['messages'] and call_params['messages'][0].get('role') == 'system':
221
- call_params['messages'][0]['content'] += schema_msg
222
- elif call_params['messages']:
223
- call_params['messages'][0]['content'] = schema_msg + "\n\n" + call_params['messages'][0]['content']
220
+ if call_params["messages"] and call_params["messages"][0].get("role") == "system":
221
+ call_params["messages"][0]["content"] += schema_msg
222
+ elif call_params["messages"]:
223
+ call_params["messages"][0]["content"] = (
224
+ schema_msg + "\n\n" + call_params["messages"][0]["content"]
225
+ )
224
226
 
225
- call_params['response_format'] = {"type": "json_object"}
227
+ call_params["response_format"] = {"type": "json_object"}
226
228
  response = await self._client.chat.completions.create(**call_params)
227
229
 
228
230
  content = response.choices[0].message.content
@@ -242,8 +244,8 @@ class LLMProvider:
242
244
  if duration > 10.0:
243
245
  ratio = max(1, usage.completion_tokens) / usage.prompt_tokens
244
246
  cached_tokens = 0
245
- if hasattr(usage, 'prompt_tokens_details') and usage.prompt_tokens_details:
246
- cached_tokens = getattr(usage.prompt_tokens_details, 'cached_tokens', 0) or 0
247
+ if hasattr(usage, "prompt_tokens_details") and usage.prompt_tokens_details:
248
+ cached_tokens = getattr(usage.prompt_tokens_details, "cached_tokens", 0) or 0
247
249
  cache_info = f", cached_tokens={cached_tokens}" if cached_tokens > 0 else ""
248
250
  logger.info(
249
251
  f"slow llm call: model={self.provider}/{self.model}, "
@@ -256,15 +258,19 @@ class LLMProvider:
256
258
  except LengthFinishReasonError as e:
257
259
  logger.warning(f"LLM output exceeded token limits: {str(e)}")
258
260
  raise OutputTooLongError(
259
- f"LLM output exceeded token limits. Input may need to be split into smaller chunks."
261
+ "LLM output exceeded token limits. Input may need to be split into smaller chunks."
260
262
  ) from e
261
263
 
262
264
  except APIConnectionError as e:
263
265
  last_exception = e
264
266
  if attempt < max_retries:
265
- status_code = getattr(e, 'status_code', None) or getattr(getattr(e, 'response', None), 'status_code', None)
266
- logger.warning(f"Connection error, retrying... (attempt {attempt + 1}/{max_retries + 1}) - status_code={status_code}, message={e}")
267
- backoff = min(initial_backoff * (2 ** attempt), max_backoff)
267
+ status_code = getattr(e, "status_code", None) or getattr(
268
+ getattr(e, "response", None), "status_code", None
269
+ )
270
+ logger.warning(
271
+ f"Connection error, retrying... (attempt {attempt + 1}/{max_retries + 1}) - status_code={status_code}, message={e}"
272
+ )
273
+ backoff = min(initial_backoff * (2**attempt), max_backoff)
268
274
  await asyncio.sleep(backoff)
269
275
  continue
270
276
  else:
@@ -279,7 +285,7 @@ class LLMProvider:
279
285
 
280
286
  last_exception = e
281
287
  if attempt < max_retries:
282
- backoff = min(initial_backoff * (2 ** attempt), max_backoff)
288
+ backoff = min(initial_backoff * (2**attempt), max_backoff)
283
289
  jitter = backoff * 0.2 * (2 * (time.time() % 1) - 1)
284
290
  sleep_time = backoff + jitter
285
291
  await asyncio.sleep(sleep_time)
@@ -293,12 +299,12 @@ class LLMProvider:
293
299
 
294
300
  if last_exception:
295
301
  raise last_exception
296
- raise RuntimeError(f"LLM call failed after all retries with no exception captured")
302
+ raise RuntimeError("LLM call failed after all retries with no exception captured")
297
303
 
298
304
  async def _call_gemini(
299
305
  self,
300
- messages: List[Dict[str, str]],
301
- response_format: Optional[Any],
306
+ messages: list[dict[str, str]],
307
+ response_format: Any | None,
302
308
  max_retries: int,
303
309
  initial_backoff: float,
304
310
  max_backoff: float,
@@ -313,27 +319,21 @@ class LLMProvider:
313
319
  gemini_contents = []
314
320
 
315
321
  for msg in messages:
316
- role = msg.get('role', 'user')
317
- content = msg.get('content', '')
322
+ role = msg.get("role", "user")
323
+ content = msg.get("content", "")
318
324
 
319
- if role == 'system':
325
+ if role == "system":
320
326
  if system_instruction:
321
327
  system_instruction += "\n\n" + content
322
328
  else:
323
329
  system_instruction = content
324
- elif role == 'assistant':
325
- gemini_contents.append(genai_types.Content(
326
- role="model",
327
- parts=[genai_types.Part(text=content)]
328
- ))
330
+ elif role == "assistant":
331
+ gemini_contents.append(genai_types.Content(role="model", parts=[genai_types.Part(text=content)]))
329
332
  else:
330
- gemini_contents.append(genai_types.Content(
331
- role="user",
332
- parts=[genai_types.Part(text=content)]
333
- ))
333
+ gemini_contents.append(genai_types.Content(role="user", parts=[genai_types.Part(text=content)]))
334
334
 
335
335
  # Add JSON schema instruction if response_format is provided
336
- if response_format is not None and hasattr(response_format, 'model_json_schema'):
336
+ if response_format is not None and hasattr(response_format, "model_json_schema"):
337
337
  schema = response_format.model_json_schema()
338
338
  schema_msg = f"\n\nYou must respond with valid JSON matching this schema:\n{json.dumps(schema, indent=2)}"
339
339
  if system_instruction:
@@ -344,10 +344,10 @@ class LLMProvider:
344
344
  # Build generation config
345
345
  config_kwargs = {}
346
346
  if system_instruction:
347
- config_kwargs['system_instruction'] = system_instruction
347
+ config_kwargs["system_instruction"] = system_instruction
348
348
  if response_format is not None:
349
- config_kwargs['response_mime_type'] = 'application/json'
350
- config_kwargs['response_schema'] = response_format
349
+ config_kwargs["response_mime_type"] = "application/json"
350
+ config_kwargs["response_schema"] = response_format
351
351
 
352
352
  generation_config = genai_types.GenerateContentConfig(**config_kwargs) if config_kwargs else None
353
353
 
@@ -366,14 +366,14 @@ class LLMProvider:
366
366
  # Handle empty response
367
367
  if content is None:
368
368
  block_reason = None
369
- if hasattr(response, 'candidates') and response.candidates:
369
+ if hasattr(response, "candidates") and response.candidates:
370
370
  candidate = response.candidates[0]
371
- if hasattr(candidate, 'finish_reason'):
371
+ if hasattr(candidate, "finish_reason"):
372
372
  block_reason = candidate.finish_reason
373
373
 
374
374
  if attempt < max_retries:
375
375
  logger.warning(f"Gemini returned empty response (reason: {block_reason}), retrying...")
376
- backoff = min(initial_backoff * (2 ** attempt), max_backoff)
376
+ backoff = min(initial_backoff * (2**attempt), max_backoff)
377
377
  await asyncio.sleep(backoff)
378
378
  continue
379
379
  else:
@@ -390,7 +390,7 @@ class LLMProvider:
390
390
 
391
391
  # Log slow calls
392
392
  duration = time.time() - start_time
393
- if duration > 10.0 and hasattr(response, 'usage_metadata') and response.usage_metadata:
393
+ if duration > 10.0 and hasattr(response, "usage_metadata") and response.usage_metadata:
394
394
  usage = response.usage_metadata
395
395
  logger.info(
396
396
  f"slow llm call: model={self.provider}/{self.model}, "
@@ -403,8 +403,8 @@ class LLMProvider:
403
403
  except json.JSONDecodeError as e:
404
404
  last_exception = e
405
405
  if attempt < max_retries:
406
- logger.warning(f"Gemini returned invalid JSON, retrying...")
407
- backoff = min(initial_backoff * (2 ** attempt), max_backoff)
406
+ logger.warning("Gemini returned invalid JSON, retrying...")
407
+ backoff = min(initial_backoff * (2**attempt), max_backoff)
408
408
  await asyncio.sleep(backoff)
409
409
  continue
410
410
  else:
@@ -421,7 +421,7 @@ class LLMProvider:
421
421
  if e.code in (400, 429, 500, 502, 503, 504) or (e.code and e.code >= 500):
422
422
  last_exception = e
423
423
  if attempt < max_retries:
424
- backoff = min(initial_backoff * (2 ** attempt), max_backoff)
424
+ backoff = min(initial_backoff * (2**attempt), max_backoff)
425
425
  jitter = backoff * 0.2 * (2 * (time.time() % 1) - 1)
426
426
  await asyncio.sleep(backoff + jitter)
427
427
  else:
@@ -437,7 +437,7 @@ class LLMProvider:
437
437
 
438
438
  if last_exception:
439
439
  raise last_exception
440
- raise RuntimeError(f"Gemini call failed after all retries")
440
+ raise RuntimeError("Gemini call failed after all retries")
441
441
 
442
442
  @classmethod
443
443
  def for_memory(cls) -> "LLMProvider":
@@ -447,13 +447,7 @@ class LLMProvider:
447
447
  base_url = os.getenv("HINDSIGHT_API_LLM_BASE_URL", "")
448
448
  model = os.getenv("HINDSIGHT_API_LLM_MODEL", "openai/gpt-oss-120b")
449
449
 
450
- return cls(
451
- provider=provider,
452
- api_key=api_key,
453
- base_url=base_url,
454
- model=model,
455
- reasoning_effort="low"
456
- )
450
+ return cls(provider=provider, api_key=api_key, base_url=base_url, model=model, reasoning_effort="low")
457
451
 
458
452
  @classmethod
459
453
  def for_answer_generation(cls) -> "LLMProvider":
@@ -463,13 +457,7 @@ class LLMProvider:
463
457
  base_url = os.getenv("HINDSIGHT_API_ANSWER_LLM_BASE_URL", os.getenv("HINDSIGHT_API_LLM_BASE_URL", ""))
464
458
  model = os.getenv("HINDSIGHT_API_ANSWER_LLM_MODEL", os.getenv("HINDSIGHT_API_LLM_MODEL", "openai/gpt-oss-120b"))
465
459
 
466
- return cls(
467
- provider=provider,
468
- api_key=api_key,
469
- base_url=base_url,
470
- model=model,
471
- reasoning_effort="high"
472
- )
460
+ return cls(provider=provider, api_key=api_key, base_url=base_url, model=model, reasoning_effort="high")
473
461
 
474
462
  @classmethod
475
463
  def for_judge(cls) -> "LLMProvider":
@@ -479,13 +467,7 @@ class LLMProvider:
479
467
  base_url = os.getenv("HINDSIGHT_API_JUDGE_LLM_BASE_URL", os.getenv("HINDSIGHT_API_LLM_BASE_URL", ""))
480
468
  model = os.getenv("HINDSIGHT_API_JUDGE_LLM_MODEL", os.getenv("HINDSIGHT_API_LLM_MODEL", "openai/gpt-oss-120b"))
481
469
 
482
- return cls(
483
- provider=provider,
484
- api_key=api_key,
485
- base_url=base_url,
486
- model=model,
487
- reasoning_effort="high"
488
- )
470
+ return cls(provider=provider, api_key=api_key, base_url=base_url, model=model, reasoning_effort="high")
489
471
 
490
472
 
491
473
  # Backwards compatibility alias