hindsight-api 0.1.5__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hindsight_api/__init__.py +10 -9
- hindsight_api/alembic/env.py +5 -8
- hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +266 -180
- hindsight_api/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py +32 -32
- hindsight_api/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py +11 -11
- hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py +7 -12
- hindsight_api/alembic/versions/e0a1b2c3d4e5_disposition_to_3_traits.py +23 -15
- hindsight_api/alembic/versions/rename_personality_to_disposition.py +30 -21
- hindsight_api/api/__init__.py +10 -10
- hindsight_api/api/http.py +575 -593
- hindsight_api/api/mcp.py +30 -28
- hindsight_api/banner.py +13 -6
- hindsight_api/config.py +9 -13
- hindsight_api/engine/__init__.py +9 -9
- hindsight_api/engine/cross_encoder.py +22 -21
- hindsight_api/engine/db_utils.py +5 -4
- hindsight_api/engine/embeddings.py +22 -21
- hindsight_api/engine/entity_resolver.py +81 -75
- hindsight_api/engine/llm_wrapper.py +61 -79
- hindsight_api/engine/memory_engine.py +603 -625
- hindsight_api/engine/query_analyzer.py +100 -97
- hindsight_api/engine/response_models.py +105 -106
- hindsight_api/engine/retain/__init__.py +9 -16
- hindsight_api/engine/retain/bank_utils.py +34 -58
- hindsight_api/engine/retain/chunk_storage.py +4 -12
- hindsight_api/engine/retain/deduplication.py +9 -28
- hindsight_api/engine/retain/embedding_processing.py +4 -11
- hindsight_api/engine/retain/embedding_utils.py +3 -4
- hindsight_api/engine/retain/entity_processing.py +7 -17
- hindsight_api/engine/retain/fact_extraction.py +155 -165
- hindsight_api/engine/retain/fact_storage.py +11 -23
- hindsight_api/engine/retain/link_creation.py +11 -39
- hindsight_api/engine/retain/link_utils.py +166 -95
- hindsight_api/engine/retain/observation_regeneration.py +39 -52
- hindsight_api/engine/retain/orchestrator.py +72 -62
- hindsight_api/engine/retain/types.py +49 -43
- hindsight_api/engine/search/__init__.py +5 -5
- hindsight_api/engine/search/fusion.py +6 -15
- hindsight_api/engine/search/graph_retrieval.py +22 -23
- hindsight_api/engine/search/mpfp_retrieval.py +76 -92
- hindsight_api/engine/search/observation_utils.py +9 -16
- hindsight_api/engine/search/reranking.py +4 -7
- hindsight_api/engine/search/retrieval.py +87 -66
- hindsight_api/engine/search/scoring.py +5 -7
- hindsight_api/engine/search/temporal_extraction.py +8 -11
- hindsight_api/engine/search/think_utils.py +115 -39
- hindsight_api/engine/search/trace.py +68 -39
- hindsight_api/engine/search/tracer.py +44 -35
- hindsight_api/engine/search/types.py +20 -17
- hindsight_api/engine/task_backend.py +21 -26
- hindsight_api/engine/utils.py +25 -10
- hindsight_api/main.py +21 -40
- hindsight_api/mcp_local.py +190 -0
- hindsight_api/metrics.py +44 -30
- hindsight_api/migrations.py +10 -8
- hindsight_api/models.py +60 -72
- hindsight_api/pg0.py +22 -23
- hindsight_api/server.py +3 -6
- {hindsight_api-0.1.5.dist-info → hindsight_api-0.1.6.dist-info}/METADATA +2 -2
- hindsight_api-0.1.6.dist-info/RECORD +64 -0
- {hindsight_api-0.1.5.dist-info → hindsight_api-0.1.6.dist-info}/entry_points.txt +1 -0
- hindsight_api-0.1.5.dist-info/RECORD +0 -63
- {hindsight_api-0.1.5.dist-info → hindsight_api-0.1.6.dist-info}/WHEEL +0 -0
|
@@ -1,15 +1,17 @@
|
|
|
1
1
|
"""
|
|
2
2
|
LLM wrapper for unified configuration across providers.
|
|
3
3
|
"""
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import logging
|
|
4
7
|
import os
|
|
5
8
|
import time
|
|
6
|
-
import
|
|
7
|
-
|
|
8
|
-
from openai import AsyncOpenAI, RateLimitError, APIError, APIStatusError, APIConnectionError, LengthFinishReasonError
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
9
11
|
from google import genai
|
|
10
|
-
from google.genai import types as genai_types
|
|
11
12
|
from google.genai import errors as genai_errors
|
|
12
|
-
import
|
|
13
|
+
from google.genai import types as genai_types
|
|
14
|
+
from openai import APIConnectionError, APIStatusError, AsyncOpenAI, LengthFinishReasonError
|
|
13
15
|
|
|
14
16
|
# Seed applied to every Groq request for deterministic behavior.
|
|
15
17
|
DEFAULT_LLM_SEED = 4242
|
|
@@ -31,6 +33,7 @@ class OutputTooLongError(Exception):
|
|
|
31
33
|
to allow callers to handle output length issues without depending on
|
|
32
34
|
provider-specific implementations.
|
|
33
35
|
"""
|
|
36
|
+
|
|
34
37
|
pass
|
|
35
38
|
|
|
36
39
|
|
|
@@ -68,9 +71,7 @@ class LLMProvider:
|
|
|
68
71
|
# Validate provider
|
|
69
72
|
valid_providers = ["openai", "groq", "ollama", "gemini"]
|
|
70
73
|
if self.provider not in valid_providers:
|
|
71
|
-
raise ValueError(
|
|
72
|
-
f"Invalid LLM provider: {self.provider}. Must be one of: {', '.join(valid_providers)}"
|
|
73
|
-
)
|
|
74
|
+
raise ValueError(f"Invalid LLM provider: {self.provider}. Must be one of: {', '.join(valid_providers)}")
|
|
74
75
|
|
|
75
76
|
# Set default base URLs
|
|
76
77
|
if not self.base_url:
|
|
@@ -106,7 +107,9 @@ class LLMProvider:
|
|
|
106
107
|
RuntimeError: If the connection test fails.
|
|
107
108
|
"""
|
|
108
109
|
try:
|
|
109
|
-
logger.info(
|
|
110
|
+
logger.info(
|
|
111
|
+
f"Verifying LLM: provider={self.provider}, model={self.model}, base_url={self.base_url or 'default'}..."
|
|
112
|
+
)
|
|
110
113
|
await self.call(
|
|
111
114
|
messages=[{"role": "user", "content": "Say 'ok'"}],
|
|
112
115
|
max_completion_tokens=10,
|
|
@@ -117,16 +120,14 @@ class LLMProvider:
|
|
|
117
120
|
# If we get here without exception, the connection is working
|
|
118
121
|
logger.info(f"LLM verified: {self.provider}/{self.model}")
|
|
119
122
|
except Exception as e:
|
|
120
|
-
raise RuntimeError(
|
|
121
|
-
f"LLM connection verification failed for {self.provider}/{self.model}: {e}"
|
|
122
|
-
) from e
|
|
123
|
+
raise RuntimeError(f"LLM connection verification failed for {self.provider}/{self.model}: {e}") from e
|
|
123
124
|
|
|
124
125
|
async def call(
|
|
125
126
|
self,
|
|
126
|
-
messages:
|
|
127
|
-
response_format:
|
|
128
|
-
max_completion_tokens:
|
|
129
|
-
temperature:
|
|
127
|
+
messages: list[dict[str, str]],
|
|
128
|
+
response_format: Any | None = None,
|
|
129
|
+
max_completion_tokens: int | None = None,
|
|
130
|
+
temperature: float | None = None,
|
|
130
131
|
scope: str = "memory",
|
|
131
132
|
max_retries: int = 10,
|
|
132
133
|
initial_backoff: float = 1.0,
|
|
@@ -161,8 +162,7 @@ class LLMProvider:
|
|
|
161
162
|
# Handle Gemini provider separately
|
|
162
163
|
if self.provider == "gemini":
|
|
163
164
|
return await self._call_gemini(
|
|
164
|
-
messages, response_format, max_retries, initial_backoff,
|
|
165
|
-
max_backoff, skip_validation, start_time
|
|
165
|
+
messages, response_format, max_retries, initial_backoff, max_backoff, skip_validation, start_time
|
|
166
166
|
)
|
|
167
167
|
|
|
168
168
|
call_params = {
|
|
@@ -213,16 +213,18 @@ class LLMProvider:
|
|
|
213
213
|
try:
|
|
214
214
|
if response_format is not None:
|
|
215
215
|
# Add schema to system message for JSON mode
|
|
216
|
-
if hasattr(response_format,
|
|
216
|
+
if hasattr(response_format, "model_json_schema"):
|
|
217
217
|
schema = response_format.model_json_schema()
|
|
218
218
|
schema_msg = f"\n\nYou must respond with valid JSON matching this schema:\n{json.dumps(schema, indent=2)}"
|
|
219
219
|
|
|
220
|
-
if call_params[
|
|
221
|
-
call_params[
|
|
222
|
-
elif call_params[
|
|
223
|
-
call_params[
|
|
220
|
+
if call_params["messages"] and call_params["messages"][0].get("role") == "system":
|
|
221
|
+
call_params["messages"][0]["content"] += schema_msg
|
|
222
|
+
elif call_params["messages"]:
|
|
223
|
+
call_params["messages"][0]["content"] = (
|
|
224
|
+
schema_msg + "\n\n" + call_params["messages"][0]["content"]
|
|
225
|
+
)
|
|
224
226
|
|
|
225
|
-
call_params[
|
|
227
|
+
call_params["response_format"] = {"type": "json_object"}
|
|
226
228
|
response = await self._client.chat.completions.create(**call_params)
|
|
227
229
|
|
|
228
230
|
content = response.choices[0].message.content
|
|
@@ -242,8 +244,8 @@ class LLMProvider:
|
|
|
242
244
|
if duration > 10.0:
|
|
243
245
|
ratio = max(1, usage.completion_tokens) / usage.prompt_tokens
|
|
244
246
|
cached_tokens = 0
|
|
245
|
-
if hasattr(usage,
|
|
246
|
-
cached_tokens = getattr(usage.prompt_tokens_details,
|
|
247
|
+
if hasattr(usage, "prompt_tokens_details") and usage.prompt_tokens_details:
|
|
248
|
+
cached_tokens = getattr(usage.prompt_tokens_details, "cached_tokens", 0) or 0
|
|
247
249
|
cache_info = f", cached_tokens={cached_tokens}" if cached_tokens > 0 else ""
|
|
248
250
|
logger.info(
|
|
249
251
|
f"slow llm call: model={self.provider}/{self.model}, "
|
|
@@ -256,15 +258,19 @@ class LLMProvider:
|
|
|
256
258
|
except LengthFinishReasonError as e:
|
|
257
259
|
logger.warning(f"LLM output exceeded token limits: {str(e)}")
|
|
258
260
|
raise OutputTooLongError(
|
|
259
|
-
|
|
261
|
+
"LLM output exceeded token limits. Input may need to be split into smaller chunks."
|
|
260
262
|
) from e
|
|
261
263
|
|
|
262
264
|
except APIConnectionError as e:
|
|
263
265
|
last_exception = e
|
|
264
266
|
if attempt < max_retries:
|
|
265
|
-
status_code = getattr(e,
|
|
266
|
-
|
|
267
|
-
|
|
267
|
+
status_code = getattr(e, "status_code", None) or getattr(
|
|
268
|
+
getattr(e, "response", None), "status_code", None
|
|
269
|
+
)
|
|
270
|
+
logger.warning(
|
|
271
|
+
f"Connection error, retrying... (attempt {attempt + 1}/{max_retries + 1}) - status_code={status_code}, message={e}"
|
|
272
|
+
)
|
|
273
|
+
backoff = min(initial_backoff * (2**attempt), max_backoff)
|
|
268
274
|
await asyncio.sleep(backoff)
|
|
269
275
|
continue
|
|
270
276
|
else:
|
|
@@ -279,7 +285,7 @@ class LLMProvider:
|
|
|
279
285
|
|
|
280
286
|
last_exception = e
|
|
281
287
|
if attempt < max_retries:
|
|
282
|
-
backoff = min(initial_backoff * (2
|
|
288
|
+
backoff = min(initial_backoff * (2**attempt), max_backoff)
|
|
283
289
|
jitter = backoff * 0.2 * (2 * (time.time() % 1) - 1)
|
|
284
290
|
sleep_time = backoff + jitter
|
|
285
291
|
await asyncio.sleep(sleep_time)
|
|
@@ -293,12 +299,12 @@ class LLMProvider:
|
|
|
293
299
|
|
|
294
300
|
if last_exception:
|
|
295
301
|
raise last_exception
|
|
296
|
-
raise RuntimeError(
|
|
302
|
+
raise RuntimeError("LLM call failed after all retries with no exception captured")
|
|
297
303
|
|
|
298
304
|
async def _call_gemini(
|
|
299
305
|
self,
|
|
300
|
-
messages:
|
|
301
|
-
response_format:
|
|
306
|
+
messages: list[dict[str, str]],
|
|
307
|
+
response_format: Any | None,
|
|
302
308
|
max_retries: int,
|
|
303
309
|
initial_backoff: float,
|
|
304
310
|
max_backoff: float,
|
|
@@ -313,27 +319,21 @@ class LLMProvider:
|
|
|
313
319
|
gemini_contents = []
|
|
314
320
|
|
|
315
321
|
for msg in messages:
|
|
316
|
-
role = msg.get(
|
|
317
|
-
content = msg.get(
|
|
322
|
+
role = msg.get("role", "user")
|
|
323
|
+
content = msg.get("content", "")
|
|
318
324
|
|
|
319
|
-
if role ==
|
|
325
|
+
if role == "system":
|
|
320
326
|
if system_instruction:
|
|
321
327
|
system_instruction += "\n\n" + content
|
|
322
328
|
else:
|
|
323
329
|
system_instruction = content
|
|
324
|
-
elif role ==
|
|
325
|
-
gemini_contents.append(genai_types.Content(
|
|
326
|
-
role="model",
|
|
327
|
-
parts=[genai_types.Part(text=content)]
|
|
328
|
-
))
|
|
330
|
+
elif role == "assistant":
|
|
331
|
+
gemini_contents.append(genai_types.Content(role="model", parts=[genai_types.Part(text=content)]))
|
|
329
332
|
else:
|
|
330
|
-
gemini_contents.append(genai_types.Content(
|
|
331
|
-
role="user",
|
|
332
|
-
parts=[genai_types.Part(text=content)]
|
|
333
|
-
))
|
|
333
|
+
gemini_contents.append(genai_types.Content(role="user", parts=[genai_types.Part(text=content)]))
|
|
334
334
|
|
|
335
335
|
# Add JSON schema instruction if response_format is provided
|
|
336
|
-
if response_format is not None and hasattr(response_format,
|
|
336
|
+
if response_format is not None and hasattr(response_format, "model_json_schema"):
|
|
337
337
|
schema = response_format.model_json_schema()
|
|
338
338
|
schema_msg = f"\n\nYou must respond with valid JSON matching this schema:\n{json.dumps(schema, indent=2)}"
|
|
339
339
|
if system_instruction:
|
|
@@ -344,10 +344,10 @@ class LLMProvider:
|
|
|
344
344
|
# Build generation config
|
|
345
345
|
config_kwargs = {}
|
|
346
346
|
if system_instruction:
|
|
347
|
-
config_kwargs[
|
|
347
|
+
config_kwargs["system_instruction"] = system_instruction
|
|
348
348
|
if response_format is not None:
|
|
349
|
-
config_kwargs[
|
|
350
|
-
config_kwargs[
|
|
349
|
+
config_kwargs["response_mime_type"] = "application/json"
|
|
350
|
+
config_kwargs["response_schema"] = response_format
|
|
351
351
|
|
|
352
352
|
generation_config = genai_types.GenerateContentConfig(**config_kwargs) if config_kwargs else None
|
|
353
353
|
|
|
@@ -366,14 +366,14 @@ class LLMProvider:
|
|
|
366
366
|
# Handle empty response
|
|
367
367
|
if content is None:
|
|
368
368
|
block_reason = None
|
|
369
|
-
if hasattr(response,
|
|
369
|
+
if hasattr(response, "candidates") and response.candidates:
|
|
370
370
|
candidate = response.candidates[0]
|
|
371
|
-
if hasattr(candidate,
|
|
371
|
+
if hasattr(candidate, "finish_reason"):
|
|
372
372
|
block_reason = candidate.finish_reason
|
|
373
373
|
|
|
374
374
|
if attempt < max_retries:
|
|
375
375
|
logger.warning(f"Gemini returned empty response (reason: {block_reason}), retrying...")
|
|
376
|
-
backoff = min(initial_backoff * (2
|
|
376
|
+
backoff = min(initial_backoff * (2**attempt), max_backoff)
|
|
377
377
|
await asyncio.sleep(backoff)
|
|
378
378
|
continue
|
|
379
379
|
else:
|
|
@@ -390,7 +390,7 @@ class LLMProvider:
|
|
|
390
390
|
|
|
391
391
|
# Log slow calls
|
|
392
392
|
duration = time.time() - start_time
|
|
393
|
-
if duration > 10.0 and hasattr(response,
|
|
393
|
+
if duration > 10.0 and hasattr(response, "usage_metadata") and response.usage_metadata:
|
|
394
394
|
usage = response.usage_metadata
|
|
395
395
|
logger.info(
|
|
396
396
|
f"slow llm call: model={self.provider}/{self.model}, "
|
|
@@ -403,8 +403,8 @@ class LLMProvider:
|
|
|
403
403
|
except json.JSONDecodeError as e:
|
|
404
404
|
last_exception = e
|
|
405
405
|
if attempt < max_retries:
|
|
406
|
-
logger.warning(
|
|
407
|
-
backoff = min(initial_backoff * (2
|
|
406
|
+
logger.warning("Gemini returned invalid JSON, retrying...")
|
|
407
|
+
backoff = min(initial_backoff * (2**attempt), max_backoff)
|
|
408
408
|
await asyncio.sleep(backoff)
|
|
409
409
|
continue
|
|
410
410
|
else:
|
|
@@ -421,7 +421,7 @@ class LLMProvider:
|
|
|
421
421
|
if e.code in (400, 429, 500, 502, 503, 504) or (e.code and e.code >= 500):
|
|
422
422
|
last_exception = e
|
|
423
423
|
if attempt < max_retries:
|
|
424
|
-
backoff = min(initial_backoff * (2
|
|
424
|
+
backoff = min(initial_backoff * (2**attempt), max_backoff)
|
|
425
425
|
jitter = backoff * 0.2 * (2 * (time.time() % 1) - 1)
|
|
426
426
|
await asyncio.sleep(backoff + jitter)
|
|
427
427
|
else:
|
|
@@ -437,7 +437,7 @@ class LLMProvider:
|
|
|
437
437
|
|
|
438
438
|
if last_exception:
|
|
439
439
|
raise last_exception
|
|
440
|
-
raise RuntimeError(
|
|
440
|
+
raise RuntimeError("Gemini call failed after all retries")
|
|
441
441
|
|
|
442
442
|
@classmethod
|
|
443
443
|
def for_memory(cls) -> "LLMProvider":
|
|
@@ -447,13 +447,7 @@ class LLMProvider:
|
|
|
447
447
|
base_url = os.getenv("HINDSIGHT_API_LLM_BASE_URL", "")
|
|
448
448
|
model = os.getenv("HINDSIGHT_API_LLM_MODEL", "openai/gpt-oss-120b")
|
|
449
449
|
|
|
450
|
-
return cls(
|
|
451
|
-
provider=provider,
|
|
452
|
-
api_key=api_key,
|
|
453
|
-
base_url=base_url,
|
|
454
|
-
model=model,
|
|
455
|
-
reasoning_effort="low"
|
|
456
|
-
)
|
|
450
|
+
return cls(provider=provider, api_key=api_key, base_url=base_url, model=model, reasoning_effort="low")
|
|
457
451
|
|
|
458
452
|
@classmethod
|
|
459
453
|
def for_answer_generation(cls) -> "LLMProvider":
|
|
@@ -463,13 +457,7 @@ class LLMProvider:
|
|
|
463
457
|
base_url = os.getenv("HINDSIGHT_API_ANSWER_LLM_BASE_URL", os.getenv("HINDSIGHT_API_LLM_BASE_URL", ""))
|
|
464
458
|
model = os.getenv("HINDSIGHT_API_ANSWER_LLM_MODEL", os.getenv("HINDSIGHT_API_LLM_MODEL", "openai/gpt-oss-120b"))
|
|
465
459
|
|
|
466
|
-
return cls(
|
|
467
|
-
provider=provider,
|
|
468
|
-
api_key=api_key,
|
|
469
|
-
base_url=base_url,
|
|
470
|
-
model=model,
|
|
471
|
-
reasoning_effort="high"
|
|
472
|
-
)
|
|
460
|
+
return cls(provider=provider, api_key=api_key, base_url=base_url, model=model, reasoning_effort="high")
|
|
473
461
|
|
|
474
462
|
@classmethod
|
|
475
463
|
def for_judge(cls) -> "LLMProvider":
|
|
@@ -479,13 +467,7 @@ class LLMProvider:
|
|
|
479
467
|
base_url = os.getenv("HINDSIGHT_API_JUDGE_LLM_BASE_URL", os.getenv("HINDSIGHT_API_LLM_BASE_URL", ""))
|
|
480
468
|
model = os.getenv("HINDSIGHT_API_JUDGE_LLM_MODEL", os.getenv("HINDSIGHT_API_LLM_MODEL", "openai/gpt-oss-120b"))
|
|
481
469
|
|
|
482
|
-
return cls(
|
|
483
|
-
provider=provider,
|
|
484
|
-
api_key=api_key,
|
|
485
|
-
base_url=base_url,
|
|
486
|
-
model=model,
|
|
487
|
-
reasoning_effort="high"
|
|
488
|
-
)
|
|
470
|
+
return cls(provider=provider, api_key=api_key, base_url=base_url, model=model, reasoning_effort="high")
|
|
489
471
|
|
|
490
472
|
|
|
491
473
|
# Backwards compatibility alias
|