hindsight-api 0.1.10__py3-none-any.whl → 0.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. hindsight_api/__init__.py +2 -0
  2. hindsight_api/alembic/env.py +24 -1
  3. hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py +14 -4
  4. hindsight_api/alembic/versions/e0a1b2c3d4e5_disposition_to_3_traits.py +54 -13
  5. hindsight_api/alembic/versions/rename_personality_to_disposition.py +18 -7
  6. hindsight_api/api/http.py +234 -228
  7. hindsight_api/api/mcp.py +14 -3
  8. hindsight_api/engine/__init__.py +12 -1
  9. hindsight_api/engine/entity_resolver.py +38 -37
  10. hindsight_api/engine/interface.py +592 -0
  11. hindsight_api/engine/llm_wrapper.py +176 -6
  12. hindsight_api/engine/memory_engine.py +993 -217
  13. hindsight_api/engine/retain/bank_utils.py +13 -12
  14. hindsight_api/engine/retain/chunk_storage.py +3 -2
  15. hindsight_api/engine/retain/fact_storage.py +10 -7
  16. hindsight_api/engine/retain/link_utils.py +17 -16
  17. hindsight_api/engine/retain/observation_regeneration.py +17 -16
  18. hindsight_api/engine/retain/orchestrator.py +2 -3
  19. hindsight_api/engine/retain/types.py +25 -8
  20. hindsight_api/engine/search/graph_retrieval.py +6 -5
  21. hindsight_api/engine/search/mpfp_retrieval.py +8 -7
  22. hindsight_api/engine/search/retrieval.py +12 -11
  23. hindsight_api/engine/search/think_utils.py +1 -1
  24. hindsight_api/engine/search/tracer.py +1 -1
  25. hindsight_api/engine/task_backend.py +32 -0
  26. hindsight_api/extensions/__init__.py +66 -0
  27. hindsight_api/extensions/base.py +81 -0
  28. hindsight_api/extensions/builtin/__init__.py +18 -0
  29. hindsight_api/extensions/builtin/tenant.py +33 -0
  30. hindsight_api/extensions/context.py +110 -0
  31. hindsight_api/extensions/http.py +89 -0
  32. hindsight_api/extensions/loader.py +125 -0
  33. hindsight_api/extensions/operation_validator.py +325 -0
  34. hindsight_api/extensions/tenant.py +63 -0
  35. hindsight_api/main.py +1 -1
  36. hindsight_api/mcp_local.py +7 -1
  37. hindsight_api/migrations.py +54 -10
  38. hindsight_api/models.py +15 -0
  39. hindsight_api/pg0.py +1 -1
  40. {hindsight_api-0.1.10.dist-info → hindsight_api-0.1.12.dist-info}/METADATA +1 -1
  41. hindsight_api-0.1.12.dist-info/RECORD +74 -0
  42. hindsight_api-0.1.10.dist-info/RECORD +0 -64
  43. {hindsight_api-0.1.10.dist-info → hindsight_api-0.1.12.dist-info}/WHEEL +0 -0
  44. {hindsight_api-0.1.10.dist-info → hindsight_api-0.1.12.dist-info}/entry_points.txt +0 -0
@@ -3,11 +3,13 @@ LLM wrapper for unified configuration across providers.
3
3
  """
4
4
 
5
5
  import asyncio
6
+ import json
6
7
  import logging
7
8
  import os
8
9
  import time
9
10
  from typing import Any
10
11
 
12
+ import httpx
11
13
  from google import genai
12
14
  from google.genai import errors as genai_errors
13
15
  from google.genai import types as genai_types
@@ -96,7 +98,7 @@ class LLMProvider:
96
98
  client_kwargs = {"api_key": self.api_key, "max_retries": 0}
97
99
  if self.base_url:
98
100
  client_kwargs["base_url"] = self.base_url
99
- self._client = AsyncOpenAI(**client_kwargs)
101
+ self._client = AsyncOpenAI(**client_kwargs) # type: ignore[invalid-argument-type] - dict kwargs
100
102
  self._gemini_client = None
101
103
 
102
104
  async def verify_connection(self) -> None:
@@ -112,7 +114,7 @@ class LLMProvider:
112
114
  )
113
115
  await self.call(
114
116
  messages=[{"role": "user", "content": "Say 'ok'"}],
115
- max_completion_tokens=10,
117
+ max_completion_tokens=100,
116
118
  max_retries=2,
117
119
  initial_backoff=0.5,
118
120
  max_backoff=2.0,
@@ -157,7 +159,6 @@ class LLMProvider:
157
159
  """
158
160
  async with _global_llm_semaphore:
159
161
  start_time = time.time()
160
- import json
161
162
 
162
163
  # Handle Gemini provider separately
163
164
  if self.provider == "gemini":
@@ -165,6 +166,20 @@ class LLMProvider:
165
166
  messages, response_format, max_retries, initial_backoff, max_backoff, skip_validation, start_time
166
167
  )
167
168
 
169
+ # Handle Ollama with native API for structured output (better schema enforcement)
170
+ if self.provider == "ollama" and response_format is not None:
171
+ return await self._call_ollama_native(
172
+ messages,
173
+ response_format,
174
+ max_completion_tokens,
175
+ temperature,
176
+ max_retries,
177
+ initial_backoff,
178
+ max_backoff,
179
+ skip_validation,
180
+ start_time,
181
+ )
182
+
168
183
  call_params = {
169
184
  "model": self.model,
170
185
  "messages": messages,
@@ -227,7 +242,31 @@ class LLMProvider:
227
242
  response = await self._client.chat.completions.create(**call_params)
228
243
 
229
244
  content = response.choices[0].message.content
230
- json_data = json.loads(content)
245
+
246
+ # Log raw LLM response for debugging JSON parse issues
247
+ try:
248
+ json_data = json.loads(content)
249
+ except json.JSONDecodeError as json_err:
250
+ # Truncate content for logging (first 500 and last 200 chars)
251
+ content_preview = content[:500] if content else "<empty>"
252
+ if content and len(content) > 700:
253
+ content_preview = f"{content[:500]}...TRUNCATED...{content[-200:]}"
254
+ logger.warning(
255
+ f"JSON parse error from LLM response (attempt {attempt + 1}/{max_retries + 1}): {json_err}\n"
256
+ f" Model: {self.provider}/{self.model}\n"
257
+ f" Content length: {len(content) if content else 0} chars\n"
258
+ f" Content preview: {content_preview!r}\n"
259
+ f" Finish reason: {response.choices[0].finish_reason if response.choices else 'unknown'}"
260
+ )
261
+ # Retry on JSON parse errors - LLM may return valid JSON on next attempt
262
+ if attempt < max_retries:
263
+ backoff = min(initial_backoff * (2**attempt), max_backoff)
264
+ await asyncio.sleep(backoff)
265
+ last_exception = json_err
266
+ continue
267
+ else:
268
+ logger.error(f"JSON parse error after {max_retries + 1} attempts, giving up")
269
+ raise
231
270
 
232
271
  if skip_validation:
233
272
  result = json_data
@@ -300,6 +339,129 @@ class LLMProvider:
300
339
  raise last_exception
301
340
  raise RuntimeError("LLM call failed after all retries with no exception captured")
302
341
 
342
+ async def _call_ollama_native(
343
+ self,
344
+ messages: list[dict[str, str]],
345
+ response_format: Any,
346
+ max_completion_tokens: int | None,
347
+ temperature: float | None,
348
+ max_retries: int,
349
+ initial_backoff: float,
350
+ max_backoff: float,
351
+ skip_validation: bool,
352
+ start_time: float,
353
+ ) -> Any:
354
+ """
355
+ Call Ollama using native API with JSON schema enforcement.
356
+
357
+ Ollama's native API supports passing a full JSON schema in the 'format' parameter,
358
+ which provides better structured output control than the OpenAI-compatible API.
359
+ """
360
+ # Get the JSON schema from the Pydantic model
361
+ schema = response_format.model_json_schema() if hasattr(response_format, "model_json_schema") else None
362
+
363
+ # Build the base URL for Ollama's native API
364
+ # Default OpenAI-compatible URL is http://localhost:11434/v1
365
+ # Native API is at http://localhost:11434/api/chat
366
+ base_url = self.base_url or "http://localhost:11434/v1"
367
+ if base_url.endswith("/v1"):
368
+ native_url = base_url[:-3] + "/api/chat"
369
+ else:
370
+ native_url = base_url.rstrip("/") + "/api/chat"
371
+
372
+ # Build request payload
373
+ payload = {
374
+ "model": self.model,
375
+ "messages": messages,
376
+ "stream": False,
377
+ }
378
+
379
+ # Add schema as format parameter for structured output
380
+ if schema:
381
+ payload["format"] = schema
382
+
383
+ # Add optional parameters with optimized defaults for Ollama
384
+ # Benchmarking shows num_ctx=16384 + num_batch=512 is optimal
385
+ options = {
386
+ "num_ctx": 16384, # 16k context window for larger prompts
387
+ "num_batch": 512, # Optimal batch size for prompt processing
388
+ }
389
+ if max_completion_tokens:
390
+ options["num_predict"] = max_completion_tokens
391
+ if temperature is not None:
392
+ options["temperature"] = temperature
393
+ payload["options"] = options
394
+
395
+ last_exception = None
396
+
397
+ async with httpx.AsyncClient(timeout=300.0) as client:
398
+ for attempt in range(max_retries + 1):
399
+ try:
400
+ response = await client.post(native_url, json=payload)
401
+ response.raise_for_status()
402
+
403
+ result = response.json()
404
+ content = result.get("message", {}).get("content", "")
405
+
406
+ # Parse JSON response
407
+ try:
408
+ json_data = json.loads(content)
409
+ except json.JSONDecodeError as json_err:
410
+ content_preview = content[:500] if content else "<empty>"
411
+ if content and len(content) > 700:
412
+ content_preview = f"{content[:500]}...TRUNCATED...{content[-200:]}"
413
+ logger.warning(
414
+ f"Ollama JSON parse error (attempt {attempt + 1}/{max_retries + 1}): {json_err}\n"
415
+ f" Model: ollama/{self.model}\n"
416
+ f" Content length: {len(content) if content else 0} chars\n"
417
+ f" Content preview: {content_preview!r}"
418
+ )
419
+ if attempt < max_retries:
420
+ backoff = min(initial_backoff * (2**attempt), max_backoff)
421
+ await asyncio.sleep(backoff)
422
+ last_exception = json_err
423
+ continue
424
+ else:
425
+ raise
426
+
427
+ # Validate against Pydantic model or return raw JSON
428
+ if skip_validation:
429
+ return json_data
430
+ else:
431
+ return response_format.model_validate(json_data)
432
+
433
+ except httpx.HTTPStatusError as e:
434
+ last_exception = e
435
+ if attempt < max_retries:
436
+ logger.warning(
437
+ f"Ollama HTTP error (attempt {attempt + 1}/{max_retries + 1}): {e.response.status_code}"
438
+ )
439
+ backoff = min(initial_backoff * (2**attempt), max_backoff)
440
+ await asyncio.sleep(backoff)
441
+ continue
442
+ else:
443
+ logger.error(f"Ollama HTTP error after {max_retries + 1} attempts: {e}")
444
+ raise
445
+
446
+ except httpx.RequestError as e:
447
+ last_exception = e
448
+ if attempt < max_retries:
449
+ logger.warning(f"Ollama connection error (attempt {attempt + 1}/{max_retries + 1}): {e}")
450
+ backoff = min(initial_backoff * (2**attempt), max_backoff)
451
+ await asyncio.sleep(backoff)
452
+ continue
453
+ else:
454
+ logger.error(f"Ollama connection error after {max_retries + 1} attempts: {e}")
455
+ raise
456
+
457
+ except Exception as e:
458
+ logger.error(f"Unexpected error during Ollama call: {type(e).__name__}: {e}")
459
+ raise
460
+
461
+ if last_exception:
462
+ raise last_exception
463
+ raise RuntimeError("Ollama call failed after all retries")
464
+
303
465
  async def _call_gemini(
304
466
  self,
305
467
  messages: list[dict[str, str]],
@@ -311,8 +473,6 @@ class LLMProvider:
311
473
  start_time: float,
312
474
  ) -> Any:
313
475
  """Handle Gemini-specific API calls."""
314
- import json
315
-
316
476
  # Convert OpenAI-style messages to Gemini format
317
477
  system_instruction = None
318
478
  gemini_contents = []
@@ -443,6 +603,8 @@ class LLMProvider:
443
603
  """Create provider for memory operations from environment variables."""
444
604
  provider = os.getenv("HINDSIGHT_API_LLM_PROVIDER", "groq")
445
605
  api_key = os.getenv("HINDSIGHT_API_LLM_API_KEY")
606
+ if not api_key:
607
+ raise ValueError("HINDSIGHT_API_LLM_API_KEY environment variable is required")
446
608
  base_url = os.getenv("HINDSIGHT_API_LLM_BASE_URL", "")
447
609
  model = os.getenv("HINDSIGHT_API_LLM_MODEL", "openai/gpt-oss-120b")
448
610
 
@@ -453,6 +615,10 @@ class LLMProvider:
453
615
  """Create provider for answer generation. Falls back to memory config if not set."""
454
616
  provider = os.getenv("HINDSIGHT_API_ANSWER_LLM_PROVIDER", os.getenv("HINDSIGHT_API_LLM_PROVIDER", "groq"))
455
617
  api_key = os.getenv("HINDSIGHT_API_ANSWER_LLM_API_KEY", os.getenv("HINDSIGHT_API_LLM_API_KEY"))
618
+ if not api_key:
619
+ raise ValueError(
620
+ "HINDSIGHT_API_LLM_API_KEY or HINDSIGHT_API_ANSWER_LLM_API_KEY environment variable is required"
621
+ )
456
622
  base_url = os.getenv("HINDSIGHT_API_ANSWER_LLM_BASE_URL", os.getenv("HINDSIGHT_API_LLM_BASE_URL", ""))
457
623
  model = os.getenv("HINDSIGHT_API_ANSWER_LLM_MODEL", os.getenv("HINDSIGHT_API_LLM_MODEL", "openai/gpt-oss-120b"))
458
624
 
@@ -463,6 +629,10 @@ class LLMProvider:
463
629
  """Create provider for judge/evaluator operations. Falls back to memory config if not set."""
464
630
  provider = os.getenv("HINDSIGHT_API_JUDGE_LLM_PROVIDER", os.getenv("HINDSIGHT_API_LLM_PROVIDER", "groq"))
465
631
  api_key = os.getenv("HINDSIGHT_API_JUDGE_LLM_API_KEY", os.getenv("HINDSIGHT_API_LLM_API_KEY"))
632
+ if not api_key:
633
+ raise ValueError(
634
+ "HINDSIGHT_API_LLM_API_KEY or HINDSIGHT_API_JUDGE_LLM_API_KEY environment variable is required"
635
+ )
466
636
  base_url = os.getenv("HINDSIGHT_API_JUDGE_LLM_BASE_URL", os.getenv("HINDSIGHT_API_LLM_BASE_URL", ""))
467
637
  model = os.getenv("HINDSIGHT_API_JUDGE_LLM_MODEL", os.getenv("HINDSIGHT_API_LLM_MODEL", "openai/gpt-oss-120b"))
468
638